]> git.cameronkatri.com Git - mandoc.git/blob - term.c
ascii_xxx -> chars_xxx (intended to hold more than just ascii encoding).
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.101 2009/09/17 07:41:28 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <err.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "chars.h"
24 #include "term.h"
25 #include "man.h"
26 #include "mdoc.h"
27
28 extern void man_run(struct termp *,
29 const struct man *);
30 extern void mdoc_run(struct termp *,
31 const struct mdoc *);
32
33 static struct termp *term_alloc(enum termenc);
34 static void term_free(struct termp *);
35
36 static void do_escaped(struct termp *, const char **);
37 static void do_special(struct termp *,
38 const char *, size_t);
39 static void do_reserved(struct termp *,
40 const char *, size_t);
41 static void buffer(struct termp *, char);
42 static void encode(struct termp *, char);
43
44
45 void *
46 ascii_alloc(void)
47 {
48
49 return(term_alloc(TERMENC_ASCII));
50 }
51
52
53 void
54 terminal_man(void *arg, const struct man *man)
55 {
56 struct termp *p;
57
58 p = (struct termp *)arg;
59 if (NULL == p->symtab)
60 p->symtab = chars_init(CHARS_ASCII);
61
62 man_run(p, man);
63 }
64
65
66 void
67 terminal_mdoc(void *arg, const struct mdoc *mdoc)
68 {
69 struct termp *p;
70
71 p = (struct termp *)arg;
72 if (NULL == p->symtab)
73 p->symtab = chars_init(CHARS_ASCII);
74
75 mdoc_run(p, mdoc);
76 }
77
78
79 void
80 terminal_free(void *arg)
81 {
82
83 term_free((struct termp *)arg);
84 }
85
86
87 static void
88 term_free(struct termp *p)
89 {
90
91 if (p->buf)
92 free(p->buf);
93 if (TERMENC_ASCII == p->enc && p->symtab)
94 chars_free(p->symtab);
95
96 free(p);
97 }
98
99
100 static struct termp *
101 term_alloc(enum termenc enc)
102 {
103 struct termp *p;
104
105 if (NULL == (p = malloc(sizeof(struct termp))))
106 return(NULL);
107 bzero(p, sizeof(struct termp));
108 p->maxrmargin = 78;
109 p->enc = enc;
110 return(p);
111 }
112
113
114 /*
115 * Flush a line of text. A "line" is loosely defined as being something
116 * that should be followed by a newline, regardless of whether it's
117 * broken apart by newlines getting there. A line can also be a
118 * fragment of a columnar list.
119 *
120 * Specifically, a line is whatever's in p->buf of length p->col, which
121 * is zeroed after this function returns.
122 *
123 * The usage of termp:flags is as follows:
124 *
125 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
126 * offset value. This is useful when doing columnar lists where the
127 * prior column has right-padded.
128 *
129 * - TERMP_NOBREAK: this is the most important and is used when making
130 * columns. In short: don't print a newline and instead pad to the
131 * right margin. Used in conjunction with TERMP_NOLPAD.
132 *
133 * - TERMP_TWOSPACE: when padding, make sure there are at least two
134 * space characters of padding. Otherwise, rather break the line.
135 *
136 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
137 * the line is overrun, and don't pad-right if it's underrun.
138 *
139 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
140 * overruning, instead save the position and continue at that point
141 * when the next invocation.
142 *
143 * In-line line breaking:
144 *
145 * If TERMP_NOBREAK is specified and the line overruns the right
146 * margin, it will break and pad-right to the right margin after
147 * writing. If maxrmargin is violated, it will break and continue
148 * writing from the right-margin, which will lead to the above
149 * scenario upon exit.
150 *
151 * Otherwise, the line will break at the right margin. Extremely long
152 * lines will cause the system to emit a warning (TODO: hyphenate, if
153 * possible).
154 */
155 void
156 term_flushln(struct termp *p)
157 {
158 int i, j;
159 size_t vbl, vsz, vis, maxvis, mmax, bp;
160 static int overstep = 0;
161
162 /*
163 * First, establish the maximum columns of "visible" content.
164 * This is usually the difference between the right-margin and
165 * an indentation, but can be, for tagged lists or columns, a
166 * small set of values.
167 */
168
169 assert(p->offset < p->rmargin);
170 assert((int)(p->rmargin - p->offset) - overstep > 0);
171
172 maxvis = /* LINTED */
173 p->rmargin - p->offset - overstep;
174 mmax = /* LINTED */
175 p->maxrmargin - p->offset - overstep;
176
177 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
178 vis = 0;
179 overstep = 0;
180
181 /*
182 * If in the standard case (left-justified), then begin with our
183 * indentation, otherwise (columns, etc.) just start spitting
184 * out text.
185 */
186
187 if ( ! (p->flags & TERMP_NOLPAD))
188 /* LINTED */
189 for (j = 0; j < (int)p->offset; j++)
190 putchar(' ');
191
192 for (i = 0; i < (int)p->col; i++) {
193 /*
194 * Count up visible word characters. Control sequences
195 * (starting with the CSI) aren't counted. A space
196 * generates a non-printing word, which is valid (the
197 * space is printed according to regular spacing rules).
198 */
199
200 /* LINTED */
201 for (j = i, vsz = 0; j < (int)p->col; j++) {
202 if (j && ' ' == p->buf[j])
203 break;
204 else if (8 == p->buf[j])
205 vsz--;
206 else
207 vsz++;
208 }
209
210 /*
211 * Choose the number of blanks to prepend: no blank at the
212 * beginning of a line, one between words -- but do not
213 * actually write them yet.
214 */
215 vbl = (size_t)(0 == vis ? 0 : 1);
216
217 /*
218 * Find out whether we would exceed the right margin.
219 * If so, break to the next line. (TODO: hyphenate)
220 * Otherwise, write the chosen number of blanks now.
221 */
222 if (vis && vis + vbl + vsz > bp) {
223 putchar('\n');
224 if (TERMP_NOBREAK & p->flags) {
225 for (j = 0; j < (int)p->rmargin; j++)
226 putchar(' ');
227 vis = p->rmargin - p->offset;
228 } else {
229 for (j = 0; j < (int)p->offset; j++)
230 putchar(' ');
231 vis = 0;
232 }
233 } else {
234 for (j = 0; j < (int)vbl; j++)
235 putchar(' ');
236 vis += vbl;
237 }
238
239 /*
240 * Finally, write out the word.
241 */
242 for ( ; i < (int)p->col; i++) {
243 if (' ' == p->buf[i])
244 break;
245 putchar(p->buf[i]);
246 }
247 vis += vsz;
248 }
249 p->col = 0;
250
251 if ( ! (TERMP_NOBREAK & p->flags)) {
252 putchar('\n');
253 return;
254 }
255
256 if (TERMP_HANG & p->flags) {
257 /* We need one blank after the tag. */
258 overstep = /* LINTED */
259 vis - maxvis + 1;
260
261 /*
262 * Behave exactly the same way as groff:
263 * If we have overstepped the margin, temporarily move
264 * it to the right and flag the rest of the line to be
265 * shorter.
266 * If we landed right at the margin, be happy.
267 * If we are one step before the margin, temporarily
268 * move it one step LEFT and flag the rest of the line
269 * to be longer.
270 */
271 if (overstep >= -1) {
272 assert((int)maxvis + overstep >= 0);
273 /* LINTED */
274 maxvis += overstep;
275 } else
276 overstep = 0;
277
278 } else if (TERMP_DANGLE & p->flags)
279 return;
280
281 /* Right-pad. */
282 if (maxvis > vis + /* LINTED */
283 ((TERMP_TWOSPACE & p->flags) ? 1 : 0))
284 for ( ; vis < maxvis; vis++)
285 putchar(' ');
286 else { /* ...or newline break. */
287 putchar('\n');
288 for (i = 0; i < (int)p->rmargin; i++)
289 putchar(' ');
290 }
291 }
292
293
294 /*
295 * A newline only breaks an existing line; it won't assert vertical
296 * space. All data in the output buffer is flushed prior to the newline
297 * assertion.
298 */
299 void
300 term_newln(struct termp *p)
301 {
302
303 p->flags |= TERMP_NOSPACE;
304 if (0 == p->col) {
305 p->flags &= ~TERMP_NOLPAD;
306 return;
307 }
308 term_flushln(p);
309 p->flags &= ~TERMP_NOLPAD;
310 }
311
312
313 /*
314 * Asserts a vertical space (a full, empty line-break between lines).
315 * Note that if used twice, this will cause two blank spaces and so on.
316 * All data in the output buffer is flushed prior to the newline
317 * assertion.
318 */
319 void
320 term_vspace(struct termp *p)
321 {
322
323 term_newln(p);
324 putchar('\n');
325 }
326
327
328 static void
329 do_special(struct termp *p, const char *word, size_t len)
330 {
331 const char *rhs;
332 size_t sz;
333 int i;
334
335 rhs = chars_a2ascii(p->symtab, word, len, &sz);
336
337 if (NULL == rhs) {
338 #if 0
339 fputs("Unknown special character: ", stderr);
340 for (i = 0; i < (int)len; i++)
341 fputc(word[i], stderr);
342 fputc('\n', stderr);
343 #endif
344 return;
345 }
346 for (i = 0; i < (int)sz; i++)
347 encode(p, rhs[i]);
348 }
349
350
351 static void
352 do_reserved(struct termp *p, const char *word, size_t len)
353 {
354 const char *rhs;
355 size_t sz;
356 int i;
357
358 rhs = chars_a2res(p->symtab, word, len, &sz);
359
360 if (NULL == rhs) {
361 #if 0
362 fputs("Unknown reserved word: ", stderr);
363 for (i = 0; i < (int)len; i++)
364 fputc(word[i], stderr);
365 fputc('\n', stderr);
366 #endif
367 return;
368 }
369 for (i = 0; i < (int)sz; i++)
370 encode(p, rhs[i]);
371 }
372
373
374 /*
375 * Handle an escape sequence: determine its length and pass it to the
376 * escape-symbol look table. Note that we assume mdoc(3) has validated
377 * the escape sequence (we assert upon badly-formed escape sequences).
378 */
379 static void
380 do_escaped(struct termp *p, const char **word)
381 {
382 int j, type;
383 const char *wp;
384
385 wp = *word;
386 type = 1;
387
388 if (0 == *(++wp)) {
389 *word = wp;
390 return;
391 }
392
393 if ('(' == *wp) {
394 wp++;
395 if (0 == *wp || 0 == *(wp + 1)) {
396 *word = 0 == *wp ? wp : wp + 1;
397 return;
398 }
399
400 do_special(p, wp, 2);
401 *word = ++wp;
402 return;
403
404 } else if ('*' == *wp) {
405 if (0 == *(++wp)) {
406 *word = wp;
407 return;
408 }
409
410 switch (*wp) {
411 case ('('):
412 wp++;
413 if (0 == *wp || 0 == *(wp + 1)) {
414 *word = 0 == *wp ? wp : wp + 1;
415 return;
416 }
417
418 do_reserved(p, wp, 2);
419 *word = ++wp;
420 return;
421 case ('['):
422 type = 0;
423 break;
424 default:
425 do_reserved(p, wp, 1);
426 *word = wp;
427 return;
428 }
429
430 } else if ('f' == *wp) {
431 if (0 == *(++wp)) {
432 *word = wp;
433 return;
434 }
435
436 switch (*wp) {
437 case ('B'):
438 p->bold++;
439 break;
440 case ('I'):
441 p->under++;
442 break;
443 case ('P'):
444 /* FALLTHROUGH */
445 case ('R'):
446 p->bold = p->under = 0;
447 break;
448 default:
449 break;
450 }
451
452 *word = wp;
453 return;
454
455 } else if ('[' != *wp) {
456 do_special(p, wp, 1);
457 *word = wp;
458 return;
459 }
460
461 wp++;
462 for (j = 0; *wp && ']' != *wp; wp++, j++)
463 /* Loop... */ ;
464
465 if (0 == *wp) {
466 *word = wp;
467 return;
468 }
469
470 if (type)
471 do_special(p, wp - j, (size_t)j);
472 else
473 do_reserved(p, wp - j, (size_t)j);
474 *word = wp;
475 }
476
477
478 /*
479 * Handle pwords, partial words, which may be either a single word or a
480 * phrase that cannot be broken down (such as a literal string). This
481 * handles word styling.
482 */
483 void
484 term_word(struct termp *p, const char *word)
485 {
486 const char *sv;
487
488 sv = word;
489
490 if (word[0] && 0 == word[1])
491 switch (word[0]) {
492 case('.'):
493 /* FALLTHROUGH */
494 case(','):
495 /* FALLTHROUGH */
496 case(';'):
497 /* FALLTHROUGH */
498 case(':'):
499 /* FALLTHROUGH */
500 case('?'):
501 /* FALLTHROUGH */
502 case('!'):
503 /* FALLTHROUGH */
504 case(')'):
505 /* FALLTHROUGH */
506 case(']'):
507 /* FALLTHROUGH */
508 case('}'):
509 if ( ! (TERMP_IGNDELIM & p->flags))
510 p->flags |= TERMP_NOSPACE;
511 break;
512 default:
513 break;
514 }
515
516 if ( ! (TERMP_NOSPACE & p->flags))
517 buffer(p, ' ');
518
519 if ( ! (p->flags & TERMP_NONOSPACE))
520 p->flags &= ~TERMP_NOSPACE;
521
522 for ( ; *word; word++)
523 if ('\\' != *word)
524 encode(p, *word);
525 else
526 do_escaped(p, &word);
527
528 if (sv[0] && 0 == sv[1])
529 switch (sv[0]) {
530 case('('):
531 /* FALLTHROUGH */
532 case('['):
533 /* FALLTHROUGH */
534 case('{'):
535 p->flags |= TERMP_NOSPACE;
536 break;
537 default:
538 break;
539 }
540 }
541
542
543 /*
544 * Insert a single character into the line-buffer. If the buffer's
545 * space is exceeded, then allocate more space by doubling the buffer
546 * size.
547 */
548 static void
549 buffer(struct termp *p, char c)
550 {
551 size_t s;
552
553 if (p->col + 1 >= p->maxcols) {
554 if (0 == p->maxcols)
555 p->maxcols = 256;
556 s = p->maxcols * 2;
557 p->buf = realloc(p->buf, s);
558 if (NULL == p->buf)
559 err(1, "realloc"); /* FIXME: shouldn't be here! */
560 p->maxcols = s;
561 }
562 p->buf[(int)(p->col)++] = c;
563 }
564
565
566 static void
567 encode(struct termp *p, char c)
568 {
569
570 if (' ' != c) {
571 if (p->bold) {
572 buffer(p, c);
573 buffer(p, 8);
574 }
575 if (p->under) {
576 buffer(p, '_');
577 buffer(p, 8);
578 }
579 }
580 buffer(p, c);
581 }