]> git.cameronkatri.com Git - mandoc.git/blob - term.c
973ad2c2b1c7d3f66081b614fac9048d6e5e41bc
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.84 2009/07/14 15:16:41 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <err.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "term.h"
24 #include "man.h"
25 #include "mdoc.h"
26
27 extern int man_run(struct termp *,
28 const struct man *);
29 extern int mdoc_run(struct termp *,
30 const struct mdoc *);
31
32 static struct termp *term_alloc(enum termenc);
33 static void term_free(struct termp *);
34 static void term_pword(struct termp *, const char *, int);
35 static void term_pescape(struct termp *,
36 const char *, int *, int);
37 static void term_nescape(struct termp *,
38 const char *, size_t);
39 static void term_chara(struct termp *, char);
40 static void term_encodea(struct termp *, char);
41 static int term_isopendelim(const char *, int);
42 static int term_isclosedelim(const char *, int);
43
44
45 void *
46 ascii_alloc(void)
47 {
48
49 return(term_alloc(TERMENC_ASCII));
50 }
51
52
53 int
54 terminal_man(void *arg, const struct man *man)
55 {
56 struct termp *p;
57
58 p = (struct termp *)arg;
59 if (NULL == p->symtab)
60 p->symtab = term_ascii2htab();
61
62 return(man_run(p, man));
63 }
64
65
66 int
67 terminal_mdoc(void *arg, const struct mdoc *mdoc)
68 {
69 struct termp *p;
70
71 p = (struct termp *)arg;
72 if (NULL == p->symtab)
73 p->symtab = term_ascii2htab();
74
75 return(mdoc_run(p, mdoc));
76 }
77
78
79 void
80 terminal_free(void *arg)
81 {
82
83 term_free((struct termp *)arg);
84 }
85
86
87 static void
88 term_free(struct termp *p)
89 {
90
91 if (p->buf)
92 free(p->buf);
93 if (TERMENC_ASCII == p->enc && p->symtab)
94 term_asciifree(p->symtab);
95
96 free(p);
97 }
98
99
100 static struct termp *
101 term_alloc(enum termenc enc)
102 {
103 struct termp *p;
104
105 if (NULL == (p = malloc(sizeof(struct termp))))
106 err(1, "malloc");
107 bzero(p, sizeof(struct termp));
108 p->maxrmargin = 78;
109 p->enc = enc;
110 return(p);
111 }
112
113
114 static int
115 term_isclosedelim(const char *p, int len)
116 {
117
118 if (1 != len)
119 return(0);
120
121 switch (*p) {
122 case('.'):
123 /* FALLTHROUGH */
124 case(','):
125 /* FALLTHROUGH */
126 case(';'):
127 /* FALLTHROUGH */
128 case(':'):
129 /* FALLTHROUGH */
130 case('?'):
131 /* FALLTHROUGH */
132 case('!'):
133 /* FALLTHROUGH */
134 case(')'):
135 /* FALLTHROUGH */
136 case(']'):
137 /* FALLTHROUGH */
138 case('}'):
139 return(1);
140 default:
141 break;
142 }
143
144 return(0);
145 }
146
147
148 static int
149 term_isopendelim(const char *p, int len)
150 {
151
152 if (1 != len)
153 return(0);
154
155 switch (*p) {
156 case('('):
157 /* FALLTHROUGH */
158 case('['):
159 /* FALLTHROUGH */
160 case('{'):
161 return(1);
162 default:
163 break;
164 }
165
166 return(0);
167 }
168
169
170 /*
171 * Flush a line of text. A "line" is loosely defined as being something
172 * that should be followed by a newline, regardless of whether it's
173 * broken apart by newlines getting there. A line can also be a
174 * fragment of a columnar list.
175 *
176 * Specifically, a line is whatever's in p->buf of length p->col, which
177 * is zeroed after this function returns.
178 *
179 * The usage of termp:flags is as follows:
180 *
181 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
182 * offset value. This is useful when doing columnar lists where the
183 * prior column has right-padded.
184 *
185 * - TERMP_NOBREAK: this is the most important and is used when making
186 * columns. In short: don't print a newline and instead pad to the
187 * right margin. Used in conjunction with TERMP_NOLPAD.
188 *
189 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
190 * the line is overrun, and don't pad-right if it's underrun.
191 *
192 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
193 * overruning, instead save the position and continue at that point
194 * when the next invocation.
195 *
196 * In-line line breaking:
197 *
198 * If TERMP_NOBREAK is specified and the line overruns the right
199 * margin, it will break and pad-right to the right margin after
200 * writing. If maxrmargin is violated, it will break and continue
201 * writing from the right-margin, which will lead to the above
202 * scenario upon exit.
203 *
204 * Otherwise, the line will break at the right margin. Extremely long
205 * lines will cause the system to emit a warning (TODO: hyphenate, if
206 * possible).
207 *
208 * FIXME: newline breaks occur (in groff) also occur when a single
209 * space follows a NOBREAK!
210 */
211 void
212 term_flushln(struct termp *p)
213 {
214 int i, j;
215 size_t vbl, vsz, vis, maxvis, mmax, bp;
216 static int sv = -1;
217
218 /*
219 * First, establish the maximum columns of "visible" content.
220 * This is usually the difference between the right-margin and
221 * an indentation, but can be, for tagged lists or columns, a
222 * small set of values.
223 */
224
225 assert(p->offset < p->rmargin);
226 maxvis = p->rmargin - p->offset;
227 mmax = p->maxrmargin - p->offset;
228 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
229 vis = 0;
230
231 if (sv >= 0) {
232 vis = (size_t)sv;
233 sv = -1;
234 }
235
236 /*
237 * If in the standard case (left-justified), then begin with our
238 * indentation, otherwise (columns, etc.) just start spitting
239 * out text.
240 */
241
242 if ( ! (p->flags & TERMP_NOLPAD))
243 /* LINTED */
244 for (j = 0; j < (int)p->offset; j++)
245 putchar(' ');
246
247 for (i = 0; i < (int)p->col; i++) {
248 /*
249 * Count up visible word characters. Control sequences
250 * (starting with the CSI) aren't counted. A space
251 * generates a non-printing word, which is valid (the
252 * space is printed according to regular spacing rules).
253 */
254
255 /* LINTED */
256 for (j = i, vsz = 0; j < (int)p->col; j++) {
257 if (' ' == p->buf[j])
258 break;
259 else if (8 == p->buf[j])
260 j += 1;
261 else
262 vsz++;
263 }
264
265 /*
266 * Choose the number of blanks to prepend: no blank at the
267 * beginning of a line, one between words -- but do not
268 * actually write them yet.
269 */
270 vbl = (size_t)(0 == vis ? 0 : 1);
271
272 /*
273 * Find out whether we would exceed the right margin.
274 * If so, break to the next line. (TODO: hyphenate)
275 * Otherwise, write the chosen number of blanks now.
276 */
277 if (vis && vis + vbl + vsz > bp) {
278 putchar('\n');
279 if (TERMP_NOBREAK & p->flags) {
280 for (j = 0; j < (int)p->rmargin; j++)
281 putchar(' ');
282 vis = p->rmargin - p->offset;
283 } else {
284 for (j = 0; j < (int)p->offset; j++)
285 putchar(' ');
286 vis = 0;
287 }
288 } else {
289 for (j = 0; j < (int)vbl; j++)
290 putchar(' ');
291 vis += vbl;
292 }
293
294 /*
295 * Finally, write out the word.
296 */
297 for ( ; i < (int)p->col; i++) {
298 if (' ' == p->buf[i])
299 break;
300 putchar(p->buf[i]);
301 }
302 vis += vsz;
303 }
304
305 /*
306 * If we've overstepped our maximum visible no-break space, then
307 * cause a newline and offset at the right margin.
308 */
309
310 if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
311 if ( ! (TERMP_DANGLE & p->flags) &&
312 ! (TERMP_HANG & p->flags)) {
313 putchar('\n');
314 for (i = 0; i < (int)p->rmargin; i++)
315 putchar(' ');
316 }
317 if (TERMP_HANG & p->flags)
318 sv = vis - maxvis;
319 p->col = 0;
320 return;
321 }
322
323 /*
324 * If we're not to right-marginalise it (newline), then instead
325 * pad to the right margin and stay off.
326 */
327
328 if (p->flags & TERMP_NOBREAK) {
329 if ( ! (TERMP_DANGLE & p->flags))
330 for ( ; vis < maxvis; vis++)
331 putchar(' ');
332 } else
333 putchar('\n');
334
335 p->col = 0;
336 }
337
338
339 /*
340 * A newline only breaks an existing line; it won't assert vertical
341 * space. All data in the output buffer is flushed prior to the newline
342 * assertion.
343 */
344 void
345 term_newln(struct termp *p)
346 {
347
348 p->flags |= TERMP_NOSPACE;
349 if (0 == p->col) {
350 p->flags &= ~TERMP_NOLPAD;
351 return;
352 }
353 term_flushln(p);
354 p->flags &= ~TERMP_NOLPAD;
355 }
356
357
358 /*
359 * Asserts a vertical space (a full, empty line-break between lines).
360 * Note that if used twice, this will cause two blank spaces and so on.
361 * All data in the output buffer is flushed prior to the newline
362 * assertion.
363 */
364 void
365 term_vspace(struct termp *p)
366 {
367
368 term_newln(p);
369 putchar('\n');
370 }
371
372
373 /*
374 * Break apart a word into "pwords" (partial-words, usually from
375 * breaking up a phrase into individual words) and, eventually, put them
376 * into the output buffer. If we're a literal word, then don't break up
377 * the word and put it verbatim into the output buffer.
378 */
379 void
380 term_word(struct termp *p, const char *word)
381 {
382 int i, j, len;
383
384 len = (int)strlen(word);
385
386 if (p->flags & TERMP_LITERAL) {
387 term_pword(p, word, len);
388 return;
389 }
390
391 /* LINTED */
392 for (j = i = 0; i < len; i++) {
393 if (' ' != word[i]) {
394 j++;
395 continue;
396 }
397
398 /* Escaped spaces don't delimit... */
399 if (i && ' ' == word[i] && '\\' == word[i - 1]) {
400 j++;
401 continue;
402 }
403
404 if (0 == j)
405 continue;
406 assert(i >= j);
407 term_pword(p, &word[i - j], j);
408 j = 0;
409 }
410 if (j > 0) {
411 assert(i >= j);
412 term_pword(p, &word[i - j], j);
413 }
414 }
415
416
417 /*
418 * Determine the symbol indicated by an escape sequences, that is, one
419 * starting with a backslash. Once done, we pass this value into the
420 * output buffer by way of the symbol table.
421 */
422 static void
423 term_nescape(struct termp *p, const char *word, size_t len)
424 {
425 const char *rhs;
426 size_t sz;
427 int i;
428
429 rhs = term_a2ascii(p->symtab, word, len, &sz);
430 if (rhs)
431 for (i = 0; i < (int)sz; i++)
432 term_encodea(p, rhs[i]);
433 }
434
435
436 /*
437 * Handle an escape sequence: determine its length and pass it to the
438 * escape-symbol look table. Note that we assume mdoc(3) has validated
439 * the escape sequence (we assert upon badly-formed escape sequences).
440 */
441 static void
442 term_pescape(struct termp *p, const char *word, int *i, int len)
443 {
444 int j;
445
446 if (++(*i) >= len)
447 return;
448
449 if ('(' == word[*i]) {
450 (*i)++;
451 if (*i + 1 >= len)
452 return;
453
454 term_nescape(p, &word[*i], 2);
455 (*i)++;
456 return;
457
458 } else if ('*' == word[*i]) {
459 (*i)++;
460 if (*i >= len)
461 return;
462
463 switch (word[*i]) {
464 case ('('):
465 (*i)++;
466 if (*i + 1 >= len)
467 return;
468
469 term_nescape(p, &word[*i], 2);
470 (*i)++;
471 return;
472 case ('['):
473 break;
474 default:
475 term_nescape(p, &word[*i], 1);
476 return;
477 }
478
479 } else if ('f' == word[*i]) {
480 (*i)++;
481 if (*i >= len)
482 return;
483 switch (word[*i]) {
484 case ('B'):
485 p->flags |= TERMP_BOLD;
486 break;
487 case ('I'):
488 p->flags |= TERMP_UNDER;
489 break;
490 case ('P'):
491 /* FALLTHROUGH */
492 case ('R'):
493 p->flags &= ~TERMP_STYLE;
494 break;
495 default:
496 break;
497 }
498 return;
499
500 } else if ('[' != word[*i]) {
501 term_nescape(p, &word[*i], 1);
502 return;
503 }
504
505 (*i)++;
506 for (j = 0; word[*i] && ']' != word[*i]; (*i)++, j++)
507 /* Loop... */ ;
508
509 if (0 == word[*i])
510 return;
511
512 term_nescape(p, &word[*i - j], (size_t)j);
513 }
514
515
516 /*
517 * Handle pwords, partial words, which may be either a single word or a
518 * phrase that cannot be broken down (such as a literal string). This
519 * handles word styling.
520 */
521 static void
522 term_pword(struct termp *p, const char *word, int len)
523 {
524 int i;
525
526 if (term_isclosedelim(word, len))
527 if ( ! (TERMP_IGNDELIM & p->flags))
528 p->flags |= TERMP_NOSPACE;
529
530 if ( ! (TERMP_NOSPACE & p->flags))
531 term_chara(p, ' ');
532
533 if ( ! (p->flags & TERMP_NONOSPACE))
534 p->flags &= ~TERMP_NOSPACE;
535
536 /*
537 * If ANSI (word-length styling), then apply our style now,
538 * before the word.
539 */
540
541 for (i = 0; i < len; i++)
542 if ('\\' == word[i])
543 term_pescape(p, word, &i, len);
544 else
545 term_encodea(p, word[i]);
546
547 if (term_isopendelim(word, len))
548 p->flags |= TERMP_NOSPACE;
549 }
550
551
552 /*
553 * Insert a single character into the line-buffer. If the buffer's
554 * space is exceeded, then allocate more space by doubling the buffer
555 * size.
556 */
557 static void
558 term_chara(struct termp *p, char c)
559 {
560 size_t s;
561
562 if (p->col + 1 >= p->maxcols) {
563 if (0 == p->maxcols)
564 p->maxcols = 256;
565 s = p->maxcols * 2;
566 p->buf = realloc(p->buf, s);
567 if (NULL == p->buf)
568 err(1, "realloc");
569 p->maxcols = s;
570 }
571 p->buf[(int)(p->col)++] = c;
572 }
573
574
575 static void
576 term_encodea(struct termp *p, char c)
577 {
578
579 if (TERMP_STYLE & p->flags) {
580 if (TERMP_BOLD & p->flags) {
581 term_chara(p, c);
582 term_chara(p, 8);
583 }
584 if (TERMP_UNDER & p->flags) {
585 term_chara(p, '_');
586 term_chara(p, 8);
587 }
588 }
589 term_chara(p, c);
590 }