]> git.cameronkatri.com Git - mandoc.git/blob - term.c
Avoid running the "width" termp callback for each whitespace.
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.164 2010/07/25 22:56:47 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "chars.h"
33 #include "out.h"
34 #include "term.h"
35 #include "main.h"
36
37 static void spec(struct termp *, enum roffdeco,
38 const char *, size_t);
39 static void res(struct termp *, const char *, size_t);
40 static void buffera(struct termp *, const char *, size_t);
41 static void bufferc(struct termp *, char);
42 static void adjbuf(struct termp *p, size_t);
43 static void encode(struct termp *, const char *, size_t);
44
45
46 void
47 term_free(struct termp *p)
48 {
49
50 if (p->buf)
51 free(p->buf);
52 if (p->symtab)
53 chars_free(p->symtab);
54
55 free(p);
56 }
57
58
59 void
60 term_begin(struct termp *p, term_margin head,
61 term_margin foot, const void *arg)
62 {
63
64 p->headf = head;
65 p->footf = foot;
66 p->argf = arg;
67 (*p->begin)(p);
68 }
69
70
71 void
72 term_end(struct termp *p)
73 {
74
75 (*p->end)(p);
76 }
77
78
79 struct termp *
80 term_alloc(enum termenc enc)
81 {
82 struct termp *p;
83
84 p = calloc(1, sizeof(struct termp));
85 if (NULL == p) {
86 perror(NULL);
87 exit(EXIT_FAILURE);
88 }
89
90 p->enc = enc;
91 return(p);
92 }
93
94
95 /*
96 * Flush a line of text. A "line" is loosely defined as being something
97 * that should be followed by a newline, regardless of whether it's
98 * broken apart by newlines getting there. A line can also be a
99 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
100 * not have a trailing newline.
101 *
102 * The following flags may be specified:
103 *
104 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
105 * offset value. This is useful when doing columnar lists where the
106 * prior column has right-padded.
107 *
108 * - TERMP_NOBREAK: this is the most important and is used when making
109 * columns. In short: don't print a newline and instead pad to the
110 * right margin. Used in conjunction with TERMP_NOLPAD.
111 *
112 * - TERMP_TWOSPACE: when padding, make sure there are at least two
113 * space characters of padding. Otherwise, rather break the line.
114 *
115 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
116 * the line is overrun, and don't pad-right if it's underrun.
117 *
118 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
119 * overruning, instead save the position and continue at that point
120 * when the next invocation.
121 *
122 * In-line line breaking:
123 *
124 * If TERMP_NOBREAK is specified and the line overruns the right
125 * margin, it will break and pad-right to the right margin after
126 * writing. If maxrmargin is violated, it will break and continue
127 * writing from the right-margin, which will lead to the above scenario
128 * upon exit. Otherwise, the line will break at the right margin.
129 */
130 void
131 term_flushln(struct termp *p)
132 {
133 int i; /* current input position in p->buf */
134 size_t vis; /* current visual position on output */
135 size_t vbl; /* number of blanks to prepend to output */
136 size_t vend; /* end of word visual position on output */
137 size_t bp; /* visual right border position */
138 int j; /* temporary loop index for p->buf */
139 int jhy; /* last hyph before overflow w/r/t j */
140 size_t maxvis; /* output position of visible boundary */
141 size_t mmax; /* used in calculating bp */
142
143 /*
144 * First, establish the maximum columns of "visible" content.
145 * This is usually the difference between the right-margin and
146 * an indentation, but can be, for tagged lists or columns, a
147 * small set of values.
148 */
149
150 assert(p->offset < p->rmargin);
151
152 maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ?
153 /* LINTED */
154 0 : p->rmargin - p->offset - p->overstep;
155 mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ?
156 /* LINTED */
157 0 : p->maxrmargin - p->offset - p->overstep;
158
159 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
160
161 /*
162 * Indent the first line of a paragraph.
163 */
164 vbl = p->flags & TERMP_NOLPAD ? 0 : p->offset;
165
166 vis = vend = i = 0;
167
168 while (i < (int)p->col) {
169 /*
170 * Handle literal tab characters: collapse all
171 * subsequent tabs into a single huge set of spaces.
172 */
173 for (j = i; j < (int)p->col; j++) {
174 if ('\t' != p->buf[j])
175 break;
176 vend = (vis / p->tabwidth + 1) * p->tabwidth;
177 vbl += vend - vis;
178 vis = vend;
179 }
180
181 /*
182 * Count up visible word characters. Control sequences
183 * (starting with the CSI) aren't counted. A space
184 * generates a non-printing word, which is valid (the
185 * space is printed according to regular spacing rules).
186 */
187
188 /* LINTED */
189 for (jhy = 0; j < (int)p->col; j++) {
190 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
191 break;
192
193 /* Back over the the last printed character. */
194 if (8 == p->buf[j]) {
195 assert(j);
196 vend -= (*p->width)(p, p->buf[j - 1]);
197 continue;
198 }
199
200 /* Regular word. */
201 /* Break at the hyphen point if we overrun. */
202 if (vend > vis && vend < bp &&
203 ASCII_HYPH == p->buf[j])
204 jhy = j;
205
206 vend += (*p->width)(p, p->buf[j]);
207 }
208
209 /*
210 * Find out whether we would exceed the right margin.
211 * If so, break to the next line.
212 */
213 if (vend > bp && 0 == jhy && vis > 0) {
214 vend -= vis;
215 (*p->endline)(p);
216 if (TERMP_NOBREAK & p->flags) {
217 p->viscol = p->rmargin;
218 (*p->advance)(p, p->rmargin);
219 vend += p->rmargin - p->offset;
220 } else {
221 p->viscol = 0;
222 vbl = p->offset;
223 }
224
225 /* Remove the p->overstep width. */
226
227 bp += (int)/* LINTED */
228 p->overstep;
229 p->overstep = 0;
230 }
231
232 /*
233 * Skip leading tabs, they were handled above.
234 */
235 while (i < (int)p->col && '\t' == p->buf[i])
236 i++;
237
238 /* Write out the [remaining] word. */
239 for ( ; i < (int)p->col; i++) {
240 if (vend > bp && jhy > 0 && i > jhy)
241 break;
242 if ('\t' == p->buf[i])
243 break;
244 if (' ' == p->buf[i]) {
245 j = i;
246 while (' ' == p->buf[i])
247 i++;
248 vbl += (i - j) * (*p->width)(p, ' ');
249 break;
250 }
251 if (ASCII_NBRSP == p->buf[i]) {
252 vbl += (*p->width)(p, ' ');
253 continue;
254 }
255
256 /*
257 * Now we definitely know there will be
258 * printable characters to output,
259 * so write preceding white space now.
260 */
261 if (vbl) {
262 (*p->advance)(p, vbl);
263 p->viscol += vbl;
264 vbl = 0;
265 }
266
267 if (ASCII_HYPH == p->buf[i]) {
268 (*p->letter)(p, '-');
269 p->viscol += (*p->width)(p, '-');
270 } else {
271 (*p->letter)(p, p->buf[i]);
272 p->viscol += (*p->width)(p, p->buf[i]);
273 }
274 }
275 vend += vbl;
276 vis = vend;
277 }
278
279 p->col = 0;
280 p->overstep = 0;
281
282 if ( ! (TERMP_NOBREAK & p->flags)) {
283 p->viscol = 0;
284 (*p->endline)(p);
285 return;
286 }
287
288 if (TERMP_HANG & p->flags) {
289 /* We need one blank after the tag. */
290 p->overstep = /* LINTED */
291 vis - maxvis + (*p->width)(p, ' ');
292
293 /*
294 * Behave exactly the same way as groff:
295 * If we have overstepped the margin, temporarily move
296 * it to the right and flag the rest of the line to be
297 * shorter.
298 * If we landed right at the margin, be happy.
299 * If we are one step before the margin, temporarily
300 * move it one step LEFT and flag the rest of the line
301 * to be longer.
302 */
303 if (p->overstep >= -1) {
304 assert((int)maxvis + p->overstep >= 0);
305 /* LINTED */
306 maxvis += p->overstep;
307 } else
308 p->overstep = 0;
309
310 } else if (TERMP_DANGLE & p->flags)
311 return;
312
313 /* Right-pad. */
314 if (maxvis > vis + /* LINTED */
315 ((TERMP_TWOSPACE & p->flags) ?
316 (*p->width)(p, ' ') : 0)) {
317 p->viscol += maxvis - vis;
318 (*p->advance)(p, maxvis - vis);
319 vis += (maxvis - vis);
320 } else { /* ...or newline break. */
321 (*p->endline)(p);
322 p->viscol = p->rmargin;
323 (*p->advance)(p, p->rmargin);
324 }
325 }
326
327
328 /*
329 * A newline only breaks an existing line; it won't assert vertical
330 * space. All data in the output buffer is flushed prior to the newline
331 * assertion.
332 */
333 void
334 term_newln(struct termp *p)
335 {
336
337 p->flags |= TERMP_NOSPACE;
338 if (0 == p->col && 0 == p->viscol) {
339 p->flags &= ~TERMP_NOLPAD;
340 return;
341 }
342 term_flushln(p);
343 p->flags &= ~TERMP_NOLPAD;
344 }
345
346
347 /*
348 * Asserts a vertical space (a full, empty line-break between lines).
349 * Note that if used twice, this will cause two blank spaces and so on.
350 * All data in the output buffer is flushed prior to the newline
351 * assertion.
352 */
353 void
354 term_vspace(struct termp *p)
355 {
356
357 term_newln(p);
358 p->viscol = 0;
359 (*p->endline)(p);
360 }
361
362
363 static void
364 spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
365 {
366 const char *rhs;
367 size_t sz;
368
369 rhs = chars_spec2str(p->symtab, word, len, &sz);
370 if (rhs)
371 encode(p, rhs, sz);
372 else if (DECO_SSPECIAL == d)
373 encode(p, word, len);
374 }
375
376
377 static void
378 res(struct termp *p, const char *word, size_t len)
379 {
380 const char *rhs;
381 size_t sz;
382
383 rhs = chars_res2str(p->symtab, word, len, &sz);
384 if (rhs)
385 encode(p, rhs, sz);
386 }
387
388
389 void
390 term_fontlast(struct termp *p)
391 {
392 enum termfont f;
393
394 f = p->fontl;
395 p->fontl = p->fontq[p->fonti];
396 p->fontq[p->fonti] = f;
397 }
398
399
400 void
401 term_fontrepl(struct termp *p, enum termfont f)
402 {
403
404 p->fontl = p->fontq[p->fonti];
405 p->fontq[p->fonti] = f;
406 }
407
408
409 void
410 term_fontpush(struct termp *p, enum termfont f)
411 {
412
413 assert(p->fonti + 1 < 10);
414 p->fontl = p->fontq[p->fonti];
415 p->fontq[++p->fonti] = f;
416 }
417
418
419 const void *
420 term_fontq(struct termp *p)
421 {
422
423 return(&p->fontq[p->fonti]);
424 }
425
426
427 enum termfont
428 term_fonttop(struct termp *p)
429 {
430
431 return(p->fontq[p->fonti]);
432 }
433
434
435 void
436 term_fontpopq(struct termp *p, const void *key)
437 {
438
439 while (p->fonti >= 0 && key != &p->fontq[p->fonti])
440 p->fonti--;
441 assert(p->fonti >= 0);
442 }
443
444
445 void
446 term_fontpop(struct termp *p)
447 {
448
449 assert(p->fonti);
450 p->fonti--;
451 }
452
453
454 /*
455 * Handle pwords, partial words, which may be either a single word or a
456 * phrase that cannot be broken down (such as a literal string). This
457 * handles word styling.
458 */
459 void
460 term_word(struct termp *p, const char *word)
461 {
462 const char *sv, *seq;
463 int sz;
464 size_t ssz;
465 enum roffdeco deco;
466
467 sv = word;
468
469 if (word[0] && '\0' == word[1])
470 switch (word[0]) {
471 case('.'):
472 /* FALLTHROUGH */
473 case(','):
474 /* FALLTHROUGH */
475 case(';'):
476 /* FALLTHROUGH */
477 case(':'):
478 /* FALLTHROUGH */
479 case('?'):
480 /* FALLTHROUGH */
481 case('!'):
482 /* FALLTHROUGH */
483 case(')'):
484 /* FALLTHROUGH */
485 case(']'):
486 if ( ! (TERMP_IGNDELIM & p->flags))
487 p->flags |= TERMP_NOSPACE;
488 break;
489 default:
490 break;
491 }
492
493 if ( ! (TERMP_NOSPACE & p->flags)) {
494 if ( ! (TERMP_KEEP & p->flags)) {
495 if (TERMP_PREKEEP & p->flags)
496 p->flags |= TERMP_KEEP;
497 bufferc(p, ' ');
498 if (TERMP_SENTENCE & p->flags)
499 bufferc(p, ' ');
500 } else
501 bufferc(p, ASCII_NBRSP);
502 }
503
504 if ( ! (p->flags & TERMP_NONOSPACE))
505 p->flags &= ~TERMP_NOSPACE;
506
507 p->flags &= ~TERMP_SENTENCE;
508
509 while (*word) {
510 if ((ssz = strcspn(word, "\\")) > 0)
511 encode(p, word, ssz);
512
513 word += ssz;
514 if ('\\' != *word)
515 continue;
516
517 seq = ++word;
518 sz = a2roffdeco(&deco, &seq, &ssz);
519
520 switch (deco) {
521 case (DECO_RESERVED):
522 res(p, seq, ssz);
523 break;
524 case (DECO_SPECIAL):
525 /* FALLTHROUGH */
526 case (DECO_SSPECIAL):
527 spec(p, deco, seq, ssz);
528 break;
529 case (DECO_BOLD):
530 term_fontrepl(p, TERMFONT_BOLD);
531 break;
532 case (DECO_ITALIC):
533 term_fontrepl(p, TERMFONT_UNDER);
534 break;
535 case (DECO_ROMAN):
536 term_fontrepl(p, TERMFONT_NONE);
537 break;
538 case (DECO_PREVIOUS):
539 term_fontlast(p);
540 break;
541 default:
542 break;
543 }
544
545 word += sz;
546 if (DECO_NOSPACE == deco && '\0' == *word)
547 p->flags |= TERMP_NOSPACE;
548 }
549
550 /*
551 * Note that we don't process the pipe: the parser sees it as
552 * punctuation, but we don't in terms of typography.
553 */
554 if (sv[0] && '\0' == sv[1])
555 switch (sv[0]) {
556 case('('):
557 /* FALLTHROUGH */
558 case('['):
559 p->flags |= TERMP_NOSPACE;
560 break;
561 default:
562 break;
563 }
564 }
565
566
567 static void
568 adjbuf(struct termp *p, size_t sz)
569 {
570
571 if (0 == p->maxcols)
572 p->maxcols = 1024;
573 while (sz >= p->maxcols)
574 p->maxcols <<= 2;
575
576 p->buf = realloc(p->buf, p->maxcols);
577 if (NULL == p->buf) {
578 perror(NULL);
579 exit(EXIT_FAILURE);
580 }
581 }
582
583
584 static void
585 buffera(struct termp *p, const char *word, size_t sz)
586 {
587
588 if (p->col + sz >= p->maxcols)
589 adjbuf(p, p->col + sz);
590
591 memcpy(&p->buf[(int)p->col], word, sz);
592 p->col += sz;
593 }
594
595
596 static void
597 bufferc(struct termp *p, char c)
598 {
599
600 if (p->col + 1 >= p->maxcols)
601 adjbuf(p, p->col + 1);
602
603 p->buf[(int)p->col++] = c;
604 }
605
606
607 static void
608 encode(struct termp *p, const char *word, size_t sz)
609 {
610 enum termfont f;
611 int i;
612
613 /*
614 * Encode and buffer a string of characters. If the current
615 * font mode is unset, buffer directly, else encode then buffer
616 * character by character.
617 */
618
619 if (TERMFONT_NONE == (f = term_fonttop(p))) {
620 buffera(p, word, sz);
621 return;
622 }
623
624 for (i = 0; i < (int)sz; i++) {
625 if ( ! isgraph((u_char)word[i])) {
626 bufferc(p, word[i]);
627 continue;
628 }
629
630 if (TERMFONT_UNDER == f)
631 bufferc(p, '_');
632 else
633 bufferc(p, word[i]);
634
635 bufferc(p, 8);
636 bufferc(p, word[i]);
637 }
638 }
639
640
641 size_t
642 term_len(const struct termp *p, size_t sz)
643 {
644
645 return((*p->width)(p, ' ') * sz);
646 }
647
648
649 size_t
650 term_strlen(const struct termp *p, const char *cp)
651 {
652 size_t sz;
653
654 for (sz = 0; *cp; cp++)
655 sz += (*p->width)(p, *cp);
656
657 return(sz);
658 }
659
660
661 /* ARGSUSED */
662 size_t
663 term_vspan(const struct termp *p, const struct roffsu *su)
664 {
665 double r;
666
667 switch (su->unit) {
668 case (SCALE_CM):
669 r = su->scale * 2;
670 break;
671 case (SCALE_IN):
672 r = su->scale * 6;
673 break;
674 case (SCALE_PC):
675 r = su->scale;
676 break;
677 case (SCALE_PT):
678 r = su->scale / 8;
679 break;
680 case (SCALE_MM):
681 r = su->scale / 1000;
682 break;
683 case (SCALE_VS):
684 r = su->scale;
685 break;
686 default:
687 r = su->scale - 1;
688 break;
689 }
690
691 if (r < 0.0)
692 r = 0.0;
693 return(/* LINTED */(size_t)
694 r);
695 }
696
697
698 size_t
699 term_hspan(const struct termp *p, const struct roffsu *su)
700 {
701 double v;
702
703 v = ((*p->hspan)(p, su));
704 if (v < 0.0)
705 v = 0.0;
706 return((size_t) /* LINTED */
707 v);
708 }