]> git.cameronkatri.com Git - mandoc.git/blob - term.c
Pushed normalisation of scaling units into term_hspan().
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.156 2010/06/30 12:30:36 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "mandoc.h"
31 #include "chars.h"
32 #include "out.h"
33 #include "term.h"
34 #include "main.h"
35
36 static void spec(struct termp *, const char *, size_t);
37 static void res(struct termp *, const char *, size_t);
38 static void buffera(struct termp *, const char *, size_t);
39 static void bufferc(struct termp *, char);
40 static void adjbuf(struct termp *p, size_t);
41 static void encode(struct termp *, const char *, size_t);
42
43
44 void
45 term_free(struct termp *p)
46 {
47
48 if (p->buf)
49 free(p->buf);
50 if (p->symtab)
51 chars_free(p->symtab);
52
53 free(p);
54 }
55
56
57 void
58 term_begin(struct termp *p, term_margin head,
59 term_margin foot, const void *arg)
60 {
61
62 p->headf = head;
63 p->footf = foot;
64 p->argf = arg;
65 (*p->begin)(p);
66 }
67
68
69 void
70 term_end(struct termp *p)
71 {
72
73 (*p->end)(p);
74 }
75
76
77 struct termp *
78 term_alloc(enum termenc enc)
79 {
80 struct termp *p;
81
82 p = calloc(1, sizeof(struct termp));
83 if (NULL == p) {
84 perror(NULL);
85 exit(EXIT_FAILURE);
86 }
87
88 p->enc = enc;
89 return(p);
90 }
91
92
93 /*
94 * Flush a line of text. A "line" is loosely defined as being something
95 * that should be followed by a newline, regardless of whether it's
96 * broken apart by newlines getting there. A line can also be a
97 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
98 * not have a trailing newline.
99 *
100 * The following flags may be specified:
101 *
102 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
103 * offset value. This is useful when doing columnar lists where the
104 * prior column has right-padded.
105 *
106 * - TERMP_NOBREAK: this is the most important and is used when making
107 * columns. In short: don't print a newline and instead pad to the
108 * right margin. Used in conjunction with TERMP_NOLPAD.
109 *
110 * - TERMP_TWOSPACE: when padding, make sure there are at least two
111 * space characters of padding. Otherwise, rather break the line.
112 *
113 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
114 * the line is overrun, and don't pad-right if it's underrun.
115 *
116 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
117 * overruning, instead save the position and continue at that point
118 * when the next invocation.
119 *
120 * In-line line breaking:
121 *
122 * If TERMP_NOBREAK is specified and the line overruns the right
123 * margin, it will break and pad-right to the right margin after
124 * writing. If maxrmargin is violated, it will break and continue
125 * writing from the right-margin, which will lead to the above scenario
126 * upon exit. Otherwise, the line will break at the right margin.
127 */
128 void
129 term_flushln(struct termp *p)
130 {
131 int i; /* current input position in p->buf */
132 size_t vis; /* current visual position on output */
133 size_t vbl; /* number of blanks to prepend to output */
134 size_t vend; /* end of word visual position on output */
135 size_t bp; /* visual right border position */
136 int j; /* temporary loop index for p->buf */
137 int jhy; /* last hyph before overflow w/r/t j */
138 size_t maxvis; /* output position of visible boundary */
139 size_t mmax; /* used in calculating bp */
140
141 /*
142 * First, establish the maximum columns of "visible" content.
143 * This is usually the difference between the right-margin and
144 * an indentation, but can be, for tagged lists or columns, a
145 * small set of values.
146 */
147
148 assert(p->offset < p->rmargin);
149
150 maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ?
151 /* LINTED */
152 0 : p->rmargin - p->offset - p->overstep;
153 mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ?
154 /* LINTED */
155 0 : p->maxrmargin - p->offset - p->overstep;
156
157 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
158
159 /*
160 * Indent the first line of a paragraph.
161 */
162 vbl = p->flags & TERMP_NOLPAD ? 0 : p->offset;
163
164 vis = vend = i = 0;
165
166 while (i < (int)p->col) {
167 /*
168 * Handle literal tab characters: collapse all
169 * subsequent tabs into a single huge set of spaces.
170 */
171 for (j = i; j < (int)p->col; j++) {
172 if ('\t' != p->buf[j])
173 break;
174 vend = (vis / p->tabwidth + 1) * p->tabwidth;
175 vbl += vend - vis;
176 vis = vend;
177 }
178
179 /*
180 * Count up visible word characters. Control sequences
181 * (starting with the CSI) aren't counted. A space
182 * generates a non-printing word, which is valid (the
183 * space is printed according to regular spacing rules).
184 */
185
186 /* LINTED */
187 for (jhy = 0; j < (int)p->col; j++) {
188 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
189 break;
190
191 /* Back over the the last printed character. */
192 if (8 == p->buf[j]) {
193 assert(j);
194 vend -= (*p->width)(p, p->buf[j - 1]);
195 continue;
196 }
197
198 /* Regular word. */
199 /* Break at the hyphen point if we overrun. */
200 if (vend > vis && vend < bp &&
201 ASCII_HYPH == p->buf[j])
202 jhy = j;
203
204 vend += (*p->width)(p, p->buf[j]);
205 }
206
207 /*
208 * Find out whether we would exceed the right margin.
209 * If so, break to the next line.
210 */
211 if (vend > bp && 0 == jhy && vis > 0) {
212 vend -= vis;
213 (*p->endline)(p);
214 if (TERMP_NOBREAK & p->flags) {
215 p->viscol = p->rmargin;
216 (*p->advance)(p, p->rmargin);
217 vend += p->rmargin - p->offset;
218 } else {
219 p->viscol = 0;
220 vbl = p->offset;
221 }
222
223 /* Remove the p->overstep width. */
224
225 bp += (int)/* LINTED */
226 p->overstep;
227 p->overstep = 0;
228 }
229
230 /*
231 * Skip leading tabs, they were handled above.
232 */
233 while (i < (int)p->col && '\t' == p->buf[i])
234 i++;
235
236 /* Write out the [remaining] word. */
237 for ( ; i < (int)p->col; i++) {
238 if (vend > bp && jhy > 0 && i > jhy)
239 break;
240 if ('\t' == p->buf[i])
241 break;
242 if (' ' == p->buf[i]) {
243 while (' ' == p->buf[i]) {
244 vbl += (*p->width)(p, p->buf[i]);
245 i++;
246 }
247 break;
248 }
249 if (ASCII_NBRSP == p->buf[i]) {
250 vbl += (*p->width)(p, ' ');
251 continue;
252 }
253
254 /*
255 * Now we definitely know there will be
256 * printable characters to output,
257 * so write preceding white space now.
258 */
259 if (vbl) {
260 (*p->advance)(p, vbl);
261 p->viscol += vbl;
262 vbl = 0;
263 }
264
265 if (ASCII_HYPH == p->buf[i]) {
266 (*p->letter)(p, '-');
267 p->viscol += (*p->width)(p, '-');
268 } else {
269 (*p->letter)(p, p->buf[i]);
270 p->viscol += (*p->width)(p, p->buf[i]);
271 }
272 }
273 vend += vbl;
274 vis = vend;
275 }
276
277 p->col = 0;
278 p->overstep = 0;
279
280 if ( ! (TERMP_NOBREAK & p->flags)) {
281 p->viscol = 0;
282 (*p->endline)(p);
283 return;
284 }
285
286 if (TERMP_HANG & p->flags) {
287 /* We need one blank after the tag. */
288 p->overstep = /* LINTED */
289 vis - maxvis + (*p->width)(p, ' ');
290
291 /*
292 * Behave exactly the same way as groff:
293 * If we have overstepped the margin, temporarily move
294 * it to the right and flag the rest of the line to be
295 * shorter.
296 * If we landed right at the margin, be happy.
297 * If we are one step before the margin, temporarily
298 * move it one step LEFT and flag the rest of the line
299 * to be longer.
300 */
301 if (p->overstep >= -1) {
302 assert((int)maxvis + p->overstep >= 0);
303 /* LINTED */
304 maxvis += p->overstep;
305 } else
306 p->overstep = 0;
307
308 } else if (TERMP_DANGLE & p->flags)
309 return;
310
311 /* Right-pad. */
312 if (maxvis > vis + /* LINTED */
313 ((TERMP_TWOSPACE & p->flags) ?
314 (*p->width)(p, ' ') : 0)) {
315 p->viscol += maxvis - vis;
316 (*p->advance)(p, maxvis - vis);
317 vis += (maxvis - vis);
318 } else { /* ...or newline break. */
319 (*p->endline)(p);
320 p->viscol = p->rmargin;
321 (*p->advance)(p, p->rmargin);
322 }
323 }
324
325
326 /*
327 * A newline only breaks an existing line; it won't assert vertical
328 * space. All data in the output buffer is flushed prior to the newline
329 * assertion.
330 */
331 void
332 term_newln(struct termp *p)
333 {
334
335 p->flags |= TERMP_NOSPACE;
336 if (0 == p->col && 0 == p->viscol) {
337 p->flags &= ~TERMP_NOLPAD;
338 return;
339 }
340 term_flushln(p);
341 p->flags &= ~TERMP_NOLPAD;
342 }
343
344
345 /*
346 * Asserts a vertical space (a full, empty line-break between lines).
347 * Note that if used twice, this will cause two blank spaces and so on.
348 * All data in the output buffer is flushed prior to the newline
349 * assertion.
350 */
351 void
352 term_vspace(struct termp *p)
353 {
354
355 term_newln(p);
356 p->viscol = 0;
357 (*p->endline)(p);
358 }
359
360
361 static void
362 spec(struct termp *p, const char *word, size_t len)
363 {
364 const char *rhs;
365 size_t sz;
366
367 rhs = chars_a2ascii(p->symtab, word, len, &sz);
368 if (rhs)
369 encode(p, rhs, sz);
370 }
371
372
373 static void
374 res(struct termp *p, const char *word, size_t len)
375 {
376 const char *rhs;
377 size_t sz;
378
379 rhs = chars_a2res(p->symtab, word, len, &sz);
380 if (rhs)
381 encode(p, rhs, sz);
382 }
383
384
385 void
386 term_fontlast(struct termp *p)
387 {
388 enum termfont f;
389
390 f = p->fontl;
391 p->fontl = p->fontq[p->fonti];
392 p->fontq[p->fonti] = f;
393 }
394
395
396 void
397 term_fontrepl(struct termp *p, enum termfont f)
398 {
399
400 p->fontl = p->fontq[p->fonti];
401 p->fontq[p->fonti] = f;
402 }
403
404
405 void
406 term_fontpush(struct termp *p, enum termfont f)
407 {
408
409 assert(p->fonti + 1 < 10);
410 p->fontl = p->fontq[p->fonti];
411 p->fontq[++p->fonti] = f;
412 }
413
414
415 const void *
416 term_fontq(struct termp *p)
417 {
418
419 return(&p->fontq[p->fonti]);
420 }
421
422
423 enum termfont
424 term_fonttop(struct termp *p)
425 {
426
427 return(p->fontq[p->fonti]);
428 }
429
430
431 void
432 term_fontpopq(struct termp *p, const void *key)
433 {
434
435 while (p->fonti >= 0 && key != &p->fontq[p->fonti])
436 p->fonti--;
437 assert(p->fonti >= 0);
438 }
439
440
441 void
442 term_fontpop(struct termp *p)
443 {
444
445 assert(p->fonti);
446 p->fonti--;
447 }
448
449
450 /*
451 * Handle pwords, partial words, which may be either a single word or a
452 * phrase that cannot be broken down (such as a literal string). This
453 * handles word styling.
454 */
455 void
456 term_word(struct termp *p, const char *word)
457 {
458 const char *sv, *seq;
459 int sz;
460 size_t ssz;
461 enum roffdeco deco;
462
463 sv = word;
464
465 if (word[0] && '\0' == word[1])
466 switch (word[0]) {
467 case('.'):
468 /* FALLTHROUGH */
469 case(','):
470 /* FALLTHROUGH */
471 case(';'):
472 /* FALLTHROUGH */
473 case(':'):
474 /* FALLTHROUGH */
475 case('?'):
476 /* FALLTHROUGH */
477 case('!'):
478 /* FALLTHROUGH */
479 case(')'):
480 /* FALLTHROUGH */
481 case(']'):
482 if ( ! (TERMP_IGNDELIM & p->flags))
483 p->flags |= TERMP_NOSPACE;
484 break;
485 default:
486 break;
487 }
488
489 if ( ! (TERMP_NOSPACE & p->flags)) {
490 if ( ! (TERMP_KEEP & p->flags)) {
491 if (TERMP_PREKEEP & p->flags)
492 p->flags |= TERMP_KEEP;
493 bufferc(p, ' ');
494 if (TERMP_SENTENCE & p->flags)
495 bufferc(p, ' ');
496 } else
497 bufferc(p, ASCII_NBRSP);
498 }
499
500 if ( ! (p->flags & TERMP_NONOSPACE))
501 p->flags &= ~TERMP_NOSPACE;
502
503 p->flags &= ~TERMP_SENTENCE;
504
505 /* FIXME: use strcspn. */
506
507 while (*word) {
508 if ('\\' != *word) {
509 encode(p, word, 1);
510 word++;
511 continue;
512 }
513
514 seq = ++word;
515 sz = a2roffdeco(&deco, &seq, &ssz);
516
517 switch (deco) {
518 case (DECO_RESERVED):
519 res(p, seq, ssz);
520 break;
521 case (DECO_SPECIAL):
522 spec(p, seq, ssz);
523 break;
524 case (DECO_BOLD):
525 term_fontrepl(p, TERMFONT_BOLD);
526 break;
527 case (DECO_ITALIC):
528 term_fontrepl(p, TERMFONT_UNDER);
529 break;
530 case (DECO_ROMAN):
531 term_fontrepl(p, TERMFONT_NONE);
532 break;
533 case (DECO_PREVIOUS):
534 term_fontlast(p);
535 break;
536 default:
537 break;
538 }
539
540 word += sz;
541 if (DECO_NOSPACE == deco && '\0' == *word)
542 p->flags |= TERMP_NOSPACE;
543 }
544
545 /*
546 * Note that we don't process the pipe: the parser sees it as
547 * punctuation, but we don't in terms of typography.
548 */
549 if (sv[0] && 0 == sv[1])
550 switch (sv[0]) {
551 case('('):
552 /* FALLTHROUGH */
553 case('['):
554 p->flags |= TERMP_NOSPACE;
555 break;
556 default:
557 break;
558 }
559 }
560
561
562 static void
563 adjbuf(struct termp *p, size_t sz)
564 {
565
566 if (0 == p->maxcols)
567 p->maxcols = 1024;
568 while (sz >= p->maxcols)
569 p->maxcols <<= 2;
570
571 p->buf = realloc(p->buf, p->maxcols);
572 if (NULL == p->buf) {
573 perror(NULL);
574 exit(EXIT_FAILURE);
575 }
576 }
577
578
579 static void
580 buffera(struct termp *p, const char *word, size_t sz)
581 {
582
583 if (p->col + sz >= p->maxcols)
584 adjbuf(p, p->col + sz);
585
586 memcpy(&p->buf[(int)p->col], word, sz);
587 p->col += sz;
588 }
589
590
591 static void
592 bufferc(struct termp *p, char c)
593 {
594
595 if (p->col + 1 >= p->maxcols)
596 adjbuf(p, p->col + 1);
597
598 p->buf[(int)p->col++] = c;
599 }
600
601
602 static void
603 encode(struct termp *p, const char *word, size_t sz)
604 {
605 enum termfont f;
606 int i;
607
608 /*
609 * Encode and buffer a string of characters. If the current
610 * font mode is unset, buffer directly, else encode then buffer
611 * character by character.
612 */
613
614 if (TERMFONT_NONE == (f = term_fonttop(p))) {
615 buffera(p, word, sz);
616 return;
617 }
618
619 for (i = 0; i < (int)sz; i++) {
620 if ( ! isgraph((u_char)word[i])) {
621 bufferc(p, word[i]);
622 continue;
623 }
624
625 if (TERMFONT_UNDER == f)
626 bufferc(p, '_');
627 else
628 bufferc(p, word[i]);
629
630 bufferc(p, 8);
631 bufferc(p, word[i]);
632 }
633 }
634
635
636 size_t
637 term_len(const struct termp *p, size_t sz)
638 {
639
640 return((*p->width)(p, ' ') * sz);
641 }
642
643
644 size_t
645 term_strlen(const struct termp *p, const char *cp)
646 {
647 size_t sz;
648
649 for (sz = 0; *cp; cp++)
650 sz += (*p->width)(p, *cp);
651
652 return(sz);
653 }
654
655
656 size_t
657 term_vspan(const struct termp *p, const struct roffsu *su)
658 {
659 double r;
660
661 switch (su->unit) {
662 case (SCALE_CM):
663 r = su->scale * 2;
664 break;
665 case (SCALE_IN):
666 r = su->scale * 6;
667 break;
668 case (SCALE_PC):
669 r = su->scale;
670 break;
671 case (SCALE_PT):
672 r = su->scale / 8;
673 break;
674 case (SCALE_MM):
675 r = su->scale / 1000;
676 break;
677 case (SCALE_VS):
678 r = su->scale;
679 break;
680 default:
681 r = su->scale - 1;
682 break;
683 }
684
685 if (r < 0.0)
686 r = 0.0;
687 return(/* LINTED */(size_t)
688 r);
689 }
690
691
692 size_t
693 term_hspan(const struct termp *p, const struct roffsu *su)
694 {
695 double v;
696
697 v = ((*p->hspan)(p, su));
698 if (v < 0.0)
699 v = 0.0;
700 return((size_t) /* LINTED */
701 v);
702 }