]> git.cameronkatri.com Git - mandoc.git/blob - term.c
Basic implementation of .Bk/.Ek; from OpenBSD.
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.151 2010/06/27 01:26:20 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "mandoc.h"
31 #include "chars.h"
32 #include "out.h"
33 #include "term.h"
34 #include "main.h"
35
36 static void spec(struct termp *, const char *, size_t);
37 static void res(struct termp *, const char *, size_t);
38 static void buffera(struct termp *, const char *, size_t);
39 static void bufferc(struct termp *, char);
40 static void adjbuf(struct termp *p, size_t);
41 static void encode(struct termp *, const char *, size_t);
42
43
44 void
45 term_free(struct termp *p)
46 {
47
48 if (p->buf)
49 free(p->buf);
50 if (p->symtab)
51 chars_free(p->symtab);
52
53 free(p);
54 }
55
56
57 void
58 term_begin(struct termp *p, term_margin head,
59 term_margin foot, const void *arg)
60 {
61
62 p->headf = head;
63 p->footf = foot;
64 p->argf = arg;
65 (*p->begin)(p);
66 }
67
68
69 void
70 term_end(struct termp *p)
71 {
72
73 (*p->end)(p);
74 }
75
76
77 struct termp *
78 term_alloc(enum termenc enc)
79 {
80 struct termp *p;
81
82 p = calloc(1, sizeof(struct termp));
83 if (NULL == p) {
84 perror(NULL);
85 exit(EXIT_FAILURE);
86 }
87
88 p->enc = enc;
89 return(p);
90 }
91
92
93 /*
94 * Flush a line of text. A "line" is loosely defined as being something
95 * that should be followed by a newline, regardless of whether it's
96 * broken apart by newlines getting there. A line can also be a
97 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
98 * not have a trailing newline.
99 *
100 * The following flags may be specified:
101 *
102 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
103 * offset value. This is useful when doing columnar lists where the
104 * prior column has right-padded.
105 *
106 * - TERMP_NOBREAK: this is the most important and is used when making
107 * columns. In short: don't print a newline and instead pad to the
108 * right margin. Used in conjunction with TERMP_NOLPAD.
109 *
110 * - TERMP_TWOSPACE: when padding, make sure there are at least two
111 * space characters of padding. Otherwise, rather break the line.
112 *
113 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
114 * the line is overrun, and don't pad-right if it's underrun.
115 *
116 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
117 * overruning, instead save the position and continue at that point
118 * when the next invocation.
119 *
120 * In-line line breaking:
121 *
122 * If TERMP_NOBREAK is specified and the line overruns the right
123 * margin, it will break and pad-right to the right margin after
124 * writing. If maxrmargin is violated, it will break and continue
125 * writing from the right-margin, which will lead to the above scenario
126 * upon exit. Otherwise, the line will break at the right margin.
127 */
128 void
129 term_flushln(struct termp *p)
130 {
131 int i; /* current input position in p->buf */
132 size_t vis; /* current visual position on output */
133 size_t vbl; /* number of blanks to prepend to output */
134 size_t vend; /* end of word visual position on output */
135 size_t bp; /* visual right border position */
136 int j; /* temporary loop index */
137 int jhy; /* last hyphen before line overflow */
138 size_t maxvis, mmax;
139
140 /*
141 * First, establish the maximum columns of "visible" content.
142 * This is usually the difference between the right-margin and
143 * an indentation, but can be, for tagged lists or columns, a
144 * small set of values.
145 */
146
147 assert(p->offset < p->rmargin);
148
149 maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ?
150 /* LINTED */
151 0 : p->rmargin - p->offset - p->overstep;
152 mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ?
153 /* LINTED */
154 0 : p->maxrmargin - p->offset - p->overstep;
155
156 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
157
158 /*
159 * Indent the first line of a paragraph.
160 */
161 vbl = p->flags & TERMP_NOLPAD ? 0 : p->offset;
162
163 /*
164 * FIXME: if bp is zero, we still output the first word before
165 * breaking the line.
166 */
167
168 vis = vend = i = 0;
169 while (i < (int)p->col) {
170
171 /*
172 * Handle literal tab characters.
173 */
174 for (j = i; j < (int)p->col; j++) {
175 if ('\t' != p->buf[j])
176 break;
177 vend = (vis/p->tabwidth+1)*p->tabwidth;
178 vbl += vend - vis;
179 vis = vend;
180 }
181
182 /*
183 * Count up visible word characters. Control sequences
184 * (starting with the CSI) aren't counted. A space
185 * generates a non-printing word, which is valid (the
186 * space is printed according to regular spacing rules).
187 */
188
189 /* LINTED */
190 for (jhy = 0; j < (int)p->col; j++) {
191 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
192 break;
193 if (8 != p->buf[j]) {
194 if (vend > vis && vend < bp &&
195 ASCII_HYPH == p->buf[j])
196 jhy = j;
197 vend++;
198 } else
199 vend--;
200 }
201
202 /*
203 * Find out whether we would exceed the right margin.
204 * If so, break to the next line.
205 */
206 if (vend > bp && 0 == jhy && vis > 0) {
207 vend -= vis;
208 (*p->endline)(p);
209 if (TERMP_NOBREAK & p->flags) {
210 p->viscol = p->rmargin;
211 (*p->advance)(p, p->rmargin);
212 vend += p->rmargin - p->offset;
213 } else {
214 p->viscol = 0;
215 vbl = p->offset;
216 }
217
218 /* Remove the p->overstep width. */
219
220 bp += (int)/* LINTED */
221 p->overstep;
222 p->overstep = 0;
223 }
224
225 /*
226 * Skip leading tabs, they were handled above.
227 */
228 while (i < (int)p->col && '\t' == p->buf[i])
229 i++;
230
231 /* Write out the [remaining] word. */
232 for ( ; i < (int)p->col; i++) {
233 if (vend > bp && jhy > 0 && i > jhy)
234 break;
235 if ('\t' == p->buf[i])
236 break;
237 if (' ' == p->buf[i]) {
238 while (' ' == p->buf[i]) {
239 vbl++;
240 i++;
241 }
242 break;
243 }
244 if (ASCII_NBRSP == p->buf[i]) {
245 vbl++;
246 continue;
247 }
248
249 /*
250 * Now we definitely know there will be
251 * printable characters to output,
252 * so write preceding white space now.
253 */
254 if (vbl) {
255 (*p->advance)(p, vbl);
256 p->viscol += vbl;
257 vbl = 0;
258 }
259
260 if (ASCII_HYPH == p->buf[i])
261 (*p->letter)(p, '-');
262 else
263 (*p->letter)(p, p->buf[i]);
264
265 p->viscol += 1;
266 }
267 vend += vbl;
268 vis = vend;
269 }
270
271 p->col = 0;
272 p->overstep = 0;
273
274 if ( ! (TERMP_NOBREAK & p->flags)) {
275 p->viscol = 0;
276 (*p->endline)(p);
277 return;
278 }
279
280 if (TERMP_HANG & p->flags) {
281 /* We need one blank after the tag. */
282 p->overstep = /* LINTED */
283 vis - maxvis + 1;
284
285 /*
286 * Behave exactly the same way as groff:
287 * If we have overstepped the margin, temporarily move
288 * it to the right and flag the rest of the line to be
289 * shorter.
290 * If we landed right at the margin, be happy.
291 * If we are one step before the margin, temporarily
292 * move it one step LEFT and flag the rest of the line
293 * to be longer.
294 */
295 if (p->overstep >= -1) {
296 assert((int)maxvis + p->overstep >= 0);
297 /* LINTED */
298 maxvis += p->overstep;
299 } else
300 p->overstep = 0;
301
302 } else if (TERMP_DANGLE & p->flags)
303 return;
304
305 /* Right-pad. */
306 if (maxvis > vis + /* LINTED */
307 ((TERMP_TWOSPACE & p->flags) ? 1 : 0)) {
308 p->viscol += maxvis - vis;
309 (*p->advance)(p, maxvis - vis);
310 vis += (maxvis - vis);
311 } else { /* ...or newline break. */
312 (*p->endline)(p);
313 p->viscol = p->rmargin;
314 (*p->advance)(p, p->rmargin);
315 }
316 }
317
318
319 /*
320 * A newline only breaks an existing line; it won't assert vertical
321 * space. All data in the output buffer is flushed prior to the newline
322 * assertion.
323 */
324 void
325 term_newln(struct termp *p)
326 {
327
328 p->flags |= TERMP_NOSPACE;
329 if (0 == p->col && 0 == p->viscol) {
330 p->flags &= ~TERMP_NOLPAD;
331 return;
332 }
333 term_flushln(p);
334 p->flags &= ~TERMP_NOLPAD;
335 }
336
337
338 /*
339 * Asserts a vertical space (a full, empty line-break between lines).
340 * Note that if used twice, this will cause two blank spaces and so on.
341 * All data in the output buffer is flushed prior to the newline
342 * assertion.
343 */
344 void
345 term_vspace(struct termp *p)
346 {
347
348 term_newln(p);
349 p->viscol = 0;
350 (*p->endline)(p);
351 }
352
353
354 static void
355 spec(struct termp *p, const char *word, size_t len)
356 {
357 const char *rhs;
358 size_t sz;
359
360 rhs = chars_a2ascii(p->symtab, word, len, &sz);
361 if (rhs)
362 encode(p, rhs, sz);
363 }
364
365
366 static void
367 res(struct termp *p, const char *word, size_t len)
368 {
369 const char *rhs;
370 size_t sz;
371
372 rhs = chars_a2res(p->symtab, word, len, &sz);
373 if (rhs)
374 encode(p, rhs, sz);
375 }
376
377
378 void
379 term_fontlast(struct termp *p)
380 {
381 enum termfont f;
382
383 f = p->fontl;
384 p->fontl = p->fontq[p->fonti];
385 p->fontq[p->fonti] = f;
386 }
387
388
389 void
390 term_fontrepl(struct termp *p, enum termfont f)
391 {
392
393 p->fontl = p->fontq[p->fonti];
394 p->fontq[p->fonti] = f;
395 }
396
397
398 void
399 term_fontpush(struct termp *p, enum termfont f)
400 {
401
402 assert(p->fonti + 1 < 10);
403 p->fontl = p->fontq[p->fonti];
404 p->fontq[++p->fonti] = f;
405 }
406
407
408 const void *
409 term_fontq(struct termp *p)
410 {
411
412 return(&p->fontq[p->fonti]);
413 }
414
415
416 enum termfont
417 term_fonttop(struct termp *p)
418 {
419
420 return(p->fontq[p->fonti]);
421 }
422
423
424 void
425 term_fontpopq(struct termp *p, const void *key)
426 {
427
428 while (p->fonti >= 0 && key != &p->fontq[p->fonti])
429 p->fonti--;
430 assert(p->fonti >= 0);
431 }
432
433
434 void
435 term_fontpop(struct termp *p)
436 {
437
438 assert(p->fonti);
439 p->fonti--;
440 }
441
442
443 /*
444 * Handle pwords, partial words, which may be either a single word or a
445 * phrase that cannot be broken down (such as a literal string). This
446 * handles word styling.
447 */
448 void
449 term_word(struct termp *p, const char *word)
450 {
451 const char *sv, *seq;
452 int sz;
453 size_t ssz;
454 enum roffdeco deco;
455
456 sv = word;
457
458 if (word[0] && '\0' == word[1])
459 switch (word[0]) {
460 case('.'):
461 /* FALLTHROUGH */
462 case(','):
463 /* FALLTHROUGH */
464 case(';'):
465 /* FALLTHROUGH */
466 case(':'):
467 /* FALLTHROUGH */
468 case('?'):
469 /* FALLTHROUGH */
470 case('!'):
471 /* FALLTHROUGH */
472 case(')'):
473 /* FALLTHROUGH */
474 case(']'):
475 if ( ! (TERMP_IGNDELIM & p->flags))
476 p->flags |= TERMP_NOSPACE;
477 break;
478 default:
479 break;
480 }
481
482 if ( ! (TERMP_NOSPACE & p->flags)) {
483 if ( ! (TERMP_KEEP & p->flags)) {
484 if (TERMP_PREKEEP & p->flags)
485 p->flags |= TERMP_KEEP;
486 bufferc(p, ' ');
487 if (TERMP_SENTENCE & p->flags)
488 bufferc(p, ' ');
489 } else
490 bufferc(p, ASCII_NBRSP);
491 }
492
493 if ( ! (p->flags & TERMP_NONOSPACE))
494 p->flags &= ~TERMP_NOSPACE;
495
496 p->flags &= ~TERMP_SENTENCE;
497
498 /* FIXME: use strcspn. */
499
500 while (*word) {
501 if ('\\' != *word) {
502 encode(p, word, 1);
503 word++;
504 continue;
505 }
506
507 seq = ++word;
508 sz = a2roffdeco(&deco, &seq, &ssz);
509
510 switch (deco) {
511 case (DECO_RESERVED):
512 res(p, seq, ssz);
513 break;
514 case (DECO_SPECIAL):
515 spec(p, seq, ssz);
516 break;
517 case (DECO_BOLD):
518 term_fontrepl(p, TERMFONT_BOLD);
519 break;
520 case (DECO_ITALIC):
521 term_fontrepl(p, TERMFONT_UNDER);
522 break;
523 case (DECO_ROMAN):
524 term_fontrepl(p, TERMFONT_NONE);
525 break;
526 case (DECO_PREVIOUS):
527 term_fontlast(p);
528 break;
529 default:
530 break;
531 }
532
533 word += sz;
534 if (DECO_NOSPACE == deco && '\0' == *word)
535 p->flags |= TERMP_NOSPACE;
536 }
537
538 /*
539 * Note that we don't process the pipe: the parser sees it as
540 * punctuation, but we don't in terms of typography.
541 */
542 if (sv[0] && 0 == sv[1])
543 switch (sv[0]) {
544 case('('):
545 /* FALLTHROUGH */
546 case('['):
547 p->flags |= TERMP_NOSPACE;
548 break;
549 default:
550 break;
551 }
552 }
553
554
555 static void
556 adjbuf(struct termp *p, size_t sz)
557 {
558
559 if (0 == p->maxcols)
560 p->maxcols = 1024;
561 while (sz >= p->maxcols)
562 p->maxcols <<= 2;
563
564 p->buf = realloc(p->buf, p->maxcols);
565 if (NULL == p->buf) {
566 perror(NULL);
567 exit(EXIT_FAILURE);
568 }
569 }
570
571
572 static void
573 buffera(struct termp *p, const char *word, size_t sz)
574 {
575
576 if (p->col + sz >= p->maxcols)
577 adjbuf(p, p->col + sz);
578
579 memcpy(&p->buf[(int)p->col], word, sz);
580 p->col += sz;
581 }
582
583
584 static void
585 bufferc(struct termp *p, char c)
586 {
587
588 if (p->col + 1 >= p->maxcols)
589 adjbuf(p, p->col + 1);
590
591 p->buf[(int)p->col++] = c;
592 }
593
594
595 static void
596 encode(struct termp *p, const char *word, size_t sz)
597 {
598 enum termfont f;
599 int i;
600
601 /*
602 * Encode and buffer a string of characters. If the current
603 * font mode is unset, buffer directly, else encode then buffer
604 * character by character.
605 */
606
607 if (TERMFONT_NONE == (f = term_fonttop(p))) {
608 buffera(p, word, sz);
609 return;
610 }
611
612 for (i = 0; i < (int)sz; i++) {
613 if ( ! isgraph((u_char)word[i])) {
614 bufferc(p, word[i]);
615 continue;
616 }
617
618 if (TERMFONT_UNDER == f)
619 bufferc(p, '_');
620 else
621 bufferc(p, word[i]);
622
623 bufferc(p, 8);
624 bufferc(p, word[i]);
625 }
626 }
627
628
629 size_t
630 term_len(const struct termp *p, size_t sz)
631 {
632
633 return((*p->width)(p, ' ') * sz);
634 }
635
636
637 size_t
638 term_strlen(const struct termp *p, const char *cp)
639 {
640 size_t sz;
641
642 for (sz = 0; *cp; cp++)
643 sz += (*p->width)(p, *cp);
644
645 return(sz);
646 }
647
648
649 size_t
650 term_vspan(const struct termp *p, const struct roffsu *su)
651 {
652 double r;
653
654 switch (su->unit) {
655 case (SCALE_CM):
656 r = su->scale * 2;
657 break;
658 case (SCALE_IN):
659 r = su->scale * 6;
660 break;
661 case (SCALE_PC):
662 r = su->scale;
663 break;
664 case (SCALE_PT):
665 r = su->scale / 8;
666 break;
667 case (SCALE_MM):
668 r = su->scale / 1000;
669 break;
670 case (SCALE_VS):
671 r = su->scale;
672 break;
673 default:
674 r = su->scale - 1;
675 break;
676 }
677
678 if (r < 0.0)
679 r = 0.0;
680 return(/* LINTED */(size_t)
681 r);
682 }
683
684
685 size_t
686 term_hspan(const struct termp *p, const struct roffsu *su)
687 {
688 double r;
689
690 /* XXX: CM, IN, and PT are approximations. */
691
692 switch (su->unit) {
693 case (SCALE_CM):
694 r = 4 * su->scale;
695 break;
696 case (SCALE_IN):
697 /* XXX: this is an approximation. */
698 r = 10 * su->scale;
699 break;
700 case (SCALE_PC):
701 r = (10 * su->scale) / 6;
702 break;
703 case (SCALE_PT):
704 r = (10 * su->scale) / 72;
705 break;
706 case (SCALE_MM):
707 r = su->scale / 1000; /* FIXME: double-check. */
708 break;
709 case (SCALE_VS):
710 r = su->scale * 2 - 1; /* FIXME: double-check. */
711 break;
712 default:
713 r = su->scale;
714 break;
715 }
716
717 if (r < 0.0)
718 r = 0.0;
719 return((size_t)/* LINTED */
720 r);
721 }
722
723