]> git.cameronkatri.com Git - mandoc.git/blob - term.c
Churn-ish check-in getting mdoc_parseln() and man_parseln() to accept a
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.150 2010/06/26 15:36:37 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "mandoc.h"
31 #include "chars.h"
32 #include "out.h"
33 #include "term.h"
34 #include "main.h"
35
36 static void spec(struct termp *, const char *, size_t);
37 static void res(struct termp *, const char *, size_t);
38 static void buffera(struct termp *, const char *, size_t);
39 static void bufferc(struct termp *, char);
40 static void adjbuf(struct termp *p, size_t);
41 static void encode(struct termp *, const char *, size_t);
42
43
44 void
45 term_free(struct termp *p)
46 {
47
48 if (p->buf)
49 free(p->buf);
50 if (p->symtab)
51 chars_free(p->symtab);
52
53 free(p);
54 }
55
56
57 void
58 term_begin(struct termp *p, term_margin head,
59 term_margin foot, const void *arg)
60 {
61
62 p->headf = head;
63 p->footf = foot;
64 p->argf = arg;
65 (*p->begin)(p);
66 }
67
68
69 void
70 term_end(struct termp *p)
71 {
72
73 (*p->end)(p);
74 }
75
76
77 struct termp *
78 term_alloc(enum termenc enc)
79 {
80 struct termp *p;
81
82 p = calloc(1, sizeof(struct termp));
83 if (NULL == p) {
84 perror(NULL);
85 exit(EXIT_FAILURE);
86 }
87
88 p->enc = enc;
89 return(p);
90 }
91
92
93 /*
94 * Flush a line of text. A "line" is loosely defined as being something
95 * that should be followed by a newline, regardless of whether it's
96 * broken apart by newlines getting there. A line can also be a
97 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
98 * not have a trailing newline.
99 *
100 * The following flags may be specified:
101 *
102 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
103 * offset value. This is useful when doing columnar lists where the
104 * prior column has right-padded.
105 *
106 * - TERMP_NOBREAK: this is the most important and is used when making
107 * columns. In short: don't print a newline and instead pad to the
108 * right margin. Used in conjunction with TERMP_NOLPAD.
109 *
110 * - TERMP_TWOSPACE: when padding, make sure there are at least two
111 * space characters of padding. Otherwise, rather break the line.
112 *
113 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
114 * the line is overrun, and don't pad-right if it's underrun.
115 *
116 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
117 * overruning, instead save the position and continue at that point
118 * when the next invocation.
119 *
120 * In-line line breaking:
121 *
122 * If TERMP_NOBREAK is specified and the line overruns the right
123 * margin, it will break and pad-right to the right margin after
124 * writing. If maxrmargin is violated, it will break and continue
125 * writing from the right-margin, which will lead to the above scenario
126 * upon exit. Otherwise, the line will break at the right margin.
127 */
128 void
129 term_flushln(struct termp *p)
130 {
131 int i; /* current input position in p->buf */
132 size_t vis; /* current visual position on output */
133 size_t vbl; /* number of blanks to prepend to output */
134 size_t vend; /* end of word visual position on output */
135 size_t bp; /* visual right border position */
136 int j; /* temporary loop index */
137 int jhy; /* last hyphen before line overflow */
138 size_t maxvis, mmax;
139
140 /*
141 * First, establish the maximum columns of "visible" content.
142 * This is usually the difference between the right-margin and
143 * an indentation, but can be, for tagged lists or columns, a
144 * small set of values.
145 */
146
147 assert(p->offset < p->rmargin);
148
149 maxvis = (int)(p->rmargin - p->offset) - p->overstep < 0 ?
150 /* LINTED */
151 0 : p->rmargin - p->offset - p->overstep;
152 mmax = (int)(p->maxrmargin - p->offset) - p->overstep < 0 ?
153 /* LINTED */
154 0 : p->maxrmargin - p->offset - p->overstep;
155
156 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
157
158 /*
159 * Indent the first line of a paragraph.
160 */
161 vbl = p->flags & TERMP_NOLPAD ? 0 : p->offset;
162
163 /*
164 * FIXME: if bp is zero, we still output the first word before
165 * breaking the line.
166 */
167
168 vis = vend = i = 0;
169 while (i < (int)p->col) {
170
171 /*
172 * Handle literal tab characters.
173 */
174 for (j = i; j < (int)p->col; j++) {
175 if ('\t' != p->buf[j])
176 break;
177 vend = (vis/p->tabwidth+1)*p->tabwidth;
178 vbl += vend - vis;
179 vis = vend;
180 }
181
182 /*
183 * Count up visible word characters. Control sequences
184 * (starting with the CSI) aren't counted. A space
185 * generates a non-printing word, which is valid (the
186 * space is printed according to regular spacing rules).
187 */
188
189 /* LINTED */
190 for (jhy = 0; j < (int)p->col; j++) {
191 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
192 break;
193 if (8 != p->buf[j]) {
194 if (vend > vis && vend < bp &&
195 ASCII_HYPH == p->buf[j])
196 jhy = j;
197 vend++;
198 } else
199 vend--;
200 }
201
202 /*
203 * Find out whether we would exceed the right margin.
204 * If so, break to the next line.
205 */
206 if (vend > bp && 0 == jhy && vis > 0) {
207 vend -= vis;
208 (*p->endline)(p);
209 if (TERMP_NOBREAK & p->flags) {
210 p->viscol = p->rmargin;
211 (*p->advance)(p, p->rmargin);
212 vend += p->rmargin - p->offset;
213 } else {
214 p->viscol = 0;
215 vbl = p->offset;
216 }
217
218 /* Remove the p->overstep width. */
219
220 bp += (int)/* LINTED */
221 p->overstep;
222 p->overstep = 0;
223 }
224
225 /*
226 * Skip leading tabs, they were handled above.
227 */
228 while (i < (int)p->col && '\t' == p->buf[i])
229 i++;
230
231 /* Write out the [remaining] word. */
232 for ( ; i < (int)p->col; i++) {
233 if (vend > bp && jhy > 0 && i > jhy)
234 break;
235 if ('\t' == p->buf[i])
236 break;
237 if (' ' == p->buf[i]) {
238 while (' ' == p->buf[i]) {
239 vbl++;
240 i++;
241 }
242 break;
243 }
244 if (ASCII_NBRSP == p->buf[i]) {
245 vbl++;
246 continue;
247 }
248
249 /*
250 * Now we definitely know there will be
251 * printable characters to output,
252 * so write preceding white space now.
253 */
254 if (vbl) {
255 (*p->advance)(p, vbl);
256 p->viscol += vbl;
257 vbl = 0;
258 }
259
260 if (ASCII_HYPH == p->buf[i])
261 (*p->letter)(p, '-');
262 else
263 (*p->letter)(p, p->buf[i]);
264
265 p->viscol += 1;
266 }
267 vend += vbl;
268 vis = vend;
269 }
270
271 p->col = 0;
272 p->overstep = 0;
273
274 if ( ! (TERMP_NOBREAK & p->flags)) {
275 p->viscol = 0;
276 (*p->endline)(p);
277 return;
278 }
279
280 if (TERMP_HANG & p->flags) {
281 /* We need one blank after the tag. */
282 p->overstep = /* LINTED */
283 vis - maxvis + 1;
284
285 /*
286 * Behave exactly the same way as groff:
287 * If we have overstepped the margin, temporarily move
288 * it to the right and flag the rest of the line to be
289 * shorter.
290 * If we landed right at the margin, be happy.
291 * If we are one step before the margin, temporarily
292 * move it one step LEFT and flag the rest of the line
293 * to be longer.
294 */
295 if (p->overstep >= -1) {
296 assert((int)maxvis + p->overstep >= 0);
297 /* LINTED */
298 maxvis += p->overstep;
299 } else
300 p->overstep = 0;
301
302 } else if (TERMP_DANGLE & p->flags)
303 return;
304
305 /* Right-pad. */
306 if (maxvis > vis + /* LINTED */
307 ((TERMP_TWOSPACE & p->flags) ? 1 : 0)) {
308 p->viscol += maxvis - vis;
309 (*p->advance)(p, maxvis - vis);
310 vis += (maxvis - vis);
311 } else { /* ...or newline break. */
312 (*p->endline)(p);
313 p->viscol = p->rmargin;
314 (*p->advance)(p, p->rmargin);
315 }
316 }
317
318
319 /*
320 * A newline only breaks an existing line; it won't assert vertical
321 * space. All data in the output buffer is flushed prior to the newline
322 * assertion.
323 */
324 void
325 term_newln(struct termp *p)
326 {
327
328 p->flags |= TERMP_NOSPACE;
329 if (0 == p->col && 0 == p->viscol) {
330 p->flags &= ~TERMP_NOLPAD;
331 return;
332 }
333 term_flushln(p);
334 p->flags &= ~TERMP_NOLPAD;
335 }
336
337
338 /*
339 * Asserts a vertical space (a full, empty line-break between lines).
340 * Note that if used twice, this will cause two blank spaces and so on.
341 * All data in the output buffer is flushed prior to the newline
342 * assertion.
343 */
344 void
345 term_vspace(struct termp *p)
346 {
347
348 term_newln(p);
349 p->viscol = 0;
350 (*p->endline)(p);
351 }
352
353
354 static void
355 spec(struct termp *p, const char *word, size_t len)
356 {
357 const char *rhs;
358 size_t sz;
359
360 rhs = chars_a2ascii(p->symtab, word, len, &sz);
361 if (rhs)
362 encode(p, rhs, sz);
363 }
364
365
366 static void
367 res(struct termp *p, const char *word, size_t len)
368 {
369 const char *rhs;
370 size_t sz;
371
372 rhs = chars_a2res(p->symtab, word, len, &sz);
373 if (rhs)
374 encode(p, rhs, sz);
375 }
376
377
378 void
379 term_fontlast(struct termp *p)
380 {
381 enum termfont f;
382
383 f = p->fontl;
384 p->fontl = p->fontq[p->fonti];
385 p->fontq[p->fonti] = f;
386 }
387
388
389 void
390 term_fontrepl(struct termp *p, enum termfont f)
391 {
392
393 p->fontl = p->fontq[p->fonti];
394 p->fontq[p->fonti] = f;
395 }
396
397
398 void
399 term_fontpush(struct termp *p, enum termfont f)
400 {
401
402 assert(p->fonti + 1 < 10);
403 p->fontl = p->fontq[p->fonti];
404 p->fontq[++p->fonti] = f;
405 }
406
407
408 const void *
409 term_fontq(struct termp *p)
410 {
411
412 return(&p->fontq[p->fonti]);
413 }
414
415
416 enum termfont
417 term_fonttop(struct termp *p)
418 {
419
420 return(p->fontq[p->fonti]);
421 }
422
423
424 void
425 term_fontpopq(struct termp *p, const void *key)
426 {
427
428 while (p->fonti >= 0 && key != &p->fontq[p->fonti])
429 p->fonti--;
430 assert(p->fonti >= 0);
431 }
432
433
434 void
435 term_fontpop(struct termp *p)
436 {
437
438 assert(p->fonti);
439 p->fonti--;
440 }
441
442
443 /*
444 * Handle pwords, partial words, which may be either a single word or a
445 * phrase that cannot be broken down (such as a literal string). This
446 * handles word styling.
447 */
448 void
449 term_word(struct termp *p, const char *word)
450 {
451 const char *sv, *seq;
452 int sz;
453 size_t ssz;
454 enum roffdeco deco;
455
456 sv = word;
457
458 if (word[0] && '\0' == word[1])
459 switch (word[0]) {
460 case('.'):
461 /* FALLTHROUGH */
462 case(','):
463 /* FALLTHROUGH */
464 case(';'):
465 /* FALLTHROUGH */
466 case(':'):
467 /* FALLTHROUGH */
468 case('?'):
469 /* FALLTHROUGH */
470 case('!'):
471 /* FALLTHROUGH */
472 case(')'):
473 /* FALLTHROUGH */
474 case(']'):
475 if ( ! (TERMP_IGNDELIM & p->flags))
476 p->flags |= TERMP_NOSPACE;
477 break;
478 default:
479 break;
480 }
481
482 if ( ! (TERMP_NOSPACE & p->flags)) {
483 bufferc(p, ' ');
484 if (TERMP_SENTENCE & p->flags)
485 bufferc(p, ' ');
486 }
487
488 if ( ! (p->flags & TERMP_NONOSPACE))
489 p->flags &= ~TERMP_NOSPACE;
490
491 p->flags &= ~TERMP_SENTENCE;
492
493 /* FIXME: use strcspn. */
494
495 while (*word) {
496 if ('\\' != *word) {
497 encode(p, word, 1);
498 word++;
499 continue;
500 }
501
502 seq = ++word;
503 sz = a2roffdeco(&deco, &seq, &ssz);
504
505 switch (deco) {
506 case (DECO_RESERVED):
507 res(p, seq, ssz);
508 break;
509 case (DECO_SPECIAL):
510 spec(p, seq, ssz);
511 break;
512 case (DECO_BOLD):
513 term_fontrepl(p, TERMFONT_BOLD);
514 break;
515 case (DECO_ITALIC):
516 term_fontrepl(p, TERMFONT_UNDER);
517 break;
518 case (DECO_ROMAN):
519 term_fontrepl(p, TERMFONT_NONE);
520 break;
521 case (DECO_PREVIOUS):
522 term_fontlast(p);
523 break;
524 default:
525 break;
526 }
527
528 word += sz;
529 if (DECO_NOSPACE == deco && '\0' == *word)
530 p->flags |= TERMP_NOSPACE;
531 }
532
533 /*
534 * Note that we don't process the pipe: the parser sees it as
535 * punctuation, but we don't in terms of typography.
536 */
537 if (sv[0] && 0 == sv[1])
538 switch (sv[0]) {
539 case('('):
540 /* FALLTHROUGH */
541 case('['):
542 p->flags |= TERMP_NOSPACE;
543 break;
544 default:
545 break;
546 }
547 }
548
549
550 static void
551 adjbuf(struct termp *p, size_t sz)
552 {
553
554 if (0 == p->maxcols)
555 p->maxcols = 1024;
556 while (sz >= p->maxcols)
557 p->maxcols <<= 2;
558
559 p->buf = realloc(p->buf, p->maxcols);
560 if (NULL == p->buf) {
561 perror(NULL);
562 exit(EXIT_FAILURE);
563 }
564 }
565
566
567 static void
568 buffera(struct termp *p, const char *word, size_t sz)
569 {
570
571 if (p->col + sz >= p->maxcols)
572 adjbuf(p, p->col + sz);
573
574 memcpy(&p->buf[(int)p->col], word, sz);
575 p->col += sz;
576 }
577
578
579 static void
580 bufferc(struct termp *p, char c)
581 {
582
583 if (p->col + 1 >= p->maxcols)
584 adjbuf(p, p->col + 1);
585
586 p->buf[(int)p->col++] = c;
587 }
588
589
590 static void
591 encode(struct termp *p, const char *word, size_t sz)
592 {
593 enum termfont f;
594 int i;
595
596 /*
597 * Encode and buffer a string of characters. If the current
598 * font mode is unset, buffer directly, else encode then buffer
599 * character by character.
600 */
601
602 if (TERMFONT_NONE == (f = term_fonttop(p))) {
603 buffera(p, word, sz);
604 return;
605 }
606
607 for (i = 0; i < (int)sz; i++) {
608 if ( ! isgraph((u_char)word[i])) {
609 bufferc(p, word[i]);
610 continue;
611 }
612
613 if (TERMFONT_UNDER == f)
614 bufferc(p, '_');
615 else
616 bufferc(p, word[i]);
617
618 bufferc(p, 8);
619 bufferc(p, word[i]);
620 }
621 }
622
623
624 size_t
625 term_len(const struct termp *p, size_t sz)
626 {
627
628 return((*p->width)(p, ' ') * sz);
629 }
630
631
632 size_t
633 term_strlen(const struct termp *p, const char *cp)
634 {
635 size_t sz;
636
637 for (sz = 0; *cp; cp++)
638 sz += (*p->width)(p, *cp);
639
640 return(sz);
641 }
642
643
644 size_t
645 term_vspan(const struct termp *p, const struct roffsu *su)
646 {
647 double r;
648
649 switch (su->unit) {
650 case (SCALE_CM):
651 r = su->scale * 2;
652 break;
653 case (SCALE_IN):
654 r = su->scale * 6;
655 break;
656 case (SCALE_PC):
657 r = su->scale;
658 break;
659 case (SCALE_PT):
660 r = su->scale / 8;
661 break;
662 case (SCALE_MM):
663 r = su->scale / 1000;
664 break;
665 case (SCALE_VS):
666 r = su->scale;
667 break;
668 default:
669 r = su->scale - 1;
670 break;
671 }
672
673 if (r < 0.0)
674 r = 0.0;
675 return(/* LINTED */(size_t)
676 r);
677 }
678
679
680 size_t
681 term_hspan(const struct termp *p, const struct roffsu *su)
682 {
683 double r;
684
685 /* XXX: CM, IN, and PT are approximations. */
686
687 switch (su->unit) {
688 case (SCALE_CM):
689 r = 4 * su->scale;
690 break;
691 case (SCALE_IN):
692 /* XXX: this is an approximation. */
693 r = 10 * su->scale;
694 break;
695 case (SCALE_PC):
696 r = (10 * su->scale) / 6;
697 break;
698 case (SCALE_PT):
699 r = (10 * su->scale) / 72;
700 break;
701 case (SCALE_MM):
702 r = su->scale / 1000; /* FIXME: double-check. */
703 break;
704 case (SCALE_VS):
705 r = su->scale * 2 - 1; /* FIXME: double-check. */
706 break;
707 default:
708 r = su->scale;
709 break;
710 }
711
712 if (r < 0.0)
713 r = 0.0;
714 return((size_t)/* LINTED */
715 r);
716 }
717
718