]> git.cameronkatri.com Git - mandoc.git/blob - term.c
Clarified special chars/predefined chars in mandoc_char.7.
[mandoc.git] / term.c
1 /* $Id: term.c,v 1.95 2009/07/27 12:35:54 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <err.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "term.h"
24 #include "man.h"
25 #include "mdoc.h"
26
27 extern int man_run(struct termp *,
28 const struct man *);
29 extern int mdoc_run(struct termp *,
30 const struct mdoc *);
31
32 static struct termp *term_alloc(enum termenc);
33 static void term_free(struct termp *);
34
35 static void do_escaped(struct termp *, const char **);
36 static void do_special(struct termp *,
37 const char *, size_t);
38 static void do_reserved(struct termp *,
39 const char *, size_t);
40 static void buffer(struct termp *, char);
41 static void encode(struct termp *, char);
42 static int isopendelim(const char *);
43 static int isclosedelim(const char *);
44
45
46 void *
47 ascii_alloc(void)
48 {
49
50 return(term_alloc(TERMENC_ASCII));
51 }
52
53
54 int
55 terminal_man(void *arg, const struct man *man)
56 {
57 struct termp *p;
58
59 p = (struct termp *)arg;
60 if (NULL == p->symtab)
61 p->symtab = term_ascii2htab();
62
63 return(man_run(p, man));
64 }
65
66
67 int
68 terminal_mdoc(void *arg, const struct mdoc *mdoc)
69 {
70 struct termp *p;
71
72 p = (struct termp *)arg;
73 if (NULL == p->symtab)
74 p->symtab = term_ascii2htab();
75
76 return(mdoc_run(p, mdoc));
77 }
78
79
80 void
81 terminal_free(void *arg)
82 {
83
84 term_free((struct termp *)arg);
85 }
86
87
88 static void
89 term_free(struct termp *p)
90 {
91
92 if (p->buf)
93 free(p->buf);
94 if (TERMENC_ASCII == p->enc && p->symtab)
95 term_asciifree(p->symtab);
96
97 free(p);
98 }
99
100
101 static struct termp *
102 term_alloc(enum termenc enc)
103 {
104 struct termp *p;
105
106 if (NULL == (p = malloc(sizeof(struct termp))))
107 err(1, "malloc");
108 bzero(p, sizeof(struct termp));
109 p->maxrmargin = 78;
110 p->enc = enc;
111 return(p);
112 }
113
114
115 static int
116 isclosedelim(const char *p)
117 {
118
119 if ( ! (*p && 0 == *(p + 1)))
120 return(0);
121
122 switch (*p) {
123 case('.'):
124 /* FALLTHROUGH */
125 case(','):
126 /* FALLTHROUGH */
127 case(';'):
128 /* FALLTHROUGH */
129 case(':'):
130 /* FALLTHROUGH */
131 case('?'):
132 /* FALLTHROUGH */
133 case('!'):
134 /* FALLTHROUGH */
135 case(')'):
136 /* FALLTHROUGH */
137 case(']'):
138 /* FALLTHROUGH */
139 case('}'):
140 return(1);
141 default:
142 break;
143 }
144
145 return(0);
146 }
147
148
149 static int
150 isopendelim(const char *p)
151 {
152
153 if ( ! (*p && 0 == *(p + 1)))
154 return(0);
155
156 switch (*p) {
157 case('('):
158 /* FALLTHROUGH */
159 case('['):
160 /* FALLTHROUGH */
161 case('{'):
162 return(1);
163 default:
164 break;
165 }
166
167 return(0);
168 }
169
170
171 /*
172 * Flush a line of text. A "line" is loosely defined as being something
173 * that should be followed by a newline, regardless of whether it's
174 * broken apart by newlines getting there. A line can also be a
175 * fragment of a columnar list.
176 *
177 * Specifically, a line is whatever's in p->buf of length p->col, which
178 * is zeroed after this function returns.
179 *
180 * The usage of termp:flags is as follows:
181 *
182 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
183 * offset value. This is useful when doing columnar lists where the
184 * prior column has right-padded.
185 *
186 * - TERMP_NOBREAK: this is the most important and is used when making
187 * columns. In short: don't print a newline and instead pad to the
188 * right margin. Used in conjunction with TERMP_NOLPAD.
189 *
190 * - TERMP_TWOSPACE: when padding, make sure there are at least two
191 * space characters of padding. Otherwise, rather break the line.
192 *
193 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
194 * the line is overrun, and don't pad-right if it's underrun.
195 *
196 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
197 * overruning, instead save the position and continue at that point
198 * when the next invocation.
199 *
200 * In-line line breaking:
201 *
202 * If TERMP_NOBREAK is specified and the line overruns the right
203 * margin, it will break and pad-right to the right margin after
204 * writing. If maxrmargin is violated, it will break and continue
205 * writing from the right-margin, which will lead to the above
206 * scenario upon exit.
207 *
208 * Otherwise, the line will break at the right margin. Extremely long
209 * lines will cause the system to emit a warning (TODO: hyphenate, if
210 * possible).
211 */
212 void
213 term_flushln(struct termp *p)
214 {
215 int i, j;
216 size_t vbl, vsz, vis, maxvis, mmax, bp;
217 static int overstep = 0;
218
219 /*
220 * First, establish the maximum columns of "visible" content.
221 * This is usually the difference between the right-margin and
222 * an indentation, but can be, for tagged lists or columns, a
223 * small set of values.
224 */
225
226 assert(p->offset < p->rmargin);
227 assert((int)(p->rmargin - p->offset) - overstep > 0);
228
229 maxvis = /* LINTED */
230 p->rmargin - p->offset - overstep;
231 mmax = /* LINTED */
232 p->maxrmargin - p->offset - overstep;
233
234 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
235 vis = 0;
236 overstep = 0;
237
238 /*
239 * If in the standard case (left-justified), then begin with our
240 * indentation, otherwise (columns, etc.) just start spitting
241 * out text.
242 */
243
244 if ( ! (p->flags & TERMP_NOLPAD))
245 /* LINTED */
246 for (j = 0; j < (int)p->offset; j++)
247 putchar(' ');
248
249 for (i = 0; i < (int)p->col; i++) {
250 /*
251 * Count up visible word characters. Control sequences
252 * (starting with the CSI) aren't counted. A space
253 * generates a non-printing word, which is valid (the
254 * space is printed according to regular spacing rules).
255 */
256
257 /* LINTED */
258 for (j = i, vsz = 0; j < (int)p->col; j++) {
259 if (j && ' ' == p->buf[j])
260 break;
261 else if (8 == p->buf[j])
262 vsz--;
263 else
264 vsz++;
265 }
266
267 /*
268 * Choose the number of blanks to prepend: no blank at the
269 * beginning of a line, one between words -- but do not
270 * actually write them yet.
271 */
272 vbl = (size_t)(0 == vis ? 0 : 1);
273
274 /*
275 * Find out whether we would exceed the right margin.
276 * If so, break to the next line. (TODO: hyphenate)
277 * Otherwise, write the chosen number of blanks now.
278 */
279 if (vis && vis + vbl + vsz > bp) {
280 putchar('\n');
281 if (TERMP_NOBREAK & p->flags) {
282 for (j = 0; j < (int)p->rmargin; j++)
283 putchar(' ');
284 vis = p->rmargin - p->offset;
285 } else {
286 for (j = 0; j < (int)p->offset; j++)
287 putchar(' ');
288 vis = 0;
289 }
290 } else {
291 for (j = 0; j < (int)vbl; j++)
292 putchar(' ');
293 vis += vbl;
294 }
295
296 /*
297 * Finally, write out the word.
298 */
299 for ( ; i < (int)p->col; i++) {
300 if (' ' == p->buf[i])
301 break;
302 putchar(p->buf[i]);
303 }
304 vis += vsz;
305 }
306 p->col = 0;
307
308 if ( ! (TERMP_NOBREAK & p->flags)) {
309 putchar('\n');
310 return;
311 }
312
313 if (TERMP_HANG & p->flags) {
314 /* We need one blank after the tag. */
315 overstep = /* LINTED */
316 vis - maxvis + 1;
317
318 /*
319 * Behave exactly the same way as groff:
320 * If we have overstepped the margin, temporarily move
321 * it to the right and flag the rest of the line to be
322 * shorter.
323 * If we landed right at the margin, be happy.
324 * If we are one step before the margin, temporarily
325 * move it one step LEFT and flag the rest of the line
326 * to be longer.
327 */
328 if (overstep >= -1) {
329 assert((int)maxvis + overstep >= 0);
330 /* LINTED */
331 maxvis += overstep;
332 } else
333 overstep = 0;
334
335 } else if (TERMP_DANGLE & p->flags)
336 return;
337
338 /* Right-pad. */
339 if (maxvis > vis + /* LINTED */
340 ((TERMP_TWOSPACE & p->flags) ? 1 : 0))
341 for ( ; vis < maxvis; vis++)
342 putchar(' ');
343 else { /* ...or newline break. */
344 putchar('\n');
345 for (i = 0; i < (int)p->rmargin; i++)
346 putchar(' ');
347 }
348 }
349
350
351 /*
352 * A newline only breaks an existing line; it won't assert vertical
353 * space. All data in the output buffer is flushed prior to the newline
354 * assertion.
355 */
356 void
357 term_newln(struct termp *p)
358 {
359
360 p->flags |= TERMP_NOSPACE;
361 if (0 == p->col) {
362 p->flags &= ~TERMP_NOLPAD;
363 return;
364 }
365 term_flushln(p);
366 p->flags &= ~TERMP_NOLPAD;
367 }
368
369
370 /*
371 * Asserts a vertical space (a full, empty line-break between lines).
372 * Note that if used twice, this will cause two blank spaces and so on.
373 * All data in the output buffer is flushed prior to the newline
374 * assertion.
375 */
376 void
377 term_vspace(struct termp *p)
378 {
379
380 term_newln(p);
381 putchar('\n');
382 }
383
384
385 static void
386 do_special(struct termp *p, const char *word, size_t len)
387 {
388 const char *rhs;
389 size_t sz;
390 int i;
391
392 rhs = term_a2ascii(p->symtab, word, len, &sz);
393
394 if (NULL == rhs)
395 return;
396 for (i = 0; i < (int)sz; i++)
397 encode(p, rhs[i]);
398 }
399
400
401 static void
402 do_reserved(struct termp *p, const char *word, size_t len)
403 {
404 const char *rhs;
405 size_t sz;
406 int i;
407
408 rhs = term_a2res(p->symtab, word, len, &sz);
409
410 if (NULL == rhs)
411 return;
412 for (i = 0; i < (int)sz; i++)
413 encode(p, rhs[i]);
414 }
415
416
417 /*
418 * Handle an escape sequence: determine its length and pass it to the
419 * escape-symbol look table. Note that we assume mdoc(3) has validated
420 * the escape sequence (we assert upon badly-formed escape sequences).
421 */
422 static void
423 do_escaped(struct termp *p, const char **word)
424 {
425 int j;
426 const char *wp;
427
428 wp = *word;
429
430 if (0 == *(++wp)) {
431 *word = wp;
432 return;
433 }
434
435 if ('(' == *wp) {
436 wp++;
437 if (0 == *wp || 0 == *(wp + 1)) {
438 *word = 0 == *wp ? wp : wp + 1;
439 return;
440 }
441
442 do_special(p, wp, 2);
443 *word = ++wp;
444 return;
445
446 } else if ('*' == *wp) {
447 if (0 == *(++wp)) {
448 *word = wp;
449 return;
450 }
451
452 switch (*wp) {
453 case ('('):
454 wp++;
455 if (0 == *wp || 0 == *(wp + 1)) {
456 *word = 0 == *wp ? wp : wp + 1;
457 return;
458 }
459
460 do_reserved(p, wp, 2);
461 *word = ++wp;
462 return;
463 case ('['):
464 break;
465 default:
466 do_reserved(p, wp, 1);
467 *word = wp;
468 return;
469 }
470
471 } else if ('f' == *wp) {
472 if (0 == *(++wp)) {
473 *word = wp;
474 return;
475 }
476
477 switch (*wp) {
478 case ('B'):
479 p->flags |= TERMP_BOLD;
480 break;
481 case ('I'):
482 p->flags |= TERMP_UNDER;
483 break;
484 case ('P'):
485 /* FALLTHROUGH */
486 case ('R'):
487 p->flags &= ~TERMP_STYLE;
488 break;
489 default:
490 break;
491 }
492
493 *word = wp;
494 return;
495
496 } else if ('[' != *wp) {
497 do_special(p, wp, 1);
498 *word = wp;
499 return;
500 }
501
502 wp++;
503 for (j = 0; *wp && ']' != *wp; wp++, j++)
504 /* Loop... */ ;
505
506 if (0 == *wp) {
507 *word = wp;
508 return;
509 }
510
511 do_special(p, wp - j, (size_t)j);
512 *word = wp;
513 }
514
515
516 /*
517 * Handle pwords, partial words, which may be either a single word or a
518 * phrase that cannot be broken down (such as a literal string). This
519 * handles word styling.
520 */
521 void
522 term_word(struct termp *p, const char *word)
523 {
524 const char *sv;
525
526 if (isclosedelim(word))
527 if ( ! (TERMP_IGNDELIM & p->flags))
528 p->flags |= TERMP_NOSPACE;
529
530 if ( ! (TERMP_NOSPACE & p->flags))
531 buffer(p, ' ');
532
533 if ( ! (p->flags & TERMP_NONOSPACE))
534 p->flags &= ~TERMP_NOSPACE;
535
536 for (sv = word; *word; word++)
537 if ('\\' != *word)
538 encode(p, *word);
539 else
540 do_escaped(p, &word);
541
542 if (isopendelim(sv))
543 p->flags |= TERMP_NOSPACE;
544 }
545
546
547 /*
548 * Insert a single character into the line-buffer. If the buffer's
549 * space is exceeded, then allocate more space by doubling the buffer
550 * size.
551 */
552 static void
553 buffer(struct termp *p, char c)
554 {
555 size_t s;
556
557 if (p->col + 1 >= p->maxcols) {
558 if (0 == p->maxcols)
559 p->maxcols = 256;
560 s = p->maxcols * 2;
561 p->buf = realloc(p->buf, s);
562 if (NULL == p->buf)
563 err(1, "realloc");
564 p->maxcols = s;
565 }
566 p->buf[(int)(p->col)++] = c;
567 }
568
569
570 static void
571 encode(struct termp *p, char c)
572 {
573
574 if (' ' != c && TERMP_STYLE & p->flags) {
575 if (TERMP_BOLD & p->flags) {
576 buffer(p, c);
577 buffer(p, 8);
578 }
579 if (TERMP_UNDER & p->flags) {
580 buffer(p, '_');
581 buffer(p, 8);
582 }
583 }
584 buffer(p, c);
585 }