term.c

   1 /*      $Id: term.c,v 1.83 2009/06/22 13:13:10 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the above
   7  * copyright notice and this permission notice appear in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16  */
  17 #include <assert.h>
  18 #include <err.h>
  19 #include <stdio.h>
  20 #include <stdlib.h>
  21 #include <string.h>
  22
  23 #include "term.h"
  24 #include "man.h"
  25 #include "mdoc.h"
  26
  27 extern  int               man_run(struct termp *,
  28                                 const struct man *);
  29 extern  int               mdoc_run(struct termp *,
  30                                 const struct mdoc *);
  31
  32 static  struct termp     *term_alloc(enum termenc);
  33 static  void              term_free(struct termp *);
  34 static  void              term_pword(struct termp *, const char *, int);
  35 static  void              term_pescape(struct termp *,
  36                                 const char *, int *, int);
  37 static  void              term_nescape(struct termp *,
  38                                 const char *, size_t);
  39 static  void              term_chara(struct termp *, char);
  40 static  void              term_encodea(struct termp *, char);
  41 static  int               term_isopendelim(const char *, int);
  42 static  int               term_isclosedelim(const char *, int);
  43
  44
  45 void *
  46 ascii_alloc(void)
  47 {
  48
  49         return(term_alloc(TERMENC_ASCII));
  50 }
  51
  52
  53 int
  54 terminal_man(void *arg, const struct man *man)
  55 {
  56         struct termp    *p;
  57
  58         p = (struct termp *)arg;
  59         if (NULL == p->symtab)
  60                 p->symtab = term_ascii2htab();
  61
  62         return(man_run(p, man));
  63 }
  64
  65
  66 int
  67 terminal_mdoc(void *arg, const struct mdoc *mdoc)
  68 {
  69         struct termp    *p;
  70
  71         p = (struct termp *)arg;
  72         if (NULL == p->symtab)
  73                 p->symtab = term_ascii2htab();
  74
  75         return(mdoc_run(p, mdoc));
  76 }
  77
  78
  79 void
  80 terminal_free(void *arg)
  81 {
  82
  83         term_free((struct termp *)arg);
  84 }
  85
  86
  87 static void
  88 term_free(struct termp *p)
  89 {
  90
  91         if (p->buf)
  92                 free(p->buf);
  93         if (TERMENC_ASCII == p->enc && p->symtab)
  94                 term_asciifree(p->symtab);
  95
  96         free(p);
  97 }
  98
  99
 100 static struct termp *
 101 term_alloc(enum termenc enc)
 102 {
 103         struct termp *p;
 104
 105         if (NULL == (p = malloc(sizeof(struct termp))))
 106                 err(1, "malloc");
 107         bzero(p, sizeof(struct termp));
 108         p->maxrmargin = 78;
 109         p->enc = enc;
 110         return(p);
 111 }
 112
 113
 114 static int
 115 term_isclosedelim(const char *p, int len)
 116 {
 117
 118         if (1 != len)
 119                 return(0);
 120
 121         switch (*p) {
 122         case('.'):
 123                 /* FALLTHROUGH */
 124         case(','):
 125                 /* FALLTHROUGH */
 126         case(';'):
 127                 /* FALLTHROUGH */
 128         case(':'):
 129                 /* FALLTHROUGH */
 130         case('?'):
 131                 /* FALLTHROUGH */
 132         case('!'):
 133                 /* FALLTHROUGH */
 134         case(')'):
 135                 /* FALLTHROUGH */
 136         case(']'):
 137                 /* FALLTHROUGH */
 138         case('}'):
 139                 return(1);
 140         default:
 141                 break;
 142         }
 143
 144         return(0);
 145 }
 146
 147
 148 static int
 149 term_isopendelim(const char *p, int len)
 150 {
 151
 152         if (1 != len)
 153                 return(0);
 154
 155         switch (*p) {
 156         case('('):
 157                 /* FALLTHROUGH */
 158         case('['):
 159                 /* FALLTHROUGH */
 160         case('{'):
 161                 return(1);
 162         default:
 163                 break;
 164         }
 165
 166         return(0);
 167 }
 168
 169
 170 /*
 171  * Flush a line of text.  A "line" is loosely defined as being something
 172  * that should be followed by a newline, regardless of whether it's
 173  * broken apart by newlines getting there.  A line can also be a
 174  * fragment of a columnar list.
 175  *
 176  * Specifically, a line is whatever's in p->buf of length p->col, which
 177  * is zeroed after this function returns.
 178  *
 179  * The variables TERMP_NOLPAD, TERMP_LITERAL and TERMP_NOBREAK are of
 180  * critical importance here.  Their behaviour follows:
 181  *
 182  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
 183  *    offset value.  This is useful when doing columnar lists where the
 184  *    prior column has right-padded.
 185  *
 186  *  - TERMP_NOBREAK: this is the most important and is used when making
 187  *    columns.  In short: don't print a newline and instead pad to the
 188  *    right margin.  Used in conjunction with TERMP_NOLPAD.
 189  *
 190  *  - TERMP_NONOBREAK: don't newline when TERMP_NOBREAK is specified.
 191  *
 192  *  In-line line breaking:
 193  *
 194  *  If TERMP_NOBREAK is specified and the line overruns the right
 195  *  margin, it will break and pad-right to the right margin after
 196  *  writing.  If maxrmargin is violated, it will break and continue
 197  *  writing from the right-margin, which will lead to the above
 198  *  scenario upon exit.
 199  *
 200  *  Otherwise, the line will break at the right margin.  Extremely long
 201  *  lines will cause the system to emit a warning (TODO: hyphenate, if
 202  *  possible).
 203  *
 204  *  FIXME: newline breaks occur (in groff) also occur when a single
 205  *  space follows a NOBREAK!
 206  */
 207 void
 208 term_flushln(struct termp *p)
 209 {
 210         int              i, j;
 211         size_t           vbl, vsz, vis, maxvis, mmax, bp;
 212
 213         /*
 214          * First, establish the maximum columns of "visible" content.
 215          * This is usually the difference between the right-margin and
 216          * an indentation, but can be, for tagged lists or columns, a
 217          * small set of values.
 218          */
 219
 220         assert(p->offset < p->rmargin);
 221         maxvis = p->rmargin - p->offset;
 222         mmax = p->maxrmargin - p->offset;
 223         bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
 224         vis = 0;
 225
 226         /*
 227          * If in the standard case (left-justified), then begin with our
 228          * indentation, otherwise (columns, etc.) just start spitting
 229          * out text.
 230          */
 231
 232         if ( ! (p->flags & TERMP_NOLPAD))
 233                 /* LINTED */
 234                 for (j = 0; j < (int)p->offset; j++)
 235                         putchar(' ');
 236
 237         for (i = 0; i < (int)p->col; i++) {
 238                 /*
 239                  * Count up visible word characters.  Control sequences
 240                  * (starting with the CSI) aren't counted.  A space
 241                  * generates a non-printing word, which is valid (the
 242                  * space is printed according to regular spacing rules).
 243                  */
 244
 245                 /* LINTED */
 246                 for (j = i, vsz = 0; j < (int)p->col; j++) {
 247                         if (' ' == p->buf[j])
 248                                 break;
 249                         else if (8 == p->buf[j])
 250                                 j += 1;
 251                         else
 252                                 vsz++;
 253                 }
 254
 255                 /*
 256                  * Choose the number of blanks to prepend: no blank at the
 257                  * beginning of a line, one between words -- but do not
 258                  * actually write them yet.
 259                  */
 260                 vbl = (size_t)(0 == vis ? 0 : 1);
 261
 262                 /*
 263                  * Find out whether we would exceed the right margin.
 264                  * If so, break to the next line.  (TODO: hyphenate)
 265                  * Otherwise, write the chosen number of blanks now.
 266                  */
 267                 if (vis && vis + vbl + vsz > bp) {
 268                         putchar('\n');
 269                         if (TERMP_NOBREAK & p->flags) {
 270                                 for (j = 0; j < (int)p->rmargin; j++)
 271                                         putchar(' ');
 272                                 vis = p->rmargin - p->offset;
 273                         } else {
 274                                 for (j = 0; j < (int)p->offset; j++)
 275                                         putchar(' ');
 276                                 vis = 0;
 277                         }
 278                 } else {
 279                         for (j = 0; j < (int)vbl; j++)
 280                                 putchar(' ');
 281                         vis += vbl;
 282                 }
 283
 284                 /*
 285                  * Finally, write out the word.
 286                  */
 287                 for ( ; i < (int)p->col; i++) {
 288                         if (' ' == p->buf[i])
 289                                 break;
 290                         putchar(p->buf[i]);
 291                 }
 292                 vis += vsz;
 293         }
 294
 295         /*
 296          * If we've overstepped our maximum visible no-break space, then
 297          * cause a newline and offset at the right margin.
 298          */
 299
 300         if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
 301                 if ( ! (TERMP_NONOBREAK & p->flags)) {
 302                         putchar('\n');
 303                         for (i = 0; i < (int)p->rmargin; i++)
 304                                 putchar(' ');
 305                 }
 306                 p->col = 0;
 307                 return;
 308         }
 309
 310         /*
 311          * If we're not to right-marginalise it (newline), then instead
 312          * pad to the right margin and stay off.
 313          */
 314
 315         if (p->flags & TERMP_NOBREAK) {
 316                 if ( ! (TERMP_NONOBREAK & p->flags))
 317                         for ( ; vis < maxvis; vis++)
 318                                 putchar(' ');
 319         } else
 320                 putchar('\n');
 321
 322         p->col = 0;
 323 }
 324
 325
 326 /*
 327  * A newline only breaks an existing line; it won't assert vertical
 328  * space.  All data in the output buffer is flushed prior to the newline
 329  * assertion.
 330  */
 331 void
 332 term_newln(struct termp *p)
 333 {
 334
 335         p->flags |= TERMP_NOSPACE;
 336         if (0 == p->col) {
 337                 p->flags &= ~TERMP_NOLPAD;
 338                 return;
 339         }
 340         term_flushln(p);
 341         p->flags &= ~TERMP_NOLPAD;
 342 }
 343
 344
 345 /*
 346  * Asserts a vertical space (a full, empty line-break between lines).
 347  * Note that if used twice, this will cause two blank spaces and so on.
 348  * All data in the output buffer is flushed prior to the newline
 349  * assertion.
 350  */
 351 void
 352 term_vspace(struct termp *p)
 353 {
 354
 355         term_newln(p);
 356         putchar('\n');
 357 }
 358
 359
 360 /*
 361  * Break apart a word into "pwords" (partial-words, usually from
 362  * breaking up a phrase into individual words) and, eventually, put them
 363  * into the output buffer.  If we're a literal word, then don't break up
 364  * the word and put it verbatim into the output buffer.
 365  */
 366 void
 367 term_word(struct termp *p, const char *word)
 368 {
 369         int              i, j, len;
 370
 371         len = (int)strlen(word);
 372
 373         if (p->flags & TERMP_LITERAL) {
 374                 term_pword(p, word, len);
 375                 return;
 376         }
 377
 378         /* LINTED */
 379         for (j = i = 0; i < len; i++) {
 380                 if (' ' != word[i]) {
 381                         j++;
 382                         continue;
 383                 }
 384
 385                 /* Escaped spaces don't delimit... */
 386                 if (i && ' ' == word[i] && '\\' == word[i - 1]) {
 387                         j++;
 388                         continue;
 389                 }
 390
 391                 if (0 == j)
 392                         continue;
 393                 assert(i >= j);
 394                 term_pword(p, &word[i - j], j);
 395                 j = 0;
 396         }
 397         if (j > 0) {
 398                 assert(i >= j);
 399                 term_pword(p, &word[i - j], j);
 400         }
 401 }
 402
 403
 404 /*
 405  * Determine the symbol indicated by an escape sequences, that is, one
 406  * starting with a backslash.  Once done, we pass this value into the
 407  * output buffer by way of the symbol table.
 408  */
 409 static void
 410 term_nescape(struct termp *p, const char *word, size_t len)
 411 {
 412         const char      *rhs;
 413         size_t           sz;
 414         int              i;
 415
 416         rhs = term_a2ascii(p->symtab, word, len, &sz);
 417         if (rhs)
 418                 for (i = 0; i < (int)sz; i++)
 419                         term_encodea(p, rhs[i]);
 420 }
 421
 422
 423 /*
 424  * Handle an escape sequence: determine its length and pass it to the
 425  * escape-symbol look table.  Note that we assume mdoc(3) has validated
 426  * the escape sequence (we assert upon badly-formed escape sequences).
 427  */
 428 static void
 429 term_pescape(struct termp *p, const char *word, int *i, int len)
 430 {
 431         int              j;
 432
 433         if (++(*i) >= len)
 434                 return;
 435
 436         if ('(' == word[*i]) {
 437                 (*i)++;
 438                 if (*i + 1 >= len)
 439                         return;
 440
 441                 term_nescape(p, &word[*i], 2);
 442                 (*i)++;
 443                 return;
 444
 445         } else if ('*' == word[*i]) {
 446                 (*i)++;
 447                 if (*i >= len)
 448                         return;
 449
 450                 switch (word[*i]) {
 451                 case ('('):
 452                         (*i)++;
 453                         if (*i + 1 >= len)
 454                                 return;
 455
 456                         term_nescape(p, &word[*i], 2);
 457                         (*i)++;
 458                         return;
 459                 case ('['):
 460                         break;
 461                 default:
 462                         term_nescape(p, &word[*i], 1);
 463                         return;
 464                 }
 465
 466         } else if ('f' == word[*i]) {
 467                 (*i)++;
 468                 if (*i >= len)
 469                         return;
 470                 switch (word[*i]) {
 471                 case ('B'):
 472                         p->flags |= TERMP_BOLD;
 473                         break;
 474                 case ('I'):
 475                         p->flags |= TERMP_UNDER;
 476                         break;
 477                 case ('P'):
 478                         /* FALLTHROUGH */
 479                 case ('R'):
 480                         p->flags &= ~TERMP_STYLE;
 481                         break;
 482                 default:
 483                         break;
 484                 }
 485                 return;
 486
 487         } else if ('[' != word[*i]) {
 488                 term_nescape(p, &word[*i], 1);
 489                 return;
 490         }
 491
 492         (*i)++;
 493         for (j = 0; word[*i] && ']' != word[*i]; (*i)++, j++)
 494                 /* Loop... */ ;
 495
 496         if (0 == word[*i])
 497                 return;
 498
 499         term_nescape(p, &word[*i - j], (size_t)j);
 500 }
 501
 502
 503 /*
 504  * Handle pwords, partial words, which may be either a single word or a
 505  * phrase that cannot be broken down (such as a literal string).  This
 506  * handles word styling.
 507  */
 508 static void
 509 term_pword(struct termp *p, const char *word, int len)
 510 {
 511         int              i;
 512
 513         if (term_isclosedelim(word, len))
 514                 if ( ! (TERMP_IGNDELIM & p->flags))
 515                         p->flags |= TERMP_NOSPACE;
 516
 517         if ( ! (TERMP_NOSPACE & p->flags))
 518                 term_chara(p, ' ');
 519
 520         if ( ! (p->flags & TERMP_NONOSPACE))
 521                 p->flags &= ~TERMP_NOSPACE;
 522
 523         /*
 524          * If ANSI (word-length styling), then apply our style now,
 525          * before the word.
 526          */
 527
 528         for (i = 0; i < len; i++)
 529                 if ('\\' == word[i])
 530                         term_pescape(p, word, &i, len);
 531                 else
 532                         term_encodea(p, word[i]);
 533
 534         if (term_isopendelim(word, len))
 535                 p->flags |= TERMP_NOSPACE;
 536 }
 537
 538
 539 /*
 540  * Insert a single character into the line-buffer.  If the buffer's
 541  * space is exceeded, then allocate more space by doubling the buffer
 542  * size.
 543  */
 544 static void
 545 term_chara(struct termp *p, char c)
 546 {
 547         size_t           s;
 548
 549         if (p->col + 1 >= p->maxcols) {
 550                 if (0 == p->maxcols)
 551                         p->maxcols = 256;
 552                 s = p->maxcols * 2;
 553                 p->buf = realloc(p->buf, s);
 554                 if (NULL == p->buf)
 555                         err(1, "realloc");
 556                 p->maxcols = s;
 557         }
 558         p->buf[(int)(p->col)++] = c;
 559 }
 560
 561
 562 static void
 563 term_encodea(struct termp *p, char c)
 564 {
 565
 566         if (TERMP_STYLE & p->flags) {
 567                 if (TERMP_BOLD & p->flags) {
 568                         term_chara(p, c);
 569                         term_chara(p, 8);
 570                 }
 571                 if (TERMP_UNDER & p->flags) {
 572                         term_chara(p, '_');
 573                         term_chara(p, 8);
 574                 }
 575         }
 576         term_chara(p, c);
 577 }