term.c

   1 /*      $Id: term.c,v 1.84 2009/07/14 15:16:41 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the above
   7  * copyright notice and this permission notice appear in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16  */
  17 #include <assert.h>
  18 #include <err.h>
  19 #include <stdio.h>
  20 #include <stdlib.h>
  21 #include <string.h>
  22
  23 #include "term.h"
  24 #include "man.h"
  25 #include "mdoc.h"
  26
  27 extern  int               man_run(struct termp *,
  28                                 const struct man *);
  29 extern  int               mdoc_run(struct termp *,
  30                                 const struct mdoc *);
  31
  32 static  struct termp     *term_alloc(enum termenc);
  33 static  void              term_free(struct termp *);
  34 static  void              term_pword(struct termp *, const char *, int);
  35 static  void              term_pescape(struct termp *,
  36                                 const char *, int *, int);
  37 static  void              term_nescape(struct termp *,
  38                                 const char *, size_t);
  39 static  void              term_chara(struct termp *, char);
  40 static  void              term_encodea(struct termp *, char);
  41 static  int               term_isopendelim(const char *, int);
  42 static  int               term_isclosedelim(const char *, int);
  43
  44
  45 void *
  46 ascii_alloc(void)
  47 {
  48
  49         return(term_alloc(TERMENC_ASCII));
  50 }
  51
  52
  53 int
  54 terminal_man(void *arg, const struct man *man)
  55 {
  56         struct termp    *p;
  57
  58         p = (struct termp *)arg;
  59         if (NULL == p->symtab)
  60                 p->symtab = term_ascii2htab();
  61
  62         return(man_run(p, man));
  63 }
  64
  65
  66 int
  67 terminal_mdoc(void *arg, const struct mdoc *mdoc)
  68 {
  69         struct termp    *p;
  70
  71         p = (struct termp *)arg;
  72         if (NULL == p->symtab)
  73                 p->symtab = term_ascii2htab();
  74
  75         return(mdoc_run(p, mdoc));
  76 }
  77
  78
  79 void
  80 terminal_free(void *arg)
  81 {
  82
  83         term_free((struct termp *)arg);
  84 }
  85
  86
  87 static void
  88 term_free(struct termp *p)
  89 {
  90
  91         if (p->buf)
  92                 free(p->buf);
  93         if (TERMENC_ASCII == p->enc && p->symtab)
  94                 term_asciifree(p->symtab);
  95
  96         free(p);
  97 }
  98
  99
 100 static struct termp *
 101 term_alloc(enum termenc enc)
 102 {
 103         struct termp *p;
 104
 105         if (NULL == (p = malloc(sizeof(struct termp))))
 106                 err(1, "malloc");
 107         bzero(p, sizeof(struct termp));
 108         p->maxrmargin = 78;
 109         p->enc = enc;
 110         return(p);
 111 }
 112
 113
 114 static int
 115 term_isclosedelim(const char *p, int len)
 116 {
 117
 118         if (1 != len)
 119                 return(0);
 120
 121         switch (*p) {
 122         case('.'):
 123                 /* FALLTHROUGH */
 124         case(','):
 125                 /* FALLTHROUGH */
 126         case(';'):
 127                 /* FALLTHROUGH */
 128         case(':'):
 129                 /* FALLTHROUGH */
 130         case('?'):
 131                 /* FALLTHROUGH */
 132         case('!'):
 133                 /* FALLTHROUGH */
 134         case(')'):
 135                 /* FALLTHROUGH */
 136         case(']'):
 137                 /* FALLTHROUGH */
 138         case('}'):
 139                 return(1);
 140         default:
 141                 break;
 142         }
 143
 144         return(0);
 145 }
 146
 147
 148 static int
 149 term_isopendelim(const char *p, int len)
 150 {
 151
 152         if (1 != len)
 153                 return(0);
 154
 155         switch (*p) {
 156         case('('):
 157                 /* FALLTHROUGH */
 158         case('['):
 159                 /* FALLTHROUGH */
 160         case('{'):
 161                 return(1);
 162         default:
 163                 break;
 164         }
 165
 166         return(0);
 167 }
 168
 169
 170 /*
 171  * Flush a line of text.  A "line" is loosely defined as being something
 172  * that should be followed by a newline, regardless of whether it's
 173  * broken apart by newlines getting there.  A line can also be a
 174  * fragment of a columnar list.
 175  *
 176  * Specifically, a line is whatever's in p->buf of length p->col, which
 177  * is zeroed after this function returns.
 178  *
 179  * The usage of termp:flags is as follows:
 180  *
 181  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
 182  *    offset value.  This is useful when doing columnar lists where the
 183  *    prior column has right-padded.
 184  *
 185  *  - TERMP_NOBREAK: this is the most important and is used when making
 186  *    columns.  In short: don't print a newline and instead pad to the
 187  *    right margin.  Used in conjunction with TERMP_NOLPAD.
 188  *
 189  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
 190  *    the line is overrun, and don't pad-right if it's underrun.
 191  *
 192  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
 193  *    overruning, instead save the position and continue at that point
 194  *    when the next invocation.
 195  *
 196  *  In-line line breaking:
 197  *
 198  *  If TERMP_NOBREAK is specified and the line overruns the right
 199  *  margin, it will break and pad-right to the right margin after
 200  *  writing.  If maxrmargin is violated, it will break and continue
 201  *  writing from the right-margin, which will lead to the above
 202  *  scenario upon exit.
 203  *
 204  *  Otherwise, the line will break at the right margin.  Extremely long
 205  *  lines will cause the system to emit a warning (TODO: hyphenate, if
 206  *  possible).
 207  *
 208  *  FIXME: newline breaks occur (in groff) also occur when a single
 209  *  space follows a NOBREAK!
 210  */
 211 void
 212 term_flushln(struct termp *p)
 213 {
 214         int              i, j;
 215         size_t           vbl, vsz, vis, maxvis, mmax, bp;
 216         static int       sv = -1;
 217
 218         /*
 219          * First, establish the maximum columns of "visible" content.
 220          * This is usually the difference between the right-margin and
 221          * an indentation, but can be, for tagged lists or columns, a
 222          * small set of values.
 223          */
 224
 225         assert(p->offset < p->rmargin);
 226         maxvis = p->rmargin - p->offset;
 227         mmax = p->maxrmargin - p->offset;
 228         bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
 229         vis = 0;
 230
 231         if (sv >= 0) {
 232                 vis = (size_t)sv;
 233                 sv = -1;
 234         }
 235
 236         /*
 237          * If in the standard case (left-justified), then begin with our
 238          * indentation, otherwise (columns, etc.) just start spitting
 239          * out text.
 240          */
 241
 242         if ( ! (p->flags & TERMP_NOLPAD))
 243                 /* LINTED */
 244                 for (j = 0; j < (int)p->offset; j++)
 245                         putchar(' ');
 246
 247         for (i = 0; i < (int)p->col; i++) {
 248                 /*
 249                  * Count up visible word characters.  Control sequences
 250                  * (starting with the CSI) aren't counted.  A space
 251                  * generates a non-printing word, which is valid (the
 252                  * space is printed according to regular spacing rules).
 253                  */
 254
 255                 /* LINTED */
 256                 for (j = i, vsz = 0; j < (int)p->col; j++) {
 257                         if (' ' == p->buf[j])
 258                                 break;
 259                         else if (8 == p->buf[j])
 260                                 j += 1;
 261                         else
 262                                 vsz++;
 263                 }
 264
 265                 /*
 266                  * Choose the number of blanks to prepend: no blank at the
 267                  * beginning of a line, one between words -- but do not
 268                  * actually write them yet.
 269                  */
 270                 vbl = (size_t)(0 == vis ? 0 : 1);
 271
 272                 /*
 273                  * Find out whether we would exceed the right margin.
 274                  * If so, break to the next line.  (TODO: hyphenate)
 275                  * Otherwise, write the chosen number of blanks now.
 276                  */
 277                 if (vis && vis + vbl + vsz > bp) {
 278                         putchar('\n');
 279                         if (TERMP_NOBREAK & p->flags) {
 280                                 for (j = 0; j < (int)p->rmargin; j++)
 281                                         putchar(' ');
 282                                 vis = p->rmargin - p->offset;
 283                         } else {
 284                                 for (j = 0; j < (int)p->offset; j++)
 285                                         putchar(' ');
 286                                 vis = 0;
 287                         }
 288                 } else {
 289                         for (j = 0; j < (int)vbl; j++)
 290                                 putchar(' ');
 291                         vis += vbl;
 292                 }
 293
 294                 /*
 295                  * Finally, write out the word.
 296                  */
 297                 for ( ; i < (int)p->col; i++) {
 298                         if (' ' == p->buf[i])
 299                                 break;
 300                         putchar(p->buf[i]);
 301                 }
 302                 vis += vsz;
 303         }
 304
 305         /*
 306          * If we've overstepped our maximum visible no-break space, then
 307          * cause a newline and offset at the right margin.
 308          */
 309
 310         if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
 311                 if ( ! (TERMP_DANGLE & p->flags) &&
 312                                 ! (TERMP_HANG & p->flags)) {
 313                         putchar('\n');
 314                         for (i = 0; i < (int)p->rmargin; i++)
 315                                 putchar(' ');
 316                 }
 317                 if (TERMP_HANG & p->flags)
 318                         sv = vis - maxvis;
 319                 p->col = 0;
 320                 return;
 321         }
 322
 323         /*
 324          * If we're not to right-marginalise it (newline), then instead
 325          * pad to the right margin and stay off.
 326          */
 327
 328         if (p->flags & TERMP_NOBREAK) {
 329                 if ( ! (TERMP_DANGLE & p->flags))
 330                         for ( ; vis < maxvis; vis++)
 331                                 putchar(' ');
 332         } else
 333                 putchar('\n');
 334
 335         p->col = 0;
 336 }
 337
 338
 339 /*
 340  * A newline only breaks an existing line; it won't assert vertical
 341  * space.  All data in the output buffer is flushed prior to the newline
 342  * assertion.
 343  */
 344 void
 345 term_newln(struct termp *p)
 346 {
 347
 348         p->flags |= TERMP_NOSPACE;
 349         if (0 == p->col) {
 350                 p->flags &= ~TERMP_NOLPAD;
 351                 return;
 352         }
 353         term_flushln(p);
 354         p->flags &= ~TERMP_NOLPAD;
 355 }
 356
 357
 358 /*
 359  * Asserts a vertical space (a full, empty line-break between lines).
 360  * Note that if used twice, this will cause two blank spaces and so on.
 361  * All data in the output buffer is flushed prior to the newline
 362  * assertion.
 363  */
 364 void
 365 term_vspace(struct termp *p)
 366 {
 367
 368         term_newln(p);
 369         putchar('\n');
 370 }
 371
 372
 373 /*
 374  * Break apart a word into "pwords" (partial-words, usually from
 375  * breaking up a phrase into individual words) and, eventually, put them
 376  * into the output buffer.  If we're a literal word, then don't break up
 377  * the word and put it verbatim into the output buffer.
 378  */
 379 void
 380 term_word(struct termp *p, const char *word)
 381 {
 382         int              i, j, len;
 383
 384         len = (int)strlen(word);
 385
 386         if (p->flags & TERMP_LITERAL) {
 387                 term_pword(p, word, len);
 388                 return;
 389         }
 390
 391         /* LINTED */
 392         for (j = i = 0; i < len; i++) {
 393                 if (' ' != word[i]) {
 394                         j++;
 395                         continue;
 396                 }
 397
 398                 /* Escaped spaces don't delimit... */
 399                 if (i && ' ' == word[i] && '\\' == word[i - 1]) {
 400                         j++;
 401                         continue;
 402                 }
 403
 404                 if (0 == j)
 405                         continue;
 406                 assert(i >= j);
 407                 term_pword(p, &word[i - j], j);
 408                 j = 0;
 409         }
 410         if (j > 0) {
 411                 assert(i >= j);
 412                 term_pword(p, &word[i - j], j);
 413         }
 414 }
 415
 416
 417 /*
 418  * Determine the symbol indicated by an escape sequences, that is, one
 419  * starting with a backslash.  Once done, we pass this value into the
 420  * output buffer by way of the symbol table.
 421  */
 422 static void
 423 term_nescape(struct termp *p, const char *word, size_t len)
 424 {
 425         const char      *rhs;
 426         size_t           sz;
 427         int              i;
 428
 429         rhs = term_a2ascii(p->symtab, word, len, &sz);
 430         if (rhs)
 431                 for (i = 0; i < (int)sz; i++)
 432                         term_encodea(p, rhs[i]);
 433 }
 434
 435
 436 /*
 437  * Handle an escape sequence: determine its length and pass it to the
 438  * escape-symbol look table.  Note that we assume mdoc(3) has validated
 439  * the escape sequence (we assert upon badly-formed escape sequences).
 440  */
 441 static void
 442 term_pescape(struct termp *p, const char *word, int *i, int len)
 443 {
 444         int              j;
 445
 446         if (++(*i) >= len)
 447                 return;
 448
 449         if ('(' == word[*i]) {
 450                 (*i)++;
 451                 if (*i + 1 >= len)
 452                         return;
 453
 454                 term_nescape(p, &word[*i], 2);
 455                 (*i)++;
 456                 return;
 457
 458         } else if ('*' == word[*i]) {
 459                 (*i)++;
 460                 if (*i >= len)
 461                         return;
 462
 463                 switch (word[*i]) {
 464                 case ('('):
 465                         (*i)++;
 466                         if (*i + 1 >= len)
 467                                 return;
 468
 469                         term_nescape(p, &word[*i], 2);
 470                         (*i)++;
 471                         return;
 472                 case ('['):
 473                         break;
 474                 default:
 475                         term_nescape(p, &word[*i], 1);
 476                         return;
 477                 }
 478
 479         } else if ('f' == word[*i]) {
 480                 (*i)++;
 481                 if (*i >= len)
 482                         return;
 483                 switch (word[*i]) {
 484                 case ('B'):
 485                         p->flags |= TERMP_BOLD;
 486                         break;
 487                 case ('I'):
 488                         p->flags |= TERMP_UNDER;
 489                         break;
 490                 case ('P'):
 491                         /* FALLTHROUGH */
 492                 case ('R'):
 493                         p->flags &= ~TERMP_STYLE;
 494                         break;
 495                 default:
 496                         break;
 497                 }
 498                 return;
 499
 500         } else if ('[' != word[*i]) {
 501                 term_nescape(p, &word[*i], 1);
 502                 return;
 503         }
 504
 505         (*i)++;
 506         for (j = 0; word[*i] && ']' != word[*i]; (*i)++, j++)
 507                 /* Loop... */ ;
 508
 509         if (0 == word[*i])
 510                 return;
 511
 512         term_nescape(p, &word[*i - j], (size_t)j);
 513 }
 514
 515
 516 /*
 517  * Handle pwords, partial words, which may be either a single word or a
 518  * phrase that cannot be broken down (such as a literal string).  This
 519  * handles word styling.
 520  */
 521 static void
 522 term_pword(struct termp *p, const char *word, int len)
 523 {
 524         int              i;
 525
 526         if (term_isclosedelim(word, len))
 527                 if ( ! (TERMP_IGNDELIM & p->flags))
 528                         p->flags |= TERMP_NOSPACE;
 529
 530         if ( ! (TERMP_NOSPACE & p->flags))
 531                 term_chara(p, ' ');
 532
 533         if ( ! (p->flags & TERMP_NONOSPACE))
 534                 p->flags &= ~TERMP_NOSPACE;
 535
 536         /*
 537          * If ANSI (word-length styling), then apply our style now,
 538          * before the word.
 539          */
 540
 541         for (i = 0; i < len; i++)
 542                 if ('\\' == word[i])
 543                         term_pescape(p, word, &i, len);
 544                 else
 545                         term_encodea(p, word[i]);
 546
 547         if (term_isopendelim(word, len))
 548                 p->flags |= TERMP_NOSPACE;
 549 }
 550
 551
 552 /*
 553  * Insert a single character into the line-buffer.  If the buffer's
 554  * space is exceeded, then allocate more space by doubling the buffer
 555  * size.
 556  */
 557 static void
 558 term_chara(struct termp *p, char c)
 559 {
 560         size_t           s;
 561
 562         if (p->col + 1 >= p->maxcols) {
 563                 if (0 == p->maxcols)
 564                         p->maxcols = 256;
 565                 s = p->maxcols * 2;
 566                 p->buf = realloc(p->buf, s);
 567                 if (NULL == p->buf)
 568                         err(1, "realloc");
 569                 p->maxcols = s;
 570         }
 571         p->buf[(int)(p->col)++] = c;
 572 }
 573
 574
 575 static void
 576 term_encodea(struct termp *p, char c)
 577 {
 578
 579         if (TERMP_STYLE & p->flags) {
 580                 if (TERMP_BOLD & p->flags) {
 581                         term_chara(p, c);
 582                         term_chara(p, 8);
 583                 }
 584                 if (TERMP_UNDER & p->flags) {
 585                         term_chara(p, '_');
 586                         term_chara(p, 8);
 587                 }
 588         }
 589         term_chara(p, c);
 590 }