term.c

   1 /*      $Id: term.c,v 1.193 2011/05/17 14:38:34 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21
  22 #include <sys/types.h>
  23
  24 #include <assert.h>
  25 #include <ctype.h>
  26 #include <stdint.h>
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "mandoc.h"
  32 #include "out.h"
  33 #include "term.h"
  34 #include "main.h"
  35
  36 static  void             adjbuf(struct termp *p, int);
  37 static  void             bufferc(struct termp *, char);
  38 static  void             encode(struct termp *, const char *, size_t);
  39
  40 void
  41 term_free(struct termp *p)
  42 {
  43
  44         if (p->buf)
  45                 free(p->buf);
  46         if (p->symtab)
  47                 mchars_free(p->symtab);
  48
  49         free(p);
  50 }
  51
  52
  53 void
  54 term_begin(struct termp *p, term_margin head,
  55                 term_margin foot, const void *arg)
  56 {
  57
  58         p->headf = head;
  59         p->footf = foot;
  60         p->argf = arg;
  61         (*p->begin)(p);
  62 }
  63
  64
  65 void
  66 term_end(struct termp *p)
  67 {
  68
  69         (*p->end)(p);
  70 }
  71
  72 /*
  73  * Flush a line of text.  A "line" is loosely defined as being something
  74  * that should be followed by a newline, regardless of whether it's
  75  * broken apart by newlines getting there.  A line can also be a
  76  * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
  77  * not have a trailing newline.
  78  *
  79  * The following flags may be specified:
  80  *
  81  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
  82  *    offset value.  This is useful when doing columnar lists where the
  83  *    prior column has right-padded.
  84  *
  85  *  - TERMP_NOBREAK: this is the most important and is used when making
  86  *    columns.  In short: don't print a newline and instead pad to the
  87  *    right margin.  Used in conjunction with TERMP_NOLPAD.
  88  *
  89  *  - TERMP_TWOSPACE: when padding, make sure there are at least two
  90  *    space characters of padding.  Otherwise, rather break the line.
  91  *
  92  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
  93  *    the line is overrun, and don't pad-right if it's underrun.
  94  *
  95  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
  96  *    overruning, instead save the position and continue at that point
  97  *    when the next invocation.
  98  *
  99  *  In-line line breaking:
 100  *
 101  *  If TERMP_NOBREAK is specified and the line overruns the right
 102  *  margin, it will break and pad-right to the right margin after
 103  *  writing.  If maxrmargin is violated, it will break and continue
 104  *  writing from the right-margin, which will lead to the above scenario
 105  *  upon exit.  Otherwise, the line will break at the right margin.
 106  */
 107 void
 108 term_flushln(struct termp *p)
 109 {
 110         int              i;     /* current input position in p->buf */
 111         size_t           vis;   /* current visual position on output */
 112         size_t           vbl;   /* number of blanks to prepend to output */
 113         size_t           vend;  /* end of word visual position on output */
 114         size_t           bp;    /* visual right border position */
 115         size_t           dv;    /* temporary for visual pos calculations */
 116         int              j;     /* temporary loop index for p->buf */
 117         int              jhy;   /* last hyph before overflow w/r/t j */
 118         size_t           maxvis; /* output position of visible boundary */
 119         size_t           mmax; /* used in calculating bp */
 120
 121         /*
 122          * First, establish the maximum columns of "visible" content.
 123          * This is usually the difference between the right-margin and
 124          * an indentation, but can be, for tagged lists or columns, a
 125          * small set of values.
 126          */
 127         assert  (p->rmargin >= p->offset);
 128         dv     = p->rmargin - p->offset;
 129         maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
 130         dv     = p->maxrmargin - p->offset;
 131         mmax   = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
 132
 133         bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
 134
 135         /*
 136          * Indent the first line of a paragraph.
 137          */
 138         vbl = p->flags & TERMP_NOLPAD ? (size_t)0 : p->offset;
 139
 140         vis = vend = 0;
 141         i = 0;
 142
 143         while (i < p->col) {
 144                 /*
 145                  * Handle literal tab characters: collapse all
 146                  * subsequent tabs into a single huge set of spaces.
 147                  */
 148                 while (i < p->col && '\t' == p->buf[i]) {
 149                         vend = (vis / p->tabwidth + 1) * p->tabwidth;
 150                         vbl += vend - vis;
 151                         vis = vend;
 152                         i++;
 153                 }
 154
 155                 /*
 156                  * Count up visible word characters.  Control sequences
 157                  * (starting with the CSI) aren't counted.  A space
 158                  * generates a non-printing word, which is valid (the
 159                  * space is printed according to regular spacing rules).
 160                  */
 161
 162                 for (j = i, jhy = 0; j < p->col; j++) {
 163                         if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
 164                                 break;
 165
 166                         /* Back over the the last printed character. */
 167                         if (8 == p->buf[j]) {
 168                                 assert(j);
 169                                 vend -= (*p->width)(p, p->buf[j - 1]);
 170                                 continue;
 171                         }
 172
 173                         /* Regular word. */
 174                         /* Break at the hyphen point if we overrun. */
 175                         if (vend > vis && vend < bp &&
 176                                         ASCII_HYPH == p->buf[j])
 177                                 jhy = j;
 178
 179                         vend += (*p->width)(p, p->buf[j]);
 180                 }
 181
 182                 /*
 183                  * Find out whether we would exceed the right margin.
 184                  * If so, break to the next line.
 185                  */
 186                 if (vend > bp && 0 == jhy && vis > 0) {
 187                         vend -= vis;
 188                         (*p->endline)(p);
 189                         if (TERMP_NOBREAK & p->flags) {
 190                                 p->viscol = p->rmargin;
 191                                 (*p->advance)(p, p->rmargin);
 192                                 vend += p->rmargin - p->offset;
 193                         } else {
 194                                 p->viscol = 0;
 195                                 vbl = p->offset;
 196                         }
 197
 198                         /* Remove the p->overstep width. */
 199
 200                         bp += (size_t)p->overstep;
 201                         p->overstep = 0;
 202                 }
 203
 204                 /* Write out the [remaining] word. */
 205                 for ( ; i < p->col; i++) {
 206                         if (vend > bp && jhy > 0 && i > jhy)
 207                                 break;
 208                         if ('\t' == p->buf[i])
 209                                 break;
 210                         if (' ' == p->buf[i]) {
 211                                 j = i;
 212                                 while (' ' == p->buf[i])
 213                                         i++;
 214                                 dv = (size_t)(i - j) * (*p->width)(p, ' ');
 215                                 vbl += dv;
 216                                 vend += dv;
 217                                 break;
 218                         }
 219                         if (ASCII_NBRSP == p->buf[i]) {
 220                                 vbl += (*p->width)(p, ' ');
 221                                 continue;
 222                         }
 223
 224                         /*
 225                          * Now we definitely know there will be
 226                          * printable characters to output,
 227                          * so write preceding white space now.
 228                          */
 229                         if (vbl) {
 230                                 (*p->advance)(p, vbl);
 231                                 p->viscol += vbl;
 232                                 vbl = 0;
 233                         }
 234
 235                         if (ASCII_HYPH == p->buf[i]) {
 236                                 (*p->letter)(p, '-');
 237                                 p->viscol += (*p->width)(p, '-');
 238                         } else {
 239                                 (*p->letter)(p, p->buf[i]);
 240                                 p->viscol += (*p->width)(p, p->buf[i]);
 241                         }
 242                 }
 243                 vis = vend;
 244         }
 245
 246         /*
 247          * If there was trailing white space, it was not printed;
 248          * so reset the cursor position accordingly.
 249          */
 250         vis -= vbl;
 251
 252         p->col = 0;
 253         p->overstep = 0;
 254
 255         if ( ! (TERMP_NOBREAK & p->flags)) {
 256                 p->viscol = 0;
 257                 (*p->endline)(p);
 258                 return;
 259         }
 260
 261         if (TERMP_HANG & p->flags) {
 262                 /* We need one blank after the tag. */
 263                 p->overstep = (int)(vis - maxvis + (*p->width)(p, ' '));
 264
 265                 /*
 266                  * Behave exactly the same way as groff:
 267                  * If we have overstepped the margin, temporarily move
 268                  * it to the right and flag the rest of the line to be
 269                  * shorter.
 270                  * If we landed right at the margin, be happy.
 271                  * If we are one step before the margin, temporarily
 272                  * move it one step LEFT and flag the rest of the line
 273                  * to be longer.
 274                  */
 275                 if (p->overstep >= -1) {
 276                         assert((int)maxvis + p->overstep >= 0);
 277                         maxvis += (size_t)p->overstep;
 278                 } else
 279                         p->overstep = 0;
 280
 281         } else if (TERMP_DANGLE & p->flags)
 282                 return;
 283
 284         /* Right-pad. */
 285         if (maxvis > vis +
 286             ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) {
 287                 p->viscol += maxvis - vis;
 288                 (*p->advance)(p, maxvis - vis);
 289                 vis += (maxvis - vis);
 290         } else {        /* ...or newline break. */
 291                 (*p->endline)(p);
 292                 p->viscol = p->rmargin;
 293                 (*p->advance)(p, p->rmargin);
 294         }
 295 }
 296
 297
 298 /*
 299  * A newline only breaks an existing line; it won't assert vertical
 300  * space.  All data in the output buffer is flushed prior to the newline
 301  * assertion.
 302  */
 303 void
 304 term_newln(struct termp *p)
 305 {
 306
 307         p->flags |= TERMP_NOSPACE;
 308         if (0 == p->col && 0 == p->viscol) {
 309                 p->flags &= ~TERMP_NOLPAD;
 310                 return;
 311         }
 312         term_flushln(p);
 313         p->flags &= ~TERMP_NOLPAD;
 314 }
 315
 316
 317 /*
 318  * Asserts a vertical space (a full, empty line-break between lines).
 319  * Note that if used twice, this will cause two blank spaces and so on.
 320  * All data in the output buffer is flushed prior to the newline
 321  * assertion.
 322  */
 323 void
 324 term_vspace(struct termp *p)
 325 {
 326
 327         term_newln(p);
 328         p->viscol = 0;
 329         (*p->endline)(p);
 330 }
 331
 332 void
 333 term_fontlast(struct termp *p)
 334 {
 335         enum termfont    f;
 336
 337         f = p->fontl;
 338         p->fontl = p->fontq[p->fonti];
 339         p->fontq[p->fonti] = f;
 340 }
 341
 342
 343 void
 344 term_fontrepl(struct termp *p, enum termfont f)
 345 {
 346
 347         p->fontl = p->fontq[p->fonti];
 348         p->fontq[p->fonti] = f;
 349 }
 350
 351
 352 void
 353 term_fontpush(struct termp *p, enum termfont f)
 354 {
 355
 356         assert(p->fonti + 1 < 10);
 357         p->fontl = p->fontq[p->fonti];
 358         p->fontq[++p->fonti] = f;
 359 }
 360
 361
 362 const void *
 363 term_fontq(struct termp *p)
 364 {
 365
 366         return(&p->fontq[p->fonti]);
 367 }
 368
 369
 370 enum termfont
 371 term_fonttop(struct termp *p)
 372 {
 373
 374         return(p->fontq[p->fonti]);
 375 }
 376
 377
 378 void
 379 term_fontpopq(struct termp *p, const void *key)
 380 {
 381
 382         while (p->fonti >= 0 && key != &p->fontq[p->fonti])
 383                 p->fonti--;
 384         assert(p->fonti >= 0);
 385 }
 386
 387
 388 void
 389 term_fontpop(struct termp *p)
 390 {
 391
 392         assert(p->fonti);
 393         p->fonti--;
 394 }
 395
 396 /*
 397  * Handle pwords, partial words, which may be either a single word or a
 398  * phrase that cannot be broken down (such as a literal string).  This
 399  * handles word styling.
 400  */
 401 void
 402 term_word(struct termp *p, const char *word)
 403 {
 404         const char      *seq, *cp;
 405         char             c;
 406         int              sz;
 407         size_t           ssz;
 408         enum mandoc_esc  esc;
 409
 410         if ( ! (TERMP_NOSPACE & p->flags)) {
 411                 if ( ! (TERMP_KEEP & p->flags)) {
 412                         if (TERMP_PREKEEP & p->flags)
 413                                 p->flags |= TERMP_KEEP;
 414                         bufferc(p, ' ');
 415                         if (TERMP_SENTENCE & p->flags)
 416                                 bufferc(p, ' ');
 417                 } else
 418                         bufferc(p, ASCII_NBRSP);
 419         }
 420
 421         if ( ! (p->flags & TERMP_NONOSPACE))
 422                 p->flags &= ~TERMP_NOSPACE;
 423         else
 424                 p->flags |= TERMP_NOSPACE;
 425
 426         p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
 427
 428         while ('\0' != *word) {
 429                 if ((ssz = strcspn(word, "\\")) > 0)
 430                         encode(p, word, ssz);
 431
 432                 word += (int)ssz;
 433                 if ('\\' != *word)
 434                         continue;
 435
 436                 word++;
 437                 esc = mandoc_escape(&word, &seq, &sz);
 438                 if (ESCAPE_ERROR == esc)
 439                         break;
 440
 441                 switch (esc) {
 442                 case (ESCAPE_UNICODE):
 443                         encode(p, "?", 1);
 444                         break;
 445                 case (ESCAPE_NUMBERED):
 446                         if ('\0' != (c = mchars_num2char(seq, sz)))
 447                                 encode(p, &c, 1);
 448                         break;
 449                 case (ESCAPE_PREDEF):
 450                         cp = mchars_res2str(p->symtab, seq, sz, &ssz);
 451                         if (NULL != cp)
 452                                 encode(p, cp, ssz);
 453                         break;
 454                 case (ESCAPE_SPECIAL):
 455                         cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
 456                         if (NULL != cp)
 457                                 encode(p, cp, ssz);
 458                         else if (1 == ssz)
 459                                 encode(p, seq, sz);
 460                         break;
 461                 case (ESCAPE_FONTBOLD):
 462                         term_fontrepl(p, TERMFONT_BOLD);
 463                         break;
 464                 case (ESCAPE_FONTITALIC):
 465                         term_fontrepl(p, TERMFONT_UNDER);
 466                         break;
 467                 case (ESCAPE_FONTROMAN):
 468                         term_fontrepl(p, TERMFONT_NONE);
 469                         break;
 470                 case (ESCAPE_FONTPREV):
 471                         term_fontlast(p);
 472                         break;
 473                 case (ESCAPE_NOSPACE):
 474                         if ('\0' == *word)
 475                                 p->flags |= TERMP_NOSPACE;
 476                         break;
 477                 default:
 478                         break;
 479                 }
 480         }
 481 }
 482
 483 static void
 484 adjbuf(struct termp *p, int sz)
 485 {
 486
 487         if (0 == p->maxcols)
 488                 p->maxcols = 1024;
 489         while (sz >= p->maxcols)
 490                 p->maxcols <<= 2;
 491
 492         p->buf = mandoc_realloc
 493                 (p->buf, sizeof(int) * (size_t)p->maxcols);
 494 }
 495
 496 static void
 497 bufferc(struct termp *p, char c)
 498 {
 499
 500         if (p->col + 1 >= p->maxcols)
 501                 adjbuf(p, p->col + 1);
 502
 503         p->buf[p->col++] = c;
 504 }
 505
 506 static void
 507 encode(struct termp *p, const char *word, size_t sz)
 508 {
 509         enum termfont     f;
 510         int               i, len;
 511
 512         /* LINTED */
 513         len = sz;
 514
 515         /*
 516          * Encode and buffer a string of characters.  If the current
 517          * font mode is unset, buffer directly, else encode then buffer
 518          * character by character.
 519          */
 520
 521         if (TERMFONT_NONE == (f = term_fonttop(p))) {
 522                 if (p->col + len >= p->maxcols)
 523                         adjbuf(p, p->col + len);
 524                 for (i = 0; i < len; i++)
 525                         p->buf[p->col++] = word[i];
 526                 return;
 527         }
 528
 529         /* Pre-buffer, assuming worst-case. */
 530
 531         if (p->col + 1 + (len * 3) >= p->maxcols)
 532                 adjbuf(p, p->col + 1 + (len * 3));
 533
 534         for (i = 0; i < len; i++) {
 535                 if ( ! isgraph((unsigned char)word[i])) {
 536                         p->buf[p->col++] = word[i];
 537                         continue;
 538                 }
 539
 540                 if (TERMFONT_UNDER == f)
 541                         p->buf[p->col++] = '_';
 542                 else
 543                         p->buf[p->col++] = word[i];
 544
 545                 p->buf[p->col++] = 8;
 546                 p->buf[p->col++] = word[i];
 547         }
 548 }
 549
 550 size_t
 551 term_len(const struct termp *p, size_t sz)
 552 {
 553
 554         return((*p->width)(p, ' ') * sz);
 555 }
 556
 557
 558 size_t
 559 term_strlen(const struct termp *p, const char *cp)
 560 {
 561         size_t           sz, rsz, i;
 562         int              ssz, c;
 563         const char      *seq, *rhs;
 564         static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
 565
 566         /*
 567          * Account for escaped sequences within string length
 568          * calculations.  This follows the logic in term_word() as we
 569          * must calculate the width of produced strings.
 570          */
 571
 572         sz = 0;
 573         while ('\0' != *cp) {
 574                 rsz = strcspn(cp, rej);
 575                 for (i = 0; i < rsz; i++)
 576                         sz += (*p->width)(p, *cp++);
 577
 578                 c = 0;
 579                 switch (*cp) {
 580                 case ('\\'):
 581                         cp++;
 582                         rhs = NULL;
 583                         switch (mandoc_escape(&cp, &seq, &ssz)) {
 584                         case (ESCAPE_ERROR):
 585                                 return(sz);
 586                         case (ESCAPE_UNICODE):
 587                                 c = '?';
 588                                 /* FALLTHROUGH */
 589                         case (ESCAPE_NUMBERED):
 590                                 if ('\0' != c)
 591                                         c = mchars_num2char(seq, ssz);
 592                                 if ('\0' != c)
 593                                         sz += (*p->width)(p, c);
 594                                 break;
 595                         case (ESCAPE_PREDEF):
 596                                 rhs = mchars_res2str
 597                                         (p->symtab, seq, ssz, &rsz);
 598                                 break;
 599                         case (ESCAPE_SPECIAL):
 600                                 rhs = mchars_spec2str
 601                                         (p->symtab, seq, ssz, &rsz);
 602
 603                                 if (ssz != 1 || rhs)
 604                                         break;
 605
 606                                 rhs = seq;
 607                                 rsz = ssz;
 608                                 break;
 609                         default:
 610                                 break;
 611                         }
 612
 613                         if (NULL == rhs)
 614                                 break;
 615
 616                         for (i = 0; i < rsz; i++)
 617                                 sz += (*p->width)(p, *rhs++);
 618                         break;
 619                 case (ASCII_NBRSP):
 620                         sz += (*p->width)(p, ' ');
 621                         cp++;
 622                         break;
 623                 case (ASCII_HYPH):
 624                         sz += (*p->width)(p, '-');
 625                         cp++;
 626                         break;
 627                 default:
 628                         break;
 629                 }
 630         }
 631
 632         return(sz);
 633 }
 634
 635 /* ARGSUSED */
 636 size_t
 637 term_vspan(const struct termp *p, const struct roffsu *su)
 638 {
 639         double           r;
 640
 641         switch (su->unit) {
 642         case (SCALE_CM):
 643                 r = su->scale * 2;
 644                 break;
 645         case (SCALE_IN):
 646                 r = su->scale * 6;
 647                 break;
 648         case (SCALE_PC):
 649                 r = su->scale;
 650                 break;
 651         case (SCALE_PT):
 652                 r = su->scale / 8;
 653                 break;
 654         case (SCALE_MM):
 655                 r = su->scale / 1000;
 656                 break;
 657         case (SCALE_VS):
 658                 r = su->scale;
 659                 break;
 660         default:
 661                 r = su->scale - 1;
 662                 break;
 663         }
 664
 665         if (r < 0.0)
 666                 r = 0.0;
 667         return(/* LINTED */(size_t)
 668                         r);
 669 }
 670
 671 size_t
 672 term_hspan(const struct termp *p, const struct roffsu *su)
 673 {
 674         double           v;
 675
 676         v = ((*p->hspan)(p, su));
 677         if (v < 0.0)
 678                 v = 0.0;
 679         return((size_t) /* LINTED */
 680                         v);
 681 }