term.c

   1 /* $Id: term.c,v 1.286 2022/04/27 13:41:13 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org>
   4  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include "config.h"
  19
  20 #include <sys/types.h>
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdint.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #include "mandoc.h"
  30 #include "mandoc_aux.h"
  31 #include "out.h"
  32 #include "term.h"
  33 #include "main.h"
  34
  35 static  size_t           cond_width(const struct termp *, int, int *);
  36 static  void             adjbuf(struct termp_col *, size_t);
  37 static  void             bufferc(struct termp *, char);
  38 static  void             encode(struct termp *, const char *, size_t);
  39 static  void             encode1(struct termp *, int);
  40 static  void             endline(struct termp *);
  41 static  void             term_field(struct termp *, size_t, size_t);
  42 static  void             term_fill(struct termp *, size_t *, size_t *,
  43                                 size_t);
  44
  45
  46 void
  47 term_setcol(struct termp *p, size_t maxtcol)
  48 {
  49         if (maxtcol > p->maxtcol) {
  50                 p->tcols = mandoc_recallocarray(p->tcols,
  51                     p->maxtcol, maxtcol, sizeof(*p->tcols));
  52                 p->maxtcol = maxtcol;
  53         }
  54         p->lasttcol = maxtcol - 1;
  55         p->tcol = p->tcols;
  56 }
  57
  58 void
  59 term_free(struct termp *p)
  60 {
  61         term_tab_free();
  62         for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
  63                 free(p->tcol->buf);
  64         free(p->tcols);
  65         free(p->fontq);
  66         free(p);
  67 }
  68
  69 void
  70 term_begin(struct termp *p, term_margin head,
  71                 term_margin foot, const struct roff_meta *arg)
  72 {
  73
  74         p->headf = head;
  75         p->footf = foot;
  76         p->argf = arg;
  77         (*p->begin)(p);
  78 }
  79
  80 void
  81 term_end(struct termp *p)
  82 {
  83
  84         (*p->end)(p);
  85 }
  86
  87 /*
  88  * Flush a chunk of text.  By default, break the output line each time
  89  * the right margin is reached, and continue output on the next line
  90  * at the same offset as the chunk itself.  By default, also break the
  91  * output line at the end of the chunk.  There are many flags modifying
  92  * this behaviour, see the comments in the body of the function.
  93  */
  94 void
  95 term_flushln(struct termp *p)
  96 {
  97         size_t   vbl;      /* Number of blanks to prepend to the output. */
  98         size_t   vbr;      /* Actual visual position of the end of field. */
  99         size_t   vfield;   /* Desired visual field width. */
 100         size_t   vtarget;  /* Desired visual position of the right margin. */
 101         size_t   ic;       /* Character position in the input buffer. */
 102         size_t   nbr;      /* Number of characters to print in this field. */
 103
 104         /*
 105          * Normally, start writing at the left margin, but with the
 106          * NOPAD flag, start writing at the current position instead.
 107          */
 108
 109         vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
 110             0 : p->tcol->offset - p->viscol;
 111         if (p->minbl && vbl < p->minbl)
 112                 vbl = p->minbl;
 113
 114         if ((p->flags & TERMP_MULTICOL) == 0)
 115                 p->tcol->col = 0;
 116
 117         /* Loop over output lines. */
 118
 119         for (;;) {
 120                 vfield = p->tcol->rmargin > p->viscol + vbl ?
 121                     p->tcol->rmargin - p->viscol - vbl : 0;
 122
 123                 /*
 124                  * Normally, break the line at the the right margin
 125                  * of the field, but with the NOBREAK flag, only
 126                  * break it at the max right margin of the screen,
 127                  * and with the BRNEVER flag, never break it at all.
 128                  */
 129
 130                 vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield :
 131                     p->maxrmargin > p->viscol + vbl ?
 132                     p->maxrmargin - p->viscol - vbl : 0;
 133
 134                 /*
 135                  * Figure out how much text will fit in the field.
 136                  * If there is whitespace only, print nothing.
 137                  */
 138
 139                 term_fill(p, &nbr, &vbr,
 140                     p->flags & TERMP_BRNEVER ? SIZE_MAX : vtarget);
 141                 if (nbr == 0)
 142                         break;
 143
 144                 /*
 145                  * With the CENTER or RIGHT flag, increase the indentation
 146                  * to center the text between the left and right margins
 147                  * or to adjust it to the right margin, respectively.
 148                  */
 149
 150                 if (vbr < vtarget) {
 151                         if (p->flags & TERMP_CENTER)
 152                                 vbl += (vtarget - vbr) / 2;
 153                         else if (p->flags & TERMP_RIGHT)
 154                                 vbl += vtarget - vbr;
 155                 }
 156
 157                 /* Finally, print the field content. */
 158
 159                 term_field(p, vbl, nbr);
 160
 161                 /*
 162                  * If there is no text left in the field, exit the loop.
 163                  * If the BRTRSP flag is set, consider trailing
 164                  * whitespace significant when deciding whether
 165                  * the field fits or not.
 166                  */
 167
 168                 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
 169                         switch (p->tcol->buf[ic]) {
 170                         case '\t':
 171                                 if (p->flags & TERMP_BRTRSP)
 172                                         vbr = term_tab_next(vbr);
 173                                 continue;
 174                         case ' ':
 175                                 if (p->flags & TERMP_BRTRSP)
 176                                         vbr += (*p->width)(p, ' ');
 177                                 continue;
 178                         case '\n':
 179                         case ASCII_BREAK:
 180                                 continue;
 181                         default:
 182                                 break;
 183                         }
 184                         break;
 185                 }
 186                 if (ic == p->tcol->lastcol)
 187                         break;
 188
 189                 /*
 190                  * At the location of an automtic line break, input
 191                  * space characters are consumed by the line break.
 192                  */
 193
 194                 while (p->tcol->col < p->tcol->lastcol &&
 195                     p->tcol->buf[p->tcol->col] == ' ')
 196                         p->tcol->col++;
 197
 198                 /*
 199                  * In multi-column mode, leave the rest of the text
 200                  * in the buffer to be handled by a subsequent
 201                  * invocation, such that the other columns of the
 202                  * table can be handled first.
 203                  * In single-column mode, simply break the line.
 204                  */
 205
 206                 if (p->flags & TERMP_MULTICOL)
 207                         return;
 208
 209                 endline(p);
 210                 p->viscol = 0;
 211
 212                 /*
 213                  * Normally, start the next line at the same indentation
 214                  * as this one, but with the BRIND flag, start it at the
 215                  * right margin instead.  This is used together with
 216                  * NOBREAK for the tags in various kinds of tagged lists.
 217                  */
 218
 219                 vbl = p->flags & TERMP_BRIND ?
 220                     p->tcol->rmargin : p->tcol->offset;
 221         }
 222
 223         /* Reset output state in preparation for the next field. */
 224
 225         p->col = p->tcol->col = p->tcol->lastcol = 0;
 226         p->minbl = p->trailspace;
 227         p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
 228
 229         if (p->flags & TERMP_MULTICOL)
 230                 return;
 231
 232         /*
 233          * The HANG flag means that the next field
 234          * always follows on the same line.
 235          * The NOBREAK flag means that the next field
 236          * follows on the same line unless the field was overrun.
 237          * Normally, break the line at the end of each field.
 238          */
 239
 240         if ((p->flags & TERMP_HANG) == 0 &&
 241             ((p->flags & TERMP_NOBREAK) == 0 ||
 242              vbr + term_len(p, p->trailspace) > vfield))
 243                 endline(p);
 244 }
 245
 246 /*
 247  * Store the number of input characters to print in this field in *nbr
 248  * and their total visual width to print in *vbr.
 249  * If there is only whitespace in the field, both remain zero.
 250  * The desired visual width of the field is provided by vtarget.
 251  * If the first word is longer, the field will be overrun.
 252  */
 253 static void
 254 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
 255 {
 256         size_t   ic;        /* Character position in the input buffer. */
 257         size_t   vis;       /* Visual position of the current character. */
 258         size_t   vn;        /* Visual position of the next character. */
 259         int      breakline; /* Break at the end of this word. */
 260         int      graph;     /* Last character was non-blank. */
 261
 262         *nbr = *vbr = vis = 0;
 263         breakline = graph = 0;
 264         for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
 265                 switch (p->tcol->buf[ic]) {
 266                 case '\b':  /* Escape \o (overstrike) or backspace markup. */
 267                         assert(ic > 0);
 268                         vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
 269                         continue;
 270
 271                 case '\t':  /* Normal ASCII whitespace. */
 272                 case ' ':
 273                 case ASCII_BREAK:  /* Escape \: (breakpoint). */
 274                         switch (p->tcol->buf[ic]) {
 275                         case '\t':
 276                                 vn = term_tab_next(vis);
 277                                 break;
 278                         case ' ':
 279                                 vn = vis + (*p->width)(p, ' ');
 280                                 break;
 281                         case ASCII_BREAK:
 282                                 vn = vis;
 283                                 break;
 284                         default:
 285                                 abort();
 286                         }
 287                         /* Can break at the end of a word. */
 288                         if (breakline || vn > vtarget)
 289                                 break;
 290                         if (graph) {
 291                                 *nbr = ic;
 292                                 *vbr = vis;
 293                                 graph = 0;
 294                         }
 295                         vis = vn;
 296                         continue;
 297
 298                 case '\n':  /* Escape \p (break at the end of the word). */
 299                         breakline = 1;
 300                         continue;
 301
 302                 case ASCII_HYPH:  /* Breakable hyphen. */
 303                         graph = 1;
 304                         /*
 305                          * We are about to decide whether to break the
 306                          * line or not, so we no longer need this hyphen
 307                          * to be marked as breakable.  Put back a real
 308                          * hyphen such that we get the correct width.
 309                          */
 310                         p->tcol->buf[ic] = '-';
 311                         vis += (*p->width)(p, '-');
 312                         if (vis > vtarget) {
 313                                 ic++;
 314                                 break;
 315                         }
 316                         *nbr = ic + 1;
 317                         *vbr = vis;
 318                         continue;
 319
 320                 case ASCII_NBRSP:  /* Non-breakable space. */
 321                         p->tcol->buf[ic] = ' ';
 322                         /* FALLTHROUGH */
 323                 default:  /* Printable character. */
 324                         graph = 1;
 325                         vis += (*p->width)(p, p->tcol->buf[ic]);
 326                         if (vis > vtarget && *nbr > 0)
 327                                 return;
 328                         continue;
 329                 }
 330                 break;
 331         }
 332
 333         /*
 334          * If the last word extends to the end of the field without any
 335          * trailing whitespace, the loop could not check yet whether it
 336          * can remain on this line.  So do the check now.
 337          */
 338
 339         if (graph && (vis <= vtarget || *nbr == 0)) {
 340                 *nbr = ic;
 341                 *vbr = vis;
 342         }
 343 }
 344
 345 /*
 346  * Print the contents of one field
 347  * with an indentation of        vbl      visual columns,
 348  * and an input string length of nbr      characters.
 349  */
 350 static void
 351 term_field(struct termp *p, size_t vbl, size_t nbr)
 352 {
 353         size_t   ic;    /* Character position in the input buffer. */
 354         size_t   vis;   /* Visual position of the current character. */
 355         size_t   dv;    /* Visual width of the current character. */
 356         size_t   vn;    /* Visual position of the next character. */
 357
 358         vis = 0;
 359         for (ic = p->tcol->col; ic < nbr; ic++) {
 360
 361                 /*
 362                  * To avoid the printing of trailing whitespace,
 363                  * do not print whitespace right away, only count it.
 364                  */
 365
 366                 switch (p->tcol->buf[ic]) {
 367                 case '\n':
 368                 case ASCII_BREAK:
 369                         continue;
 370                 case '\t':
 371                         vn = term_tab_next(vis);
 372                         vbl += vn - vis;
 373                         vis = vn;
 374                         continue;
 375                 case ' ':
 376                 case ASCII_NBRSP:
 377                         dv = (*p->width)(p, ' ');
 378                         vbl += dv;
 379                         vis += dv;
 380                         continue;
 381                 default:
 382                         break;
 383                 }
 384
 385                 /*
 386                  * We found a non-blank character to print,
 387                  * so write preceding white space now.
 388                  */
 389
 390                 if (vbl > 0) {
 391                         (*p->advance)(p, vbl);
 392                         p->viscol += vbl;
 393                         vbl = 0;
 394                 }
 395
 396                 /* Print the character and adjust the visual position. */
 397
 398                 (*p->letter)(p, p->tcol->buf[ic]);
 399                 if (p->tcol->buf[ic] == '\b') {
 400                         dv = (*p->width)(p, p->tcol->buf[ic - 1]);
 401                         p->viscol -= dv;
 402                         vis -= dv;
 403                 } else {
 404                         dv = (*p->width)(p, p->tcol->buf[ic]);
 405                         p->viscol += dv;
 406                         vis += dv;
 407                 }
 408         }
 409         p->tcol->col = nbr;
 410 }
 411
 412 static void
 413 endline(struct termp *p)
 414 {
 415         if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
 416                 p->mc = NULL;
 417                 p->flags &= ~TERMP_ENDMC;
 418         }
 419         if (p->mc != NULL) {
 420                 if (p->viscol && p->maxrmargin >= p->viscol)
 421                         (*p->advance)(p, p->maxrmargin - p->viscol + 1);
 422                 p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
 423                 term_word(p, p->mc);
 424                 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
 425         }
 426         p->viscol = 0;
 427         p->minbl = 0;
 428         (*p->endline)(p);
 429 }
 430
 431 /*
 432  * A newline only breaks an existing line; it won't assert vertical
 433  * space.  All data in the output buffer is flushed prior to the newline
 434  * assertion.
 435  */
 436 void
 437 term_newln(struct termp *p)
 438 {
 439
 440         p->flags |= TERMP_NOSPACE;
 441         if (p->tcol->lastcol || p->viscol)
 442                 term_flushln(p);
 443 }
 444
 445 /*
 446  * Asserts a vertical space (a full, empty line-break between lines).
 447  * Note that if used twice, this will cause two blank spaces and so on.
 448  * All data in the output buffer is flushed prior to the newline
 449  * assertion.
 450  */
 451 void
 452 term_vspace(struct termp *p)
 453 {
 454
 455         term_newln(p);
 456         p->viscol = 0;
 457         p->minbl = 0;
 458         if (0 < p->skipvsp)
 459                 p->skipvsp--;
 460         else
 461                 (*p->endline)(p);
 462 }
 463
 464 /* Swap current and previous font; for \fP and .ft P */
 465 void
 466 term_fontlast(struct termp *p)
 467 {
 468         enum termfont    f;
 469
 470         f = p->fontl;
 471         p->fontl = p->fontq[p->fonti];
 472         p->fontq[p->fonti] = f;
 473 }
 474
 475 /* Set font, save current, discard previous; for \f, .ft, .B etc. */
 476 void
 477 term_fontrepl(struct termp *p, enum termfont f)
 478 {
 479
 480         p->fontl = p->fontq[p->fonti];
 481         p->fontq[p->fonti] = f;
 482 }
 483
 484 /* Set font, save previous. */
 485 void
 486 term_fontpush(struct termp *p, enum termfont f)
 487 {
 488
 489         p->fontl = p->fontq[p->fonti];
 490         if (++p->fonti == p->fontsz) {
 491                 p->fontsz += 8;
 492                 p->fontq = mandoc_reallocarray(p->fontq,
 493                     p->fontsz, sizeof(*p->fontq));
 494         }
 495         p->fontq[p->fonti] = f;
 496 }
 497
 498 /* Flush to make the saved pointer current again. */
 499 void
 500 term_fontpopq(struct termp *p, int i)
 501 {
 502
 503         assert(i >= 0);
 504         if (p->fonti > i)
 505                 p->fonti = i;
 506 }
 507
 508 /* Pop one font off the stack. */
 509 void
 510 term_fontpop(struct termp *p)
 511 {
 512
 513         assert(p->fonti);
 514         p->fonti--;
 515 }
 516
 517 /*
 518  * Handle pwords, partial words, which may be either a single word or a
 519  * phrase that cannot be broken down (such as a literal string).  This
 520  * handles word styling.
 521  */
 522 void
 523 term_word(struct termp *p, const char *word)
 524 {
 525         struct roffsu    su;
 526         const char       nbrsp[2] = { ASCII_NBRSP, 0 };
 527         const char      *seq, *cp;
 528         int              sz, uc;
 529         size_t           csz, lsz, ssz;
 530         enum mandoc_esc  esc;
 531
 532         if ((p->flags & TERMP_NOBUF) == 0) {
 533                 if ((p->flags & TERMP_NOSPACE) == 0) {
 534                         if ((p->flags & TERMP_KEEP) == 0) {
 535                                 bufferc(p, ' ');
 536                                 if (p->flags & TERMP_SENTENCE)
 537                                         bufferc(p, ' ');
 538                         } else
 539                                 bufferc(p, ASCII_NBRSP);
 540                 }
 541                 if (p->flags & TERMP_PREKEEP)
 542                         p->flags |= TERMP_KEEP;
 543                 if (p->flags & TERMP_NONOSPACE)
 544                         p->flags |= TERMP_NOSPACE;
 545                 else
 546                         p->flags &= ~TERMP_NOSPACE;
 547                 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
 548                 p->skipvsp = 0;
 549         }
 550
 551         while ('\0' != *word) {
 552                 if ('\\' != *word) {
 553                         if (TERMP_NBRWORD & p->flags) {
 554                                 if (' ' == *word) {
 555                                         encode(p, nbrsp, 1);
 556                                         word++;
 557                                         continue;
 558                                 }
 559                                 ssz = strcspn(word, "\\ ");
 560                         } else
 561                                 ssz = strcspn(word, "\\");
 562                         encode(p, word, ssz);
 563                         word += (int)ssz;
 564                         continue;
 565                 }
 566
 567                 word++;
 568                 esc = mandoc_escape(&word, &seq, &sz);
 569                 switch (esc) {
 570                 case ESCAPE_UNICODE:
 571                         uc = mchars_num2uc(seq + 1, sz - 1);
 572                         break;
 573                 case ESCAPE_NUMBERED:
 574                         uc = mchars_num2char(seq, sz);
 575                         if (uc < 0)
 576                                 continue;
 577                         break;
 578                 case ESCAPE_SPECIAL:
 579                         if (p->enc == TERMENC_ASCII) {
 580                                 cp = mchars_spec2str(seq, sz, &ssz);
 581                                 if (cp != NULL)
 582                                         encode(p, cp, ssz);
 583                         } else {
 584                                 uc = mchars_spec2cp(seq, sz);
 585                                 if (uc > 0)
 586                                         encode1(p, uc);
 587                         }
 588                         continue;
 589                 case ESCAPE_UNDEF:
 590                         uc = *seq;
 591                         break;
 592                 case ESCAPE_FONTBOLD:
 593                 case ESCAPE_FONTCB:
 594                         term_fontrepl(p, TERMFONT_BOLD);
 595                         continue;
 596                 case ESCAPE_FONTITALIC:
 597                 case ESCAPE_FONTCI:
 598                         term_fontrepl(p, TERMFONT_UNDER);
 599                         continue;
 600                 case ESCAPE_FONTBI:
 601                         term_fontrepl(p, TERMFONT_BI);
 602                         continue;
 603                 case ESCAPE_FONT:
 604                 case ESCAPE_FONTCR:
 605                 case ESCAPE_FONTROMAN:
 606                         term_fontrepl(p, TERMFONT_NONE);
 607                         continue;
 608                 case ESCAPE_FONTPREV:
 609                         term_fontlast(p);
 610                         continue;
 611                 case ESCAPE_BREAK:
 612                         bufferc(p, '\n');
 613                         continue;
 614                 case ESCAPE_NOSPACE:
 615                         if (p->flags & TERMP_BACKAFTER)
 616                                 p->flags &= ~TERMP_BACKAFTER;
 617                         else if (*word == '\0')
 618                                 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
 619                         continue;
 620                 case ESCAPE_DEVICE:
 621                         if (p->type == TERMTYPE_PDF)
 622                                 encode(p, "pdf", 3);
 623                         else if (p->type == TERMTYPE_PS)
 624                                 encode(p, "ps", 2);
 625                         else if (p->enc == TERMENC_ASCII)
 626                                 encode(p, "ascii", 5);
 627                         else
 628                                 encode(p, "utf8", 4);
 629                         continue;
 630                 case ESCAPE_HORIZ:
 631                         if (p->flags & TERMP_BACKAFTER) {
 632                                 p->flags &= ~TERMP_BACKAFTER;
 633                                 continue;
 634                         }
 635                         if (*seq == '|') {
 636                                 seq++;
 637                                 uc = -p->col;
 638                         } else
 639                                 uc = 0;
 640                         if (a2roffsu(seq, &su, SCALE_EM) == NULL)
 641                                 continue;
 642                         uc += term_hen(p, &su);
 643                         if (uc >= 0) {
 644                                 while (uc > 0) {
 645                                         uc -= term_len(p, 1);
 646                                         if (p->flags & TERMP_BACKBEFORE)
 647                                                 p->flags &= ~TERMP_BACKBEFORE;
 648                                         else
 649                                                 bufferc(p, ASCII_NBRSP);
 650                                 }
 651                                 continue;
 652                         }
 653                         if (p->flags & TERMP_BACKBEFORE) {
 654                                 p->flags &= ~TERMP_BACKBEFORE;
 655                                 assert(p->col > 0);
 656                                 p->col--;
 657                         }
 658                         if (p->col >= (size_t)(-uc)) {
 659                                 p->col += uc;
 660                         } else {
 661                                 uc += p->col;
 662                                 p->col = 0;
 663                                 if (p->tcol->offset > (size_t)(-uc)) {
 664                                         p->ti += uc;
 665                                         p->tcol->offset += uc;
 666                                 } else {
 667                                         p->ti -= p->tcol->offset;
 668                                         p->tcol->offset = 0;
 669                                 }
 670                         }
 671                         continue;
 672                 case ESCAPE_HLINE:
 673                         if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
 674                                 continue;
 675                         uc = term_hen(p, &su);
 676                         if (uc <= 0) {
 677                                 if (p->tcol->rmargin <= p->tcol->offset)
 678                                         continue;
 679                                 lsz = p->tcol->rmargin - p->tcol->offset;
 680                         } else
 681                                 lsz = uc;
 682                         if (*cp == seq[-1])
 683                                 uc = -1;
 684                         else if (*cp == '\\') {
 685                                 seq = cp + 1;
 686                                 esc = mandoc_escape(&seq, &cp, &sz);
 687                                 switch (esc) {
 688                                 case ESCAPE_UNICODE:
 689                                         uc = mchars_num2uc(cp + 1, sz - 1);
 690                                         break;
 691                                 case ESCAPE_NUMBERED:
 692                                         uc = mchars_num2char(cp, sz);
 693                                         break;
 694                                 case ESCAPE_SPECIAL:
 695                                         uc = mchars_spec2cp(cp, sz);
 696                                         break;
 697                                 case ESCAPE_UNDEF:
 698                                         uc = *seq;
 699                                         break;
 700                                 default:
 701                                         uc = -1;
 702                                         break;
 703                                 }
 704                         } else
 705                                 uc = *cp;
 706                         if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
 707                                 uc = '_';
 708                         if (p->enc == TERMENC_ASCII) {
 709                                 cp = ascii_uc2str(uc);
 710                                 csz = term_strlen(p, cp);
 711                                 ssz = strlen(cp);
 712                         } else
 713                                 csz = (*p->width)(p, uc);
 714                         while (lsz >= csz) {
 715                                 if (p->enc == TERMENC_ASCII)
 716                                         encode(p, cp, ssz);
 717                                 else
 718                                         encode1(p, uc);
 719                                 lsz -= csz;
 720                         }
 721                         continue;
 722                 case ESCAPE_SKIPCHAR:
 723                         p->flags |= TERMP_BACKAFTER;
 724                         continue;
 725                 case ESCAPE_OVERSTRIKE:
 726                         cp = seq + sz;
 727                         while (seq < cp) {
 728                                 if (*seq == '\\') {
 729                                         mandoc_escape(&seq, NULL, NULL);
 730                                         continue;
 731                                 }
 732                                 encode1(p, *seq++);
 733                                 if (seq < cp) {
 734                                         if (p->flags & TERMP_BACKBEFORE)
 735                                                 p->flags |= TERMP_BACKAFTER;
 736                                         else
 737                                                 p->flags |= TERMP_BACKBEFORE;
 738                                 }
 739                         }
 740                         /* Trim trailing backspace/blank pair. */
 741                         if (p->tcol->lastcol > 2 &&
 742                             (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
 743                              p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
 744                                 p->tcol->lastcol -= 2;
 745                         if (p->col > p->tcol->lastcol)
 746                                 p->col = p->tcol->lastcol;
 747                         continue;
 748                 default:
 749                         continue;
 750                 }
 751
 752                 /*
 753                  * Common handling for Unicode and numbered
 754                  * character escape sequences.
 755                  */
 756
 757                 if (p->enc == TERMENC_ASCII) {
 758                         cp = ascii_uc2str(uc);
 759                         encode(p, cp, strlen(cp));
 760                 } else {
 761                         if ((uc < 0x20 && uc != 0x09) ||
 762                             (uc > 0x7E && uc < 0xA0))
 763                                 uc = 0xFFFD;
 764                         encode1(p, uc);
 765                 }
 766         }
 767         p->flags &= ~TERMP_NBRWORD;
 768 }
 769
 770 static void
 771 adjbuf(struct termp_col *c, size_t sz)
 772 {
 773         if (c->maxcols == 0)
 774                 c->maxcols = 1024;
 775         while (c->maxcols <= sz)
 776                 c->maxcols <<= 2;
 777         c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
 778 }
 779
 780 static void
 781 bufferc(struct termp *p, char c)
 782 {
 783         if (p->flags & TERMP_NOBUF) {
 784                 (*p->letter)(p, c);
 785                 return;
 786         }
 787         if (p->col + 1 >= p->tcol->maxcols)
 788                 adjbuf(p->tcol, p->col + 1);
 789         if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
 790                 p->tcol->buf[p->col] = c;
 791         if (p->tcol->lastcol < ++p->col)
 792                 p->tcol->lastcol = p->col;
 793 }
 794
 795 /*
 796  * See encode().
 797  * Do this for a single (probably unicode) value.
 798  * Does not check for non-decorated glyphs.
 799  */
 800 static void
 801 encode1(struct termp *p, int c)
 802 {
 803         enum termfont     f;
 804
 805         if (p->flags & TERMP_NOBUF) {
 806                 (*p->letter)(p, c);
 807                 return;
 808         }
 809
 810         if (p->col + 7 >= p->tcol->maxcols)
 811                 adjbuf(p->tcol, p->col + 7);
 812
 813         f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
 814             p->fontq[p->fonti] : TERMFONT_NONE;
 815
 816         if (p->flags & TERMP_BACKBEFORE) {
 817                 if (p->tcol->buf[p->col - 1] == ' ' ||
 818                     p->tcol->buf[p->col - 1] == '\t')
 819                         p->col--;
 820                 else
 821                         p->tcol->buf[p->col++] = '\b';
 822                 p->flags &= ~TERMP_BACKBEFORE;
 823         }
 824         if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
 825                 p->tcol->buf[p->col++] = '_';
 826                 p->tcol->buf[p->col++] = '\b';
 827         }
 828         if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
 829                 if (c == ASCII_HYPH)
 830                         p->tcol->buf[p->col++] = '-';
 831                 else
 832                         p->tcol->buf[p->col++] = c;
 833                 p->tcol->buf[p->col++] = '\b';
 834         }
 835         if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
 836                 p->tcol->buf[p->col] = c;
 837         if (p->tcol->lastcol < ++p->col)
 838                 p->tcol->lastcol = p->col;
 839         if (p->flags & TERMP_BACKAFTER) {
 840                 p->flags |= TERMP_BACKBEFORE;
 841                 p->flags &= ~TERMP_BACKAFTER;
 842         }
 843 }
 844
 845 static void
 846 encode(struct termp *p, const char *word, size_t sz)
 847 {
 848         size_t            i;
 849
 850         if (p->flags & TERMP_NOBUF) {
 851                 for (i = 0; i < sz; i++)
 852                         (*p->letter)(p, word[i]);
 853                 return;
 854         }
 855
 856         if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
 857                 adjbuf(p->tcol, p->col + 2 + (sz * 5));
 858
 859         for (i = 0; i < sz; i++) {
 860                 if (ASCII_HYPH == word[i] ||
 861                     isgraph((unsigned char)word[i]))
 862                         encode1(p, word[i]);
 863                 else {
 864                         if (p->tcol->lastcol <= p->col ||
 865                             (word[i] != ' ' && word[i] != ASCII_NBRSP))
 866                                 p->tcol->buf[p->col] = word[i];
 867                         p->col++;
 868
 869                         /*
 870                          * Postpone the effect of \z while handling
 871                          * an overstrike sequence from ascii_uc2str().
 872                          */
 873
 874                         if (word[i] == '\b' &&
 875                             (p->flags & TERMP_BACKBEFORE)) {
 876                                 p->flags &= ~TERMP_BACKBEFORE;
 877                                 p->flags |= TERMP_BACKAFTER;
 878                         }
 879                 }
 880         }
 881         if (p->tcol->lastcol < p->col)
 882                 p->tcol->lastcol = p->col;
 883 }
 884
 885 void
 886 term_setwidth(struct termp *p, const char *wstr)
 887 {
 888         struct roffsu    su;
 889         int              iop, width;
 890
 891         iop = 0;
 892         width = 0;
 893         if (NULL != wstr) {
 894                 switch (*wstr) {
 895                 case '+':
 896                         iop = 1;
 897                         wstr++;
 898                         break;
 899                 case '-':
 900                         iop = -1;
 901                         wstr++;
 902                         break;
 903                 default:
 904                         break;
 905                 }
 906                 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
 907                         width = term_hspan(p, &su);
 908                 else
 909                         iop = 0;
 910         }
 911         (*p->setwidth)(p, iop, width);
 912 }
 913
 914 size_t
 915 term_len(const struct termp *p, size_t sz)
 916 {
 917
 918         return (*p->width)(p, ' ') * sz;
 919 }
 920
 921 static size_t
 922 cond_width(const struct termp *p, int c, int *skip)
 923 {
 924
 925         if (*skip) {
 926                 (*skip) = 0;
 927                 return 0;
 928         } else
 929                 return (*p->width)(p, c);
 930 }
 931
 932 size_t
 933 term_strlen(const struct termp *p, const char *cp)
 934 {
 935         size_t           sz, rsz, i;
 936         int              ssz, skip, uc;
 937         const char      *seq, *rhs;
 938         enum mandoc_esc  esc;
 939         static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
 940                         ASCII_BREAK, '\0' };
 941
 942         /*
 943          * Account for escaped sequences within string length
 944          * calculations.  This follows the logic in term_word() as we
 945          * must calculate the width of produced strings.
 946          */
 947
 948         sz = 0;
 949         skip = 0;
 950         while ('\0' != *cp) {
 951                 rsz = strcspn(cp, rej);
 952                 for (i = 0; i < rsz; i++)
 953                         sz += cond_width(p, *cp++, &skip);
 954
 955                 switch (*cp) {
 956                 case '\\':
 957                         cp++;
 958                         rhs = NULL;
 959                         esc = mandoc_escape(&cp, &seq, &ssz);
 960                         switch (esc) {
 961                         case ESCAPE_UNICODE:
 962                                 uc = mchars_num2uc(seq + 1, ssz - 1);
 963                                 break;
 964                         case ESCAPE_NUMBERED:
 965                                 uc = mchars_num2char(seq, ssz);
 966                                 if (uc < 0)
 967                                         continue;
 968                                 break;
 969                         case ESCAPE_SPECIAL:
 970                                 if (p->enc == TERMENC_ASCII) {
 971                                         rhs = mchars_spec2str(seq, ssz, &rsz);
 972                                         if (rhs != NULL)
 973                                                 break;
 974                                 } else {
 975                                         uc = mchars_spec2cp(seq, ssz);
 976                                         if (uc > 0)
 977                                                 sz += cond_width(p, uc, &skip);
 978                                 }
 979                                 continue;
 980                         case ESCAPE_UNDEF:
 981                                 uc = *seq;
 982                                 break;
 983                         case ESCAPE_DEVICE:
 984                                 if (p->type == TERMTYPE_PDF) {
 985                                         rhs = "pdf";
 986                                         rsz = 3;
 987                                 } else if (p->type == TERMTYPE_PS) {
 988                                         rhs = "ps";
 989                                         rsz = 2;
 990                                 } else if (p->enc == TERMENC_ASCII) {
 991                                         rhs = "ascii";
 992                                         rsz = 5;
 993                                 } else {
 994                                         rhs = "utf8";
 995                                         rsz = 4;
 996                                 }
 997                                 break;
 998                         case ESCAPE_SKIPCHAR:
 999                                 skip = 1;
1000                                 continue;
1001                         case ESCAPE_OVERSTRIKE:
1002                                 rsz = 0;
1003                                 rhs = seq + ssz;
1004                                 while (seq < rhs) {
1005                                         if (*seq == '\\') {
1006                                                 mandoc_escape(&seq, NULL, NULL);
1007                                                 continue;
1008                                         }
1009                                         i = (*p->width)(p, *seq++);
1010                                         if (rsz < i)
1011                                                 rsz = i;
1012                                 }
1013                                 sz += rsz;
1014                                 continue;
1015                         default:
1016                                 continue;
1017                         }
1018
1019                         /*
1020                          * Common handling for Unicode and numbered
1021                          * character escape sequences.
1022                          */
1023
1024                         if (rhs == NULL) {
1025                                 if (p->enc == TERMENC_ASCII) {
1026                                         rhs = ascii_uc2str(uc);
1027                                         rsz = strlen(rhs);
1028                                 } else {
1029                                         if ((uc < 0x20 && uc != 0x09) ||
1030                                             (uc > 0x7E && uc < 0xA0))
1031                                                 uc = 0xFFFD;
1032                                         sz += cond_width(p, uc, &skip);
1033                                         continue;
1034                                 }
1035                         }
1036
1037                         if (skip) {
1038                                 skip = 0;
1039                                 break;
1040                         }
1041
1042                         /*
1043                          * Common handling for all escape sequences
1044                          * printing more than one character.
1045                          */
1046
1047                         for (i = 0; i < rsz; i++)
1048                                 sz += (*p->width)(p, *rhs++);
1049                         break;
1050                 case ASCII_NBRSP:
1051                         sz += cond_width(p, ' ', &skip);
1052                         cp++;
1053                         break;
1054                 case ASCII_HYPH:
1055                         sz += cond_width(p, '-', &skip);
1056                         cp++;
1057                         break;
1058                 default:
1059                         break;
1060                 }
1061         }
1062
1063         return sz;
1064 }
1065
1066 int
1067 term_vspan(const struct termp *p, const struct roffsu *su)
1068 {
1069         double           r;
1070         int              ri;
1071
1072         switch (su->unit) {
1073         case SCALE_BU:
1074                 r = su->scale / 40.0;
1075                 break;
1076         case SCALE_CM:
1077                 r = su->scale * 6.0 / 2.54;
1078                 break;
1079         case SCALE_FS:
1080                 r = su->scale * 65536.0 / 40.0;
1081                 break;
1082         case SCALE_IN:
1083                 r = su->scale * 6.0;
1084                 break;
1085         case SCALE_MM:
1086                 r = su->scale * 0.006;
1087                 break;
1088         case SCALE_PC:
1089                 r = su->scale;
1090                 break;
1091         case SCALE_PT:
1092                 r = su->scale / 12.0;
1093                 break;
1094         case SCALE_EN:
1095         case SCALE_EM:
1096                 r = su->scale * 0.6;
1097                 break;
1098         case SCALE_VS:
1099                 r = su->scale;
1100                 break;
1101         default:
1102                 abort();
1103         }
1104         ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1105         return ri < 66 ? ri : 1;
1106 }
1107
1108 /*
1109  * Convert a scaling width to basic units, rounding towards 0.
1110  */
1111 int
1112 term_hspan(const struct termp *p, const struct roffsu *su)
1113 {
1114
1115         return (*p->hspan)(p, su);
1116 }
1117
1118 /*
1119  * Convert a scaling width to basic units, rounding to closest.
1120  */
1121 int
1122 term_hen(const struct termp *p, const struct roffsu *su)
1123 {
1124         int bu;
1125
1126         if ((bu = (*p->hspan)(p, su)) >= 0)
1127                 return (bu + 11) / 24;
1128         else
1129                 return -((-bu + 11) / 24);
1130 }