roff.c

   1 /*      $Id: roff.c,v 1.139 2011/05/24 14:00:39 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26
  27 #include "mandoc.h"
  28 #include "libroff.h"
  29 #include "libmandoc.h"
  30
  31 #define RSTACK_MAX      128
  32
  33 enum    rofft {
  34         ROFF_ad,
  35         ROFF_am,
  36         ROFF_ami,
  37         ROFF_am1,
  38         ROFF_de,
  39         ROFF_dei,
  40         ROFF_de1,
  41         ROFF_ds,
  42         ROFF_el,
  43         ROFF_hy,
  44         ROFF_ie,
  45         ROFF_if,
  46         ROFF_ig,
  47         ROFF_it,
  48         ROFF_ne,
  49         ROFF_nh,
  50         ROFF_nr,
  51         ROFF_ns,
  52         ROFF_ps,
  53         ROFF_rm,
  54         ROFF_so,
  55         ROFF_ta,
  56         ROFF_tr,
  57         ROFF_TS,
  58         ROFF_TE,
  59         ROFF_T_,
  60         ROFF_EQ,
  61         ROFF_EN,
  62         ROFF_cblock,
  63         ROFF_ccond, /* FIXME: remove this. */
  64         ROFF_USERDEF,
  65         ROFF_MAX
  66 };
  67
  68 enum    roffrule {
  69         ROFFRULE_ALLOW,
  70         ROFFRULE_DENY
  71 };
  72
  73 struct  roffstr {
  74         char            *name; /* key of symbol */
  75         char            *string; /* current value */
  76         struct roffstr  *next; /* next in list */
  77 };
  78
  79 struct  roff {
  80         struct mparse   *parse; /* parse point */
  81         struct roffnode *last; /* leaf of stack */
  82         enum roffrule    rstack[RSTACK_MAX]; /* stack of !`ie' rules */
  83         int              rstackpos; /* position in rstack */
  84         struct regset   *regs; /* read/writable registers */
  85         struct roffstr  *first_string; /* user-defined strings & macros */
  86         const char      *current_string; /* value of last called user macro */
  87         struct tbl_node *first_tbl; /* first table parsed */
  88         struct tbl_node *last_tbl; /* last table parsed */
  89         struct tbl_node *tbl; /* current table being parsed */
  90         struct eqn_node *last_eqn; /* last equation parsed */
  91         struct eqn_node *first_eqn; /* first equation parsed */
  92         struct eqn_node *eqn; /* current equation being parsed */
  93 };
  94
  95 struct  roffnode {
  96         enum rofft       tok; /* type of node */
  97         struct roffnode *parent; /* up one in stack */
  98         int              line; /* parse line */
  99         int              col; /* parse col */
 100         char            *name; /* node name, e.g. macro name */
 101         char            *end; /* end-rules: custom token */
 102         int              endspan; /* end-rules: next-line or infty */
 103         enum roffrule    rule; /* current evaluation rule */
 104 };
 105
 106 #define ROFF_ARGS        struct roff *r, /* parse ctx */ \
 107                          enum rofft tok, /* tok of macro */ \
 108                          char **bufp, /* input buffer */ \
 109                          size_t *szp, /* size of input buffer */ \
 110                          int ln, /* parse line */ \
 111                          int ppos, /* original pos in buffer */ \
 112                          int pos, /* current pos in buffer */ \
 113                          int *offs /* reset offset of buffer data */
 114
 115 typedef enum rofferr (*roffproc)(ROFF_ARGS);
 116
 117 struct  roffmac {
 118         const char      *name; /* macro name */
 119         roffproc         proc; /* process new macro */
 120         roffproc         text; /* process as child text of macro */
 121         roffproc         sub; /* process as child of macro */
 122         int              flags;
 123 #define ROFFMAC_STRUCT  (1 << 0) /* always interpret */
 124         struct roffmac  *next;
 125 };
 126
 127 static  enum rofferr     roff_block(ROFF_ARGS);
 128 static  enum rofferr     roff_block_text(ROFF_ARGS);
 129 static  enum rofferr     roff_block_sub(ROFF_ARGS);
 130 static  enum rofferr     roff_cblock(ROFF_ARGS);
 131 static  enum rofferr     roff_ccond(ROFF_ARGS);
 132 static  enum rofferr     roff_cond(ROFF_ARGS);
 133 static  enum rofferr     roff_cond_text(ROFF_ARGS);
 134 static  enum rofferr     roff_cond_sub(ROFF_ARGS);
 135 static  enum rofferr     roff_ds(ROFF_ARGS);
 136 static  enum roffrule    roff_evalcond(const char *, int *);
 137 static  void             roff_freestr(struct roff *);
 138 static  char            *roff_getname(struct roff *, char **, int, int);
 139 static  const char      *roff_getstrn(const struct roff *,
 140                                 const char *, size_t);
 141 static  enum rofferr     roff_line_ignore(ROFF_ARGS);
 142 static  enum rofferr     roff_nr(ROFF_ARGS);
 143 static  int              roff_res(struct roff *,
 144                                 char **, size_t *, int);
 145 static  enum rofferr     roff_rm(ROFF_ARGS);
 146 static  void             roff_setstr(struct roff *,
 147                                 const char *, const char *, int);
 148 static  enum rofferr     roff_so(ROFF_ARGS);
 149 static  enum rofferr     roff_TE(ROFF_ARGS);
 150 static  enum rofferr     roff_TS(ROFF_ARGS);
 151 static  enum rofferr     roff_EQ(ROFF_ARGS);
 152 static  enum rofferr     roff_EN(ROFF_ARGS);
 153 static  enum rofferr     roff_T_(ROFF_ARGS);
 154 static  enum rofferr     roff_userdef(ROFF_ARGS);
 155
 156 /* See roff_hash_find() */
 157
 158 #define ASCII_HI         126
 159 #define ASCII_LO         33
 160 #define HASHWIDTH       (ASCII_HI - ASCII_LO + 1)
 161
 162 static  struct roffmac  *hash[HASHWIDTH];
 163
 164 static  struct roffmac   roffs[ROFF_MAX] = {
 165         { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
 166         { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 167         { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 168         { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 169         { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 170         { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 171         { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 172         { "ds", roff_ds, NULL, NULL, 0, NULL },
 173         { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 174         { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
 175         { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 176         { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 177         { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 178         { "it", roff_line_ignore, NULL, NULL, 0, NULL },
 179         { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
 180         { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
 181         { "nr", roff_nr, NULL, NULL, 0, NULL },
 182         { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
 183         { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
 184         { "rm", roff_rm, NULL, NULL, 0, NULL },
 185         { "so", roff_so, NULL, NULL, 0, NULL },
 186         { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
 187         { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
 188         { "TS", roff_TS, NULL, NULL, 0, NULL },
 189         { "TE", roff_TE, NULL, NULL, 0, NULL },
 190         { "T&", roff_T_, NULL, NULL, 0, NULL },
 191         { "EQ", roff_EQ, NULL, NULL, 0, NULL },
 192         { "EN", roff_EN, NULL, NULL, 0, NULL },
 193         { ".", roff_cblock, NULL, NULL, 0, NULL },
 194         { "\\}", roff_ccond, NULL, NULL, 0, NULL },
 195         { NULL, roff_userdef, NULL, NULL, 0, NULL },
 196 };
 197
 198 static  void             roff_free1(struct roff *);
 199 static  enum rofft       roff_hash_find(const char *, size_t);
 200 static  void             roff_hash_init(void);
 201 static  void             roffnode_cleanscope(struct roff *);
 202 static  void             roffnode_push(struct roff *, enum rofft,
 203                                 const char *, int, int);
 204 static  void             roffnode_pop(struct roff *);
 205 static  enum rofft       roff_parse(struct roff *, const char *, int *);
 206
 207 /* See roff_hash_find() */
 208 #define ROFF_HASH(p)    (p[0] - ASCII_LO)
 209
 210 static void
 211 roff_hash_init(void)
 212 {
 213         struct roffmac   *n;
 214         int               buc, i;
 215
 216         for (i = 0; i < (int)ROFF_USERDEF; i++) {
 217                 assert(roffs[i].name[0] >= ASCII_LO);
 218                 assert(roffs[i].name[0] <= ASCII_HI);
 219
 220                 buc = ROFF_HASH(roffs[i].name);
 221
 222                 if (NULL != (n = hash[buc])) {
 223                         for ( ; n->next; n = n->next)
 224                                 /* Do nothing. */ ;
 225                         n->next = &roffs[i];
 226                 } else
 227                         hash[buc] = &roffs[i];
 228         }
 229 }
 230
 231
 232 /*
 233  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
 234  * the nil-terminated string name could be found.
 235  */
 236 static enum rofft
 237 roff_hash_find(const char *p, size_t s)
 238 {
 239         int              buc;
 240         struct roffmac  *n;
 241
 242         /*
 243          * libroff has an extremely simple hashtable, for the time
 244          * being, which simply keys on the first character, which must
 245          * be printable, then walks a chain.  It works well enough until
 246          * optimised.
 247          */
 248
 249         if (p[0] < ASCII_LO || p[0] > ASCII_HI)
 250                 return(ROFF_MAX);
 251
 252         buc = ROFF_HASH(p);
 253
 254         if (NULL == (n = hash[buc]))
 255                 return(ROFF_MAX);
 256         for ( ; n; n = n->next)
 257                 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
 258                         return((enum rofft)(n - roffs));
 259
 260         return(ROFF_MAX);
 261 }
 262
 263
 264 /*
 265  * Pop the current node off of the stack of roff instructions currently
 266  * pending.
 267  */
 268 static void
 269 roffnode_pop(struct roff *r)
 270 {
 271         struct roffnode *p;
 272
 273         assert(r->last);
 274         p = r->last;
 275
 276         r->last = r->last->parent;
 277         free(p->name);
 278         free(p->end);
 279         free(p);
 280 }
 281
 282
 283 /*
 284  * Push a roff node onto the instruction stack.  This must later be
 285  * removed with roffnode_pop().
 286  */
 287 static void
 288 roffnode_push(struct roff *r, enum rofft tok, const char *name,
 289                 int line, int col)
 290 {
 291         struct roffnode *p;
 292
 293         p = mandoc_calloc(1, sizeof(struct roffnode));
 294         p->tok = tok;
 295         if (name)
 296                 p->name = mandoc_strdup(name);
 297         p->parent = r->last;
 298         p->line = line;
 299         p->col = col;
 300         p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
 301
 302         r->last = p;
 303 }
 304
 305
 306 static void
 307 roff_free1(struct roff *r)
 308 {
 309         struct tbl_node *t;
 310         struct eqn_node *e;
 311
 312         while (NULL != (t = r->first_tbl)) {
 313                 r->first_tbl = t->next;
 314                 tbl_free(t);
 315         }
 316
 317         r->first_tbl = r->last_tbl = r->tbl = NULL;
 318
 319         while (NULL != (e = r->first_eqn)) {
 320                 r->first_eqn = e->next;
 321                 eqn_free(e);
 322         }
 323
 324         r->first_eqn = r->last_eqn = r->eqn = NULL;
 325
 326         while (r->last)
 327                 roffnode_pop(r);
 328
 329         roff_freestr(r);
 330 }
 331
 332
 333 void
 334 roff_reset(struct roff *r)
 335 {
 336
 337         roff_free1(r);
 338 }
 339
 340
 341 void
 342 roff_free(struct roff *r)
 343 {
 344
 345         roff_free1(r);
 346         free(r);
 347 }
 348
 349
 350 struct roff *
 351 roff_alloc(struct regset *regs, struct mparse *parse)
 352 {
 353         struct roff     *r;
 354
 355         r = mandoc_calloc(1, sizeof(struct roff));
 356         r->regs = regs;
 357         r->parse = parse;
 358         r->rstackpos = -1;
 359
 360         roff_hash_init();
 361         return(r);
 362 }
 363
 364
 365 /*
 366  * Pre-filter each and every line for reserved words (one beginning with
 367  * `\*', e.g., `\*(ab').  These must be handled before the actual line
 368  * is processed.
 369  */
 370 static int
 371 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
 372 {
 373         const char      *stesc; /* start of an escape sequence ('\\') */
 374         const char      *stnam; /* start of the name, after "[(*" */
 375         const char      *cp;    /* end of the name, e.g. before ']' */
 376         const char      *res;   /* the string to be substituted */
 377         int              i, maxl;
 378         size_t           nsz;
 379         char            *n;
 380
 381         /* Search for a leading backslash and save a pointer to it. */
 382
 383         cp = *bufp + pos;
 384         while (NULL != (cp = strchr(cp, '\\'))) {
 385                 stesc = cp++;
 386
 387                 /*
 388                  * The second character must be an asterisk.
 389                  * If it isn't, skip it anyway:  It is escaped,
 390                  * so it can't start another escape sequence.
 391                  */
 392
 393                 if ('\0' == *cp)
 394                         return(1);
 395                 if ('*' != *cp++)
 396                         continue;
 397
 398                 /*
 399                  * The third character decides the length
 400                  * of the name of the string.
 401                  * Save a pointer to the name.
 402                  */
 403
 404                 switch (*cp) {
 405                 case ('\0'):
 406                         return(1);
 407                 case ('('):
 408                         cp++;
 409                         maxl = 2;
 410                         break;
 411                 case ('['):
 412                         cp++;
 413                         maxl = 0;
 414                         break;
 415                 default:
 416                         maxl = 1;
 417                         break;
 418                 }
 419                 stnam = cp;
 420
 421                 /* Advance to the end of the name. */
 422
 423                 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
 424                         if ('\0' == *cp)
 425                                 return(1); /* Error. */
 426                         if (0 == maxl && ']' == *cp)
 427                                 break;
 428                 }
 429
 430                 /*
 431                  * Retrieve the replacement string; if it is
 432                  * undefined, resume searching for escapes.
 433                  */
 434
 435                 res = roff_getstrn(r, stnam, (size_t)i);
 436
 437                 if (NULL == res) {
 438                         cp -= maxl ? 1 : 0;
 439                         continue;
 440                 }
 441
 442                 /* Replace the escape sequence by the string. */
 443
 444                 nsz = *szp + strlen(res) + 1;
 445                 n = mandoc_malloc(nsz);
 446
 447                 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
 448                 strlcat(n, res, nsz);
 449                 strlcat(n, cp + (maxl ? 0 : 1), nsz);
 450
 451                 free(*bufp);
 452
 453                 *bufp = n;
 454                 *szp = nsz;
 455                 return(0);
 456         }
 457
 458         return(1);
 459 }
 460
 461
 462 enum rofferr
 463 roff_parseln(struct roff *r, int ln, char **bufp,
 464                 size_t *szp, int pos, int *offs)
 465 {
 466         enum rofft       t;
 467         enum rofferr     e;
 468         int              ppos, ctl;
 469
 470         /*
 471          * Run the reserved-word filter only if we have some reserved
 472          * words to fill in.
 473          */
 474
 475         if (r->first_string && ! roff_res(r, bufp, szp, pos))
 476                 return(ROFF_REPARSE);
 477
 478         ppos = pos;
 479         ctl = mandoc_getcontrol(*bufp, &pos);
 480
 481         /*
 482          * First, if a scope is open and we're not a macro, pass the
 483          * text through the macro's filter.  If a scope isn't open and
 484          * we're not a macro, just let it through.
 485          * Finally, if there's an equation scope open, divert it into it
 486          * no matter our state.
 487          */
 488
 489         if (r->last && ! ctl) {
 490                 t = r->last->tok;
 491                 assert(roffs[t].text);
 492                 e = (*roffs[t].text)
 493                         (r, t, bufp, szp, ln, pos, pos, offs);
 494                 assert(ROFF_IGN == e || ROFF_CONT == e);
 495                 if (ROFF_CONT != e)
 496                         return(e);
 497                 if (r->eqn)
 498                         return(eqn_read(&r->eqn, ln, *bufp, pos));
 499                 if (r->tbl)
 500                         return(tbl_read(r->tbl, ln, *bufp, pos));
 501                 return(ROFF_CONT);
 502         } else if ( ! ctl) {
 503                 if (r->eqn)
 504                         return(eqn_read(&r->eqn, ln, *bufp, pos));
 505                 if (r->tbl)
 506                         return(tbl_read(r->tbl, ln, *bufp, pos));
 507                 return(ROFF_CONT);
 508         } else if (r->eqn)
 509                 return(eqn_read(&r->eqn, ln, *bufp, ppos));
 510
 511         /*
 512          * If a scope is open, go to the child handler for that macro,
 513          * as it may want to preprocess before doing anything with it.
 514          * Don't do so if an equation is open.
 515          */
 516
 517         if (r->last) {
 518                 t = r->last->tok;
 519                 assert(roffs[t].sub);
 520                 return((*roffs[t].sub)
 521                                 (r, t, bufp, szp,
 522                                  ln, ppos, pos, offs));
 523         }
 524
 525         /*
 526          * Lastly, as we've no scope open, try to look up and execute
 527          * the new macro.  If no macro is found, simply return and let
 528          * the compilers handle it.
 529          */
 530
 531         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
 532                 return(ROFF_CONT);
 533
 534         assert(roffs[t].proc);
 535         return((*roffs[t].proc)
 536                         (r, t, bufp, szp,
 537                          ln, ppos, pos, offs));
 538 }
 539
 540
 541 void
 542 roff_endparse(struct roff *r)
 543 {
 544
 545         if (r->last)
 546                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 547                                 r->last->line, r->last->col, NULL);
 548
 549         if (r->eqn) {
 550                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 551                                 r->eqn->eqn.line, r->eqn->eqn.pos, NULL);
 552                 eqn_end(r->eqn);
 553                 r->eqn = NULL;
 554         }
 555
 556         if (r->tbl) {
 557                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 558                                 r->tbl->line, r->tbl->pos, NULL);
 559                 tbl_end(r->tbl);
 560                 r->tbl = NULL;
 561         }
 562 }
 563
 564 /*
 565  * Parse a roff node's type from the input buffer.  This must be in the
 566  * form of ".foo xxx" in the usual way.
 567  */
 568 static enum rofft
 569 roff_parse(struct roff *r, const char *buf, int *pos)
 570 {
 571         const char      *mac;
 572         size_t           maclen;
 573         enum rofft       t;
 574
 575         if ('\0' == buf[*pos] || '"' == buf[*pos])
 576                 return(ROFF_MAX);
 577
 578         mac = buf + *pos;
 579         maclen = strcspn(mac, " \\\t\0");
 580
 581         t = (r->current_string = roff_getstrn(r, mac, maclen))
 582             ? ROFF_USERDEF : roff_hash_find(mac, maclen);
 583
 584         *pos += (int)maclen;
 585
 586         while (buf[*pos] && ' ' == buf[*pos])
 587                 (*pos)++;
 588
 589         return(t);
 590 }
 591
 592 /* ARGSUSED */
 593 static enum rofferr
 594 roff_cblock(ROFF_ARGS)
 595 {
 596
 597         /*
 598          * A block-close `..' should only be invoked as a child of an
 599          * ignore macro, otherwise raise a warning and just ignore it.
 600          */
 601
 602         if (NULL == r->last) {
 603                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 604                 return(ROFF_IGN);
 605         }
 606
 607         switch (r->last->tok) {
 608         case (ROFF_am):
 609                 /* FALLTHROUGH */
 610         case (ROFF_ami):
 611                 /* FALLTHROUGH */
 612         case (ROFF_am1):
 613                 /* FALLTHROUGH */
 614         case (ROFF_de):
 615                 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
 616                 /* FALLTHROUGH */
 617         case (ROFF_dei):
 618                 /* FALLTHROUGH */
 619         case (ROFF_ig):
 620                 break;
 621         default:
 622                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 623                 return(ROFF_IGN);
 624         }
 625
 626         if ((*bufp)[pos])
 627                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 628
 629         roffnode_pop(r);
 630         roffnode_cleanscope(r);
 631         return(ROFF_IGN);
 632
 633 }
 634
 635
 636 static void
 637 roffnode_cleanscope(struct roff *r)
 638 {
 639
 640         while (r->last) {
 641                 if (--r->last->endspan < 0)
 642                         break;
 643                 roffnode_pop(r);
 644         }
 645 }
 646
 647
 648 /* ARGSUSED */
 649 static enum rofferr
 650 roff_ccond(ROFF_ARGS)
 651 {
 652
 653         if (NULL == r->last) {
 654                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 655                 return(ROFF_IGN);
 656         }
 657
 658         switch (r->last->tok) {
 659         case (ROFF_el):
 660                 /* FALLTHROUGH */
 661         case (ROFF_ie):
 662                 /* FALLTHROUGH */
 663         case (ROFF_if):
 664                 break;
 665         default:
 666                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 667                 return(ROFF_IGN);
 668         }
 669
 670         if (r->last->endspan > -1) {
 671                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 672                 return(ROFF_IGN);
 673         }
 674
 675         if ((*bufp)[pos])
 676                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 677
 678         roffnode_pop(r);
 679         roffnode_cleanscope(r);
 680         return(ROFF_IGN);
 681 }
 682
 683
 684 /* ARGSUSED */
 685 static enum rofferr
 686 roff_block(ROFF_ARGS)
 687 {
 688         int             sv;
 689         size_t          sz;
 690         char            *name;
 691
 692         name = NULL;
 693
 694         if (ROFF_ig != tok) {
 695                 if ('\0' == (*bufp)[pos]) {
 696                         mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
 697                         return(ROFF_IGN);
 698                 }
 699
 700                 /*
 701                  * Re-write `de1', since we don't really care about
 702                  * groff's strange compatibility mode, into `de'.
 703                  */
 704
 705                 if (ROFF_de1 == tok)
 706                         tok = ROFF_de;
 707                 if (ROFF_de == tok)
 708                         name = *bufp + pos;
 709                 else
 710                         mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
 711                             roffs[tok].name);
 712
 713                 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 714                         pos++;
 715
 716                 while (isspace((unsigned char)(*bufp)[pos]))
 717                         (*bufp)[pos++] = '\0';
 718         }
 719
 720         roffnode_push(r, tok, name, ln, ppos);
 721
 722         /*
 723          * At the beginning of a `de' macro, clear the existing string
 724          * with the same name, if there is one.  New content will be
 725          * added from roff_block_text() in multiline mode.
 726          */
 727
 728         if (ROFF_de == tok)
 729                 roff_setstr(r, name, "", 0);
 730
 731         if ('\0' == (*bufp)[pos])
 732                 return(ROFF_IGN);
 733
 734         /* If present, process the custom end-of-line marker. */
 735
 736         sv = pos;
 737         while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 738                 pos++;
 739
 740         /*
 741          * Note: groff does NOT like escape characters in the input.
 742          * Instead of detecting this, we're just going to let it fly and
 743          * to hell with it.
 744          */
 745
 746         assert(pos > sv);
 747         sz = (size_t)(pos - sv);
 748
 749         if (1 == sz && '.' == (*bufp)[sv])
 750                 return(ROFF_IGN);
 751
 752         r->last->end = mandoc_malloc(sz + 1);
 753
 754         memcpy(r->last->end, *bufp + sv, sz);
 755         r->last->end[(int)sz] = '\0';
 756
 757         if ((*bufp)[pos])
 758                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 759
 760         return(ROFF_IGN);
 761 }
 762
 763
 764 /* ARGSUSED */
 765 static enum rofferr
 766 roff_block_sub(ROFF_ARGS)
 767 {
 768         enum rofft      t;
 769         int             i, j;
 770
 771         /*
 772          * First check whether a custom macro exists at this level.  If
 773          * it does, then check against it.  This is some of groff's
 774          * stranger behaviours.  If we encountered a custom end-scope
 775          * tag and that tag also happens to be a "real" macro, then we
 776          * need to try interpreting it again as a real macro.  If it's
 777          * not, then return ignore.  Else continue.
 778          */
 779
 780         if (r->last->end) {
 781                 for (i = pos, j = 0; r->last->end[j]; j++, i++)
 782                         if ((*bufp)[i] != r->last->end[j])
 783                                 break;
 784
 785                 if ('\0' == r->last->end[j] &&
 786                                 ('\0' == (*bufp)[i] ||
 787                                  ' ' == (*bufp)[i] ||
 788                                  '\t' == (*bufp)[i])) {
 789                         roffnode_pop(r);
 790                         roffnode_cleanscope(r);
 791
 792                         while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
 793                                 i++;
 794
 795                         pos = i;
 796                         if (ROFF_MAX != roff_parse(r, *bufp, &pos))
 797                                 return(ROFF_RERUN);
 798                         return(ROFF_IGN);
 799                 }
 800         }
 801
 802         /*
 803          * If we have no custom end-query or lookup failed, then try
 804          * pulling it out of the hashtable.
 805          */
 806
 807         t = roff_parse(r, *bufp, &pos);
 808
 809         /*
 810          * Macros other than block-end are only significant
 811          * in `de' blocks; elsewhere, simply throw them away.
 812          */
 813         if (ROFF_cblock != t) {
 814                 if (ROFF_de == tok)
 815                         roff_setstr(r, r->last->name, *bufp + ppos, 1);
 816                 return(ROFF_IGN);
 817         }
 818
 819         assert(roffs[t].proc);
 820         return((*roffs[t].proc)(r, t, bufp, szp,
 821                                 ln, ppos, pos, offs));
 822 }
 823
 824
 825 /* ARGSUSED */
 826 static enum rofferr
 827 roff_block_text(ROFF_ARGS)
 828 {
 829
 830         if (ROFF_de == tok)
 831                 roff_setstr(r, r->last->name, *bufp + pos, 1);
 832
 833         return(ROFF_IGN);
 834 }
 835
 836
 837 /* ARGSUSED */
 838 static enum rofferr
 839 roff_cond_sub(ROFF_ARGS)
 840 {
 841         enum rofft       t;
 842         enum roffrule    rr;
 843         char            *ep;
 844
 845         rr = r->last->rule;
 846         roffnode_cleanscope(r);
 847
 848         /*
 849          * If the macro is unknown, first check if it contains a closing
 850          * delimiter `\}'.  If it does, close out our scope and return
 851          * the currently-scoped rule (ignore or continue).  Else, drop
 852          * into the currently-scoped rule.
 853          */
 854
 855         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
 856                 /*
 857                  * Jump through hoops to detect a \}, because it could
 858                  * be (say) \\}, which is something completely
 859                  * different.
 860                  */
 861                 ep = &(*bufp)[pos];
 862                 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
 863                         ep++;
 864                         if ('}' != *ep)
 865                                 continue;
 866                         *--ep = '\0';
 867                         roff_ccond(r, ROFF_ccond, bufp, szp,
 868                                         ln, pos, pos + 2, offs);
 869                         break;
 870                 }
 871                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 872         }
 873
 874         /*
 875          * A denied conditional must evaluate its children if and only
 876          * if they're either structurally required (such as loops and
 877          * conditionals) or a closing macro.
 878          */
 879
 880         if (ROFFRULE_DENY == rr)
 881                 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
 882                         if (ROFF_ccond != t)
 883                                 return(ROFF_IGN);
 884
 885         assert(roffs[t].proc);
 886         return((*roffs[t].proc)(r, t, bufp, szp,
 887                                 ln, ppos, pos, offs));
 888 }
 889
 890
 891 /* ARGSUSED */
 892 static enum rofferr
 893 roff_cond_text(ROFF_ARGS)
 894 {
 895         char            *ep, *st;
 896         enum roffrule    rr;
 897
 898         rr = r->last->rule;
 899
 900         /*
 901          * We display the value of the text if out current evaluation
 902          * scope permits us to do so.
 903          */
 904
 905         /* FIXME: use roff_ccond? */
 906
 907         st = &(*bufp)[pos];
 908         if (NULL == (ep = strstr(st, "\\}"))) {
 909                 roffnode_cleanscope(r);
 910                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 911         }
 912
 913         if (ep == st || (ep > st && '\\' != *(ep - 1)))
 914                 roffnode_pop(r);
 915
 916         roffnode_cleanscope(r);
 917         return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 918 }
 919
 920
 921 static enum roffrule
 922 roff_evalcond(const char *v, int *pos)
 923 {
 924
 925         switch (v[*pos]) {
 926         case ('n'):
 927                 (*pos)++;
 928                 return(ROFFRULE_ALLOW);
 929         case ('e'):
 930                 /* FALLTHROUGH */
 931         case ('o'):
 932                 /* FALLTHROUGH */
 933         case ('t'):
 934                 (*pos)++;
 935                 return(ROFFRULE_DENY);
 936         default:
 937                 break;
 938         }
 939
 940         while (v[*pos] && ' ' != v[*pos])
 941                 (*pos)++;
 942         return(ROFFRULE_DENY);
 943 }
 944
 945 /* ARGSUSED */
 946 static enum rofferr
 947 roff_line_ignore(ROFF_ARGS)
 948 {
 949
 950         if (ROFF_it == tok)
 951                 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
 952
 953         return(ROFF_IGN);
 954 }
 955
 956 /* ARGSUSED */
 957 static enum rofferr
 958 roff_cond(ROFF_ARGS)
 959 {
 960         int              sv;
 961         enum roffrule    rule;
 962
 963         /*
 964          * An `.el' has no conditional body: it will consume the value
 965          * of the current rstack entry set in prior `ie' calls or
 966          * defaults to DENY.
 967          *
 968          * If we're not an `el', however, then evaluate the conditional.
 969          */
 970
 971         rule = ROFF_el == tok ?
 972                 (r->rstackpos < 0 ?
 973                  ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
 974                 roff_evalcond(*bufp, &pos);
 975
 976         sv = pos;
 977         while (' ' == (*bufp)[pos])
 978                 pos++;
 979
 980         /*
 981          * Roff is weird.  If we have just white-space after the
 982          * conditional, it's considered the BODY and we exit without
 983          * really doing anything.  Warn about this.  It's probably
 984          * wrong.
 985          */
 986
 987         if ('\0' == (*bufp)[pos] && sv != pos) {
 988                 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
 989                 return(ROFF_IGN);
 990         }
 991
 992         roffnode_push(r, tok, NULL, ln, ppos);
 993
 994         r->last->rule = rule;
 995
 996         /*
 997          * An if-else will put the NEGATION of the current evaluated
 998          * conditional into the stack of rules.
 999          */
1000
1001         if (ROFF_ie == tok) {
1002                 if (r->rstackpos == RSTACK_MAX - 1) {
1003                         mandoc_msg(MANDOCERR_MEM,
1004                                 r->parse, ln, ppos, NULL);
1005                         return(ROFF_ERR);
1006                 }
1007                 r->rstack[++r->rstackpos] =
1008                         ROFFRULE_DENY == r->last->rule ?
1009                         ROFFRULE_ALLOW : ROFFRULE_DENY;
1010         }
1011
1012         /* If the parent has false as its rule, then so do we. */
1013
1014         if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1015                 r->last->rule = ROFFRULE_DENY;
1016
1017         /*
1018          * Determine scope.  If we're invoked with "\{" trailing the
1019          * conditional, then we're in a multiline scope.  Else our scope
1020          * expires on the next line.
1021          */
1022
1023         r->last->endspan = 1;
1024
1025         if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1026                 r->last->endspan = -1;
1027                 pos += 2;
1028         }
1029
1030         /*
1031          * If there are no arguments on the line, the next-line scope is
1032          * assumed.
1033          */
1034
1035         if ('\0' == (*bufp)[pos])
1036                 return(ROFF_IGN);
1037
1038         /* Otherwise re-run the roff parser after recalculating. */
1039
1040         *offs = pos;
1041         return(ROFF_RERUN);
1042 }
1043
1044
1045 /* ARGSUSED */
1046 static enum rofferr
1047 roff_ds(ROFF_ARGS)
1048 {
1049         char            *name, *string;
1050
1051         /*
1052          * A symbol is named by the first word following the macro
1053          * invocation up to a space.  Its value is anything after the
1054          * name's trailing whitespace and optional double-quote.  Thus,
1055          *
1056          *  [.ds foo "bar  "     ]
1057          *
1058          * will have `bar  "     ' as its value.
1059          */
1060
1061         string = *bufp + pos;
1062         name = roff_getname(r, &string, ln, pos);
1063         if ('\0' == *name)
1064                 return(ROFF_IGN);
1065
1066         /* Read past initial double-quote. */
1067         if ('"' == *string)
1068                 string++;
1069
1070         /* The rest is the value. */
1071         roff_setstr(r, name, string, 0);
1072         return(ROFF_IGN);
1073 }
1074
1075
1076 /* ARGSUSED */
1077 static enum rofferr
1078 roff_nr(ROFF_ARGS)
1079 {
1080         const char      *key;
1081         char            *val;
1082         int              iv;
1083         struct reg      *rg;
1084
1085         val = *bufp + pos;
1086         key = roff_getname(r, &val, ln, pos);
1087         rg = r->regs->regs;
1088
1089         if (0 == strcmp(key, "nS")) {
1090                 rg[(int)REG_nS].set = 1;
1091                 if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0)
1092                         rg[REG_nS].v.u = (unsigned)iv;
1093                 else
1094                         rg[(int)REG_nS].v.u = 0u;
1095         }
1096
1097         return(ROFF_IGN);
1098 }
1099
1100 /* ARGSUSED */
1101 static enum rofferr
1102 roff_rm(ROFF_ARGS)
1103 {
1104         const char       *name;
1105         char             *cp;
1106
1107         cp = *bufp + pos;
1108         while ('\0' != *cp) {
1109                 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1110                 if ('\0' != *name)
1111                         roff_setstr(r, name, NULL, 0);
1112         }
1113         return(ROFF_IGN);
1114 }
1115
1116 /* ARGSUSED */
1117 static enum rofferr
1118 roff_TE(ROFF_ARGS)
1119 {
1120
1121         if (NULL == r->tbl)
1122                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1123         else
1124                 tbl_end(r->tbl);
1125
1126         r->tbl = NULL;
1127         return(ROFF_IGN);
1128 }
1129
1130 /* ARGSUSED */
1131 static enum rofferr
1132 roff_T_(ROFF_ARGS)
1133 {
1134
1135         if (NULL == r->tbl)
1136                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1137         else
1138                 tbl_restart(ppos, ln, r->tbl);
1139
1140         return(ROFF_IGN);
1141 }
1142
1143 /* ARGSUSED */
1144 static enum rofferr
1145 roff_EQ(ROFF_ARGS)
1146 {
1147         struct eqn_node *e;
1148
1149         assert(NULL == r->eqn);
1150         e = eqn_alloc(ppos, ln);
1151
1152         if (r->last_eqn)
1153                 r->last_eqn->next = e;
1154         else
1155                 r->first_eqn = r->last_eqn = e;
1156
1157         r->eqn = r->last_eqn = e;
1158         return(ROFF_IGN);
1159 }
1160
1161 /* ARGSUSED */
1162 static enum rofferr
1163 roff_EN(ROFF_ARGS)
1164 {
1165
1166         mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1167         return(ROFF_IGN);
1168 }
1169
1170 /* ARGSUSED */
1171 static enum rofferr
1172 roff_TS(ROFF_ARGS)
1173 {
1174         struct tbl_node *t;
1175
1176         if (r->tbl) {
1177                 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1178                 tbl_end(r->tbl);
1179         }
1180
1181         t = tbl_alloc(ppos, ln, r->parse);
1182
1183         if (r->last_tbl)
1184                 r->last_tbl->next = t;
1185         else
1186                 r->first_tbl = r->last_tbl = t;
1187
1188         r->tbl = r->last_tbl = t;
1189         return(ROFF_IGN);
1190 }
1191
1192 /* ARGSUSED */
1193 static enum rofferr
1194 roff_so(ROFF_ARGS)
1195 {
1196         char *name;
1197
1198         mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1199
1200         /*
1201          * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1202          * opening anything that's not in our cwd or anything beneath
1203          * it.  Thus, explicitly disallow traversing up the file-system
1204          * or using absolute paths.
1205          */
1206
1207         name = *bufp + pos;
1208         if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1209                 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1210                 return(ROFF_ERR);
1211         }
1212
1213         *offs = pos;
1214         return(ROFF_SO);
1215 }
1216
1217 /* ARGSUSED */
1218 static enum rofferr
1219 roff_userdef(ROFF_ARGS)
1220 {
1221         const char       *arg[9];
1222         char             *cp, *n1, *n2;
1223         int               i;
1224
1225         /*
1226          * Collect pointers to macro argument strings
1227          * and null-terminate them.
1228          */
1229         cp = *bufp + pos;
1230         for (i = 0; i < 9; i++)
1231                 arg[i] = '\0' == *cp ? "" :
1232                     mandoc_getarg(r->parse, &cp, ln, &pos);
1233
1234         /*
1235          * Expand macro arguments.
1236          */
1237         *szp = 0;
1238         n1 = cp = mandoc_strdup(r->current_string);
1239         while (NULL != (cp = strstr(cp, "\\$"))) {
1240                 i = cp[2] - '1';
1241                 if (0 > i || 8 < i) {
1242                         /* Not an argument invocation. */
1243                         cp += 2;
1244                         continue;
1245                 }
1246
1247                 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1248                 n2 = mandoc_malloc(*szp);
1249
1250                 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1251                 strlcat(n2, arg[i], *szp);
1252                 strlcat(n2, cp + 3, *szp);
1253
1254                 cp = n2 + (cp - n1);
1255                 free(n1);
1256                 n1 = n2;
1257         }
1258
1259         /*
1260          * Replace the macro invocation
1261          * by the expanded macro.
1262          */
1263         free(*bufp);
1264         *bufp = n1;
1265         if (0 == *szp)
1266                 *szp = strlen(*bufp) + 1;
1267
1268         return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1269            ROFF_REPARSE : ROFF_APPEND);
1270 }
1271
1272 static char *
1273 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1274 {
1275         char     *name, *cp;
1276
1277         name = *cpp;
1278         if ('\0' == *name)
1279                 return(name);
1280
1281         /* Read until end of name. */
1282         for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1283                 if ('\\' != *cp)
1284                         continue;
1285                 cp++;
1286                 if ('\\' == *cp)
1287                         continue;
1288                 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1289                 *cp = '\0';
1290                 name = cp;
1291         }
1292
1293         /* Nil-terminate name. */
1294         if ('\0' != *cp)
1295                 *(cp++) = '\0';
1296
1297         /* Read past spaces. */
1298         while (' ' == *cp)
1299                 cp++;
1300
1301         *cpp = cp;
1302         return(name);
1303 }
1304
1305 /*
1306  * Store *string into the user-defined string called *name.
1307  * In multiline mode, append to an existing entry and append '\n';
1308  * else replace the existing entry, if there is one.
1309  * To clear an existing entry, call with (*r, *name, NULL, 0).
1310  */
1311 static void
1312 roff_setstr(struct roff *r, const char *name, const char *string,
1313         int multiline)
1314 {
1315         struct roffstr   *n;
1316         char             *c;
1317         size_t            oldch, newch;
1318
1319         /* Search for an existing string with the same name. */
1320         n = r->first_string;
1321         while (n && strcmp(name, n->name))
1322                 n = n->next;
1323
1324         if (NULL == n) {
1325                 /* Create a new string table entry. */
1326                 n = mandoc_malloc(sizeof(struct roffstr));
1327                 n->name = mandoc_strdup(name);
1328                 n->string = NULL;
1329                 n->next = r->first_string;
1330                 r->first_string = n;
1331         } else if (0 == multiline) {
1332                 /* In multiline mode, append; else replace. */
1333                 free(n->string);
1334                 n->string = NULL;
1335         }
1336
1337         if (NULL == string)
1338                 return;
1339
1340         /*
1341          * One additional byte for the '\n' in multiline mode,
1342          * and one for the terminating '\0'.
1343          */
1344         newch = strlen(string) + (multiline ? 2u : 1u);
1345         if (NULL == n->string) {
1346                 n->string = mandoc_malloc(newch);
1347                 *n->string = '\0';
1348                 oldch = 0;
1349         } else {
1350                 oldch = strlen(n->string);
1351                 n->string = mandoc_realloc(n->string, oldch + newch);
1352         }
1353
1354         /* Skip existing content in the destination buffer. */
1355         c = n->string + (int)oldch;
1356
1357         /* Append new content to the destination buffer. */
1358         while (*string) {
1359                 /*
1360                  * Rudimentary roff copy mode:
1361                  * Handle escaped backslashes.
1362                  */
1363                 if ('\\' == *string && '\\' == *(string + 1))
1364                         string++;
1365                 *c++ = *string++;
1366         }
1367
1368         /* Append terminating bytes. */
1369         if (multiline)
1370                 *c++ = '\n';
1371         *c = '\0';
1372 }
1373
1374 static const char *
1375 roff_getstrn(const struct roff *r, const char *name, size_t len)
1376 {
1377         const struct roffstr *n;
1378
1379         n = r->first_string;
1380         while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1381                 n = n->next;
1382
1383         return(n ? n->string : NULL);
1384 }
1385
1386 static void
1387 roff_freestr(struct roff *r)
1388 {
1389         struct roffstr   *n, *nn;
1390
1391         for (n = r->first_string; n; n = nn) {
1392                 free(n->name);
1393                 free(n->string);
1394                 nn = n->next;
1395                 free(n);
1396         }
1397
1398         r->first_string = NULL;
1399 }
1400
1401 const struct tbl_span *
1402 roff_span(const struct roff *r)
1403 {
1404
1405         return(r->tbl ? tbl_span(r->tbl) : NULL);
1406 }
1407
1408 const struct eqn *
1409 roff_eqn(const struct roff *r)
1410 {
1411
1412         return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1413 }