roff.c

   1 /*      $Id: roff.c,v 1.129 2011/03/22 09:50:11 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21
  22 #include <assert.h>
  23 #include <errno.h>
  24 #include <ctype.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <stdio.h>
  29
  30 #include "mandoc.h"
  31 #include "libroff.h"
  32 #include "libmandoc.h"
  33
  34 #define RSTACK_MAX      128
  35
  36 #define ROFF_CTL(c) \
  37         ('.' == (c) || '\'' == (c))
  38
  39 enum    rofft {
  40         ROFF_ad,
  41         ROFF_am,
  42         ROFF_ami,
  43         ROFF_am1,
  44         ROFF_de,
  45         ROFF_dei,
  46         ROFF_de1,
  47         ROFF_ds,
  48         ROFF_el,
  49         ROFF_hy,
  50         ROFF_ie,
  51         ROFF_if,
  52         ROFF_ig,
  53         ROFF_it,
  54         ROFF_ne,
  55         ROFF_nh,
  56         ROFF_nr,
  57         ROFF_ns,
  58         ROFF_ps,
  59         ROFF_rm,
  60         ROFF_so,
  61         ROFF_ta,
  62         ROFF_tr,
  63         ROFF_TS,
  64         ROFF_TE,
  65         ROFF_T_,
  66         ROFF_EQ,
  67         ROFF_EN,
  68         ROFF_cblock,
  69         ROFF_ccond, /* FIXME: remove this. */
  70         ROFF_USERDEF,
  71         ROFF_MAX
  72 };
  73
  74 enum    roffrule {
  75         ROFFRULE_ALLOW,
  76         ROFFRULE_DENY
  77 };
  78
  79 struct  roffstr {
  80         char            *name; /* key of symbol */
  81         char            *string; /* current value */
  82         struct roffstr  *next; /* next in list */
  83 };
  84
  85 struct  roff {
  86         struct mparse   *parse; /* parse point */
  87         struct roffnode *last; /* leaf of stack */
  88         enum roffrule    rstack[RSTACK_MAX]; /* stack of !`ie' rules */
  89         int              rstackpos; /* position in rstack */
  90         struct regset   *regs; /* read/writable registers */
  91         struct roffstr  *first_string; /* user-defined strings & macros */
  92         const char      *current_string; /* value of last called user macro */
  93         struct tbl_node *first_tbl; /* first table parsed */
  94         struct tbl_node *last_tbl; /* last table parsed */
  95         struct tbl_node *tbl; /* current table being parsed */
  96         struct eqn_node *last_eqn; /* last equation parsed */
  97         struct eqn_node *first_eqn; /* first equation parsed */
  98         struct eqn_node *eqn; /* current equation being parsed */
  99 };
 100
 101 struct  roffnode {
 102         enum rofft       tok; /* type of node */
 103         struct roffnode *parent; /* up one in stack */
 104         int              line; /* parse line */
 105         int              col; /* parse col */
 106         char            *name; /* node name, e.g. macro name */
 107         char            *end; /* end-rules: custom token */
 108         int              endspan; /* end-rules: next-line or infty */
 109         enum roffrule    rule; /* current evaluation rule */
 110 };
 111
 112 #define ROFF_ARGS        struct roff *r, /* parse ctx */ \
 113                          enum rofft tok, /* tok of macro */ \
 114                          char **bufp, /* input buffer */ \
 115                          size_t *szp, /* size of input buffer */ \
 116                          int ln, /* parse line */ \
 117                          int ppos, /* original pos in buffer */ \
 118                          int pos, /* current pos in buffer */ \
 119                          int *offs /* reset offset of buffer data */
 120
 121 typedef enum rofferr (*roffproc)(ROFF_ARGS);
 122
 123 struct  roffmac {
 124         const char      *name; /* macro name */
 125         roffproc         proc; /* process new macro */
 126         roffproc         text; /* process as child text of macro */
 127         roffproc         sub; /* process as child of macro */
 128         int              flags;
 129 #define ROFFMAC_STRUCT  (1 << 0) /* always interpret */
 130         struct roffmac  *next;
 131 };
 132
 133 static  enum rofferr     roff_block(ROFF_ARGS);
 134 static  enum rofferr     roff_block_text(ROFF_ARGS);
 135 static  enum rofferr     roff_block_sub(ROFF_ARGS);
 136 static  enum rofferr     roff_cblock(ROFF_ARGS);
 137 static  enum rofferr     roff_ccond(ROFF_ARGS);
 138 static  enum rofferr     roff_cond(ROFF_ARGS);
 139 static  enum rofferr     roff_cond_text(ROFF_ARGS);
 140 static  enum rofferr     roff_cond_sub(ROFF_ARGS);
 141 static  enum rofferr     roff_ds(ROFF_ARGS);
 142 static  enum roffrule    roff_evalcond(const char *, int *);
 143 static  void             roff_freestr(struct roff *);
 144 static  char            *roff_getname(struct roff *, char **, int, int);
 145 static  const char      *roff_getstrn(const struct roff *,
 146                                 const char *, size_t);
 147 static  enum rofferr     roff_line_ignore(ROFF_ARGS);
 148 static  enum rofferr     roff_nr(ROFF_ARGS);
 149 static  int              roff_res(struct roff *,
 150                                 char **, size_t *, int);
 151 static  enum rofferr     roff_rm(ROFF_ARGS);
 152 static  void             roff_setstr(struct roff *,
 153                                 const char *, const char *, int);
 154 static  enum rofferr     roff_so(ROFF_ARGS);
 155 static  enum rofferr     roff_TE(ROFF_ARGS);
 156 static  enum rofferr     roff_TS(ROFF_ARGS);
 157 static  enum rofferr     roff_EQ(ROFF_ARGS);
 158 static  enum rofferr     roff_EN(ROFF_ARGS);
 159 static  enum rofferr     roff_T_(ROFF_ARGS);
 160 static  enum rofferr     roff_userdef(ROFF_ARGS);
 161
 162 /* See roff_hash_find() */
 163
 164 #define ASCII_HI         126
 165 #define ASCII_LO         33
 166 #define HASHWIDTH       (ASCII_HI - ASCII_LO + 1)
 167
 168 static  struct roffmac  *hash[HASHWIDTH];
 169
 170 static  struct roffmac   roffs[ROFF_MAX] = {
 171         { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
 172         { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 173         { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 174         { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 175         { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 176         { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 177         { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 178         { "ds", roff_ds, NULL, NULL, 0, NULL },
 179         { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 180         { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
 181         { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 182         { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 183         { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 184         { "it", roff_line_ignore, NULL, NULL, 0, NULL },
 185         { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
 186         { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
 187         { "nr", roff_nr, NULL, NULL, 0, NULL },
 188         { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
 189         { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
 190         { "rm", roff_rm, NULL, NULL, 0, NULL },
 191         { "so", roff_so, NULL, NULL, 0, NULL },
 192         { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
 193         { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
 194         { "TS", roff_TS, NULL, NULL, 0, NULL },
 195         { "TE", roff_TE, NULL, NULL, 0, NULL },
 196         { "T&", roff_T_, NULL, NULL, 0, NULL },
 197         { "EQ", roff_EQ, NULL, NULL, 0, NULL },
 198         { "EN", roff_EN, NULL, NULL, 0, NULL },
 199         { ".", roff_cblock, NULL, NULL, 0, NULL },
 200         { "\\}", roff_ccond, NULL, NULL, 0, NULL },
 201         { NULL, roff_userdef, NULL, NULL, 0, NULL },
 202 };
 203
 204 static  void             roff_free1(struct roff *);
 205 static  enum rofft       roff_hash_find(const char *, size_t);
 206 static  void             roff_hash_init(void);
 207 static  void             roffnode_cleanscope(struct roff *);
 208 static  void             roffnode_push(struct roff *, enum rofft,
 209                                 const char *, int, int);
 210 static  void             roffnode_pop(struct roff *);
 211 static  enum rofft       roff_parse(struct roff *, const char *, int *);
 212 static  int              roff_parse_nat(const char *, unsigned int *);
 213
 214 /* See roff_hash_find() */
 215 #define ROFF_HASH(p)    (p[0] - ASCII_LO)
 216
 217 static void
 218 roff_hash_init(void)
 219 {
 220         struct roffmac   *n;
 221         int               buc, i;
 222
 223         for (i = 0; i < (int)ROFF_USERDEF; i++) {
 224                 assert(roffs[i].name[0] >= ASCII_LO);
 225                 assert(roffs[i].name[0] <= ASCII_HI);
 226
 227                 buc = ROFF_HASH(roffs[i].name);
 228
 229                 if (NULL != (n = hash[buc])) {
 230                         for ( ; n->next; n = n->next)
 231                                 /* Do nothing. */ ;
 232                         n->next = &roffs[i];
 233                 } else
 234                         hash[buc] = &roffs[i];
 235         }
 236 }
 237
 238
 239 /*
 240  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
 241  * the nil-terminated string name could be found.
 242  */
 243 static enum rofft
 244 roff_hash_find(const char *p, size_t s)
 245 {
 246         int              buc;
 247         struct roffmac  *n;
 248
 249         /*
 250          * libroff has an extremely simple hashtable, for the time
 251          * being, which simply keys on the first character, which must
 252          * be printable, then walks a chain.  It works well enough until
 253          * optimised.
 254          */
 255
 256         if (p[0] < ASCII_LO || p[0] > ASCII_HI)
 257                 return(ROFF_MAX);
 258
 259         buc = ROFF_HASH(p);
 260
 261         if (NULL == (n = hash[buc]))
 262                 return(ROFF_MAX);
 263         for ( ; n; n = n->next)
 264                 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
 265                         return((enum rofft)(n - roffs));
 266
 267         return(ROFF_MAX);
 268 }
 269
 270
 271 /*
 272  * Pop the current node off of the stack of roff instructions currently
 273  * pending.
 274  */
 275 static void
 276 roffnode_pop(struct roff *r)
 277 {
 278         struct roffnode *p;
 279
 280         assert(r->last);
 281         p = r->last;
 282
 283         if (ROFF_el == p->tok)
 284                 if (r->rstackpos > -1)
 285                         r->rstackpos--;
 286
 287         r->last = r->last->parent;
 288         free(p->name);
 289         free(p->end);
 290         free(p);
 291 }
 292
 293
 294 /*
 295  * Push a roff node onto the instruction stack.  This must later be
 296  * removed with roffnode_pop().
 297  */
 298 static void
 299 roffnode_push(struct roff *r, enum rofft tok, const char *name,
 300                 int line, int col)
 301 {
 302         struct roffnode *p;
 303
 304         p = mandoc_calloc(1, sizeof(struct roffnode));
 305         p->tok = tok;
 306         if (name)
 307                 p->name = mandoc_strdup(name);
 308         p->parent = r->last;
 309         p->line = line;
 310         p->col = col;
 311         p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
 312
 313         r->last = p;
 314 }
 315
 316
 317 static void
 318 roff_free1(struct roff *r)
 319 {
 320         struct tbl_node *t;
 321         struct eqn_node *e;
 322
 323         while (NULL != (t = r->first_tbl)) {
 324                 r->first_tbl = t->next;
 325                 tbl_free(t);
 326         }
 327
 328         r->first_tbl = r->last_tbl = r->tbl = NULL;
 329
 330         while (NULL != (e = r->first_eqn)) {
 331                 r->first_eqn = e->next;
 332                 eqn_free(e);
 333         }
 334
 335         r->first_eqn = r->last_eqn = r->eqn = NULL;
 336
 337         while (r->last)
 338                 roffnode_pop(r);
 339
 340         roff_freestr(r);
 341 }
 342
 343
 344 void
 345 roff_reset(struct roff *r)
 346 {
 347
 348         roff_free1(r);
 349 }
 350
 351
 352 void
 353 roff_free(struct roff *r)
 354 {
 355
 356         roff_free1(r);
 357         free(r);
 358 }
 359
 360
 361 struct roff *
 362 roff_alloc(struct regset *regs, struct mparse *parse)
 363 {
 364         struct roff     *r;
 365
 366         r = mandoc_calloc(1, sizeof(struct roff));
 367         r->regs = regs;
 368         r->parse = parse;
 369         r->rstackpos = -1;
 370
 371         roff_hash_init();
 372         return(r);
 373 }
 374
 375
 376 /*
 377  * Pre-filter each and every line for reserved words (one beginning with
 378  * `\*', e.g., `\*(ab').  These must be handled before the actual line
 379  * is processed.
 380  */
 381 static int
 382 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
 383 {
 384         const char      *stesc; /* start of an escape sequence ('\\') */
 385         const char      *stnam; /* start of the name, after "[(*" */
 386         const char      *cp;    /* end of the name, e.g. before ']' */
 387         const char      *res;   /* the string to be substituted */
 388         int              i, maxl;
 389         size_t           nsz;
 390         char            *n;
 391
 392         /* Search for a leading backslash and save a pointer to it. */
 393
 394         cp = *bufp + pos;
 395         while (NULL != (cp = strchr(cp, '\\'))) {
 396                 stesc = cp++;
 397
 398                 /*
 399                  * The second character must be an asterisk.
 400                  * If it isn't, skip it anyway:  It is escaped,
 401                  * so it can't start another escape sequence.
 402                  */
 403
 404                 if ('\0' == *cp)
 405                         return(1);
 406                 if ('*' != *cp++)
 407                         continue;
 408
 409                 /*
 410                  * The third character decides the length
 411                  * of the name of the string.
 412                  * Save a pointer to the name.
 413                  */
 414
 415                 switch (*cp) {
 416                 case ('\0'):
 417                         return(1);
 418                 case ('('):
 419                         cp++;
 420                         maxl = 2;
 421                         break;
 422                 case ('['):
 423                         cp++;
 424                         maxl = 0;
 425                         break;
 426                 default:
 427                         maxl = 1;
 428                         break;
 429                 }
 430                 stnam = cp;
 431
 432                 /* Advance to the end of the name. */
 433
 434                 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
 435                         if ('\0' == *cp)
 436                                 return(1); /* Error. */
 437                         if (0 == maxl && ']' == *cp)
 438                                 break;
 439                 }
 440
 441                 /*
 442                  * Retrieve the replacement string; if it is
 443                  * undefined, resume searching for escapes.
 444                  */
 445
 446                 res = roff_getstrn(r, stnam, (size_t)i);
 447
 448                 if (NULL == res) {
 449                         cp -= maxl ? 1 : 0;
 450                         continue;
 451                 }
 452
 453                 /* Replace the escape sequence by the string. */
 454
 455                 nsz = *szp + strlen(res) + 1;
 456                 n = mandoc_malloc(nsz);
 457
 458                 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
 459                 strlcat(n, res, nsz);
 460                 strlcat(n, cp + (maxl ? 0 : 1), nsz);
 461
 462                 free(*bufp);
 463
 464                 *bufp = n;
 465                 *szp = nsz;
 466                 return(0);
 467         }
 468
 469         return(1);
 470 }
 471
 472
 473 enum rofferr
 474 roff_parseln(struct roff *r, int ln, char **bufp,
 475                 size_t *szp, int pos, int *offs)
 476 {
 477         enum rofft       t;
 478         enum rofferr     e;
 479         int              ppos;
 480
 481         /*
 482          * Run the reserved-word filter only if we have some reserved
 483          * words to fill in.
 484          */
 485
 486         if (r->first_string && ! roff_res(r, bufp, szp, pos))
 487                 return(ROFF_REPARSE);
 488
 489         /*
 490          * First, if a scope is open and we're not a macro, pass the
 491          * text through the macro's filter.  If a scope isn't open and
 492          * we're not a macro, just let it through.
 493          * Finally, if there's an equation scope open, divert it into it
 494          * no matter our state.
 495          */
 496
 497         if (r->last && ! ROFF_CTL((*bufp)[pos])) {
 498                 t = r->last->tok;
 499                 assert(roffs[t].text);
 500                 e = (*roffs[t].text)
 501                         (r, t, bufp, szp, ln, pos, pos, offs);
 502                 assert(ROFF_IGN == e || ROFF_CONT == e);
 503                 if (ROFF_CONT != e)
 504                         return(e);
 505                 if (r->eqn)
 506                         return(eqn_read(&r->eqn, ln, *bufp, *offs));
 507                 if (r->tbl)
 508                         return(tbl_read(r->tbl, ln, *bufp, *offs));
 509                 return(ROFF_CONT);
 510         } else if ( ! ROFF_CTL((*bufp)[pos])) {
 511                 if (r->eqn)
 512                         return(eqn_read(&r->eqn, ln, *bufp, *offs));
 513                 if (r->tbl)
 514                         return(tbl_read(r->tbl, ln, *bufp, *offs));
 515                 return(ROFF_CONT);
 516         } else if (r->eqn)
 517                 return(eqn_read(&r->eqn, ln, *bufp, *offs));
 518
 519         /*
 520          * If a scope is open, go to the child handler for that macro,
 521          * as it may want to preprocess before doing anything with it.
 522          * Don't do so if an equation is open.
 523          */
 524
 525         if (r->last) {
 526                 t = r->last->tok;
 527                 assert(roffs[t].sub);
 528                 return((*roffs[t].sub)
 529                                 (r, t, bufp, szp,
 530                                  ln, pos, pos, offs));
 531         }
 532
 533         /*
 534          * Lastly, as we've no scope open, try to look up and execute
 535          * the new macro.  If no macro is found, simply return and let
 536          * the compilers handle it.
 537          */
 538
 539         ppos = pos;
 540         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
 541                 return(ROFF_CONT);
 542
 543         assert(roffs[t].proc);
 544         return((*roffs[t].proc)
 545                         (r, t, bufp, szp,
 546                          ln, ppos, pos, offs));
 547 }
 548
 549
 550 void
 551 roff_endparse(struct roff *r)
 552 {
 553
 554         if (r->last)
 555                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 556                                 r->last->line, r->last->col, NULL);
 557
 558         if (r->eqn) {
 559                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 560                                 r->eqn->eqn.line, r->eqn->eqn.pos, NULL);
 561                 eqn_end(r->eqn);
 562                 r->eqn = NULL;
 563         }
 564
 565         if (r->tbl) {
 566                 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
 567                                 r->tbl->line, r->tbl->pos, NULL);
 568                 tbl_end(r->tbl);
 569                 r->tbl = NULL;
 570         }
 571 }
 572
 573
 574 /*
 575  * Parse a roff node's type from the input buffer.  This must be in the
 576  * form of ".foo xxx" in the usual way.
 577  */
 578 static enum rofft
 579 roff_parse(struct roff *r, const char *buf, int *pos)
 580 {
 581         const char      *mac;
 582         size_t           maclen;
 583         enum rofft       t;
 584
 585         assert(ROFF_CTL(buf[*pos]));
 586         (*pos)++;
 587
 588         while (' ' == buf[*pos] || '\t' == buf[*pos])
 589                 (*pos)++;
 590
 591         if ('\0' == buf[*pos])
 592                 return(ROFF_MAX);
 593
 594         mac = buf + *pos;
 595         maclen = strcspn(mac, " \\\t\0");
 596
 597         t = (r->current_string = roff_getstrn(r, mac, maclen))
 598             ? ROFF_USERDEF : roff_hash_find(mac, maclen);
 599
 600         *pos += (int)maclen;
 601         while (buf[*pos] && ' ' == buf[*pos])
 602                 (*pos)++;
 603
 604         return(t);
 605 }
 606
 607
 608 static int
 609 roff_parse_nat(const char *buf, unsigned int *res)
 610 {
 611         char            *ep;
 612         long             lval;
 613
 614         errno = 0;
 615         lval = strtol(buf, &ep, 10);
 616         if (buf[0] == '\0' || *ep != '\0')
 617                 return(0);
 618         if ((errno == ERANGE &&
 619                         (lval == LONG_MAX || lval == LONG_MIN)) ||
 620                         (lval > INT_MAX || lval < 0))
 621                 return(0);
 622
 623         *res = (unsigned int)lval;
 624         return(1);
 625 }
 626
 627
 628 /* ARGSUSED */
 629 static enum rofferr
 630 roff_cblock(ROFF_ARGS)
 631 {
 632
 633         /*
 634          * A block-close `..' should only be invoked as a child of an
 635          * ignore macro, otherwise raise a warning and just ignore it.
 636          */
 637
 638         if (NULL == r->last) {
 639                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 640                 return(ROFF_IGN);
 641         }
 642
 643         switch (r->last->tok) {
 644         case (ROFF_am):
 645                 /* FALLTHROUGH */
 646         case (ROFF_ami):
 647                 /* FALLTHROUGH */
 648         case (ROFF_am1):
 649                 /* FALLTHROUGH */
 650         case (ROFF_de):
 651                 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
 652                 /* FALLTHROUGH */
 653         case (ROFF_dei):
 654                 /* FALLTHROUGH */
 655         case (ROFF_ig):
 656                 break;
 657         default:
 658                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 659                 return(ROFF_IGN);
 660         }
 661
 662         if ((*bufp)[pos])
 663                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 664
 665         roffnode_pop(r);
 666         roffnode_cleanscope(r);
 667         return(ROFF_IGN);
 668
 669 }
 670
 671
 672 static void
 673 roffnode_cleanscope(struct roff *r)
 674 {
 675
 676         while (r->last) {
 677                 if (--r->last->endspan < 0)
 678                         break;
 679                 roffnode_pop(r);
 680         }
 681 }
 682
 683
 684 /* ARGSUSED */
 685 static enum rofferr
 686 roff_ccond(ROFF_ARGS)
 687 {
 688
 689         if (NULL == r->last) {
 690                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 691                 return(ROFF_IGN);
 692         }
 693
 694         switch (r->last->tok) {
 695         case (ROFF_el):
 696                 /* FALLTHROUGH */
 697         case (ROFF_ie):
 698                 /* FALLTHROUGH */
 699         case (ROFF_if):
 700                 break;
 701         default:
 702                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 703                 return(ROFF_IGN);
 704         }
 705
 706         if (r->last->endspan > -1) {
 707                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
 708                 return(ROFF_IGN);
 709         }
 710
 711         if ((*bufp)[pos])
 712                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 713
 714         roffnode_pop(r);
 715         roffnode_cleanscope(r);
 716         return(ROFF_IGN);
 717 }
 718
 719
 720 /* ARGSUSED */
 721 static enum rofferr
 722 roff_block(ROFF_ARGS)
 723 {
 724         int             sv;
 725         size_t          sz;
 726         char            *name;
 727
 728         name = NULL;
 729
 730         if (ROFF_ig != tok) {
 731                 if ('\0' == (*bufp)[pos]) {
 732                         mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
 733                         return(ROFF_IGN);
 734                 }
 735
 736                 /*
 737                  * Re-write `de1', since we don't really care about
 738                  * groff's strange compatibility mode, into `de'.
 739                  */
 740
 741                 if (ROFF_de1 == tok)
 742                         tok = ROFF_de;
 743                 if (ROFF_de == tok)
 744                         name = *bufp + pos;
 745                 else
 746                         mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
 747                             roffs[tok].name);
 748
 749                 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
 750                         pos++;
 751
 752                 while (' ' == (*bufp)[pos])
 753                         (*bufp)[pos++] = '\0';
 754         }
 755
 756         roffnode_push(r, tok, name, ln, ppos);
 757
 758         /*
 759          * At the beginning of a `de' macro, clear the existing string
 760          * with the same name, if there is one.  New content will be
 761          * added from roff_block_text() in multiline mode.
 762          */
 763
 764         if (ROFF_de == tok)
 765                 roff_setstr(r, name, "", 0);
 766
 767         if ('\0' == (*bufp)[pos])
 768                 return(ROFF_IGN);
 769
 770         /* If present, process the custom end-of-line marker. */
 771
 772         sv = pos;
 773         while ((*bufp)[pos] &&
 774                         ' ' != (*bufp)[pos] &&
 775                         '\t' != (*bufp)[pos])
 776                 pos++;
 777
 778         /*
 779          * Note: groff does NOT like escape characters in the input.
 780          * Instead of detecting this, we're just going to let it fly and
 781          * to hell with it.
 782          */
 783
 784         assert(pos > sv);
 785         sz = (size_t)(pos - sv);
 786
 787         if (1 == sz && '.' == (*bufp)[sv])
 788                 return(ROFF_IGN);
 789
 790         r->last->end = mandoc_malloc(sz + 1);
 791
 792         memcpy(r->last->end, *bufp + sv, sz);
 793         r->last->end[(int)sz] = '\0';
 794
 795         if ((*bufp)[pos])
 796                 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
 797
 798         return(ROFF_IGN);
 799 }
 800
 801
 802 /* ARGSUSED */
 803 static enum rofferr
 804 roff_block_sub(ROFF_ARGS)
 805 {
 806         enum rofft      t;
 807         int             i, j;
 808
 809         /*
 810          * First check whether a custom macro exists at this level.  If
 811          * it does, then check against it.  This is some of groff's
 812          * stranger behaviours.  If we encountered a custom end-scope
 813          * tag and that tag also happens to be a "real" macro, then we
 814          * need to try interpreting it again as a real macro.  If it's
 815          * not, then return ignore.  Else continue.
 816          */
 817
 818         if (r->last->end) {
 819                 i = pos + 1;
 820                 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
 821                         i++;
 822
 823                 for (j = 0; r->last->end[j]; j++, i++)
 824                         if ((*bufp)[i] != r->last->end[j])
 825                                 break;
 826
 827                 if ('\0' == r->last->end[j] &&
 828                                 ('\0' == (*bufp)[i] ||
 829                                  ' ' == (*bufp)[i] ||
 830                                  '\t' == (*bufp)[i])) {
 831                         roffnode_pop(r);
 832                         roffnode_cleanscope(r);
 833
 834                         if (ROFF_MAX != roff_parse(r, *bufp, &pos))
 835                                 return(ROFF_RERUN);
 836                         return(ROFF_IGN);
 837                 }
 838         }
 839
 840         /*
 841          * If we have no custom end-query or lookup failed, then try
 842          * pulling it out of the hashtable.
 843          */
 844
 845         ppos = pos;
 846         t = roff_parse(r, *bufp, &pos);
 847
 848         /*
 849          * Macros other than block-end are only significant
 850          * in `de' blocks; elsewhere, simply throw them away.
 851          */
 852         if (ROFF_cblock != t) {
 853                 if (ROFF_de == tok)
 854                         roff_setstr(r, r->last->name, *bufp + ppos, 1);
 855                 return(ROFF_IGN);
 856         }
 857
 858         assert(roffs[t].proc);
 859         return((*roffs[t].proc)(r, t, bufp, szp,
 860                                 ln, ppos, pos, offs));
 861 }
 862
 863
 864 /* ARGSUSED */
 865 static enum rofferr
 866 roff_block_text(ROFF_ARGS)
 867 {
 868
 869         if (ROFF_de == tok)
 870                 roff_setstr(r, r->last->name, *bufp + pos, 1);
 871
 872         return(ROFF_IGN);
 873 }
 874
 875
 876 /* ARGSUSED */
 877 static enum rofferr
 878 roff_cond_sub(ROFF_ARGS)
 879 {
 880         enum rofft       t;
 881         enum roffrule    rr;
 882
 883         ppos = pos;
 884         rr = r->last->rule;
 885
 886         /*
 887          * Clean out scope.  If we've closed ourselves, then don't
 888          * continue.
 889          */
 890
 891         roffnode_cleanscope(r);
 892
 893         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
 894                 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
 895                         return(roff_ccond
 896                                 (r, ROFF_ccond, bufp, szp,
 897                                  ln, pos, pos + 2, offs));
 898                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 899         }
 900
 901         /*
 902          * A denied conditional must evaluate its children if and only
 903          * if they're either structurally required (such as loops and
 904          * conditionals) or a closing macro.
 905          */
 906         if (ROFFRULE_DENY == rr)
 907                 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
 908                         if (ROFF_ccond != t)
 909                                 return(ROFF_IGN);
 910
 911         assert(roffs[t].proc);
 912         return((*roffs[t].proc)(r, t, bufp, szp,
 913                                 ln, ppos, pos, offs));
 914 }
 915
 916
 917 /* ARGSUSED */
 918 static enum rofferr
 919 roff_cond_text(ROFF_ARGS)
 920 {
 921         char            *ep, *st;
 922         enum roffrule    rr;
 923
 924         rr = r->last->rule;
 925
 926         /*
 927          * We display the value of the text if out current evaluation
 928          * scope permits us to do so.
 929          */
 930
 931         /* FIXME: use roff_ccond? */
 932
 933         st = &(*bufp)[pos];
 934         if (NULL == (ep = strstr(st, "\\}"))) {
 935                 roffnode_cleanscope(r);
 936                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 937         }
 938
 939         if (ep == st || (ep > st && '\\' != *(ep - 1)))
 940                 roffnode_pop(r);
 941
 942         roffnode_cleanscope(r);
 943         return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 944 }
 945
 946
 947 static enum roffrule
 948 roff_evalcond(const char *v, int *pos)
 949 {
 950
 951         switch (v[*pos]) {
 952         case ('n'):
 953                 (*pos)++;
 954                 return(ROFFRULE_ALLOW);
 955         case ('e'):
 956                 /* FALLTHROUGH */
 957         case ('o'):
 958                 /* FALLTHROUGH */
 959         case ('t'):
 960                 (*pos)++;
 961                 return(ROFFRULE_DENY);
 962         default:
 963                 break;
 964         }
 965
 966         while (v[*pos] && ' ' != v[*pos])
 967                 (*pos)++;
 968         return(ROFFRULE_DENY);
 969 }
 970
 971 /* ARGSUSED */
 972 static enum rofferr
 973 roff_line_ignore(ROFF_ARGS)
 974 {
 975
 976         if (ROFF_it == tok)
 977                 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
 978
 979         return(ROFF_IGN);
 980 }
 981
 982 /* ARGSUSED */
 983 static enum rofferr
 984 roff_cond(ROFF_ARGS)
 985 {
 986         int              sv;
 987         enum roffrule    rule;
 988
 989         /* Stack overflow! */
 990
 991         if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
 992                 mandoc_msg(MANDOCERR_MEM, r->parse, ln, ppos, NULL);
 993                 return(ROFF_ERR);
 994         }
 995
 996         /* First, evaluate the conditional. */
 997
 998         if (ROFF_el == tok) {
 999                 /*
1000                  * An `.el' will get the value of the current rstack
1001                  * entry set in prior `ie' calls or defaults to DENY.
1002                  */
1003                 if (r->rstackpos < 0)
1004                         rule = ROFFRULE_DENY;
1005                 else
1006                         rule = r->rstack[r->rstackpos];
1007         } else
1008                 rule = roff_evalcond(*bufp, &pos);
1009
1010         sv = pos;
1011
1012         while (' ' == (*bufp)[pos])
1013                 pos++;
1014
1015         /*
1016          * Roff is weird.  If we have just white-space after the
1017          * conditional, it's considered the BODY and we exit without
1018          * really doing anything.  Warn about this.  It's probably
1019          * wrong.
1020          */
1021
1022         if ('\0' == (*bufp)[pos] && sv != pos) {
1023                 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1024                 return(ROFF_IGN);
1025         }
1026
1027         roffnode_push(r, tok, NULL, ln, ppos);
1028
1029         r->last->rule = rule;
1030
1031         if (ROFF_ie == tok) {
1032                 /*
1033                  * An if-else will put the NEGATION of the current
1034                  * evaluated conditional into the stack.
1035                  */
1036                 r->rstackpos++;
1037                 if (ROFFRULE_DENY == r->last->rule)
1038                         r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
1039                 else
1040                         r->rstack[r->rstackpos] = ROFFRULE_DENY;
1041         }
1042
1043         /* If the parent has false as its rule, then so do we. */
1044
1045         if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1046                 r->last->rule = ROFFRULE_DENY;
1047
1048         /*
1049          * Determine scope.  If we're invoked with "\{" trailing the
1050          * conditional, then we're in a multiline scope.  Else our scope
1051          * expires on the next line.
1052          */
1053
1054         r->last->endspan = 1;
1055
1056         if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1057                 r->last->endspan = -1;
1058                 pos += 2;
1059         }
1060
1061         /*
1062          * If there are no arguments on the line, the next-line scope is
1063          * assumed.
1064          */
1065
1066         if ('\0' == (*bufp)[pos])
1067                 return(ROFF_IGN);
1068
1069         /* Otherwise re-run the roff parser after recalculating. */
1070
1071         *offs = pos;
1072         return(ROFF_RERUN);
1073 }
1074
1075
1076 /* ARGSUSED */
1077 static enum rofferr
1078 roff_ds(ROFF_ARGS)
1079 {
1080         char            *name, *string;
1081
1082         /*
1083          * A symbol is named by the first word following the macro
1084          * invocation up to a space.  Its value is anything after the
1085          * name's trailing whitespace and optional double-quote.  Thus,
1086          *
1087          *  [.ds foo "bar  "     ]
1088          *
1089          * will have `bar  "     ' as its value.
1090          */
1091
1092         string = *bufp + pos;
1093         name = roff_getname(r, &string, ln, pos);
1094         if ('\0' == *name)
1095                 return(ROFF_IGN);
1096
1097         /* Read past initial double-quote. */
1098         if ('"' == *string)
1099                 string++;
1100
1101         /* The rest is the value. */
1102         roff_setstr(r, name, string, 0);
1103         return(ROFF_IGN);
1104 }
1105
1106
1107 /* ARGSUSED */
1108 static enum rofferr
1109 roff_nr(ROFF_ARGS)
1110 {
1111         const char      *key;
1112         char            *val;
1113         struct reg      *rg;
1114
1115         val = *bufp + pos;
1116         key = roff_getname(r, &val, ln, pos);
1117         rg = r->regs->regs;
1118
1119         if (0 == strcmp(key, "nS")) {
1120                 rg[(int)REG_nS].set = 1;
1121                 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1122                         rg[(int)REG_nS].v.u = 0;
1123         }
1124
1125         return(ROFF_IGN);
1126 }
1127
1128 /* ARGSUSED */
1129 static enum rofferr
1130 roff_rm(ROFF_ARGS)
1131 {
1132         const char       *name;
1133         char             *cp;
1134
1135         cp = *bufp + pos;
1136         while ('\0' != *cp) {
1137                 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1138                 if ('\0' != *name)
1139                         roff_setstr(r, name, NULL, 0);
1140         }
1141         return(ROFF_IGN);
1142 }
1143
1144 /* ARGSUSED */
1145 static enum rofferr
1146 roff_TE(ROFF_ARGS)
1147 {
1148
1149         if (NULL == r->tbl)
1150                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1151         else
1152                 tbl_end(r->tbl);
1153
1154         r->tbl = NULL;
1155         return(ROFF_IGN);
1156 }
1157
1158 /* ARGSUSED */
1159 static enum rofferr
1160 roff_T_(ROFF_ARGS)
1161 {
1162
1163         if (NULL == r->tbl)
1164                 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1165         else
1166                 tbl_restart(ppos, ln, r->tbl);
1167
1168         return(ROFF_IGN);
1169 }
1170
1171 /* ARGSUSED */
1172 static enum rofferr
1173 roff_EQ(ROFF_ARGS)
1174 {
1175         struct eqn_node *e;
1176
1177         assert(NULL == r->eqn);
1178         e = eqn_alloc(ppos, ln);
1179
1180         if (r->last_eqn)
1181                 r->last_eqn->next = e;
1182         else
1183                 r->first_eqn = r->last_eqn = e;
1184
1185         r->eqn = r->last_eqn = e;
1186         return(ROFF_IGN);
1187 }
1188
1189 /* ARGSUSED */
1190 static enum rofferr
1191 roff_EN(ROFF_ARGS)
1192 {
1193
1194         mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1195         return(ROFF_IGN);
1196 }
1197
1198 /* ARGSUSED */
1199 static enum rofferr
1200 roff_TS(ROFF_ARGS)
1201 {
1202         struct tbl_node *t;
1203
1204         if (r->tbl) {
1205                 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1206                 tbl_end(r->tbl);
1207         }
1208
1209         t = tbl_alloc(ppos, ln, r->parse);
1210
1211         if (r->last_tbl)
1212                 r->last_tbl->next = t;
1213         else
1214                 r->first_tbl = r->last_tbl = t;
1215
1216         r->tbl = r->last_tbl = t;
1217         return(ROFF_IGN);
1218 }
1219
1220 /* ARGSUSED */
1221 static enum rofferr
1222 roff_so(ROFF_ARGS)
1223 {
1224         char *name;
1225
1226         mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1227
1228         /*
1229          * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1230          * opening anything that's not in our cwd or anything beneath
1231          * it.  Thus, explicitly disallow traversing up the file-system
1232          * or using absolute paths.
1233          */
1234
1235         name = *bufp + pos;
1236         if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1237                 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1238                 return(ROFF_ERR);
1239         }
1240
1241         *offs = pos;
1242         return(ROFF_SO);
1243 }
1244
1245 /* ARGSUSED */
1246 static enum rofferr
1247 roff_userdef(ROFF_ARGS)
1248 {
1249         const char       *arg[9];
1250         char             *cp, *n1, *n2;
1251         int               i;
1252
1253         /*
1254          * Collect pointers to macro argument strings
1255          * and null-terminate them.
1256          */
1257         cp = *bufp + pos;
1258         for (i = 0; i < 9; i++)
1259                 arg[i] = '\0' == *cp ? "" :
1260                     mandoc_getarg(r->parse, &cp, ln, &pos);
1261
1262         /*
1263          * Expand macro arguments.
1264          */
1265         *szp = 0;
1266         n1 = cp = mandoc_strdup(r->current_string);
1267         while (NULL != (cp = strstr(cp, "\\$"))) {
1268                 i = cp[2] - '1';
1269                 if (0 > i || 8 < i) {
1270                         /* Not an argument invocation. */
1271                         cp += 2;
1272                         continue;
1273                 }
1274
1275                 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1276                 n2 = mandoc_malloc(*szp);
1277
1278                 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1279                 strlcat(n2, arg[i], *szp);
1280                 strlcat(n2, cp + 3, *szp);
1281
1282                 cp = n2 + (cp - n1);
1283                 free(n1);
1284                 n1 = n2;
1285         }
1286
1287         /*
1288          * Replace the macro invocation
1289          * by the expanded macro.
1290          */
1291         free(*bufp);
1292         *bufp = n1;
1293         if (0 == *szp)
1294                 *szp = strlen(*bufp) + 1;
1295
1296         return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1297            ROFF_REPARSE : ROFF_APPEND);
1298 }
1299
1300 static char *
1301 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1302 {
1303         char     *name, *cp;
1304
1305         name = *cpp;
1306         if ('\0' == *name)
1307                 return(name);
1308
1309         /* Read until end of name. */
1310         for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1311                 if ('\\' != *cp)
1312                         continue;
1313                 cp++;
1314                 if ('\\' == *cp)
1315                         continue;
1316                 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1317                 *cp = '\0';
1318                 name = cp;
1319         }
1320
1321         /* Nil-terminate name. */
1322         if ('\0' != *cp)
1323                 *(cp++) = '\0';
1324
1325         /* Read past spaces. */
1326         while (' ' == *cp)
1327                 cp++;
1328
1329         *cpp = cp;
1330         return(name);
1331 }
1332
1333 /*
1334  * Store *string into the user-defined string called *name.
1335  * In multiline mode, append to an existing entry and append '\n';
1336  * else replace the existing entry, if there is one.
1337  * To clear an existing entry, call with (*r, *name, NULL, 0).
1338  */
1339 static void
1340 roff_setstr(struct roff *r, const char *name, const char *string,
1341         int multiline)
1342 {
1343         struct roffstr   *n;
1344         char             *c;
1345         size_t            oldch, newch;
1346
1347         /* Search for an existing string with the same name. */
1348         n = r->first_string;
1349         while (n && strcmp(name, n->name))
1350                 n = n->next;
1351
1352         if (NULL == n) {
1353                 /* Create a new string table entry. */
1354                 n = mandoc_malloc(sizeof(struct roffstr));
1355                 n->name = mandoc_strdup(name);
1356                 n->string = NULL;
1357                 n->next = r->first_string;
1358                 r->first_string = n;
1359         } else if (0 == multiline) {
1360                 /* In multiline mode, append; else replace. */
1361                 free(n->string);
1362                 n->string = NULL;
1363         }
1364
1365         if (NULL == string)
1366                 return;
1367
1368         /*
1369          * One additional byte for the '\n' in multiline mode,
1370          * and one for the terminating '\0'.
1371          */
1372         newch = strlen(string) + (multiline ? 2u : 1u);
1373         if (NULL == n->string) {
1374                 n->string = mandoc_malloc(newch);
1375                 *n->string = '\0';
1376                 oldch = 0;
1377         } else {
1378                 oldch = strlen(n->string);
1379                 n->string = mandoc_realloc(n->string, oldch + newch);
1380         }
1381
1382         /* Skip existing content in the destination buffer. */
1383         c = n->string + (int)oldch;
1384
1385         /* Append new content to the destination buffer. */
1386         while (*string) {
1387                 /*
1388                  * Rudimentary roff copy mode:
1389                  * Handle escaped backslashes.
1390                  */
1391                 if ('\\' == *string && '\\' == *(string + 1))
1392                         string++;
1393                 *c++ = *string++;
1394         }
1395
1396         /* Append terminating bytes. */
1397         if (multiline)
1398                 *c++ = '\n';
1399         *c = '\0';
1400 }
1401
1402 static const char *
1403 roff_getstrn(const struct roff *r, const char *name, size_t len)
1404 {
1405         const struct roffstr *n;
1406
1407         n = r->first_string;
1408         while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1409                 n = n->next;
1410
1411         return(n ? n->string : NULL);
1412 }
1413
1414 static void
1415 roff_freestr(struct roff *r)
1416 {
1417         struct roffstr   *n, *nn;
1418
1419         for (n = r->first_string; n; n = nn) {
1420                 free(n->name);
1421                 free(n->string);
1422                 nn = n->next;
1423                 free(n);
1424         }
1425
1426         r->first_string = NULL;
1427 }
1428
1429 const struct tbl_span *
1430 roff_span(const struct roff *r)
1431 {
1432
1433         return(r->tbl ? tbl_span(r->tbl) : NULL);
1434 }
1435
1436 const struct eqn *
1437 roff_eqn(const struct roff *r)
1438 {
1439
1440         return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1441 }