roff.c

   1 /*      $Id: roff.c,v 1.125 2011/02/06 20:36:36 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21
  22 #include <assert.h>
  23 #include <errno.h>
  24 #include <ctype.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <stdio.h>
  29
  30 #include "mandoc.h"
  31 #include "roff.h"
  32 #include "libroff.h"
  33 #include "libmandoc.h"
  34
  35 #define RSTACK_MAX      128
  36
  37 #define ROFF_CTL(c) \
  38         ('.' == (c) || '\'' == (c))
  39
  40 enum    rofft {
  41         ROFF_ad,
  42         ROFF_am,
  43         ROFF_ami,
  44         ROFF_am1,
  45         ROFF_de,
  46         ROFF_dei,
  47         ROFF_de1,
  48         ROFF_ds,
  49         ROFF_el,
  50         ROFF_hy,
  51         ROFF_ie,
  52         ROFF_if,
  53         ROFF_ig,
  54         ROFF_it,
  55         ROFF_ne,
  56         ROFF_nh,
  57         ROFF_nr,
  58         ROFF_ns,
  59         ROFF_ps,
  60         ROFF_rm,
  61         ROFF_so,
  62         ROFF_ta,
  63         ROFF_tr,
  64         ROFF_TS,
  65         ROFF_TE,
  66         ROFF_T_,
  67         ROFF_EQ,
  68         ROFF_EN,
  69         ROFF_cblock,
  70         ROFF_ccond, /* FIXME: remove this. */
  71         ROFF_USERDEF,
  72         ROFF_MAX
  73 };
  74
  75 enum    roffrule {
  76         ROFFRULE_ALLOW,
  77         ROFFRULE_DENY
  78 };
  79
  80 struct  roffstr {
  81         char            *name; /* key of symbol */
  82         char            *string; /* current value */
  83         struct roffstr  *next; /* next in list */
  84 };
  85
  86 struct  roff {
  87         struct roffnode *last; /* leaf of stack */
  88         mandocmsg        msg; /* err/warn/fatal messages */
  89         void            *data; /* privdata for messages */
  90         enum roffrule    rstack[RSTACK_MAX]; /* stack of !`ie' rules */
  91         int              rstackpos; /* position in rstack */
  92         struct regset   *regs; /* read/writable registers */
  93         struct roffstr  *first_string; /* user-defined strings & macros */
  94         const char      *current_string; /* value of last called user macro */
  95         struct tbl_node *first_tbl; /* first table parsed */
  96         struct tbl_node *last_tbl; /* last table parsed */
  97         struct tbl_node *tbl; /* current table being parsed */
  98         struct eqn_node *last_eqn; /* last equation parsed */
  99         struct eqn_node *first_eqn; /* first equation parsed */
 100         struct eqn_node *eqn; /* current equation being parsed */
 101 };
 102
 103 struct  roffnode {
 104         enum rofft       tok; /* type of node */
 105         struct roffnode *parent; /* up one in stack */
 106         int              line; /* parse line */
 107         int              col; /* parse col */
 108         char            *name; /* node name, e.g. macro name */
 109         char            *end; /* end-rules: custom token */
 110         int              endspan; /* end-rules: next-line or infty */
 111         enum roffrule    rule; /* current evaluation rule */
 112 };
 113
 114 #define ROFF_ARGS        struct roff *r, /* parse ctx */ \
 115                          enum rofft tok, /* tok of macro */ \
 116                          char **bufp, /* input buffer */ \
 117                          size_t *szp, /* size of input buffer */ \
 118                          int ln, /* parse line */ \
 119                          int ppos, /* original pos in buffer */ \
 120                          int pos, /* current pos in buffer */ \
 121                          int *offs /* reset offset of buffer data */
 122
 123 typedef enum rofferr (*roffproc)(ROFF_ARGS);
 124
 125 struct  roffmac {
 126         const char      *name; /* macro name */
 127         roffproc         proc; /* process new macro */
 128         roffproc         text; /* process as child text of macro */
 129         roffproc         sub; /* process as child of macro */
 130         int              flags;
 131 #define ROFFMAC_STRUCT  (1 << 0) /* always interpret */
 132         struct roffmac  *next;
 133 };
 134
 135 static  enum rofferr     roff_block(ROFF_ARGS);
 136 static  enum rofferr     roff_block_text(ROFF_ARGS);
 137 static  enum rofferr     roff_block_sub(ROFF_ARGS);
 138 static  enum rofferr     roff_cblock(ROFF_ARGS);
 139 static  enum rofferr     roff_ccond(ROFF_ARGS);
 140 static  enum rofferr     roff_cond(ROFF_ARGS);
 141 static  enum rofferr     roff_cond_text(ROFF_ARGS);
 142 static  enum rofferr     roff_cond_sub(ROFF_ARGS);
 143 static  enum rofferr     roff_ds(ROFF_ARGS);
 144 static  enum roffrule    roff_evalcond(const char *, int *);
 145 static  void             roff_freestr(struct roff *);
 146 static  char            *roff_getname(struct roff *, char **, int, int);
 147 static  const char      *roff_getstrn(const struct roff *,
 148                                 const char *, size_t);
 149 static  enum rofferr     roff_line_ignore(ROFF_ARGS);
 150 static  enum rofferr     roff_nr(ROFF_ARGS);
 151 static  int              roff_res(struct roff *,
 152                                 char **, size_t *, int);
 153 static  enum rofferr     roff_rm(ROFF_ARGS);
 154 static  void             roff_setstr(struct roff *,
 155                                 const char *, const char *, int);
 156 static  enum rofferr     roff_so(ROFF_ARGS);
 157 static  enum rofferr     roff_TE(ROFF_ARGS);
 158 static  enum rofferr     roff_TS(ROFF_ARGS);
 159 static  enum rofferr     roff_EQ(ROFF_ARGS);
 160 static  enum rofferr     roff_EN(ROFF_ARGS);
 161 static  enum rofferr     roff_T_(ROFF_ARGS);
 162 static  enum rofferr     roff_userdef(ROFF_ARGS);
 163
 164 /* See roff_hash_find() */
 165
 166 #define ASCII_HI         126
 167 #define ASCII_LO         33
 168 #define HASHWIDTH       (ASCII_HI - ASCII_LO + 1)
 169
 170 static  struct roffmac  *hash[HASHWIDTH];
 171
 172 static  struct roffmac   roffs[ROFF_MAX] = {
 173         { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
 174         { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 175         { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 176         { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 177         { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 178         { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 179         { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 180         { "ds", roff_ds, NULL, NULL, 0, NULL },
 181         { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 182         { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
 183         { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 184         { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
 185         { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
 186         { "it", roff_line_ignore, NULL, NULL, 0, NULL },
 187         { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
 188         { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
 189         { "nr", roff_nr, NULL, NULL, 0, NULL },
 190         { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
 191         { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
 192         { "rm", roff_rm, NULL, NULL, 0, NULL },
 193         { "so", roff_so, NULL, NULL, 0, NULL },
 194         { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
 195         { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
 196         { "TS", roff_TS, NULL, NULL, 0, NULL },
 197         { "TE", roff_TE, NULL, NULL, 0, NULL },
 198         { "T&", roff_T_, NULL, NULL, 0, NULL },
 199         { "EQ", roff_EQ, NULL, NULL, 0, NULL },
 200         { "EN", roff_EN, NULL, NULL, 0, NULL },
 201         { ".", roff_cblock, NULL, NULL, 0, NULL },
 202         { "\\}", roff_ccond, NULL, NULL, 0, NULL },
 203         { NULL, roff_userdef, NULL, NULL, 0, NULL },
 204 };
 205
 206 static  void             roff_free1(struct roff *);
 207 static  enum rofft       roff_hash_find(const char *, size_t);
 208 static  void             roff_hash_init(void);
 209 static  void             roffnode_cleanscope(struct roff *);
 210 static  void             roffnode_push(struct roff *, enum rofft,
 211                                 const char *, int, int);
 212 static  void             roffnode_pop(struct roff *);
 213 static  enum rofft       roff_parse(struct roff *, const char *, int *);
 214 static  int              roff_parse_nat(const char *, unsigned int *);
 215
 216 /* See roff_hash_find() */
 217 #define ROFF_HASH(p)    (p[0] - ASCII_LO)
 218
 219 static void
 220 roff_hash_init(void)
 221 {
 222         struct roffmac   *n;
 223         int               buc, i;
 224
 225         for (i = 0; i < (int)ROFF_USERDEF; i++) {
 226                 assert(roffs[i].name[0] >= ASCII_LO);
 227                 assert(roffs[i].name[0] <= ASCII_HI);
 228
 229                 buc = ROFF_HASH(roffs[i].name);
 230
 231                 if (NULL != (n = hash[buc])) {
 232                         for ( ; n->next; n = n->next)
 233                                 /* Do nothing. */ ;
 234                         n->next = &roffs[i];
 235                 } else
 236                         hash[buc] = &roffs[i];
 237         }
 238 }
 239
 240
 241 /*
 242  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
 243  * the nil-terminated string name could be found.
 244  */
 245 static enum rofft
 246 roff_hash_find(const char *p, size_t s)
 247 {
 248         int              buc;
 249         struct roffmac  *n;
 250
 251         /*
 252          * libroff has an extremely simple hashtable, for the time
 253          * being, which simply keys on the first character, which must
 254          * be printable, then walks a chain.  It works well enough until
 255          * optimised.
 256          */
 257
 258         if (p[0] < ASCII_LO || p[0] > ASCII_HI)
 259                 return(ROFF_MAX);
 260
 261         buc = ROFF_HASH(p);
 262
 263         if (NULL == (n = hash[buc]))
 264                 return(ROFF_MAX);
 265         for ( ; n; n = n->next)
 266                 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
 267                         return((enum rofft)(n - roffs));
 268
 269         return(ROFF_MAX);
 270 }
 271
 272
 273 /*
 274  * Pop the current node off of the stack of roff instructions currently
 275  * pending.
 276  */
 277 static void
 278 roffnode_pop(struct roff *r)
 279 {
 280         struct roffnode *p;
 281
 282         assert(r->last);
 283         p = r->last;
 284
 285         if (ROFF_el == p->tok)
 286                 if (r->rstackpos > -1)
 287                         r->rstackpos--;
 288
 289         r->last = r->last->parent;
 290         free(p->name);
 291         free(p->end);
 292         free(p);
 293 }
 294
 295
 296 /*
 297  * Push a roff node onto the instruction stack.  This must later be
 298  * removed with roffnode_pop().
 299  */
 300 static void
 301 roffnode_push(struct roff *r, enum rofft tok, const char *name,
 302                 int line, int col)
 303 {
 304         struct roffnode *p;
 305
 306         p = mandoc_calloc(1, sizeof(struct roffnode));
 307         p->tok = tok;
 308         if (name)
 309                 p->name = mandoc_strdup(name);
 310         p->parent = r->last;
 311         p->line = line;
 312         p->col = col;
 313         p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
 314
 315         r->last = p;
 316 }
 317
 318
 319 static void
 320 roff_free1(struct roff *r)
 321 {
 322         struct tbl_node *t;
 323         struct eqn_node *e;
 324
 325         while (NULL != (t = r->first_tbl)) {
 326                 r->first_tbl = t->next;
 327                 tbl_free(t);
 328         }
 329
 330         r->first_tbl = r->last_tbl = r->tbl = NULL;
 331
 332         while (NULL != (e = r->first_eqn)) {
 333                 r->first_eqn = e->next;
 334                 eqn_free(e);
 335         }
 336
 337         r->first_eqn = r->last_eqn = r->eqn = NULL;
 338
 339         while (r->last)
 340                 roffnode_pop(r);
 341
 342         roff_freestr(r);
 343 }
 344
 345
 346 void
 347 roff_reset(struct roff *r)
 348 {
 349
 350         roff_free1(r);
 351 }
 352
 353
 354 void
 355 roff_free(struct roff *r)
 356 {
 357
 358         roff_free1(r);
 359         free(r);
 360 }
 361
 362
 363 struct roff *
 364 roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
 365 {
 366         struct roff     *r;
 367
 368         r = mandoc_calloc(1, sizeof(struct roff));
 369         r->regs = regs;
 370         r->msg = msg;
 371         r->data = data;
 372         r->rstackpos = -1;
 373
 374         roff_hash_init();
 375         return(r);
 376 }
 377
 378
 379 /*
 380  * Pre-filter each and every line for reserved words (one beginning with
 381  * `\*', e.g., `\*(ab').  These must be handled before the actual line
 382  * is processed.
 383  */
 384 static int
 385 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
 386 {
 387         const char      *stesc; /* start of an escape sequence ('\\') */
 388         const char      *stnam; /* start of the name, after "[(*" */
 389         const char      *cp;    /* end of the name, e.g. before ']' */
 390         const char      *res;   /* the string to be substituted */
 391         int              i, maxl;
 392         size_t           nsz;
 393         char            *n;
 394
 395         /* Search for a leading backslash and save a pointer to it. */
 396
 397         cp = *bufp + pos;
 398         while (NULL != (cp = strchr(cp, '\\'))) {
 399                 stesc = cp++;
 400
 401                 /*
 402                  * The second character must be an asterisk.
 403                  * If it isn't, skip it anyway:  It is escaped,
 404                  * so it can't start another escape sequence.
 405                  */
 406
 407                 if ('\0' == *cp)
 408                         return(1);
 409                 if ('*' != *cp++)
 410                         continue;
 411
 412                 /*
 413                  * The third character decides the length
 414                  * of the name of the string.
 415                  * Save a pointer to the name.
 416                  */
 417
 418                 switch (*cp) {
 419                 case ('\0'):
 420                         return(1);
 421                 case ('('):
 422                         cp++;
 423                         maxl = 2;
 424                         break;
 425                 case ('['):
 426                         cp++;
 427                         maxl = 0;
 428                         break;
 429                 default:
 430                         maxl = 1;
 431                         break;
 432                 }
 433                 stnam = cp;
 434
 435                 /* Advance to the end of the name. */
 436
 437                 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
 438                         if ('\0' == *cp)
 439                                 return(1); /* Error. */
 440                         if (0 == maxl && ']' == *cp)
 441                                 break;
 442                 }
 443
 444                 /*
 445                  * Retrieve the replacement string; if it is
 446                  * undefined, resume searching for escapes.
 447                  */
 448
 449                 res = roff_getstrn(r, stnam, (size_t)i);
 450
 451                 if (NULL == res) {
 452                         cp -= maxl ? 1 : 0;
 453                         continue;
 454                 }
 455
 456                 /* Replace the escape sequence by the string. */
 457
 458                 nsz = *szp + strlen(res) + 1;
 459                 n = mandoc_malloc(nsz);
 460
 461                 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
 462                 strlcat(n, res, nsz);
 463                 strlcat(n, cp + (maxl ? 0 : 1), nsz);
 464
 465                 free(*bufp);
 466
 467                 *bufp = n;
 468                 *szp = nsz;
 469                 return(0);
 470         }
 471
 472         return(1);
 473 }
 474
 475
 476 enum rofferr
 477 roff_parseln(struct roff *r, int ln, char **bufp,
 478                 size_t *szp, int pos, int *offs)
 479 {
 480         enum rofft       t;
 481         enum rofferr     e;
 482         int              ppos;
 483
 484         /*
 485          * Run the reserved-word filter only if we have some reserved
 486          * words to fill in.
 487          */
 488
 489         if (r->first_string && ! roff_res(r, bufp, szp, pos))
 490                 return(ROFF_REPARSE);
 491
 492         /*
 493          * First, if a scope is open and we're not a macro, pass the
 494          * text through the macro's filter.  If a scope isn't open and
 495          * we're not a macro, just let it through.
 496          * Finally, if there's an equation scope open, divert it into it
 497          * no matter our state.
 498          */
 499
 500         if (r->last && ! ROFF_CTL((*bufp)[pos])) {
 501                 t = r->last->tok;
 502                 assert(roffs[t].text);
 503                 e = (*roffs[t].text)
 504                         (r, t, bufp, szp, ln, pos, pos, offs);
 505                 assert(ROFF_IGN == e || ROFF_CONT == e);
 506                 if (ROFF_CONT != e)
 507                         return(e);
 508                 if (r->eqn)
 509                         return(eqn_read(&r->eqn, ln, *bufp, *offs));
 510                 if (r->tbl)
 511                         return(tbl_read(r->tbl, ln, *bufp, *offs));
 512                 return(ROFF_CONT);
 513         } else if ( ! ROFF_CTL((*bufp)[pos])) {
 514                 if (r->eqn)
 515                         return(eqn_read(&r->eqn, ln, *bufp, *offs));
 516                 if (r->tbl)
 517                         return(tbl_read(r->tbl, ln, *bufp, *offs));
 518                 return(ROFF_CONT);
 519         } else if (r->eqn)
 520                 return(eqn_read(&r->eqn, ln, *bufp, *offs));
 521
 522         /*
 523          * If a scope is open, go to the child handler for that macro,
 524          * as it may want to preprocess before doing anything with it.
 525          * Don't do so if an equation is open.
 526          */
 527
 528         if (r->last) {
 529                 t = r->last->tok;
 530                 assert(roffs[t].sub);
 531                 return((*roffs[t].sub)
 532                                 (r, t, bufp, szp,
 533                                  ln, pos, pos, offs));
 534         }
 535
 536         /*
 537          * Lastly, as we've no scope open, try to look up and execute
 538          * the new macro.  If no macro is found, simply return and let
 539          * the compilers handle it.
 540          */
 541
 542         ppos = pos;
 543         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
 544                 return(ROFF_CONT);
 545
 546         assert(roffs[t].proc);
 547         return((*roffs[t].proc)
 548                         (r, t, bufp, szp,
 549                          ln, ppos, pos, offs));
 550 }
 551
 552
 553 void
 554 roff_endparse(struct roff *r)
 555 {
 556
 557         if (r->last)
 558                 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
 559                                 r->last->line, r->last->col, NULL);
 560
 561         if (r->eqn) {
 562                 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
 563                                 r->eqn->line, r->eqn->pos, NULL);
 564                 eqn_end(r->eqn);
 565                 r->eqn = NULL;
 566         }
 567
 568         if (r->tbl) {
 569                 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
 570                                 r->tbl->line, r->tbl->pos, NULL);
 571                 tbl_end(r->tbl);
 572                 r->tbl = NULL;
 573         }
 574 }
 575
 576
 577 /*
 578  * Parse a roff node's type from the input buffer.  This must be in the
 579  * form of ".foo xxx" in the usual way.
 580  */
 581 static enum rofft
 582 roff_parse(struct roff *r, const char *buf, int *pos)
 583 {
 584         const char      *mac;
 585         size_t           maclen;
 586         enum rofft       t;
 587
 588         assert(ROFF_CTL(buf[*pos]));
 589         (*pos)++;
 590
 591         while (' ' == buf[*pos] || '\t' == buf[*pos])
 592                 (*pos)++;
 593
 594         if ('\0' == buf[*pos])
 595                 return(ROFF_MAX);
 596
 597         mac = buf + *pos;
 598         maclen = strcspn(mac, " \\\t\0");
 599
 600         t = (r->current_string = roff_getstrn(r, mac, maclen))
 601             ? ROFF_USERDEF : roff_hash_find(mac, maclen);
 602
 603         *pos += maclen;
 604         while (buf[*pos] && ' ' == buf[*pos])
 605                 (*pos)++;
 606
 607         return(t);
 608 }
 609
 610
 611 static int
 612 roff_parse_nat(const char *buf, unsigned int *res)
 613 {
 614         char            *ep;
 615         long             lval;
 616
 617         errno = 0;
 618         lval = strtol(buf, &ep, 10);
 619         if (buf[0] == '\0' || *ep != '\0')
 620                 return(0);
 621         if ((errno == ERANGE &&
 622                         (lval == LONG_MAX || lval == LONG_MIN)) ||
 623                         (lval > INT_MAX || lval < 0))
 624                 return(0);
 625
 626         *res = (unsigned int)lval;
 627         return(1);
 628 }
 629
 630
 631 /* ARGSUSED */
 632 static enum rofferr
 633 roff_cblock(ROFF_ARGS)
 634 {
 635
 636         /*
 637          * A block-close `..' should only be invoked as a child of an
 638          * ignore macro, otherwise raise a warning and just ignore it.
 639          */
 640
 641         if (NULL == r->last) {
 642                 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
 643                 return(ROFF_IGN);
 644         }
 645
 646         switch (r->last->tok) {
 647         case (ROFF_am):
 648                 /* FALLTHROUGH */
 649         case (ROFF_ami):
 650                 /* FALLTHROUGH */
 651         case (ROFF_am1):
 652                 /* FALLTHROUGH */
 653         case (ROFF_de):
 654                 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
 655                 /* FALLTHROUGH */
 656         case (ROFF_dei):
 657                 /* FALLTHROUGH */
 658         case (ROFF_ig):
 659                 break;
 660         default:
 661                 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
 662                 return(ROFF_IGN);
 663         }
 664
 665         if ((*bufp)[pos])
 666                 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
 667
 668         roffnode_pop(r);
 669         roffnode_cleanscope(r);
 670         return(ROFF_IGN);
 671
 672 }
 673
 674
 675 static void
 676 roffnode_cleanscope(struct roff *r)
 677 {
 678
 679         while (r->last) {
 680                 if (--r->last->endspan < 0)
 681                         break;
 682                 roffnode_pop(r);
 683         }
 684 }
 685
 686
 687 /* ARGSUSED */
 688 static enum rofferr
 689 roff_ccond(ROFF_ARGS)
 690 {
 691
 692         if (NULL == r->last) {
 693                 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
 694                 return(ROFF_IGN);
 695         }
 696
 697         switch (r->last->tok) {
 698         case (ROFF_el):
 699                 /* FALLTHROUGH */
 700         case (ROFF_ie):
 701                 /* FALLTHROUGH */
 702         case (ROFF_if):
 703                 break;
 704         default:
 705                 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
 706                 return(ROFF_IGN);
 707         }
 708
 709         if (r->last->endspan > -1) {
 710                 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
 711                 return(ROFF_IGN);
 712         }
 713
 714         if ((*bufp)[pos])
 715                 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
 716
 717         roffnode_pop(r);
 718         roffnode_cleanscope(r);
 719         return(ROFF_IGN);
 720 }
 721
 722
 723 /* ARGSUSED */
 724 static enum rofferr
 725 roff_block(ROFF_ARGS)
 726 {
 727         int             sv;
 728         size_t          sz;
 729         char            *name;
 730
 731         name = NULL;
 732
 733         if (ROFF_ig != tok) {
 734                 if ('\0' == (*bufp)[pos]) {
 735                         (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
 736                         return(ROFF_IGN);
 737                 }
 738
 739                 /*
 740                  * Re-write `de1', since we don't really care about
 741                  * groff's strange compatibility mode, into `de'.
 742                  */
 743
 744                 if (ROFF_de1 == tok)
 745                         tok = ROFF_de;
 746                 if (ROFF_de == tok)
 747                         name = *bufp + pos;
 748                 else
 749                         (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
 750                             roffs[tok].name);
 751
 752                 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
 753                         pos++;
 754
 755                 while (' ' == (*bufp)[pos])
 756                         (*bufp)[pos++] = '\0';
 757         }
 758
 759         roffnode_push(r, tok, name, ln, ppos);
 760
 761         /*
 762          * At the beginning of a `de' macro, clear the existing string
 763          * with the same name, if there is one.  New content will be
 764          * added from roff_block_text() in multiline mode.
 765          */
 766
 767         if (ROFF_de == tok)
 768                 roff_setstr(r, name, "", 0);
 769
 770         if ('\0' == (*bufp)[pos])
 771                 return(ROFF_IGN);
 772
 773         /* If present, process the custom end-of-line marker. */
 774
 775         sv = pos;
 776         while ((*bufp)[pos] &&
 777                         ' ' != (*bufp)[pos] &&
 778                         '\t' != (*bufp)[pos])
 779                 pos++;
 780
 781         /*
 782          * Note: groff does NOT like escape characters in the input.
 783          * Instead of detecting this, we're just going to let it fly and
 784          * to hell with it.
 785          */
 786
 787         assert(pos > sv);
 788         sz = (size_t)(pos - sv);
 789
 790         if (1 == sz && '.' == (*bufp)[sv])
 791                 return(ROFF_IGN);
 792
 793         r->last->end = mandoc_malloc(sz + 1);
 794
 795         memcpy(r->last->end, *bufp + sv, sz);
 796         r->last->end[(int)sz] = '\0';
 797
 798         if ((*bufp)[pos])
 799                 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
 800
 801         return(ROFF_IGN);
 802 }
 803
 804
 805 /* ARGSUSED */
 806 static enum rofferr
 807 roff_block_sub(ROFF_ARGS)
 808 {
 809         enum rofft      t;
 810         int             i, j;
 811
 812         /*
 813          * First check whether a custom macro exists at this level.  If
 814          * it does, then check against it.  This is some of groff's
 815          * stranger behaviours.  If we encountered a custom end-scope
 816          * tag and that tag also happens to be a "real" macro, then we
 817          * need to try interpreting it again as a real macro.  If it's
 818          * not, then return ignore.  Else continue.
 819          */
 820
 821         if (r->last->end) {
 822                 i = pos + 1;
 823                 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
 824                         i++;
 825
 826                 for (j = 0; r->last->end[j]; j++, i++)
 827                         if ((*bufp)[i] != r->last->end[j])
 828                                 break;
 829
 830                 if ('\0' == r->last->end[j] &&
 831                                 ('\0' == (*bufp)[i] ||
 832                                  ' ' == (*bufp)[i] ||
 833                                  '\t' == (*bufp)[i])) {
 834                         roffnode_pop(r);
 835                         roffnode_cleanscope(r);
 836
 837                         if (ROFF_MAX != roff_parse(r, *bufp, &pos))
 838                                 return(ROFF_RERUN);
 839                         return(ROFF_IGN);
 840                 }
 841         }
 842
 843         /*
 844          * If we have no custom end-query or lookup failed, then try
 845          * pulling it out of the hashtable.
 846          */
 847
 848         ppos = pos;
 849         t = roff_parse(r, *bufp, &pos);
 850
 851         /*
 852          * Macros other than block-end are only significant
 853          * in `de' blocks; elsewhere, simply throw them away.
 854          */
 855         if (ROFF_cblock != t) {
 856                 if (ROFF_de == tok)
 857                         roff_setstr(r, r->last->name, *bufp + ppos, 1);
 858                 return(ROFF_IGN);
 859         }
 860
 861         assert(roffs[t].proc);
 862         return((*roffs[t].proc)(r, t, bufp, szp,
 863                                 ln, ppos, pos, offs));
 864 }
 865
 866
 867 /* ARGSUSED */
 868 static enum rofferr
 869 roff_block_text(ROFF_ARGS)
 870 {
 871
 872         if (ROFF_de == tok)
 873                 roff_setstr(r, r->last->name, *bufp + pos, 1);
 874
 875         return(ROFF_IGN);
 876 }
 877
 878
 879 /* ARGSUSED */
 880 static enum rofferr
 881 roff_cond_sub(ROFF_ARGS)
 882 {
 883         enum rofft       t;
 884         enum roffrule    rr;
 885
 886         ppos = pos;
 887         rr = r->last->rule;
 888
 889         /*
 890          * Clean out scope.  If we've closed ourselves, then don't
 891          * continue.
 892          */
 893
 894         roffnode_cleanscope(r);
 895
 896         if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
 897                 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
 898                         return(roff_ccond
 899                                 (r, ROFF_ccond, bufp, szp,
 900                                  ln, pos, pos + 2, offs));
 901                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 902         }
 903
 904         /*
 905          * A denied conditional must evaluate its children if and only
 906          * if they're either structurally required (such as loops and
 907          * conditionals) or a closing macro.
 908          */
 909         if (ROFFRULE_DENY == rr)
 910                 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
 911                         if (ROFF_ccond != t)
 912                                 return(ROFF_IGN);
 913
 914         assert(roffs[t].proc);
 915         return((*roffs[t].proc)(r, t, bufp, szp,
 916                                 ln, ppos, pos, offs));
 917 }
 918
 919
 920 /* ARGSUSED */
 921 static enum rofferr
 922 roff_cond_text(ROFF_ARGS)
 923 {
 924         char            *ep, *st;
 925         enum roffrule    rr;
 926
 927         rr = r->last->rule;
 928
 929         /*
 930          * We display the value of the text if out current evaluation
 931          * scope permits us to do so.
 932          */
 933
 934         /* FIXME: use roff_ccond? */
 935
 936         st = &(*bufp)[pos];
 937         if (NULL == (ep = strstr(st, "\\}"))) {
 938                 roffnode_cleanscope(r);
 939                 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 940         }
 941
 942         if (ep == st || (ep > st && '\\' != *(ep - 1)))
 943                 roffnode_pop(r);
 944
 945         roffnode_cleanscope(r);
 946         return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 947 }
 948
 949
 950 static enum roffrule
 951 roff_evalcond(const char *v, int *pos)
 952 {
 953
 954         switch (v[*pos]) {
 955         case ('n'):
 956                 (*pos)++;
 957                 return(ROFFRULE_ALLOW);
 958         case ('e'):
 959                 /* FALLTHROUGH */
 960         case ('o'):
 961                 /* FALLTHROUGH */
 962         case ('t'):
 963                 (*pos)++;
 964                 return(ROFFRULE_DENY);
 965         default:
 966                 break;
 967         }
 968
 969         while (v[*pos] && ' ' != v[*pos])
 970                 (*pos)++;
 971         return(ROFFRULE_DENY);
 972 }
 973
 974 /* ARGSUSED */
 975 static enum rofferr
 976 roff_line_ignore(ROFF_ARGS)
 977 {
 978
 979         if (ROFF_it == tok)
 980                 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, "it");
 981
 982         return(ROFF_IGN);
 983 }
 984
 985 /* ARGSUSED */
 986 static enum rofferr
 987 roff_cond(ROFF_ARGS)
 988 {
 989         int              sv;
 990         enum roffrule    rule;
 991
 992         /* Stack overflow! */
 993
 994         if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
 995                 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
 996                 return(ROFF_ERR);
 997         }
 998
 999         /* First, evaluate the conditional. */
1000
1001         if (ROFF_el == tok) {
1002                 /*
1003                  * An `.el' will get the value of the current rstack
1004                  * entry set in prior `ie' calls or defaults to DENY.
1005                  */
1006                 if (r->rstackpos < 0)
1007                         rule = ROFFRULE_DENY;
1008                 else
1009                         rule = r->rstack[r->rstackpos];
1010         } else
1011                 rule = roff_evalcond(*bufp, &pos);
1012
1013         sv = pos;
1014
1015         while (' ' == (*bufp)[pos])
1016                 pos++;
1017
1018         /*
1019          * Roff is weird.  If we have just white-space after the
1020          * conditional, it's considered the BODY and we exit without
1021          * really doing anything.  Warn about this.  It's probably
1022          * wrong.
1023          */
1024
1025         if ('\0' == (*bufp)[pos] && sv != pos) {
1026                 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
1027                 return(ROFF_IGN);
1028         }
1029
1030         roffnode_push(r, tok, NULL, ln, ppos);
1031
1032         r->last->rule = rule;
1033
1034         if (ROFF_ie == tok) {
1035                 /*
1036                  * An if-else will put the NEGATION of the current
1037                  * evaluated conditional into the stack.
1038                  */
1039                 r->rstackpos++;
1040                 if (ROFFRULE_DENY == r->last->rule)
1041                         r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
1042                 else
1043                         r->rstack[r->rstackpos] = ROFFRULE_DENY;
1044         }
1045
1046         /* If the parent has false as its rule, then so do we. */
1047
1048         if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1049                 r->last->rule = ROFFRULE_DENY;
1050
1051         /*
1052          * Determine scope.  If we're invoked with "\{" trailing the
1053          * conditional, then we're in a multiline scope.  Else our scope
1054          * expires on the next line.
1055          */
1056
1057         r->last->endspan = 1;
1058
1059         if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1060                 r->last->endspan = -1;
1061                 pos += 2;
1062         }
1063
1064         /*
1065          * If there are no arguments on the line, the next-line scope is
1066          * assumed.
1067          */
1068
1069         if ('\0' == (*bufp)[pos])
1070                 return(ROFF_IGN);
1071
1072         /* Otherwise re-run the roff parser after recalculating. */
1073
1074         *offs = pos;
1075         return(ROFF_RERUN);
1076 }
1077
1078
1079 /* ARGSUSED */
1080 static enum rofferr
1081 roff_ds(ROFF_ARGS)
1082 {
1083         char            *name, *string;
1084
1085         /*
1086          * A symbol is named by the first word following the macro
1087          * invocation up to a space.  Its value is anything after the
1088          * name's trailing whitespace and optional double-quote.  Thus,
1089          *
1090          *  [.ds foo "bar  "     ]
1091          *
1092          * will have `bar  "     ' as its value.
1093          */
1094
1095         string = *bufp + pos;
1096         name = roff_getname(r, &string, ln, pos);
1097         if ('\0' == *name)
1098                 return(ROFF_IGN);
1099
1100         /* Read past initial double-quote. */
1101         if ('"' == *string)
1102                 string++;
1103
1104         /* The rest is the value. */
1105         roff_setstr(r, name, string, 0);
1106         return(ROFF_IGN);
1107 }
1108
1109
1110 /* ARGSUSED */
1111 static enum rofferr
1112 roff_nr(ROFF_ARGS)
1113 {
1114         const char      *key;
1115         char            *val;
1116         struct reg      *rg;
1117
1118         val = *bufp + pos;
1119         key = roff_getname(r, &val, ln, pos);
1120         rg = r->regs->regs;
1121
1122         if (0 == strcmp(key, "nS")) {
1123                 rg[(int)REG_nS].set = 1;
1124                 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1125                         rg[(int)REG_nS].v.u = 0;
1126         }
1127
1128         return(ROFF_IGN);
1129 }
1130
1131 /* ARGSUSED */
1132 static enum rofferr
1133 roff_rm(ROFF_ARGS)
1134 {
1135         const char       *name;
1136         char             *cp;
1137
1138         cp = *bufp + pos;
1139         while ('\0' != *cp) {
1140                 name = roff_getname(r, &cp, ln, cp - *bufp);
1141                 if ('\0' != *name)
1142                         roff_setstr(r, name, NULL, 0);
1143         }
1144         return(ROFF_IGN);
1145 }
1146
1147 /* ARGSUSED */
1148 static enum rofferr
1149 roff_TE(ROFF_ARGS)
1150 {
1151
1152         if (NULL == r->tbl)
1153                 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1154         else
1155                 tbl_end(r->tbl);
1156
1157         r->tbl = NULL;
1158         return(ROFF_IGN);
1159 }
1160
1161 /* ARGSUSED */
1162 static enum rofferr
1163 roff_T_(ROFF_ARGS)
1164 {
1165
1166         if (NULL == r->tbl)
1167                 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1168         else
1169                 tbl_restart(ppos, ln, r->tbl);
1170
1171         return(ROFF_IGN);
1172 }
1173
1174 /* ARGSUSED */
1175 static enum rofferr
1176 roff_EQ(ROFF_ARGS)
1177 {
1178         struct eqn_node *e;
1179
1180         assert(NULL == r->eqn);
1181         e = eqn_alloc(ppos, ln);
1182
1183         if (r->last_eqn)
1184                 r->last_eqn->next = e;
1185         else
1186                 r->first_eqn = r->last_eqn = e;
1187
1188         r->eqn = r->last_eqn = e;
1189         return(ROFF_IGN);
1190 }
1191
1192 /* ARGSUSED */
1193 static enum rofferr
1194 roff_EN(ROFF_ARGS)
1195 {
1196
1197         (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1198         return(ROFF_IGN);
1199 }
1200
1201 /* ARGSUSED */
1202 static enum rofferr
1203 roff_TS(ROFF_ARGS)
1204 {
1205         struct tbl_node *t;
1206
1207         if (r->tbl) {
1208                 (*r->msg)(MANDOCERR_SCOPEBROKEN, r->data, ln, ppos, NULL);
1209                 tbl_end(r->tbl);
1210         }
1211
1212         t = tbl_alloc(ppos, ln, r->data, r->msg);
1213
1214         if (r->last_tbl)
1215                 r->last_tbl->next = t;
1216         else
1217                 r->first_tbl = r->last_tbl = t;
1218
1219         r->tbl = r->last_tbl = t;
1220         return(ROFF_IGN);
1221 }
1222
1223 /* ARGSUSED */
1224 static enum rofferr
1225 roff_so(ROFF_ARGS)
1226 {
1227         char *name;
1228
1229         (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1230
1231         /*
1232          * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1233          * opening anything that's not in our cwd or anything beneath
1234          * it.  Thus, explicitly disallow traversing up the file-system
1235          * or using absolute paths.
1236          */
1237
1238         name = *bufp + pos;
1239         if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1240                 (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1241                 return(ROFF_ERR);
1242         }
1243
1244         *offs = pos;
1245         return(ROFF_SO);
1246 }
1247
1248 /* ARGSUSED */
1249 static enum rofferr
1250 roff_userdef(ROFF_ARGS)
1251 {
1252         const char       *arg[9];
1253         char             *cp, *n1, *n2;
1254         int               i;
1255
1256         /*
1257          * Collect pointers to macro argument strings
1258          * and null-terminate them.
1259          */
1260         cp = *bufp + pos;
1261         for (i = 0; i < 9; i++)
1262                 arg[i] = '\0' == *cp ? "" :
1263                     mandoc_getarg(&cp, r->msg, r->data, ln, &pos);
1264
1265         /*
1266          * Expand macro arguments.
1267          */
1268         *szp = 0;
1269         n1 = cp = mandoc_strdup(r->current_string);
1270         while (NULL != (cp = strstr(cp, "\\$"))) {
1271                 i = cp[2] - '1';
1272                 if (0 > i || 8 < i) {
1273                         /* Not an argument invocation. */
1274                         cp += 2;
1275                         continue;
1276                 }
1277
1278                 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1279                 n2 = mandoc_malloc(*szp);
1280
1281                 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1282                 strlcat(n2, arg[i], *szp);
1283                 strlcat(n2, cp + 3, *szp);
1284
1285                 cp = n2 + (cp - n1);
1286                 free(n1);
1287                 n1 = n2;
1288         }
1289
1290         /*
1291          * Replace the macro invocation
1292          * by the expanded macro.
1293          */
1294         free(*bufp);
1295         *bufp = n1;
1296         if (0 == *szp)
1297                 *szp = strlen(*bufp) + 1;
1298
1299         return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1300            ROFF_REPARSE : ROFF_APPEND);
1301 }
1302
1303 static char *
1304 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1305 {
1306         char     *name, *cp;
1307
1308         name = *cpp;
1309         if ('\0' == *name)
1310                 return(name);
1311
1312         /* Read until end of name. */
1313         for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1314                 if ('\\' != *cp)
1315                         continue;
1316                 cp++;
1317                 if ('\\' == *cp)
1318                         continue;
1319                 (*r->msg)(MANDOCERR_NAMESC, r->data, ln, pos, NULL);
1320                 *cp = '\0';
1321                 name = cp;
1322         }
1323
1324         /* Nil-terminate name. */
1325         if ('\0' != *cp)
1326                 *(cp++) = '\0';
1327
1328         /* Read past spaces. */
1329         while (' ' == *cp)
1330                 cp++;
1331
1332         *cpp = cp;
1333         return(name);
1334 }
1335
1336 /*
1337  * Store *string into the user-defined string called *name.
1338  * In multiline mode, append to an existing entry and append '\n';
1339  * else replace the existing entry, if there is one.
1340  * To clear an existing entry, call with (*r, *name, NULL, 0).
1341  */
1342 static void
1343 roff_setstr(struct roff *r, const char *name, const char *string,
1344         int multiline)
1345 {
1346         struct roffstr   *n;
1347         char             *c;
1348         size_t            oldch, newch;
1349
1350         /* Search for an existing string with the same name. */
1351         n = r->first_string;
1352         while (n && strcmp(name, n->name))
1353                 n = n->next;
1354
1355         if (NULL == n) {
1356                 /* Create a new string table entry. */
1357                 n = mandoc_malloc(sizeof(struct roffstr));
1358                 n->name = mandoc_strdup(name);
1359                 n->string = NULL;
1360                 n->next = r->first_string;
1361                 r->first_string = n;
1362         } else if (0 == multiline) {
1363                 /* In multiline mode, append; else replace. */
1364                 free(n->string);
1365                 n->string = NULL;
1366         }
1367
1368         if (NULL == string)
1369                 return;
1370
1371         /*
1372          * One additional byte for the '\n' in multiline mode,
1373          * and one for the terminating '\0'.
1374          */
1375         newch = strlen(string) + (multiline ? 2 : 1);
1376         if (NULL == n->string) {
1377                 n->string = mandoc_malloc(newch);
1378                 *n->string = '\0';
1379                 oldch = 0;
1380         } else {
1381                 oldch = strlen(n->string);
1382                 n->string = mandoc_realloc(n->string, oldch + newch);
1383         }
1384
1385         /* Skip existing content in the destination buffer. */
1386         c = n->string + oldch;
1387
1388         /* Append new content to the destination buffer. */
1389         while (*string) {
1390                 /*
1391                  * Rudimentary roff copy mode:
1392                  * Handle escaped backslashes.
1393                  */
1394                 if ('\\' == *string && '\\' == *(string + 1))
1395                         string++;
1396                 *c++ = *string++;
1397         }
1398
1399         /* Append terminating bytes. */
1400         if (multiline)
1401                 *c++ = '\n';
1402         *c = '\0';
1403 }
1404
1405 static const char *
1406 roff_getstrn(const struct roff *r, const char *name, size_t len)
1407 {
1408         const struct roffstr *n;
1409
1410         n = r->first_string;
1411         while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1412                 n = n->next;
1413
1414         return(n ? n->string : NULL);
1415 }
1416
1417 static void
1418 roff_freestr(struct roff *r)
1419 {
1420         struct roffstr   *n, *nn;
1421
1422         for (n = r->first_string; n; n = nn) {
1423                 free(n->name);
1424                 free(n->string);
1425                 nn = n->next;
1426                 free(n);
1427         }
1428
1429         r->first_string = NULL;
1430 }
1431
1432 const struct tbl_span *
1433 roff_span(const struct roff *r)
1434 {
1435
1436         return(r->tbl ? tbl_span(r->tbl) : NULL);
1437 }
1438
1439 const struct eqn *
1440 roff_eqn(const struct roff *r)
1441 {
1442
1443         return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1444 }