mdocterm.c

   1 /* $Id: mdocterm.c,v 1.43 2009/03/15 07:08:53 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the
   7  * above copyright notice and this permission notice appear in all
   8  * copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
  11  * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
  12  * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
  13  * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
  14  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
  15  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
  16  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  17  * PERFORMANCE OF THIS SOFTWARE.
  18  */
  19 #include <sys/types.h>
  20
  21 #include <assert.h>
  22 #include <ctype.h>
  23 #include <err.h>
  24 #include <getopt.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <unistd.h>
  29
  30 #include "mmain.h"
  31 #include "term.h"
  32
  33 struct  nroffopt {
  34         int               fl_h;
  35         int               fl_i;
  36         char             *arg_m;
  37         char             *arg_n;
  38         char             *arg_o;
  39         char             *arg_r;
  40         char             *arg_T;
  41         struct termp     *termp; /* Ephemeral. */
  42 };
  43
  44 struct  termseq {
  45         const char       *enc;
  46         int               sym;
  47 };
  48
  49 dead_pre void             punt(struct nroffopt *, char *) dead_post;
  50 static  int               option(void *, int, char *);
  51 static  int               optsopt(struct termp *, char *);
  52 static  void              body(struct termp *,
  53                                 struct termpair *,
  54                                 const struct mdoc_meta *,
  55                                 const struct mdoc_node *);
  56 static  void              header(struct termp *,
  57                                 const struct mdoc_meta *);
  58 static  void              footer(struct termp *,
  59                                 const struct mdoc_meta *);
  60
  61 static  void              pword(struct termp *, const char *, size_t);
  62 static  void              pescape(struct termp *, const char *,
  63                                 size_t *, size_t);
  64 static  void              nescape(struct termp *,
  65                                 const char *, size_t);
  66 static  void              chara(struct termp *, char);
  67 static  void              stringa(struct termp *,
  68                                 const char *, size_t);
  69 static  void              symbola(struct termp *, enum tsym);
  70 static  void              sanity(const struct mdoc_node *);
  71
  72 #ifdef __linux__
  73 extern  size_t            strlcat(char *, const char *, size_t);
  74 extern  size_t            strlcpy(char *, const char *, size_t);
  75 #endif
  76
  77 static  struct termseq    termenc1[] = {
  78         { "\\",           TERMSYM_SLASH },
  79         { "\'",           TERMSYM_RSQUOTE },
  80         { "`",            TERMSYM_LSQUOTE },
  81         { "-",            TERMSYM_HYPHEN },
  82         { " ",            TERMSYM_SPACE },
  83         { ".",            TERMSYM_PERIOD },
  84         { "&",            TERMSYM_BREAK },
  85         { "e",            TERMSYM_SLASH },
  86         { "q",            TERMSYM_DQUOTE },
  87         { "|",            TERMSYM_BREAK },
  88         { NULL,           0 }
  89 };
  90
  91 static  struct termseq    termenc2[] = {
  92         { "rC",           TERMSYM_RBRACE },
  93         { "lC",           TERMSYM_LBRACE },
  94         { "rB",           TERMSYM_RBRACK },
  95         { "lB",           TERMSYM_LBRACK },
  96         { "ra",           TERMSYM_RANGLE },
  97         { "la",           TERMSYM_LANGLE },
  98         { "Lq",           TERMSYM_LDQUOTE },
  99         { "lq",           TERMSYM_LDQUOTE },
 100         { "Rq",           TERMSYM_RDQUOTE },
 101         { "rq",           TERMSYM_RDQUOTE },
 102         { "oq",           TERMSYM_LSQUOTE },
 103         { "aq",           TERMSYM_RSQUOTE },
 104
 105         { "<-",           TERMSYM_LARROW },
 106         { "->",           TERMSYM_RARROW },
 107         { "ua",           TERMSYM_UARROW },
 108         { "da",           TERMSYM_DARROW },
 109
 110         { "bu",           TERMSYM_BULLET },
 111         { "Ba",           TERMSYM_BAR },
 112         { "ba",           TERMSYM_BAR },
 113         { "co",           TERMSYM_COPY },
 114         { "Am",           TERMSYM_AMP },
 115
 116         { "Le",           TERMSYM_LE },
 117         { "<=",           TERMSYM_LE },
 118         { "Ge",           TERMSYM_GE },
 119         { ">=",           TERMSYM_GE },
 120         { "==",           TERMSYM_EQ },
 121         { "Ne",           TERMSYM_NEQ },
 122         { "!=",           TERMSYM_NEQ },
 123         { "Pm",           TERMSYM_PLUSMINUS },
 124         { "+-",           TERMSYM_PLUSMINUS },
 125         { "If",           TERMSYM_INF2 },
 126         { "if",           TERMSYM_INF },
 127         { "Na",           TERMSYM_NAN },
 128         { "na",           TERMSYM_NAN },
 129         { "**",           TERMSYM_ASTERISK },
 130         { "Gt",           TERMSYM_GT },
 131         { "Lt",           TERMSYM_LT },
 132
 133         { "aa",           TERMSYM_ACUTE },
 134         { "ga",           TERMSYM_GRAVE },
 135
 136         { "en",           TERMSYM_EN },
 137         { "em",           TERMSYM_EM },
 138
 139         { "Pi",           TERMSYM_PI },
 140         { NULL,           0 }
 141 };
 142
 143 /* FIXME: abstract to dynamically-compiled table. */
 144 static  struct termsym    termsym_ascii[TERMSYM_MAX] = {
 145         { "]", 1 },             /* TERMSYM_RBRACK */
 146         { "[", 1 },             /* TERMSYM_LBRACK */
 147         { "<-", 2 },            /* TERMSYM_LARROW */
 148         { "->", 2 },            /* TERMSYM_RARROW */
 149         { "^", 1 },             /* TERMSYM_UARROW */
 150         { "v", 1 },             /* TERMSYM_DARROW */
 151         { "`", 1 },             /* TERMSYM_LSQUOTE */
 152         { "\'", 1 },            /* TERMSYM_RSQUOTE */
 153         { "\'", 1 },            /* TERMSYM_SQUOTE */
 154         { "``", 2 },            /* TERMSYM_LDQUOTE */
 155         { "\'\'", 2 },          /* TERMSYM_RDQUOTE */
 156         { "\"", 1 },            /* TERMSYM_DQUOTE */
 157         { "<", 1 },             /* TERMSYM_LT */
 158         { ">", 1 },             /* TERMSYM_GT */
 159         { "<=", 2 },            /* TERMSYM_LE */
 160         { ">=", 2 },            /* TERMSYM_GE */
 161         { "==", 2 },            /* TERMSYM_EQ */
 162         { "!=", 2 },            /* TERMSYM_NEQ */
 163         { "\'", 1 },            /* TERMSYM_ACUTE */
 164         { "`", 1 },             /* TERMSYM_GRAVE */
 165         { "pi", 2 },            /* TERMSYM_PI */
 166         { "+=", 2 },            /* TERMSYM_PLUSMINUS */
 167         { "oo", 2 },            /* TERMSYM_INF */
 168         { "infinity", 8 },      /* TERMSYM_INF2 */
 169         { "NaN", 3 },           /* TERMSYM_NAN */
 170         { "|", 1 },             /* TERMSYM_BAR */
 171         { "o", 1 },             /* TERMSYM_BULLET */
 172         { "&", 1 },             /* TERMSYM_AMP */
 173         { "--", 2 },            /* TERMSYM_EM */
 174         { "-", 1 },             /* TERMSYM_EN */
 175         { "(C)", 3 },           /* TERMSYM_COPY */
 176         { "*", 1 },             /* TERMSYM_ASTERISK */
 177         { "\\", 1 },            /* TERMSYM_SLASH */
 178         { "-", 1 },             /* TERMSYM_HYPHEN */
 179         { " ", 1 },             /* TERMSYM_SPACE */
 180         { ".", 1 },             /* TERMSYM_PERIOD */
 181         { "", 0 },              /* TERMSYM_BREAK */
 182         { "<", 1 },             /* TERMSYM_LANGLE */
 183         { ">", 1 },             /* TERMSYM_RANGLE */
 184         { "{", 1 },             /* TERMSYM_LBRACE */
 185         { "}", 1 },             /* TERMSYM_RBRACE */
 186 };
 187
 188 int
 189 main(int argc, char *argv[])
 190 {
 191         struct mmain      *p;
 192         const struct mdoc *mdoc;
 193         struct nroffopt    nroff;
 194         struct termp       termp;
 195         int                c;
 196         char              *in;
 197
 198         (void)memset(&termp, 0, sizeof(struct termp));
 199         (void)memset(&nroff, 0, sizeof(struct nroffopt));
 200
 201         termp.maxrmargin = termp.rmargin = 78; /* FIXME */
 202         termp.maxcols = 1024; /* FIXME */
 203         termp.offset = termp.col = 0;
 204         termp.flags = TERMP_NOSPACE;
 205         termp.symtab = termsym_ascii;
 206         termp.enc = TERMENC_NROFF;
 207
 208         nroff.termp = &termp;
 209
 210         p = mmain_alloc();
 211
 212         c = mmain_getopt(p, argc, argv, "[-Ooption...]",
 213                         "[infile]", "him:n:o:r:T:O:", &nroff, option);
 214
 215         /* FIXME: this needs to accept multiple outputs. */
 216         argv += c;
 217         if ((argc -= c) > 0)
 218                 in = *argv++;
 219         else
 220                 in = "-";
 221
 222         mmain_prepare(p, in);
 223
 224         if (NULL == (mdoc = mmain_process(p))) {
 225                 if (TERMP_NOPUNT & termp.iflags)
 226                         mmain_exit(p, 1);
 227                 mmain_free(p);
 228                 punt(&nroff, in);
 229                 /* NOTREACHED */
 230         }
 231
 232         if (NULL == (termp.buf = malloc(termp.maxcols)))
 233                 err(1, "malloc");
 234
 235         header(&termp, mdoc_meta(mdoc));
 236         body(&termp, NULL, mdoc_meta(mdoc), mdoc_node(mdoc));
 237         footer(&termp, mdoc_meta(mdoc));
 238
 239         free(termp.buf);
 240
 241         mmain_exit(p, 0);
 242         /* NOTREACHED */
 243 }
 244
 245
 246 static int
 247 optsopt(struct termp *p, char *arg)
 248 {
 249         char            *v;
 250         char            *toks[] = { "ansi", "nopunt", NULL };
 251
 252         while (*arg)
 253                 switch (getsubopt(&arg, toks, &v)) {
 254                 case (0):
 255                         p->enc = TERMENC_ANSI;
 256                         break;
 257                 case (2):
 258                         p->iflags |= TERMP_NOPUNT;
 259                         break;
 260                 default:
 261                         warnx("unknown -O argument");
 262                         return(0);
 263                 }
 264
 265         return(1);
 266 }
 267
 268
 269 static int
 270 option(void *ptr, int c, char *arg)
 271 {
 272         struct termp    *termp;
 273         struct nroffopt *nroff;
 274
 275         nroff = (struct nroffopt *)ptr;
 276         termp = nroff->termp;
 277
 278         switch (c) {
 279         case ('h'):
 280                 nroff->fl_h = 1;
 281                 break;
 282         case ('i'):
 283                 nroff->fl_i = 1;
 284                 break;
 285         case ('m'):
 286                 nroff->arg_m = arg;
 287                 break;
 288         case ('n'):
 289                 nroff->arg_n = arg;
 290                 break;
 291         case ('o'):
 292                 nroff->arg_o = arg;
 293                 break;
 294         case ('r'):
 295                 nroff->arg_r = arg;
 296                 break;
 297         case ('T'):
 298                 nroff->arg_T = arg;
 299                 break;
 300         case ('O'):
 301                 return(optsopt(termp, arg));
 302         default:
 303                 break;
 304         }
 305
 306         return(1);
 307 }
 308
 309
 310 /*
 311  * Flush a line of text.  A "line" is loosely defined as being something
 312  * that should be followed by a newline, regardless of whether it's
 313  * broken apart by newlines getting there.  A line can also be a
 314  * fragment of a columnar list.
 315  *
 316  * Specifically, a line is whatever's in p->buf of length p->col, which
 317  * is zeroed after this function returns.
 318  *
 319  * The variables TERMP_NOLPAD, TERMP_LITERAL and TERMP_NOBREAK are of
 320  * critical importance here.  Their behaviour follows:
 321  *
 322  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
 323  *    offset value.  This is useful when doing columnar lists where the
 324  *    prior column has right-padded.
 325  *
 326  *  - TERMP_NOBREAK: this is the most important and is used when making
 327  *    columns.  In short: don't print a newline and instead pad to the
 328  *    right margin.  Used in conjunction with TERMP_NOLPAD.
 329  *
 330  *  In-line line breaking:
 331  *
 332  *  If TERMP_NOBREAK is specified and the line overruns the right
 333  *  margin, it will break and pad-right to the right margin after
 334  *  writing.  If maxrmargin is violated, it will break and continue
 335  *  writing from the right-margin, which will lead to the above
 336  *  scenario upon exit.
 337  *
 338  *  Otherwise, the line will break at the right margin.  Extremely long
 339  *  lines will cause the system to emit a warning (TODO: hyphenate, if
 340  *  possible).
 341  */
 342 void
 343 flushln(struct termp *p)
 344 {
 345         size_t           i, j, vsz, vis, maxvis, mmax, bp;
 346
 347         /*
 348          * First, establish the maximum columns of "visible" content.
 349          * This is usually the difference between the right-margin and
 350          * an indentation, but can be, for tagged lists or columns, a
 351          * small set of values.
 352          */
 353
 354         assert(p->offset < p->rmargin);
 355         maxvis = p->rmargin - p->offset;
 356         mmax = p->maxrmargin - p->offset;
 357         bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
 358         vis = 0;
 359
 360         /*
 361          * If in the standard case (left-justified), then begin with our
 362          * indentation, otherwise (columns, etc.) just start spitting
 363          * out text.
 364          */
 365
 366         if ( ! (p->flags & TERMP_NOLPAD))
 367                 /* LINTED */
 368                 for (j = 0; j < p->offset; j++)
 369                         putchar(' ');
 370
 371         for (i = 0; i < p->col; i++) {
 372                 /*
 373                  * Count up visible word characters.  Control sequences
 374                  * (starting with the CSI) aren't counted.  A space
 375                  * generates a non-printing word, which is valid (the
 376                  * space is printed according to regular spacing rules).
 377                  */
 378
 379                 /* LINTED */
 380                 for (j = i, vsz = 0; j < p->col; j++) {
 381                         if (isspace((u_char)p->buf[j])) {
 382                                 break;
 383                         } else if (27 == p->buf[j]) {
 384                                 assert(TERMENC_ANSI == p->enc);
 385                                 assert(j + 5 <= p->col);
 386                                 j += 4;
 387                         } else if (8 == p->buf[j]) {
 388                                 assert(TERMENC_NROFF == p->enc);
 389                                 assert(j + 2 <= p->col);
 390                                 j += 1;
 391                         } else
 392                                 vsz++;
 393                 }
 394
 395                 /*
 396                  * Do line-breaking.  If we're greater than our
 397                  * break-point and already in-line, break to the next
 398                  * line and start writing.  If we're at the line start,
 399                  * then write out the word (TODO: hyphenate) and break
 400                  * in a subsequent loop invocation.
 401                  */
 402
 403                 if ( ! (TERMP_NOBREAK & p->flags)) {
 404                         if (vis && vis + vsz > bp) {
 405                                 putchar('\n');
 406                                 for (j = 0; j < p->offset; j++)
 407                                         putchar(' ');
 408                                 vis = 0;
 409                         } else if (vis + vsz > bp)
 410                                 warnx("word breaks right margin");
 411
 412                         /* TODO: hyphenate. */
 413
 414                 } else {
 415                         if (vis && vis + vsz > bp) {
 416                                 putchar('\n');
 417                                 for (j = 0; j < p->rmargin; j++)
 418                                         putchar(' ');
 419                                 vis = p->rmargin - p->offset;
 420                         } else if (vis + vsz > bp)
 421                                 warnx("word breaks right margin");
 422
 423                         /* TODO: hyphenate. */
 424                 }
 425
 426                 /*
 427                  * Write out the word and a trailing space.  Omit the
 428                  * space if we're the last word in the line or beyond
 429                  * our breakpoint.
 430                  */
 431
 432                 for ( ; i < p->col; i++) {
 433                         if (isspace((u_char)p->buf[i]))
 434                                 break;
 435                         putchar(p->buf[i]);
 436                 }
 437                 vis += vsz;
 438                 if (i < p->col && vis <= bp) {
 439                         putchar(' ');
 440                         vis++;
 441                 }
 442         }
 443
 444         /*
 445          * If we've overstepped our maximum visible no-break space, then
 446          * cause a newline and offset at the right margin.
 447          */
 448
 449         if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
 450                 if ( ! (TERMP_NONOBREAK & p->flags)) {
 451                         putchar('\n');
 452                         for (i = 0; i < p->rmargin; i++)
 453                                 putchar(' ');
 454                 }
 455                 p->col = 0;
 456                 return;
 457         }
 458
 459         /*
 460          * If we're not to right-marginalise it (newline), then instead
 461          * pad to the right margin and stay off.
 462          */
 463
 464         if (p->flags & TERMP_NOBREAK) {
 465                 if ( ! (TERMP_NONOBREAK & p->flags))
 466                         for ( ; vis < maxvis; vis++)
 467                                 putchar(' ');
 468         } else
 469                 putchar('\n');
 470
 471         p->col = 0;
 472 }
 473
 474
 475 /*
 476  * A newline only breaks an existing line; it won't assert vertical
 477  * space.  All data in the output buffer is flushed prior to the newline
 478  * assertion.
 479  */
 480 void
 481 newln(struct termp *p)
 482 {
 483
 484         p->flags |= TERMP_NOSPACE;
 485         if (0 == p->col) {
 486                 p->flags &= ~TERMP_NOLPAD;
 487                 return;
 488         }
 489         flushln(p);
 490         p->flags &= ~TERMP_NOLPAD;
 491 }
 492
 493
 494 /*
 495  * Asserts a vertical space (a full, empty line-break between lines).
 496  * Note that if used twice, this will cause two blank spaces and so on.
 497  * All data in the output buffer is flushed prior to the newline
 498  * assertion.
 499  */
 500 void
 501 vspace(struct termp *p)
 502 {
 503
 504         newln(p);
 505         putchar('\n');
 506 }
 507
 508
 509 /*
 510  * Break apart a word into "pwords" (partial-words, usually from
 511  * breaking up a phrase into individual words) and, eventually, put them
 512  * into the output buffer.  If we're a literal word, then don't break up
 513  * the word and put it verbatim into the output buffer.
 514  */
 515 void
 516 word(struct termp *p, const char *word)
 517 {
 518         size_t           i, j, len;
 519
 520         if (p->flags & TERMP_LITERAL) {
 521                 pword(p, word, strlen(word));
 522                 return;
 523         }
 524
 525         if (0 == (len = strlen(word)))
 526                 errx(1, "blank line not in literal context");
 527
 528         if (mdoc_isdelim(word)) {
 529                 if ( ! (p->flags & TERMP_IGNDELIM))
 530                         p->flags |= TERMP_NOSPACE;
 531                 p->flags &= ~TERMP_IGNDELIM;
 532         }
 533
 534         /* LINTED */
 535         for (j = i = 0; i < len; i++) {
 536                 if ( ! isspace((u_char)word[i])) {
 537                         j++;
 538                         continue;
 539                 }
 540
 541                 /* Escaped spaces don't delimit... */
 542                 if (i > 0 && isspace((u_char)word[i]) &&
 543                                 '\\' == word[i - 1]) {
 544                         j++;
 545                         continue;
 546                 }
 547
 548                 if (0 == j)
 549                         continue;
 550                 assert(i >= j);
 551                 pword(p, &word[i - j], j);
 552                 j = 0;
 553         }
 554         if (j > 0) {
 555                 assert(i >= j);
 556                 pword(p, &word[i - j], j);
 557         }
 558 }
 559
 560
 561 /*
 562  * This is the main function for printing out nodes.  It's constituted
 563  * of PRE and POST functions, which correspond to prefix and infix
 564  * processing.  The termpair structure allows data to persist between
 565  * prefix and postfix invocations.
 566  */
 567 static void
 568 body(struct termp *p, struct termpair *ppair,
 569                 const struct mdoc_meta *meta,
 570                 const struct mdoc_node *node)
 571 {
 572         int              dochild;
 573         struct termpair  pair;
 574
 575         /* Some quick sanity-checking. */
 576
 577         sanity(node);
 578
 579         /* Pre-processing. */
 580
 581         dochild = 1;
 582         pair.ppair = ppair;
 583         pair.type = 0;
 584         pair.offset = pair.rmargin = 0;
 585         pair.flag = 0;
 586         pair.count = 0;
 587
 588         if (MDOC_TEXT != node->type) {
 589                 if (termacts[node->tok].pre)
 590                         if ( ! (*termacts[node->tok].pre)(p, &pair, meta, node))
 591                                 dochild = 0;
 592         } else /* MDOC_TEXT == node->type */
 593                 word(p, node->string);
 594
 595         /* Children. */
 596
 597         if (TERMPAIR_FLAG & pair.type)
 598                 p->flags |= pair.flag;
 599
 600         if (dochild && node->child)
 601                 body(p, &pair, meta, node->child);
 602
 603         if (TERMPAIR_FLAG & pair.type)
 604                 p->flags &= ~pair.flag;
 605
 606         /* Post-processing. */
 607
 608         if (MDOC_TEXT != node->type)
 609                 if (termacts[node->tok].post)
 610                         (*termacts[node->tok].post)(p, &pair, meta, node);
 611
 612         /* Siblings. */
 613
 614         if (node->next)
 615                 body(p, ppair, meta, node->next);
 616 }
 617
 618
 619 static void
 620 footer(struct termp *p, const struct mdoc_meta *meta)
 621 {
 622         struct tm       *tm;
 623         char            *buf, *os;
 624
 625         if (NULL == (buf = malloc(p->rmargin)))
 626                 err(1, "malloc");
 627         if (NULL == (os = malloc(p->rmargin)))
 628                 err(1, "malloc");
 629
 630         tm = localtime(&meta->date);
 631
 632 #ifdef __OpenBSD__
 633         if (NULL == strftime(buf, p->rmargin, "%B %d, %Y", tm))
 634 #else
 635         if (0 == strftime(buf, p->rmargin, "%B %d, %Y", tm))
 636 #endif
 637                 err(1, "strftime");
 638
 639         (void)strlcpy(os, meta->os, p->rmargin);
 640
 641         /*
 642          * This is /slightly/ different from regular groff output
 643          * because we don't have page numbers.  Print the following:
 644          *
 645          * OS                                            MDOCDATE
 646          */
 647
 648         vspace(p);
 649
 650         p->flags |= TERMP_NOSPACE | TERMP_NOBREAK;
 651         p->rmargin = p->maxrmargin - strlen(buf);
 652         p->offset = 0;
 653
 654         word(p, os);
 655         flushln(p);
 656
 657         p->flags |= TERMP_NOLPAD | TERMP_NOSPACE;
 658         p->offset = p->rmargin;
 659         p->rmargin = p->maxrmargin;
 660         p->flags &= ~TERMP_NOBREAK;
 661
 662         word(p, buf);
 663         flushln(p);
 664
 665         free(buf);
 666         free(os);
 667 }
 668
 669
 670 static void
 671 header(struct termp *p, const struct mdoc_meta *meta)
 672 {
 673         char            *buf, *title, *bufp;
 674
 675         p->rmargin = p->maxrmargin;
 676         p->offset = 0;
 677
 678         if (NULL == (buf = malloc(p->rmargin)))
 679                 err(1, "malloc");
 680         if (NULL == (title = malloc(p->rmargin)))
 681                 err(1, "malloc");
 682
 683         /*
 684          * The header is strange.  It has three components, which are
 685          * really two with the first duplicated.  It goes like this:
 686          *
 687          * IDENTIFIER              TITLE                   IDENTIFIER
 688          *
 689          * The IDENTIFIER is NAME(SECTION), which is the command-name
 690          * (if given, or "unknown" if not) followed by the manual page
 691          * section.  These are given in `Dt'.  The TITLE is a free-form
 692          * string depending on the manual volume.  If not specified, it
 693          * switches on the manual section.
 694          */
 695
 696         assert(meta->vol);
 697         (void)strlcpy(buf, meta->vol, p->rmargin);
 698
 699         if (meta->arch) {
 700                 (void)strlcat(buf, " (", p->rmargin);
 701                 (void)strlcat(buf, meta->arch, p->rmargin);
 702                 (void)strlcat(buf, ")", p->rmargin);
 703         }
 704
 705         (void)snprintf(title, p->rmargin, "%s(%d)",
 706                         meta->title, meta->msec);
 707
 708         for (bufp = title; *bufp; bufp++)
 709                 *bufp = toupper((u_char)*bufp);
 710
 711         p->offset = 0;
 712         p->rmargin = (p->maxrmargin - strlen(buf)) / 2;
 713         p->flags |= TERMP_NOBREAK | TERMP_NOSPACE;
 714
 715         word(p, title);
 716         flushln(p);
 717
 718         p->flags |= TERMP_NOLPAD | TERMP_NOSPACE;
 719         p->offset = p->rmargin;
 720         p->rmargin = p->maxrmargin - strlen(title);
 721
 722         word(p, buf);
 723         flushln(p);
 724
 725         p->offset = p->rmargin;
 726         p->rmargin = p->maxrmargin;
 727         p->flags &= ~TERMP_NOBREAK;
 728         p->flags |= TERMP_NOLPAD | TERMP_NOSPACE;
 729
 730         word(p, title);
 731         flushln(p);
 732
 733         p->rmargin = p->maxrmargin;
 734         p->offset = 0;
 735         p->flags &= ~TERMP_NOSPACE;
 736
 737         free(title);
 738         free(buf);
 739 }
 740
 741
 742 /*
 743  * Determine the symbol indicated by an escape sequences, that is, one
 744  * starting with a backslash.  Once done, we pass this value into the
 745  * output buffer by way of the symbol table.
 746  */
 747 static void
 748 nescape(struct termp *p, const char *word, size_t len)
 749 {
 750         struct termseq  *enc;
 751
 752         switch (len) {
 753         case (1):
 754                 enc = termenc1;
 755                 break;
 756         case (2):
 757                 enc = termenc2;
 758                 break;
 759         default:
 760                 warnx("unsupported %zu-byte escape sequence", len);
 761                 return;
 762         }
 763
 764         for ( ; enc->enc; enc++)
 765                 if (0 == memcmp(enc->enc, word, len)) {
 766                         symbola(p, enc->sym);
 767                         return;
 768                 }
 769
 770         warnx("unsupported %zu-byte escape sequence", len);
 771 }
 772
 773
 774 /*
 775  * Handle an escape sequence: determine its length and pass it to the
 776  * escape-symbol look table.  Note that we assume mdoc(3) has validated
 777  * the escape sequence (we assert upon badly-formed escape sequences).
 778  */
 779 static void
 780 pescape(struct termp *p, const char *word, size_t *i, size_t len)
 781 {
 782         size_t           j;
 783
 784         if (++(*i) >= len) {
 785                 warnx("ignoring bad escape sequence");
 786                 return;
 787         }
 788
 789         if ('(' == word[*i]) {
 790                 (*i)++;
 791                 if (*i + 1 >= len) {
 792                         warnx("ignoring bad escape sequence");
 793                         return;
 794                 }
 795                 nescape(p, &word[*i], 2);
 796                 (*i)++;
 797                 return;
 798
 799         } else if ('*' == word[*i]) {
 800                 (*i)++;
 801                 if (*i >= len) {
 802                         warnx("ignoring bad escape sequence");
 803                         return;
 804                 }
 805                 switch (word[*i]) {
 806                 case ('('):
 807                         (*i)++;
 808                         if (*i + 1 >= len) {
 809                                 warnx("ignoring bad escape sequence");
 810                                 return;
 811                         }
 812                         nescape(p, &word[*i], 2);
 813                         (*i)++;
 814                         return;
 815                 case ('['):
 816                         break;
 817                 default:
 818                         nescape(p, &word[*i], 1);
 819                         return;
 820                 }
 821
 822         } else if ('[' != word[*i]) {
 823                 nescape(p, &word[*i], 1);
 824                 return;
 825         }
 826
 827         (*i)++;
 828         for (j = 0; word[*i] && ']' != word[*i]; (*i)++, j++)
 829                 /* Loop... */ ;
 830
 831         if (0 == word[*i]) {
 832                 warnx("ignoring bad escape sequence");
 833                 return;
 834         }
 835         nescape(p, &word[*i - j], j);
 836 }
 837
 838
 839 /*
 840  * Handle pwords, partial words, which may be either a single word or a
 841  * phrase that cannot be broken down (such as a literal string).  This
 842  * handles word styling.
 843  */
 844 static void
 845 pword(struct termp *p, const char *word, size_t len)
 846 {
 847         size_t           i;
 848
 849         if ( ! (TERMP_NOSPACE & p->flags) &&
 850                         ! (TERMP_LITERAL & p->flags))
 851                 chara(p, ' ');
 852
 853         if ( ! (p->flags & TERMP_NONOSPACE))
 854                 p->flags &= ~TERMP_NOSPACE;
 855
 856         /*
 857          * If ANSI (word-length styling), then apply our style now,
 858          * before the word.
 859          */
 860
 861         if (TERMENC_ANSI == p->enc && TERMP_STYLE & p->flags) {
 862                 if (TERMP_BOLD & p->flags) {
 863                         chara(p, 27);
 864                         stringa(p, "[01m", 4);
 865                 }
 866                 if (TERMP_UNDER & p->flags) {
 867                         chara(p, 27);
 868                         stringa(p, "[04m", 4);
 869                 }
 870                 if (TERMP_RED & p->flags) {
 871                         chara(p, 27);
 872                         stringa(p, "[31m", 4);
 873                 }
 874                 if (TERMP_GREEN & p->flags) {
 875                         chara(p, 27);
 876                         stringa(p, "[32m", 4);
 877                 }
 878                 if (TERMP_YELLOW & p->flags) {
 879                         chara(p, 27);
 880                         stringa(p, "[33m", 4);
 881                 }
 882                 if (TERMP_BLUE & p->flags) {
 883                         chara(p, 27);
 884                         stringa(p, "[34m", 4);
 885                 }
 886                 if (TERMP_MAGENTA & p->flags) {
 887                         chara(p, 27);
 888                         stringa(p, "[35m", 4);
 889                 }
 890                 if (TERMP_CYAN & p->flags) {
 891                         chara(p, 27);
 892                         stringa(p, "[36m", 4);
 893                 }
 894         }
 895
 896         for (i = 0; i < len; i++) {
 897                 if ('\\' == word[i]) {
 898                         pescape(p, word, &i, len);
 899                         continue;
 900                 }
 901
 902                 if (TERMENC_NROFF == p->enc &&
 903                                 TERMP_STYLE & p->flags) {
 904                         if (TERMP_BOLD & p->flags) {
 905                                 chara(p, word[i]);
 906                                 chara(p, 8);
 907                         }
 908                         if (TERMP_UNDER & p->flags) {
 909                                 chara(p, '_');
 910                                 chara(p, 8);
 911                         }
 912                 }
 913
 914                 chara(p, word[i]);
 915         }
 916
 917         if (TERMENC_ANSI == p->enc && TERMP_STYLE & p->flags) {
 918                 chara(p, 27);
 919                 stringa(p, "[00m", 4);
 920         }
 921 }
 922
 923
 924 /*
 925  * Add a symbol to the output line buffer.
 926  */
 927 static void
 928 symbola(struct termp *p, enum tsym sym)
 929 {
 930
 931         assert(p->symtab[sym].sym);
 932         stringa(p, p->symtab[sym].sym, p->symtab[sym].sz);
 933 }
 934
 935
 936 /*
 937  * Like chara() but for arbitrary-length buffers.  Resize the buffer by
 938  * a factor of two (if the buffer is less than that) or the buffer's
 939  * size.
 940  */
 941 static void
 942 stringa(struct termp *p, const char *c, size_t sz)
 943 {
 944         size_t           s;
 945
 946         if (0 == sz)
 947                 return;
 948
 949         s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
 950
 951         assert(c);
 952         if (p->col + sz >= p->maxcols) {
 953                 p->buf = realloc(p->buf, s);
 954                 if (NULL == p->buf)
 955                         err(1, "realloc");
 956                 p->maxcols = s;
 957         }
 958
 959         (void)memcpy(&p->buf[p->col], c, sz);
 960         p->col += sz;
 961 }
 962
 963
 964 /*
 965  * Insert a single character into the line-buffer.  If the buffer's
 966  * space is exceeded, then allocate more space by doubling the buffer
 967  * size.
 968  */
 969 static void
 970 chara(struct termp *p, char c)
 971 {
 972
 973         if (p->col + 1 >= p->maxcols) {
 974                 p->buf = realloc(p->buf, p->maxcols * 2);
 975                 if (NULL == p->buf)
 976                         err(1, "malloc");
 977                 p->maxcols *= 2;
 978         }
 979         p->buf[(p->col)++] = c;
 980 }
 981
 982
 983 static void
 984 sanity(const struct mdoc_node *n)
 985 {
 986
 987         switch (n->type) {
 988         case (MDOC_TEXT):
 989                 if (n->child)
 990                         errx(1, "regular form violated (1)");
 991                 if (NULL == n->parent)
 992                         errx(1, "regular form violated (2)");
 993                 if (NULL == n->string)
 994                         errx(1, "regular form violated (3)");
 995                 switch (n->parent->type) {
 996                 case (MDOC_TEXT):
 997                         /* FALLTHROUGH */
 998                 case (MDOC_ROOT):
 999                         errx(1, "regular form violated (4)");
1000                         /* NOTREACHED */
1001                 default:
1002                         break;
1003                 }
1004                 break;
1005         case (MDOC_ELEM):
1006                 if (NULL == n->parent)
1007                         errx(1, "regular form violated (5)");
1008                 switch (n->parent->type) {
1009                 case (MDOC_TAIL):
1010                         /* FALLTHROUGH */
1011                 case (MDOC_BODY):
1012                         /* FALLTHROUGH */
1013                 case (MDOC_HEAD):
1014                         break;
1015                 default:
1016                         errx(1, "regular form violated (6)");
1017                         /* NOTREACHED */
1018                 }
1019                 if (n->child) switch (n->child->type) {
1020                 case (MDOC_TEXT):
1021                         break;
1022                 default:
1023                         errx(1, "regular form violated (7(");
1024                         /* NOTREACHED */
1025                 }
1026                 break;
1027         case (MDOC_HEAD):
1028                 /* FALLTHROUGH */
1029         case (MDOC_BODY):
1030                 /* FALLTHROUGH */
1031         case (MDOC_TAIL):
1032                 if (NULL == n->parent)
1033                         errx(1, "regular form violated (8)");
1034                 if (MDOC_BLOCK != n->parent->type)
1035                         errx(1, "regular form violated (9)");
1036                 if (n->child) switch (n->child->type) {
1037                 case (MDOC_BLOCK):
1038                         /* FALLTHROUGH */
1039                 case (MDOC_ELEM):
1040                         /* FALLTHROUGH */
1041                 case (MDOC_TEXT):
1042                         break;
1043                 default:
1044                         errx(1, "regular form violated (a)");
1045                         /* NOTREACHED */
1046                 }
1047                 break;
1048         case (MDOC_BLOCK):
1049                 if (NULL == n->parent)
1050                         errx(1, "regular form violated (b)");
1051                 if (NULL == n->child)
1052                         errx(1, "regular form violated (c)");
1053                 switch (n->parent->type) {
1054                 case (MDOC_ROOT):
1055                         /* FALLTHROUGH */
1056                 case (MDOC_HEAD):
1057                         /* FALLTHROUGH */
1058                 case (MDOC_BODY):
1059                         /* FALLTHROUGH */
1060                 case (MDOC_TAIL):
1061                         break;
1062                 default:
1063                         errx(1, "regular form violated (d)");
1064                         /* NOTREACHED */
1065                 }
1066                 switch (n->child->type) {
1067                 case (MDOC_ROOT):
1068                         /* FALLTHROUGH */
1069                 case (MDOC_ELEM):
1070                         errx(1, "regular form violated (e)");
1071                         /* NOTREACHED */
1072                 default:
1073                         break;
1074                 }
1075                 break;
1076         case (MDOC_ROOT):
1077                 if (n->parent)
1078                         errx(1, "regular form violated (f)");
1079                 if (NULL == n->child)
1080                         errx(1, "regular form violated (10)");
1081                 switch (n->child->type) {
1082                 case (MDOC_BLOCK):
1083                         break;
1084                 default:
1085                         errx(1, "regular form violated (11)");
1086                         /* NOTREACHED */
1087                 }
1088                 break;
1089         }
1090 }
1091
1092
1093 dead_pre void
1094 punt(struct nroffopt *nroff, char *in)
1095 {
1096         char            *args[32];
1097         char             arg0[32], argm[32];
1098         int              i;
1099
1100         warnx("punting to nroff!");
1101
1102         i = 0;
1103
1104         (void)strlcpy(arg0, "nroff", 32);
1105         args[i++] = arg0;
1106
1107         if (nroff->fl_h)
1108                 args[i++] = "-h";
1109         if (nroff->fl_i)
1110                 args[i++] = "-i";
1111
1112         if (nroff->arg_m) {
1113                 (void)strlcpy(argm, "-m", 32);
1114                 (void)strlcat(argm, nroff->arg_m, 32);
1115                 args[i++] = argm;
1116         } else
1117                 args[i++] = "-mandoc";
1118
1119         args[i++] = in;
1120         args[i++] = (char *)NULL;
1121
1122         (void)execvp("nroff", args);
1123         errx(1, "exec");
1124         /* NOTREACHED */
1125 }
1126