- memset(&curp->regs, 0, sizeof(struct regset));
-
- /* Reset the current-parse compilers. */
-
- if (curp->mdoc)
- mdoc_reset(curp->mdoc);
- if (curp->man)
- man_reset(curp->man);
-
- assert(curp->roff);
- roff_reset(curp->roff);
-
- if (curp->exit_status < curp->file_status)
- curp->exit_status = curp->file_status;
-
- return;
-}
-
-static void
-pdesc(struct curparse *curp)
-{
- struct buf blk;
- int with_mmap;
-
- /*
- * Run for each opened file; may be called more than once for
- * each full parse sequence if the opened file is nested (i.e.,
- * from `so'). Simply sucks in the whole file and moves into
- * the parse phase for the file.
- */
-
- if ( ! read_whole_file(curp, &blk, &with_mmap)) {
- curp->file_status = MANDOCLEVEL_SYSERR;
- return;
- }
-
- /* Line number is per-file. */
-
- curp->line = 1;
-
- parsebuf(curp, blk, 1);
-
- if (with_mmap)
- munmap(blk.buf, blk.sz);
- else
- free(blk.buf);
-}
-
-/*
- * Main parse routine for an opened file. This is called for each
- * opened file and simply loops around the full input file, possibly
- * nesting (i.e., with `so').
- */
-static void
-parsebuf(struct curparse *curp, struct buf blk, int start)
-{
- const struct tbl_span *span;
- struct buf ln;
- enum rofferr rr;
- int i, of, rc;
- int pos; /* byte number in the ln buffer */
- int lnn; /* line number in the real file */
- unsigned char c;
-
- memset(&ln, 0, sizeof(struct buf));
-
- lnn = curp->line;
- pos = 0;
-
- for (i = 0; i < (int)blk.sz; ) {
- if (0 == pos && '\0' == blk.buf[i])
- break;
-
- if (start) {
- curp->line = lnn;
- curp->reparse_count = 0;
- }
-
- while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
-
- /*
- * When finding an unescaped newline character,
- * leave the character loop to process the line.
- * Skip a preceding carriage return, if any.
- */
-
- if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
- '\n' == blk.buf[i + 1])
- ++i;
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
-
- /*
- * Warn about bogus characters. If you're using
- * non-ASCII encoding, you're screwing your
- * readers. Since I'd rather this not happen,
- * I'll be helpful and drop these characters so
- * we don't display gibberish. Note to manual
- * writers: use special characters.
- */
-
- c = (unsigned char) blk.buf[i];
-
- if ( ! (isascii(c) &&
- (isgraph(c) || isblank(c)))) {
- mmsg(MANDOCERR_BADCHAR, curp,
- curp->line, pos, "ignoring byte");
- i++;
- continue;
- }
-
- /* Trailing backslash = a plain char. */
-
- if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
- ln.buf[pos++] = blk.buf[i++];
- continue;
- }
-
- /*
- * Found escape and at least one other character.
- * When it's a newline character, skip it.
- * When there is a carriage return in between,
- * skip that one as well.
- */
-
- if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
- '\n' == blk.buf[i + 2])
- ++i;
- if ('\n' == blk.buf[i + 1]) {
- i += 2;
- ++lnn;
- continue;
- }
-
- if ('"' == blk.buf[i + 1]) {
- i += 2;
- /* Comment, skip to end of line */
- for (; i < (int)blk.sz; ++i) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
- }
-
- /* Backout trailing whitespaces */
- for (; pos > 0; --pos) {
- if (ln.buf[pos - 1] != ' ')
- break;
- if (pos > 2 && ln.buf[pos - 2] == '\\')
- break;
- }
- break;
- }
-
- /* Some other escape sequence, copy & cont. */
-
- if (pos + 1 >= (int)ln.sz)
- resize_buf(&ln, 256);
-
- ln.buf[pos++] = blk.buf[i++];
- ln.buf[pos++] = blk.buf[i++];
- }
-
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
-
- ln.buf[pos] = '\0';
-
- /*
- * A significant amount of complexity is contained by
- * the roff preprocessor. It's line-oriented but can be
- * expressed on one line, so we need at times to
- * readjust our starting point and re-run it. The roff
- * preprocessor can also readjust the buffers with new
- * data, so we pass them in wholesale.
- */
-
- of = 0;
-
-rerun:
- rr = roff_parseln
- (curp->roff, curp->line,
- &ln.buf, &ln.sz, of, &of);
-
- switch (rr) {
- case (ROFF_REPARSE):
- if (REPARSE_LIMIT >= ++curp->reparse_count)
- parsebuf(curp, ln, 0);
- else
- mmsg(MANDOCERR_ROFFLOOP, curp,
- curp->line, pos, NULL);
- pos = 0;
- continue;
- case (ROFF_APPEND):
- pos = (int)strlen(ln.buf);
- continue;
- case (ROFF_RERUN):
- goto rerun;
- case (ROFF_IGN):
- pos = 0;
- continue;
- case (ROFF_ERR):
- assert(MANDOCLEVEL_FATAL <= curp->file_status);
- break;
- case (ROFF_SO):
- if (pfile(ln.buf + of, curp)) {
- pos = 0;
- continue;
- } else
- break;
- default:
- break;
- }
-
- /*
- * If we encounter errors in the recursive parsebuf()
- * call, make sure we don't continue parsing.
- */
-
- if (MANDOCLEVEL_FATAL <= curp->file_status)
- break;
-
- /*
- * If input parsers have not been allocated, do so now.
- * We keep these instanced betwen parsers, but set them
- * locally per parse routine since we can use different
- * parsers with each one.
- */
-
- if ( ! (curp->man || curp->mdoc))
- pset(ln.buf + of, pos - of, curp);
-
- /*
- * Lastly, push down into the parsers themselves. One
- * of these will have already been set in the pset()
- * routine.
- * If libroff returns ROFF_TBL, then add it to the
- * currently open parse. Since we only get here if
- * there does exist data (see tbl_data.c), we're
- * guaranteed that something's been allocated.
- * Do the same for ROFF_EQN.
- */
-
- rc = -1;
-
- if (ROFF_TBL == rr)
- while (NULL != (span = roff_span(curp->roff))) {
- rc = curp->man ?
- man_addspan(curp->man, span) :
- mdoc_addspan(curp->mdoc, span);
- if (0 == rc)
- break;
- }
- else if (ROFF_EQN == rr)
- rc = curp->mdoc ?
- mdoc_addeqn(curp->mdoc,
- roff_eqn(curp->roff)) :
- man_addeqn(curp->man,
- roff_eqn(curp->roff));
- else if (curp->man || curp->mdoc)
- rc = curp->man ?
- man_parseln(curp->man,
- curp->line, ln.buf, of) :
- mdoc_parseln(curp->mdoc,
- curp->line, ln.buf, of);
-
- if (0 == rc) {
- assert(MANDOCLEVEL_FATAL <= curp->file_status);
- break;
- }
-
- /* Temporary buffers typically are not full. */
-
- if (0 == start && '\0' == blk.buf[i])
- break;
-
- /* Start the next input line. */
-
- pos = 0;
- }
-
- free(ln.buf);
-}
-
-static void
-pset(const char *buf, int pos, struct curparse *curp)
-{
- int i;
-
- /*
- * Try to intuit which kind of manual parser should be used. If
- * passed in by command-line (-man, -mdoc), then use that
- * explicitly. If passed as -mandoc, then try to guess from the
- * line: either skip dot-lines, use -mdoc when finding `.Dt', or
- * default to -man, which is more lenient.
- *
- * Separate out pmdoc/pman from mdoc/man: the first persists
- * through all parsers, while the latter is used per-parse.
- */
-
- if ('.' == buf[0] || '\'' == buf[0]) {
- for (i = 1; buf[i]; i++)
- if (' ' != buf[i] && '\t' != buf[i])
- break;
- if ('\0' == buf[i])
- return;
- }
-
- switch (curp->inttype) {
- case (INTT_MDOC):
- if (NULL == curp->pmdoc)
- curp->pmdoc = mdoc_alloc
- (&curp->regs, curp, mmsg);
- assert(curp->pmdoc);
- curp->mdoc = curp->pmdoc;
- return;
- case (INTT_MAN):
- if (NULL == curp->pman)
- curp->pman = man_alloc
- (&curp->regs, curp, mmsg);
- assert(curp->pman);
- curp->man = curp->pman;
- return;
- default:
- break;
- }
-
- if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
- if (NULL == curp->pmdoc)
- curp->pmdoc = mdoc_alloc
- (&curp->regs, curp, mmsg);
- assert(curp->pmdoc);
- curp->mdoc = curp->pmdoc;
- return;
- }