-static void
-parsebuf(struct curparse *curp, struct buf blk, int start)
-{
- struct buf ln;
- int i, pos, lnn, of;
- unsigned char c;
- struct man *man;
- struct mdoc *mdoc;
- struct roff *roff;
-
- man = curp->man;
- mdoc = curp->mdoc;
- roff = curp->roff;
-
- memset(&ln, 0, sizeof(struct buf));
-
- lnn = curp->line; /* line number in the real file */
- pos = 0; /* byte number in the ln buffer */
-
- for (i = 0; i < (int)blk.sz;) {
- if (0 == pos && '\0' == blk.buf[i])
- break;
- if (start)
- curp->line = lnn;
-
- while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
-
- /*
- * Warn about bogus characters. If you're using
- * non-ASCII encoding, you're screwing your
- * readers. Since I'd rather this not happen,
- * I'll be helpful and drop these characters so
- * we don't display gibberish. Note to manual
- * writers: use special characters.
- */
-
- c = (unsigned char) blk.buf[i];
- if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) {
- mmsg(MANDOCERR_BADCHAR, curp,
- curp->line, pos, "ignoring byte");
- i++;
- continue;
- }
-
- /* Trailing backslash is like a plain character. */
- if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
- ln.buf[pos++] = blk.buf[i++];
- continue;
- }
- /* Found an escape and at least one other character. */
- if ('\n' == blk.buf[i + 1]) {
- /* Escaped newlines are skipped over */
- i += 2;
- ++lnn;
- continue;
- }
- if ('"' == blk.buf[i + 1]) {
- i += 2;
- /* Comment, skip to end of line */
- for (; i < (int)blk.sz; ++i) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
- }
- /* Backout trailing whitespaces */
- for (; pos > 0; --pos) {
- if (ln.buf[pos - 1] != ' ')
- break;
- if (pos > 2 && ln.buf[pos - 2] == '\\')
- break;
- }
- break;
- }
- /* Some other escape sequence, copy and continue. */
- if (pos + 1 >= (int)ln.sz)
- resize_buf(&ln, 256);
-
- ln.buf[pos++] = blk.buf[i++];
- ln.buf[pos++] = blk.buf[i++];
- }
-
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
- ln.buf[pos] = '\0';
-
- /*
- * A significant amount of complexity is contained by
- * the roff preprocessor. It's line-oriented but can be
- * expressed on one line, so we need at times to
- * readjust our starting point and re-run it. The roff
- * preprocessor can also readjust the buffers with new
- * data, so we pass them in wholesale.
- */
-
- of = 0;
-rerun:
- switch (roff_parseln(roff, curp->line, &ln.buf, &ln.sz,
- of, &of)) {
- case (ROFF_REPARSE):
- parsebuf(curp, ln, 0);
- pos = 0;
- continue;
- case (ROFF_APPEND):
- pos = strlen(ln.buf);
- continue;
- case (ROFF_RERUN):
- goto rerun;
- case (ROFF_IGN):
- pos = 0;
- continue;
- case (ROFF_ERR):
- assert(MANDOCLEVEL_FATAL <= exit_status);
- break;
- case (ROFF_SO):
- if (pfile(ln.buf + of, curp)) {
- pos = 0;
- continue;
- } else
- break;
- case (ROFF_CONT):
- break;
- }
-
- /*
- * If input parsers have not been allocated, do so now.
- * We keep these instanced betwen parsers, but set them
- * locally per parse routine since we can use different
- * parsers with each one.
- */
-
- if ( ! (man || mdoc))
- pset(ln.buf + of, pos - of, curp, &man, &mdoc);
-
- /* Lastly, push down into the parsers themselves. */
-
- if (man && ! man_parseln(man, curp->line, ln.buf, of)) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- break;
- }
- if (mdoc && ! mdoc_parseln(mdoc, curp->line, ln.buf, of)) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- break;
- }
-
- /* Temporary buffers typically are not full. */
- if (0 == start && '\0' == blk.buf[i])
- break;
-
- /* Start the next input line. */
- pos = 0;
- }
-
- free(ln.buf);
-}
-
-
-static void
-pset(const char *buf, int pos, struct curparse *curp,
- struct man **man, struct mdoc **mdoc)
-{
- int i;
-
- /*
- * Try to intuit which kind of manual parser should be used. If
- * passed in by command-line (-man, -mdoc), then use that
- * explicitly. If passed as -mandoc, then try to guess from the
- * line: either skip dot-lines, use -mdoc when finding `.Dt', or
- * default to -man, which is more lenient.
- */
-
- if ('.' == buf[0] || '\'' == buf[0]) {
- for (i = 1; buf[i]; i++)
- if (' ' != buf[i] && '\t' != buf[i])
- break;
- if ('\0' == buf[i])
- return;
- }
-
- switch (curp->inttype) {
- case (INTT_MDOC):
- if (NULL == curp->mdoc)
- curp->mdoc = mdoc_alloc(&curp->regs, curp, mmsg);
- assert(curp->mdoc);
- *mdoc = curp->mdoc;
- return;
- case (INTT_MAN):
- if (NULL == curp->man)
- curp->man = man_alloc(&curp->regs, curp, mmsg);
- assert(curp->man);
- *man = curp->man;
- return;
- default:
- break;
- }
-
- if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
- if (NULL == curp->mdoc)
- curp->mdoc = mdoc_alloc(&curp->regs, curp, mmsg);
- assert(curp->mdoc);
- *mdoc = curp->mdoc;
- return;
- }
-
- if (NULL == curp->man)
- curp->man = man_alloc(&curp->regs, curp, mmsg);
- assert(curp->man);
- *man = curp->man;
-}
-
-