+ struct buf ln;
+ int i, of, rc;
+ int pos; /* byte number in the ln buffer */
+ int lnn; /* line number in the real file */
+ unsigned char c;
+
+ /*
+ * Main parse routine for an opened file. This is called for
+ * each opened file and simply loops around the full input file,
+ * possibly nesting (i.e., with `so').
+ */
+
+ memset(&ln, 0, sizeof(struct buf));
+
+ lnn = curp->line;
+ pos = 0;
+
+ for (i = 0; i < (int)blk.sz; ) {
+ if (0 == pos && '\0' == blk.buf[i])
+ break;
+
+ if (start)
+ curp->line = lnn;
+
+ while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
+ if ('\n' == blk.buf[i]) {
+ ++i;
+ ++lnn;
+ break;
+ }
+
+ /*
+ * Warn about bogus characters. If you're using
+ * non-ASCII encoding, you're screwing your
+ * readers. Since I'd rather this not happen,
+ * I'll be helpful and drop these characters so
+ * we don't display gibberish. Note to manual
+ * writers: use special characters.
+ */
+
+ c = (unsigned char) blk.buf[i];
+
+ if ( ! (isascii(c) &&
+ (isgraph(c) || isblank(c)))) {
+ mmsg(MANDOCERR_BADCHAR, curp,
+ curp->line, pos, "ignoring byte");
+ i++;
+ continue;
+ }
+
+ /* Trailing backslash = a plain char. */
+
+ if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
+ if (pos >= (int)ln.sz)
+ resize_buf(&ln, 256);
+ ln.buf[pos++] = blk.buf[i++];
+ continue;
+ }
+
+ /* Found escape & at least one other char. */
+
+ if ('\n' == blk.buf[i + 1]) {
+ i += 2;
+ /* Escaped newlines are skipped over */
+ ++lnn;
+ continue;
+ }
+
+ if ('"' == blk.buf[i + 1]) {
+ i += 2;
+ /* Comment, skip to end of line */
+ for (; i < (int)blk.sz; ++i) {
+ if ('\n' == blk.buf[i]) {
+ ++i;
+ ++lnn;
+ break;
+ }
+ }
+
+ /* Backout trailing whitespaces */
+ for (; pos > 0; --pos) {
+ if (ln.buf[pos - 1] != ' ')
+ break;
+ if (pos > 2 && ln.buf[pos - 2] == '\\')
+ break;
+ }
+ break;
+ }
+
+ /* Some other escape sequence, copy & cont. */
+
+ if (pos + 1 >= (int)ln.sz)
+ resize_buf(&ln, 256);
+
+ ln.buf[pos++] = blk.buf[i++];
+ ln.buf[pos++] = blk.buf[i++];
+ }
+
+ if (pos >= (int)ln.sz)
+ resize_buf(&ln, 256);
+
+ ln.buf[pos] = '\0';
+
+ /*
+ * A significant amount of complexity is contained by
+ * the roff preprocessor. It's line-oriented but can be
+ * expressed on one line, so we need at times to
+ * readjust our starting point and re-run it. The roff
+ * preprocessor can also readjust the buffers with new
+ * data, so we pass them in wholesale.
+ */
+
+ of = 0;
+
+rerun:
+ rc = roff_parseln
+ (curp->roff, curp->line,
+ &ln.buf, &ln.sz, of, &of);
+
+ switch (rc) {
+ case (ROFF_REPARSE):
+ parsebuf(curp, ln, 0);
+ pos = 0;
+ continue;
+ case (ROFF_APPEND):
+ pos = strlen(ln.buf);
+ continue;
+ case (ROFF_RERUN):
+ goto rerun;
+ case (ROFF_IGN):
+ pos = 0;
+ continue;
+ case (ROFF_ERR):
+ assert(MANDOCLEVEL_FATAL <= exit_status);
+ break;
+ case (ROFF_SO):
+ if (pfile(ln.buf + of, curp)) {
+ pos = 0;
+ continue;
+ } else
+ break;
+ case (ROFF_CONT):
+ break;
+ }
+
+ /*
+ * If input parsers have not been allocated, do so now.
+ * We keep these instanced betwen parsers, but set them
+ * locally per parse routine since we can use different
+ * parsers with each one.
+ */
+
+ if ( ! (curp->man || curp->mdoc))
+ pset(ln.buf + of, pos - of, curp);
+
+ /*
+ * Lastly, push down into the parsers themselves. One
+ * of these will have already been set in the pset()
+ * routine.
+ */
+
+ if (curp->man || curp->mdoc) {
+ rc = curp->man ?
+ man_parseln(curp->man,
+ curp->line, ln.buf, of) :
+ mdoc_parseln(curp->mdoc,
+ curp->line, ln.buf, of);
+
+ if ( ! rc) {
+ assert(MANDOCLEVEL_FATAL <= exit_status);
+ break;
+ }
+ }
+
+ /* Temporary buffers typically are not full. */
+
+ if (0 == start && '\0' == blk.buf[i])
+ break;
+
+ /* Start the next input line. */
+
+ pos = 0;
+ }
+
+ free(ln.buf);
+}
+
+static void
+pset(const char *buf, int pos, struct curparse *curp)
+{
+ int i;