- /*
- * Two buffers: ln and buf. buf is the input file and may be
- * memory mapped. ln is a line buffer and grows on-demand.
- */
-
- if ( ! read_whole_file(curp, &blk, &with_mmap)) {
- exit_status = MANDOCLEVEL_SYSERR;
- return;
- }
-
- if (NULL == curp->roff)
- curp->roff = roff_alloc(&curp->regs, curp, mmsg);
- assert(curp->roff);
- roff = curp->roff;
-
- for (i = 0, lnn = 1; i < (int)blk.sz;) {
- pos = 0;
- lnn_start = lnn;
- while (i < (int)blk.sz) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
-
- /*
- * Warn about bogus characters. If you're using
- * non-ASCII encoding, you're screwing your
- * readers. Since I'd rather this not happen,
- * I'll be helpful and drop these characters so
- * we don't display gibberish. Note to manual
- * writers: use special characters.
- */
-
- c = (unsigned char) blk.buf[i];
- if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) {
- mmsg(MANDOCERR_BADCHAR, curp,
- lnn_start, pos, "ignoring byte");
- i++;
- continue;
- }
-
- /* Trailing backslash is like a plain character. */
- if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
- ln.buf[pos++] = blk.buf[i++];
- continue;
- }
- /* Found an escape and at least one other character. */
- if ('\n' == blk.buf[i + 1]) {
- /* Escaped newlines are skipped over */
- i += 2;
- ++lnn;
- continue;
- }
- if ('"' == blk.buf[i + 1]) {
- i += 2;
- /* Comment, skip to end of line */
- for (; i < (int)blk.sz; ++i) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
- }
- /* Backout trailing whitespaces */
- for (; pos > 0; --pos) {
- if (ln.buf[pos - 1] != ' ')
- break;
- if (pos > 2 && ln.buf[pos - 2] == '\\')
- break;
- }
- break;
- }
- /* Some other escape sequence, copy and continue. */
- if (pos + 1 >= (int)ln.sz)
- resize_buf(&ln, 256);
-
- ln.buf[pos++] = blk.buf[i++];
- ln.buf[pos++] = blk.buf[i++];
- }
-
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
- ln.buf[pos] = '\0';
-
- /*
- * A significant amount of complexity is contained by
- * the roff preprocessor. It's line-oriented but can be
- * expressed on one line, so we need at times to
- * readjust our starting point and re-run it. The roff
- * preprocessor can also readjust the buffers with new
- * data, so we pass them in wholesale.
- */
-
- of = 0;
- do {
- re = roff_parseln(roff, lnn_start,
- &ln.buf, &ln.sz, of, &of);
- } while (ROFF_RERUN == re);
-
- if (ROFF_IGN == re) {
- continue;
- } else if (ROFF_ERR == re) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- goto cleanup;
- }
-
- /*
- * If input parsers have not been allocated, do so now.
- * We keep these instanced betwen parsers, but set them
- * locally per parse routine since we can use different
- * parsers with each one.
- */
-
- if ( ! (man || mdoc))
- pset(ln.buf + of, pos - of, curp, &man, &mdoc);
-
- /* Lastly, push down into the parsers themselves. */
-
- if (man && ! man_parseln(man, lnn_start, ln.buf, of)) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- goto cleanup;
- }
- if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- goto cleanup;
- }
- }
-
- /* NOTE a parser may not have been assigned, yet. */
-
- if ( ! (man || mdoc)) {
- fprintf(stderr, "%s: Not a manual\n", curp->file);
- exit_status = MANDOCLEVEL_FATAL;