-static void
-resize_buf(struct buf *buf, size_t initial)
-{
-
- buf->sz = buf->sz ? 2 * buf->sz : initial;
- buf->buf = realloc(buf->buf, buf->sz);
- if (NULL == buf->buf) {
- perror(NULL);
- exit(MANDOCLEVEL_SYSERR);
- }
-}
-
-
-static int
-read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
-{
- struct stat st;
- size_t off;
- ssize_t ssz;
-
- if (-1 == fstat(curp->fd, &st)) {
- perror(curp->file);
- return(0);
- }
-
- /*
- * If we're a regular file, try just reading in the whole entry
- * via mmap(). This is faster than reading it into blocks, and
- * since each file is only a few bytes to begin with, I'm not
- * concerned that this is going to tank any machines.
- */
-
- if (S_ISREG(st.st_mode)) {
- if (st.st_size >= (1U << 31)) {
- fprintf(stderr, "%s: input too large\n",
- curp->file);
- return(0);
- }
- *with_mmap = 1;
- fb->sz = (size_t)st.st_size;
- fb->buf = mmap(NULL, fb->sz, PROT_READ,
- MAP_FILE|MAP_SHARED, curp->fd, 0);
- if (fb->buf != MAP_FAILED)
- return(1);
- }
-
- /*
- * If this isn't a regular file (like, say, stdin), then we must
- * go the old way and just read things in bit by bit.
- */
-
- *with_mmap = 0;
- off = 0;
- fb->sz = 0;
- fb->buf = NULL;
- for (;;) {
- if (off == fb->sz) {
- if (fb->sz == (1U << 31)) {
- fprintf(stderr, "%s: input too large\n",
- curp->file);
- break;
- }
- resize_buf(fb, 65536);
- }
- ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off);
- if (ssz == 0) {
- fb->sz = off;
- return(1);
- }
- if (ssz == -1) {
- perror(curp->file);
- break;
- }
- off += (size_t)ssz;
- }
-
- free(fb->buf);
- fb->buf = NULL;
- return(0);
-}
-
-
-static void
-fdesc(struct curparse *curp)
-{
- struct buf ln, blk;
- int i, pos, lnn, lnn_start, with_mmap, of;
- enum rofferr re;
- unsigned char c;
- struct man *man;
- struct mdoc *mdoc;
- struct roff *roff;
-
- man = NULL;
- mdoc = NULL;
- roff = NULL;
-
- memset(&ln, 0, sizeof(struct buf));
-
- /*
- * Two buffers: ln and buf. buf is the input file and may be
- * memory mapped. ln is a line buffer and grows on-demand.
- */
-
- if ( ! read_whole_file(curp, &blk, &with_mmap)) {
- exit_status = MANDOCLEVEL_SYSERR;
- return;
- }
-
- if (NULL == curp->roff)
- curp->roff = roff_alloc(&curp->regs, curp, mmsg);
- assert(curp->roff);
- roff = curp->roff;
-
- for (i = 0, lnn = 1; i < (int)blk.sz;) {
- pos = 0;
- lnn_start = lnn;
- while (i < (int)blk.sz) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
-
- /*
- * Warn about bogus characters. If you're using
- * non-ASCII encoding, you're screwing your
- * readers. Since I'd rather this not happen,
- * I'll be helpful and drop these characters so
- * we don't display gibberish. Note to manual
- * writers: use special characters.
- */
-
- c = (unsigned char) blk.buf[i];
- if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) {
- mmsg(MANDOCERR_BADCHAR, curp,
- lnn_start, pos, "ignoring byte");
- i++;
- continue;
- }
-
- /* Trailing backslash is like a plain character. */
- if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
- ln.buf[pos++] = blk.buf[i++];
- continue;
- }
- /* Found an escape and at least one other character. */
- if ('\n' == blk.buf[i + 1]) {
- /* Escaped newlines are skipped over */
- i += 2;
- ++lnn;
- continue;
- }
- if ('"' == blk.buf[i + 1]) {
- i += 2;
- /* Comment, skip to end of line */
- for (; i < (int)blk.sz; ++i) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
- }
- /* Backout trailing whitespaces */
- for (; pos > 0; --pos) {
- if (ln.buf[pos - 1] != ' ')
- break;
- if (pos > 2 && ln.buf[pos - 2] == '\\')
- break;
- }
- break;
- }
- /* Some other escape sequence, copy and continue. */
- if (pos + 1 >= (int)ln.sz)
- resize_buf(&ln, 256);
-
- ln.buf[pos++] = blk.buf[i++];
- ln.buf[pos++] = blk.buf[i++];
- }
-
- if (pos >= (int)ln.sz)
- resize_buf(&ln, 256);
- ln.buf[pos] = '\0';
-
- /*
- * A significant amount of complexity is contained by
- * the roff preprocessor. It's line-oriented but can be
- * expressed on one line, so we need at times to
- * readjust our starting point and re-run it. The roff
- * preprocessor can also readjust the buffers with new
- * data, so we pass them in wholesale.
- */
-
- of = 0;
- do {
- re = roff_parseln(roff, lnn_start,
- &ln.buf, &ln.sz, of, &of);
- } while (ROFF_RERUN == re);
-
- if (ROFF_IGN == re) {
- continue;
- } else if (ROFF_ERR == re) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- goto cleanup;
- }
-
- /*
- * If input parsers have not been allocated, do so now.
- * We keep these instanced betwen parsers, but set them
- * locally per parse routine since we can use different
- * parsers with each one.
- */
-
- if ( ! (man || mdoc))
- pset(ln.buf + of, pos - of, curp, &man, &mdoc);
-
- /* Lastly, push down into the parsers themselves. */
-
- if (man && ! man_parseln(man, lnn_start, ln.buf, of)) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- goto cleanup;
- }
- if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) {
- assert(MANDOCLEVEL_FATAL <= exit_status);
- goto cleanup;
- }
- }