]> git.cameronkatri.com Git - mandoc.git/blob - read.c
always use the right buffer, and fix one evil typo
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.85 2014/09/07 02:17:40 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct buf {
49 char *buf; /* binary input buffer */
50 size_t sz; /* size of binary buffer */
51 };
52
53 struct mparse {
54 struct man *pman; /* persistent man parser */
55 struct mdoc *pmdoc; /* persistent mdoc parser */
56 struct man *man; /* man parser */
57 struct mdoc *mdoc; /* mdoc parser */
58 struct roff *roff; /* roff parser (!NULL) */
59 char *sodest; /* filename pointed to by .so */
60 const char *file; /* filename of current input file */
61 struct buf *primary; /* buffer currently being parsed */
62 struct buf *secondary; /* preprocessed copy of input */
63 const char *defos; /* default operating system */
64 mandocmsg mmsg; /* warning/error message handler */
65 enum mandoclevel file_status; /* status of current parse */
66 enum mandoclevel wlevel; /* ignore messages below this */
67 int options; /* parser options */
68 int reparse_count; /* finite interp. stack */
69 int line; /* line number in the file */
70 };
71
72 static void choose_parser(struct mparse *);
73 static void resize_buf(struct buf *, size_t);
74 static void mparse_buf_r(struct mparse *, struct buf, int);
75 static int read_whole_file(struct mparse *, const char *, int,
76 struct buf *, int *);
77 static void mparse_end(struct mparse *);
78 static void mparse_parse_buffer(struct mparse *, struct buf,
79 const char *);
80
81 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
82 MANDOCERR_OK,
83 MANDOCERR_WARNING,
84 MANDOCERR_WARNING,
85 MANDOCERR_ERROR,
86 MANDOCERR_FATAL,
87 MANDOCERR_MAX,
88 MANDOCERR_MAX
89 };
90
91 static const char * const mandocerrs[MANDOCERR_MAX] = {
92 "ok",
93
94 "generic warning",
95
96 /* related to the prologue */
97 "missing manual title, using UNTITLED",
98 "missing manual title, using \"\"",
99 "lower case character in document title",
100 "missing manual section, using \"\"",
101 "unknown manual section",
102 "unknown manual volume or arch",
103 "missing date, using today's date",
104 "cannot parse date, using it verbatim",
105 "missing Os macro, using \"\"",
106 "duplicate prologue macro",
107 "late prologue macro",
108 "skipping late title macro",
109 "prologue macros out of order",
110
111 /* related to document structure */
112 ".so is fragile, better use ln(1)",
113 "no document body",
114 "content before first section header",
115 "first section is not \"NAME\"",
116 "bad NAME section contents",
117 "sections out of conventional order",
118 "duplicate section title",
119 "unexpected section",
120
121 /* related to macros and nesting */
122 "obsolete macro",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "missing -std argument, adding it",
149
150 /* related to bad macro arguments */
151 "unterminated quoted argument",
152 "duplicate argument",
153 "skipping duplicate argument",
154 "skipping duplicate display type",
155 "skipping duplicate list type",
156 "skipping -width argument",
157 "unknown AT&T UNIX version",
158 "invalid content in Rs block",
159 "invalid Boolean argument",
160 "unknown font, skipping request",
161
162 /* related to plain text */
163 "blank line in fill mode, using .sp",
164 "tab in filled text",
165 "whitespace at end of input line",
166 "bad comment style",
167 "invalid escape sequence",
168 "undefined string, using \"\"",
169
170 "generic error",
171
172 /* related to equations */
173 "unexpected equation scope closure",
174 "equation scope open on exit",
175 "overlapping equation scopes",
176 "unexpected end of equation",
177 "equation syntax error",
178
179 /* related to tables */
180 "bad table syntax",
181 "bad table option",
182 "bad table layout",
183 "no table layout cells specified",
184 "no table data cells specified",
185 "ignore data in cell",
186 "data block still open",
187 "ignoring extra data cells",
188
189 /* related to document structure and macros */
190 "input stack limit exceeded, infinite loop?",
191 "skipping bad character",
192 "skipping unknown macro",
193 "skipping item outside list",
194 "skipping column outside column list",
195 "skipping end of block that is not open",
196 "inserting missing end of block",
197 "appending missing end of block",
198
199 /* related to request and macro arguments */
200 "escaped character not allowed in a name",
201 "argument count wrong",
202 "missing list type, using -item",
203 "missing manual name, using \"\"",
204 "uname(3) system call failed, using UNKNOWN",
205 "unknown standard specifier",
206 "skipping request without numeric argument",
207 "skipping all arguments",
208 "skipping excess arguments",
209
210 "generic fatal error",
211
212 "input too large",
213 "NOT IMPLEMENTED: Bd -file",
214 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
215 ".so request failed",
216
217 /* system errors */
218 "cannot dup file descriptor",
219 "cannot exec",
220 "gunzip failed with code",
221 "cannot fork",
222 NULL,
223 "cannot open pipe",
224 "cannot read file",
225 "gunzip died from signal",
226 "cannot stat file",
227 "wait failed",
228 };
229
230 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
231 "SUCCESS",
232 "RESERVED",
233 "WARNING",
234 "ERROR",
235 "FATAL",
236 "BADARG",
237 "SYSERR"
238 };
239
240
241 static void
242 resize_buf(struct buf *buf, size_t initial)
243 {
244
245 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
246 buf->buf = mandoc_realloc(buf->buf, buf->sz);
247 }
248
249 static void
250 choose_parser(struct mparse *curp)
251 {
252 char *cp, *ep;
253 int format;
254
255 /*
256 * If neither command line arguments -mdoc or -man select
257 * a parser nor the roff parser found a .Dd or .TH macro
258 * yet, look ahead in the main input buffer.
259 */
260
261 if ((format = roff_getformat(curp->roff)) == 0) {
262 cp = curp->primary->buf;
263 ep = cp + curp->primary->sz;
264 while (cp < ep) {
265 if (*cp == '.' || *cp == '\'') {
266 cp++;
267 if (cp[0] == 'D' && cp[1] == 'd') {
268 format = MPARSE_MDOC;
269 break;
270 }
271 if (cp[0] == 'T' && cp[1] == 'H') {
272 format = MPARSE_MAN;
273 break;
274 }
275 }
276 cp = memchr(cp, '\n', ep - cp);
277 if (cp == NULL)
278 break;
279 cp++;
280 }
281 }
282
283 if (format == MPARSE_MDOC) {
284 if (NULL == curp->pmdoc)
285 curp->pmdoc = mdoc_alloc(
286 curp->roff, curp, curp->defos,
287 MPARSE_QUICK & curp->options ? 1 : 0);
288 assert(curp->pmdoc);
289 curp->mdoc = curp->pmdoc;
290 return;
291 }
292
293 /* Fall back to man(7) as a last resort. */
294
295 if (NULL == curp->pman)
296 curp->pman = man_alloc(curp->roff, curp,
297 MPARSE_QUICK & curp->options ? 1 : 0);
298 assert(curp->pman);
299 curp->man = curp->pman;
300 }
301
302 /*
303 * Main parse routine for an opened file. This is called for each
304 * opened file and simply loops around the full input file, possibly
305 * nesting (i.e., with `so').
306 */
307 static void
308 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
309 {
310 const struct tbl_span *span;
311 struct buf ln;
312 enum rofferr rr;
313 int i, of, rc;
314 int pos; /* byte number in the ln buffer */
315 int lnn; /* line number in the real file */
316 unsigned char c;
317
318 memset(&ln, 0, sizeof(struct buf));
319
320 lnn = curp->line;
321 pos = 0;
322
323 for (i = 0; i < (int)blk.sz; ) {
324 if (0 == pos && '\0' == blk.buf[i])
325 break;
326
327 if (start) {
328 curp->line = lnn;
329 curp->reparse_count = 0;
330 }
331
332 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
333
334 /*
335 * When finding an unescaped newline character,
336 * leave the character loop to process the line.
337 * Skip a preceding carriage return, if any.
338 */
339
340 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
341 '\n' == blk.buf[i + 1])
342 ++i;
343 if ('\n' == blk.buf[i]) {
344 ++i;
345 ++lnn;
346 break;
347 }
348
349 /*
350 * Make sure we have space for at least
351 * one backslash and one other character
352 * and the trailing NUL byte.
353 */
354
355 if (pos + 2 >= (int)ln.sz)
356 resize_buf(&ln, 256);
357
358 /*
359 * Warn about bogus characters. If you're using
360 * non-ASCII encoding, you're screwing your
361 * readers. Since I'd rather this not happen,
362 * I'll be helpful and replace these characters
363 * with "?", so we don't display gibberish.
364 * Note to manual writers: use special characters.
365 */
366
367 c = (unsigned char) blk.buf[i];
368
369 if ( ! (isascii(c) &&
370 (isgraph(c) || isblank(c)))) {
371 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
372 curp->line, pos, "0x%x", c);
373 i++;
374 ln.buf[pos++] = '?';
375 continue;
376 }
377
378 /* Trailing backslash = a plain char. */
379
380 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
381 ln.buf[pos++] = blk.buf[i++];
382 continue;
383 }
384
385 /*
386 * Found escape and at least one other character.
387 * When it's a newline character, skip it.
388 * When there is a carriage return in between,
389 * skip that one as well.
390 */
391
392 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
393 '\n' == blk.buf[i + 2])
394 ++i;
395 if ('\n' == blk.buf[i + 1]) {
396 i += 2;
397 ++lnn;
398 continue;
399 }
400
401 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
402 i += 2;
403 /* Comment, skip to end of line */
404 for (; i < (int)blk.sz; ++i) {
405 if ('\n' == blk.buf[i]) {
406 ++i;
407 ++lnn;
408 break;
409 }
410 }
411
412 /* Backout trailing whitespaces */
413 for (; pos > 0; --pos) {
414 if (ln.buf[pos - 1] != ' ')
415 break;
416 if (pos > 2 && ln.buf[pos - 2] == '\\')
417 break;
418 }
419 break;
420 }
421
422 /* Catch escaped bogus characters. */
423
424 c = (unsigned char) blk.buf[i+1];
425
426 if ( ! (isascii(c) &&
427 (isgraph(c) || isblank(c)))) {
428 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
429 curp->line, pos, "0x%x", c);
430 i += 2;
431 ln.buf[pos++] = '?';
432 continue;
433 }
434
435 /* Some other escape sequence, copy & cont. */
436
437 ln.buf[pos++] = blk.buf[i++];
438 ln.buf[pos++] = blk.buf[i++];
439 }
440
441 if (pos >= (int)ln.sz)
442 resize_buf(&ln, 256);
443
444 ln.buf[pos] = '\0';
445
446 /*
447 * A significant amount of complexity is contained by
448 * the roff preprocessor. It's line-oriented but can be
449 * expressed on one line, so we need at times to
450 * readjust our starting point and re-run it. The roff
451 * preprocessor can also readjust the buffers with new
452 * data, so we pass them in wholesale.
453 */
454
455 of = 0;
456
457 /*
458 * Maintain a lookaside buffer of all parsed lines. We
459 * only do this if mparse_keep() has been invoked (the
460 * buffer may be accessed with mparse_getkeep()).
461 */
462
463 if (curp->secondary) {
464 curp->secondary->buf = mandoc_realloc(
465 curp->secondary->buf,
466 curp->secondary->sz + pos + 2);
467 memcpy(curp->secondary->buf +
468 curp->secondary->sz,
469 ln.buf, pos);
470 curp->secondary->sz += pos;
471 curp->secondary->buf
472 [curp->secondary->sz] = '\n';
473 curp->secondary->sz++;
474 curp->secondary->buf
475 [curp->secondary->sz] = '\0';
476 }
477 rerun:
478 rr = roff_parseln(curp->roff, curp->line,
479 &ln.buf, &ln.sz, of, &of);
480
481 switch (rr) {
482 case ROFF_REPARSE:
483 if (REPARSE_LIMIT >= ++curp->reparse_count)
484 mparse_buf_r(curp, ln, 0);
485 else
486 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
487 curp->line, pos, NULL);
488 pos = 0;
489 continue;
490 case ROFF_APPEND:
491 pos = (int)strlen(ln.buf);
492 continue;
493 case ROFF_RERUN:
494 goto rerun;
495 case ROFF_IGN:
496 pos = 0;
497 continue;
498 case ROFF_ERR:
499 assert(MANDOCLEVEL_FATAL <= curp->file_status);
500 break;
501 case ROFF_SO:
502 if (0 == (MPARSE_SO & curp->options) &&
503 (i >= (int)blk.sz || '\0' == blk.buf[i])) {
504 curp->sodest = mandoc_strdup(ln.buf + of);
505 free(ln.buf);
506 return;
507 }
508 /*
509 * We remove `so' clauses from our lookaside
510 * buffer because we're going to descend into
511 * the file recursively.
512 */
513 if (curp->secondary)
514 curp->secondary->sz -= pos + 1;
515 mparse_readfd(curp, -1, ln.buf + of);
516 if (MANDOCLEVEL_FATAL <= curp->file_status) {
517 mandoc_vmsg(MANDOCERR_SO_FAIL,
518 curp, curp->line, pos,
519 ".so %s", ln.buf + of);
520 break;
521 }
522 pos = 0;
523 continue;
524 default:
525 break;
526 }
527
528 /*
529 * If we encounter errors in the recursive parse, make
530 * sure we don't continue parsing.
531 */
532
533 if (MANDOCLEVEL_FATAL <= curp->file_status)
534 break;
535
536 /*
537 * If input parsers have not been allocated, do so now.
538 * We keep these instanced between parsers, but set them
539 * locally per parse routine since we can use different
540 * parsers with each one.
541 */
542
543 if ( ! (curp->man || curp->mdoc))
544 choose_parser(curp);
545
546 /*
547 * Lastly, push down into the parsers themselves.
548 * If libroff returns ROFF_TBL, then add it to the
549 * currently open parse. Since we only get here if
550 * there does exist data (see tbl_data.c), we're
551 * guaranteed that something's been allocated.
552 * Do the same for ROFF_EQN.
553 */
554
555 rc = -1;
556
557 if (ROFF_TBL == rr)
558 while (NULL != (span = roff_span(curp->roff))) {
559 rc = curp->man ?
560 man_addspan(curp->man, span) :
561 mdoc_addspan(curp->mdoc, span);
562 if (0 == rc)
563 break;
564 }
565 else if (ROFF_EQN == rr)
566 rc = curp->mdoc ?
567 mdoc_addeqn(curp->mdoc,
568 roff_eqn(curp->roff)) :
569 man_addeqn(curp->man,
570 roff_eqn(curp->roff));
571 else if (curp->man || curp->mdoc)
572 rc = curp->man ?
573 man_parseln(curp->man,
574 curp->line, ln.buf, of) :
575 mdoc_parseln(curp->mdoc,
576 curp->line, ln.buf, of);
577
578 if (0 == rc) {
579 assert(MANDOCLEVEL_FATAL <= curp->file_status);
580 break;
581 } else if (2 == rc)
582 break;
583
584 /* Temporary buffers typically are not full. */
585
586 if (0 == start && '\0' == blk.buf[i])
587 break;
588
589 /* Start the next input line. */
590
591 pos = 0;
592 }
593
594 free(ln.buf);
595 }
596
597 static int
598 read_whole_file(struct mparse *curp, const char *file, int fd,
599 struct buf *fb, int *with_mmap)
600 {
601 size_t off;
602 ssize_t ssz;
603
604 #if HAVE_MMAP
605 struct stat st;
606 if (-1 == fstat(fd, &st)) {
607 curp->file_status = MANDOCLEVEL_SYSERR;
608 if (curp->mmsg)
609 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
610 file, 0, 0, strerror(errno));
611 return(0);
612 }
613
614 /*
615 * If we're a regular file, try just reading in the whole entry
616 * via mmap(). This is faster than reading it into blocks, and
617 * since each file is only a few bytes to begin with, I'm not
618 * concerned that this is going to tank any machines.
619 */
620
621 if (S_ISREG(st.st_mode)) {
622 if (st.st_size >= (1U << 31)) {
623 curp->file_status = MANDOCLEVEL_FATAL;
624 if (curp->mmsg)
625 (*curp->mmsg)(MANDOCERR_TOOLARGE,
626 curp->file_status, file, 0, 0, NULL);
627 return(0);
628 }
629 *with_mmap = 1;
630 fb->sz = (size_t)st.st_size;
631 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
632 if (fb->buf != MAP_FAILED)
633 return(1);
634 }
635 #endif
636
637 /*
638 * If this isn't a regular file (like, say, stdin), then we must
639 * go the old way and just read things in bit by bit.
640 */
641
642 *with_mmap = 0;
643 off = 0;
644 fb->sz = 0;
645 fb->buf = NULL;
646 for (;;) {
647 if (off == fb->sz) {
648 if (fb->sz == (1U << 31)) {
649 curp->file_status = MANDOCLEVEL_FATAL;
650 if (curp->mmsg)
651 (*curp->mmsg)(MANDOCERR_TOOLARGE,
652 curp->file_status,
653 file, 0, 0, NULL);
654 break;
655 }
656 resize_buf(fb, 65536);
657 }
658 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
659 if (ssz == 0) {
660 fb->sz = off;
661 return(1);
662 }
663 if (ssz == -1) {
664 curp->file_status = MANDOCLEVEL_SYSERR;
665 if (curp->mmsg)
666 (*curp->mmsg)(MANDOCERR_SYSREAD,
667 curp->file_status, file, 0, 0,
668 strerror(errno));
669 break;
670 }
671 off += (size_t)ssz;
672 }
673
674 free(fb->buf);
675 fb->buf = NULL;
676 return(0);
677 }
678
679 static void
680 mparse_end(struct mparse *curp)
681 {
682
683 if (MANDOCLEVEL_FATAL <= curp->file_status)
684 return;
685
686 if (curp->mdoc == NULL &&
687 curp->man == NULL &&
688 curp->sodest == NULL) {
689 if (curp->options & MPARSE_MDOC)
690 curp->mdoc = curp->pmdoc;
691 else {
692 if (curp->pman == NULL)
693 curp->pman = man_alloc(curp->roff, curp,
694 curp->options & MPARSE_QUICK ? 1 : 0);
695 curp->man = curp->pman;
696 }
697 }
698
699 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
700 assert(MANDOCLEVEL_FATAL <= curp->file_status);
701 return;
702 }
703
704 if (curp->man && ! man_endparse(curp->man)) {
705 assert(MANDOCLEVEL_FATAL <= curp->file_status);
706 return;
707 }
708
709 roff_endparse(curp->roff);
710 }
711
712 static void
713 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
714 {
715 struct buf *svprimary;
716 const char *svfile;
717 static int recursion_depth;
718
719 if (64 < recursion_depth) {
720 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
721 return;
722 }
723
724 /* Line number is per-file. */
725 svfile = curp->file;
726 curp->file = file;
727 svprimary = curp->primary;
728 curp->primary = &blk;
729 curp->line = 1;
730 recursion_depth++;
731
732 mparse_buf_r(curp, blk, 1);
733
734 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
735 mparse_end(curp);
736
737 curp->primary = svprimary;
738 curp->file = svfile;
739 }
740
741 enum mandoclevel
742 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
743 const char *file)
744 {
745 struct buf blk;
746
747 blk.buf = UNCONST(buf);
748 blk.sz = len;
749
750 mparse_parse_buffer(curp, blk, file);
751 return(curp->file_status);
752 }
753
754 enum mandoclevel
755 mparse_readfd(struct mparse *curp, int fd, const char *file)
756 {
757 struct buf blk;
758 int with_mmap;
759
760 if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
761 curp->file_status = MANDOCLEVEL_SYSERR;
762 if (curp->mmsg)
763 (*curp->mmsg)(MANDOCERR_SYSOPEN,
764 curp->file_status,
765 file, 0, 0, strerror(errno));
766 goto out;
767 }
768
769 /*
770 * Run for each opened file; may be called more than once for
771 * each full parse sequence if the opened file is nested (i.e.,
772 * from `so'). Simply sucks in the whole file and moves into
773 * the parse phase for the file.
774 */
775
776 if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
777 goto out;
778
779 mparse_parse_buffer(curp, blk, file);
780
781 #if HAVE_MMAP
782 if (with_mmap)
783 munmap(blk.buf, blk.sz);
784 else
785 #endif
786 free(blk.buf);
787
788 if (STDIN_FILENO != fd && -1 == close(fd))
789 perror(file);
790 out:
791 return(curp->file_status);
792 }
793
794 enum mandoclevel
795 mparse_open(struct mparse *curp, int *fd, const char *file,
796 pid_t *child_pid)
797 {
798 int pfd[2];
799 char *cp;
800 enum mandocerr err;
801
802 pfd[1] = -1;
803 curp->file = file;
804 if ((cp = strrchr(file, '.')) == NULL ||
805 strcmp(cp + 1, "gz")) {
806 *child_pid = 0;
807 if ((*fd = open(file, O_RDONLY)) == -1) {
808 err = MANDOCERR_SYSOPEN;
809 goto out;
810 }
811 return(MANDOCLEVEL_OK);
812 }
813
814 if (pipe(pfd) == -1) {
815 err = MANDOCERR_SYSPIPE;
816 goto out;
817 }
818
819 switch (*child_pid = fork()) {
820 case -1:
821 err = MANDOCERR_SYSFORK;
822 close(pfd[0]);
823 close(pfd[1]);
824 pfd[1] = -1;
825 break;
826 case 0:
827 close(pfd[0]);
828 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
829 err = MANDOCERR_SYSDUP;
830 break;
831 }
832 execlp("gunzip", "gunzip", "-c", file, NULL);
833 err = MANDOCERR_SYSEXEC;
834 break;
835 default:
836 close(pfd[1]);
837 *fd = pfd[0];
838 return(MANDOCLEVEL_OK);
839 }
840
841 out:
842 *fd = -1;
843 *child_pid = 0;
844 curp->file_status = MANDOCLEVEL_SYSERR;
845 if (curp->mmsg)
846 (*curp->mmsg)(err, curp->file_status, file,
847 0, 0, strerror(errno));
848 if (pfd[1] != -1)
849 exit(1);
850 return(curp->file_status);
851 }
852
853 enum mandoclevel
854 mparse_wait(struct mparse *curp, pid_t child_pid)
855 {
856 int status;
857
858 if (waitpid(child_pid, &status, 0) == -1) {
859 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
860 strerror(errno));
861 curp->file_status = MANDOCLEVEL_SYSERR;
862 return(curp->file_status);
863 }
864 if (WIFSIGNALED(status)) {
865 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
866 "%d", WTERMSIG(status));
867 curp->file_status = MANDOCLEVEL_SYSERR;
868 return(curp->file_status);
869 }
870 if (WEXITSTATUS(status)) {
871 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
872 "%d", WEXITSTATUS(status));
873 curp->file_status = MANDOCLEVEL_SYSERR;
874 return(curp->file_status);
875 }
876 return(MANDOCLEVEL_OK);
877 }
878
879 struct mparse *
880 mparse_alloc(int options, enum mandoclevel wlevel,
881 mandocmsg mmsg, const char *defos)
882 {
883 struct mparse *curp;
884
885 assert(wlevel <= MANDOCLEVEL_FATAL);
886
887 curp = mandoc_calloc(1, sizeof(struct mparse));
888
889 curp->options = options;
890 curp->wlevel = wlevel;
891 curp->mmsg = mmsg;
892 curp->defos = defos;
893
894 curp->roff = roff_alloc(curp, options);
895 if (curp->options & MPARSE_MDOC)
896 curp->pmdoc = mdoc_alloc(
897 curp->roff, curp, curp->defos,
898 curp->options & MPARSE_QUICK ? 1 : 0);
899 if (curp->options & MPARSE_MAN)
900 curp->pman = man_alloc(curp->roff, curp,
901 curp->options & MPARSE_QUICK ? 1 : 0);
902
903 return(curp);
904 }
905
906 void
907 mparse_reset(struct mparse *curp)
908 {
909
910 roff_reset(curp->roff);
911
912 if (curp->mdoc)
913 mdoc_reset(curp->mdoc);
914 if (curp->man)
915 man_reset(curp->man);
916 if (curp->secondary)
917 curp->secondary->sz = 0;
918
919 curp->file_status = MANDOCLEVEL_OK;
920 curp->mdoc = NULL;
921 curp->man = NULL;
922
923 free(curp->sodest);
924 curp->sodest = NULL;
925 }
926
927 void
928 mparse_free(struct mparse *curp)
929 {
930
931 if (curp->pmdoc)
932 mdoc_free(curp->pmdoc);
933 if (curp->pman)
934 man_free(curp->pman);
935 if (curp->roff)
936 roff_free(curp->roff);
937 if (curp->secondary)
938 free(curp->secondary->buf);
939
940 free(curp->secondary);
941 free(curp->sodest);
942 free(curp);
943 }
944
945 void
946 mparse_result(struct mparse *curp,
947 struct mdoc **mdoc, struct man **man, char **sodest)
948 {
949
950 if (sodest && NULL != (*sodest = curp->sodest)) {
951 *mdoc = NULL;
952 *man = NULL;
953 return;
954 }
955 if (mdoc)
956 *mdoc = curp->mdoc;
957 if (man)
958 *man = curp->man;
959 }
960
961 void
962 mandoc_vmsg(enum mandocerr t, struct mparse *m,
963 int ln, int pos, const char *fmt, ...)
964 {
965 char buf[256];
966 va_list ap;
967
968 va_start(ap, fmt);
969 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
970 va_end(ap);
971
972 mandoc_msg(t, m, ln, pos, buf);
973 }
974
975 void
976 mandoc_msg(enum mandocerr er, struct mparse *m,
977 int ln, int col, const char *msg)
978 {
979 enum mandoclevel level;
980
981 level = MANDOCLEVEL_FATAL;
982 while (er < mandoclimits[level])
983 level--;
984
985 if (level < m->wlevel)
986 return;
987
988 if (m->mmsg)
989 (*m->mmsg)(er, level, m->file, ln, col, msg);
990
991 if (m->file_status < level)
992 m->file_status = level;
993 }
994
995 const char *
996 mparse_strerror(enum mandocerr er)
997 {
998
999 return(mandocerrs[er]);
1000 }
1001
1002 const char *
1003 mparse_strlevel(enum mandoclevel lvl)
1004 {
1005 return(mandoclevels[lvl]);
1006 }
1007
1008 void
1009 mparse_keep(struct mparse *p)
1010 {
1011
1012 assert(NULL == p->secondary);
1013 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1014 }
1015
1016 const char *
1017 mparse_getkeep(const struct mparse *p)
1018 {
1019
1020 assert(p->secondary);
1021 return(p->secondary->sz ? p->secondary->buf : NULL);
1022 }