]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Protect the roff parser from dividing by zero. ok schwarze@
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.92 2014/10/20 19:04:45 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct buf {
49 char *buf; /* binary input buffer */
50 size_t sz; /* size of binary buffer */
51 };
52
53 struct mparse {
54 struct man *pman; /* persistent man parser */
55 struct mdoc *pmdoc; /* persistent mdoc parser */
56 struct man *man; /* man parser */
57 struct mdoc *mdoc; /* mdoc parser */
58 struct roff *roff; /* roff parser (!NULL) */
59 char *sodest; /* filename pointed to by .so */
60 const char *file; /* filename of current input file */
61 struct buf *primary; /* buffer currently being parsed */
62 struct buf *secondary; /* preprocessed copy of input */
63 const char *defos; /* default operating system */
64 mandocmsg mmsg; /* warning/error message handler */
65 enum mandoclevel file_status; /* status of current parse */
66 enum mandoclevel wlevel; /* ignore messages below this */
67 int options; /* parser options */
68 int reparse_count; /* finite interp. stack */
69 int line; /* line number in the file */
70 };
71
72 static void choose_parser(struct mparse *);
73 static void resize_buf(struct buf *, size_t);
74 static void mparse_buf_r(struct mparse *, struct buf, int);
75 static int read_whole_file(struct mparse *, const char *, int,
76 struct buf *, int *);
77 static void mparse_end(struct mparse *);
78 static void mparse_parse_buffer(struct mparse *, struct buf,
79 const char *);
80
81 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
82 MANDOCERR_OK,
83 MANDOCERR_WARNING,
84 MANDOCERR_WARNING,
85 MANDOCERR_ERROR,
86 MANDOCERR_FATAL,
87 MANDOCERR_MAX,
88 MANDOCERR_MAX
89 };
90
91 static const char * const mandocerrs[MANDOCERR_MAX] = {
92 "ok",
93
94 "generic warning",
95
96 /* related to the prologue */
97 "missing manual title, using UNTITLED",
98 "missing manual title, using \"\"",
99 "lower case character in document title",
100 "missing manual section, using \"\"",
101 "unknown manual section",
102 "unknown manual volume or arch",
103 "missing date, using today's date",
104 "cannot parse date, using it verbatim",
105 "missing Os macro, using \"\"",
106 "duplicate prologue macro",
107 "late prologue macro",
108 "skipping late title macro",
109 "prologue macros out of order",
110
111 /* related to document structure */
112 ".so is fragile, better use ln(1)",
113 "no document body",
114 "content before first section header",
115 "first section is not \"NAME\"",
116 "bad NAME section contents",
117 "sections out of conventional order",
118 "duplicate section title",
119 "unexpected section",
120 "unusual Xr order",
121 "unusual Xr punctuation",
122 "AUTHORS section without An macro",
123
124 /* related to macros and nesting */
125 "obsolete macro",
126 "skipping paragraph macro",
127 "moving paragraph macro out of list",
128 "skipping no-space macro",
129 "blocks badly nested",
130 "nested displays are not portable",
131 "moving content out of list",
132 ".Vt block has child macro",
133 "fill mode already enabled, skipping",
134 "fill mode already disabled, skipping",
135 "line scope broken",
136
137 /* related to missing macro arguments */
138 "skipping empty request",
139 "conditional request controls empty scope",
140 "skipping empty macro",
141 "empty argument, using 0n",
142 "argument count wrong",
143 "missing display type, using -ragged",
144 "list type is not the first argument",
145 "missing -width in -tag list, using 8n",
146 "missing utility name, using \"\"",
147 "empty head in list item",
148 "empty list item",
149 "missing font type, using \\fR",
150 "unknown font type, using \\fR",
151 "missing -std argument, adding it",
152 "missing eqn box, using \"\"",
153
154 /* related to bad macro arguments */
155 "unterminated quoted argument",
156 "duplicate argument",
157 "skipping duplicate argument",
158 "skipping duplicate display type",
159 "skipping duplicate list type",
160 "skipping -width argument",
161 "unknown AT&T UNIX version",
162 "comma in function argument",
163 "parenthesis in function name",
164 "invalid content in Rs block",
165 "invalid Boolean argument",
166 "unknown font, skipping request",
167
168 /* related to plain text */
169 "blank line in fill mode, using .sp",
170 "tab in filled text",
171 "whitespace at end of input line",
172 "bad comment style",
173 "invalid escape sequence",
174 "undefined string, using \"\"",
175
176 "generic error",
177
178 /* related to equations */
179 "unexpected equation scope closure",
180 "equation scope open on exit",
181 "overlapping equation scopes",
182 "unexpected end of equation",
183
184 /* related to tables */
185 "bad table syntax",
186 "bad table option",
187 "bad table layout",
188 "no table layout cells specified",
189 "no table data cells specified",
190 "ignore data in cell",
191 "data block still open",
192 "ignoring extra data cells",
193
194 /* related to document structure and macros */
195 "input stack limit exceeded, infinite loop?",
196 "skipping bad character",
197 "skipping unknown macro",
198 "skipping item outside list",
199 "skipping column outside column list",
200 "skipping end of block that is not open",
201 "inserting missing end of block",
202 "appending missing end of block",
203
204 /* related to request and macro arguments */
205 "escaped character not allowed in a name",
206 "argument count wrong",
207 "missing list type, using -item",
208 "missing manual name, using \"\"",
209 "uname(3) system call failed, using UNKNOWN",
210 "unknown standard specifier",
211 "skipping request without numeric argument",
212 "skipping all arguments",
213 "skipping excess arguments",
214 "divide by zero",
215
216 "generic fatal error",
217
218 "input too large",
219 "NOT IMPLEMENTED: Bd -file",
220 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
221 ".so request failed",
222
223 /* system errors */
224 "cannot dup file descriptor",
225 "cannot exec",
226 "gunzip failed with code",
227 "cannot fork",
228 NULL,
229 "cannot open pipe",
230 "cannot read file",
231 "gunzip died from signal",
232 "cannot stat file",
233 "wait failed",
234 };
235
236 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
237 "SUCCESS",
238 "RESERVED",
239 "WARNING",
240 "ERROR",
241 "FATAL",
242 "BADARG",
243 "SYSERR"
244 };
245
246
247 static void
248 resize_buf(struct buf *buf, size_t initial)
249 {
250
251 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
252 buf->buf = mandoc_realloc(buf->buf, buf->sz);
253 }
254
255 static void
256 choose_parser(struct mparse *curp)
257 {
258 char *cp, *ep;
259 int format;
260
261 /*
262 * If neither command line arguments -mdoc or -man select
263 * a parser nor the roff parser found a .Dd or .TH macro
264 * yet, look ahead in the main input buffer.
265 */
266
267 if ((format = roff_getformat(curp->roff)) == 0) {
268 cp = curp->primary->buf;
269 ep = cp + curp->primary->sz;
270 while (cp < ep) {
271 if (*cp == '.' || *cp == '\'') {
272 cp++;
273 if (cp[0] == 'D' && cp[1] == 'd') {
274 format = MPARSE_MDOC;
275 break;
276 }
277 if (cp[0] == 'T' && cp[1] == 'H') {
278 format = MPARSE_MAN;
279 break;
280 }
281 }
282 cp = memchr(cp, '\n', ep - cp);
283 if (cp == NULL)
284 break;
285 cp++;
286 }
287 }
288
289 if (format == MPARSE_MDOC) {
290 if (NULL == curp->pmdoc)
291 curp->pmdoc = mdoc_alloc(
292 curp->roff, curp, curp->defos,
293 MPARSE_QUICK & curp->options ? 1 : 0);
294 assert(curp->pmdoc);
295 curp->mdoc = curp->pmdoc;
296 return;
297 }
298
299 /* Fall back to man(7) as a last resort. */
300
301 if (NULL == curp->pman)
302 curp->pman = man_alloc(curp->roff, curp,
303 MPARSE_QUICK & curp->options ? 1 : 0);
304 assert(curp->pman);
305 curp->man = curp->pman;
306 }
307
308 /*
309 * Main parse routine for an opened file. This is called for each
310 * opened file and simply loops around the full input file, possibly
311 * nesting (i.e., with `so').
312 */
313 static void
314 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
315 {
316 const struct tbl_span *span;
317 struct buf ln;
318 enum rofferr rr;
319 int i, of, rc;
320 int pos; /* byte number in the ln buffer */
321 int lnn; /* line number in the real file */
322 unsigned char c;
323
324 memset(&ln, 0, sizeof(struct buf));
325
326 lnn = curp->line;
327 pos = 0;
328
329 for (i = 0; i < (int)blk.sz; ) {
330 if (0 == pos && '\0' == blk.buf[i])
331 break;
332
333 if (start) {
334 curp->line = lnn;
335 curp->reparse_count = 0;
336 }
337
338 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
339
340 /*
341 * When finding an unescaped newline character,
342 * leave the character loop to process the line.
343 * Skip a preceding carriage return, if any.
344 */
345
346 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
347 '\n' == blk.buf[i + 1])
348 ++i;
349 if ('\n' == blk.buf[i]) {
350 ++i;
351 ++lnn;
352 break;
353 }
354
355 /*
356 * Make sure we have space for at least
357 * one backslash and one other character
358 * and the trailing NUL byte.
359 */
360
361 if (pos + 2 >= (int)ln.sz)
362 resize_buf(&ln, 256);
363
364 /*
365 * Warn about bogus characters. If you're using
366 * non-ASCII encoding, you're screwing your
367 * readers. Since I'd rather this not happen,
368 * I'll be helpful and replace these characters
369 * with "?", so we don't display gibberish.
370 * Note to manual writers: use special characters.
371 */
372
373 c = (unsigned char) blk.buf[i];
374
375 if ( ! (isascii(c) &&
376 (isgraph(c) || isblank(c)))) {
377 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
378 curp->line, pos, "0x%x", c);
379 i++;
380 ln.buf[pos++] = '?';
381 continue;
382 }
383
384 /* Trailing backslash = a plain char. */
385
386 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
387 ln.buf[pos++] = blk.buf[i++];
388 continue;
389 }
390
391 /*
392 * Found escape and at least one other character.
393 * When it's a newline character, skip it.
394 * When there is a carriage return in between,
395 * skip that one as well.
396 */
397
398 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
399 '\n' == blk.buf[i + 2])
400 ++i;
401 if ('\n' == blk.buf[i + 1]) {
402 i += 2;
403 ++lnn;
404 continue;
405 }
406
407 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
408 i += 2;
409 /* Comment, skip to end of line */
410 for (; i < (int)blk.sz; ++i) {
411 if ('\n' == blk.buf[i]) {
412 ++i;
413 ++lnn;
414 break;
415 }
416 }
417
418 /* Backout trailing whitespaces */
419 for (; pos > 0; --pos) {
420 if (ln.buf[pos - 1] != ' ')
421 break;
422 if (pos > 2 && ln.buf[pos - 2] == '\\')
423 break;
424 }
425 break;
426 }
427
428 /* Catch escaped bogus characters. */
429
430 c = (unsigned char) blk.buf[i+1];
431
432 if ( ! (isascii(c) &&
433 (isgraph(c) || isblank(c)))) {
434 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
435 curp->line, pos, "0x%x", c);
436 i += 2;
437 ln.buf[pos++] = '?';
438 continue;
439 }
440
441 /* Some other escape sequence, copy & cont. */
442
443 ln.buf[pos++] = blk.buf[i++];
444 ln.buf[pos++] = blk.buf[i++];
445 }
446
447 if (pos >= (int)ln.sz)
448 resize_buf(&ln, 256);
449
450 ln.buf[pos] = '\0';
451
452 /*
453 * A significant amount of complexity is contained by
454 * the roff preprocessor. It's line-oriented but can be
455 * expressed on one line, so we need at times to
456 * readjust our starting point and re-run it. The roff
457 * preprocessor can also readjust the buffers with new
458 * data, so we pass them in wholesale.
459 */
460
461 of = 0;
462
463 /*
464 * Maintain a lookaside buffer of all parsed lines. We
465 * only do this if mparse_keep() has been invoked (the
466 * buffer may be accessed with mparse_getkeep()).
467 */
468
469 if (curp->secondary) {
470 curp->secondary->buf = mandoc_realloc(
471 curp->secondary->buf,
472 curp->secondary->sz + pos + 2);
473 memcpy(curp->secondary->buf +
474 curp->secondary->sz,
475 ln.buf, pos);
476 curp->secondary->sz += pos;
477 curp->secondary->buf
478 [curp->secondary->sz] = '\n';
479 curp->secondary->sz++;
480 curp->secondary->buf
481 [curp->secondary->sz] = '\0';
482 }
483 rerun:
484 rr = roff_parseln(curp->roff, curp->line,
485 &ln.buf, &ln.sz, of, &of);
486
487 switch (rr) {
488 case ROFF_REPARSE:
489 if (REPARSE_LIMIT >= ++curp->reparse_count)
490 mparse_buf_r(curp, ln, 0);
491 else
492 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
493 curp->line, pos, NULL);
494 pos = 0;
495 continue;
496 case ROFF_APPEND:
497 pos = (int)strlen(ln.buf);
498 continue;
499 case ROFF_RERUN:
500 goto rerun;
501 case ROFF_IGN:
502 pos = 0;
503 continue;
504 case ROFF_ERR:
505 assert(MANDOCLEVEL_FATAL <= curp->file_status);
506 break;
507 case ROFF_SO:
508 if (0 == (MPARSE_SO & curp->options) &&
509 (i >= (int)blk.sz || '\0' == blk.buf[i])) {
510 curp->sodest = mandoc_strdup(ln.buf + of);
511 free(ln.buf);
512 return;
513 }
514 /*
515 * We remove `so' clauses from our lookaside
516 * buffer because we're going to descend into
517 * the file recursively.
518 */
519 if (curp->secondary)
520 curp->secondary->sz -= pos + 1;
521 mparse_readfd(curp, -1, ln.buf + of);
522 if (MANDOCLEVEL_FATAL <= curp->file_status) {
523 mandoc_vmsg(MANDOCERR_SO_FAIL,
524 curp, curp->line, pos,
525 ".so %s", ln.buf + of);
526 break;
527 }
528 pos = 0;
529 continue;
530 default:
531 break;
532 }
533
534 /*
535 * If we encounter errors in the recursive parse, make
536 * sure we don't continue parsing.
537 */
538
539 if (MANDOCLEVEL_FATAL <= curp->file_status)
540 break;
541
542 /*
543 * If input parsers have not been allocated, do so now.
544 * We keep these instanced between parsers, but set them
545 * locally per parse routine since we can use different
546 * parsers with each one.
547 */
548
549 if ( ! (curp->man || curp->mdoc))
550 choose_parser(curp);
551
552 /*
553 * Lastly, push down into the parsers themselves.
554 * If libroff returns ROFF_TBL, then add it to the
555 * currently open parse. Since we only get here if
556 * there does exist data (see tbl_data.c), we're
557 * guaranteed that something's been allocated.
558 * Do the same for ROFF_EQN.
559 */
560
561 rc = -1;
562
563 if (ROFF_TBL == rr)
564 while (NULL != (span = roff_span(curp->roff))) {
565 rc = curp->man ?
566 man_addspan(curp->man, span) :
567 mdoc_addspan(curp->mdoc, span);
568 if (0 == rc)
569 break;
570 }
571 else if (ROFF_EQN == rr)
572 rc = curp->mdoc ?
573 mdoc_addeqn(curp->mdoc,
574 roff_eqn(curp->roff)) :
575 man_addeqn(curp->man,
576 roff_eqn(curp->roff));
577 else if (curp->man || curp->mdoc)
578 rc = curp->man ?
579 man_parseln(curp->man,
580 curp->line, ln.buf, of) :
581 mdoc_parseln(curp->mdoc,
582 curp->line, ln.buf, of);
583
584 if (0 == rc) {
585 assert(MANDOCLEVEL_FATAL <= curp->file_status);
586 break;
587 } else if (2 == rc)
588 break;
589
590 /* Temporary buffers typically are not full. */
591
592 if (0 == start && '\0' == blk.buf[i])
593 break;
594
595 /* Start the next input line. */
596
597 pos = 0;
598 }
599
600 free(ln.buf);
601 }
602
603 static int
604 read_whole_file(struct mparse *curp, const char *file, int fd,
605 struct buf *fb, int *with_mmap)
606 {
607 size_t off;
608 ssize_t ssz;
609
610 #if HAVE_MMAP
611 struct stat st;
612 if (-1 == fstat(fd, &st)) {
613 curp->file_status = MANDOCLEVEL_SYSERR;
614 if (curp->mmsg)
615 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
616 file, 0, 0, strerror(errno));
617 return(0);
618 }
619
620 /*
621 * If we're a regular file, try just reading in the whole entry
622 * via mmap(). This is faster than reading it into blocks, and
623 * since each file is only a few bytes to begin with, I'm not
624 * concerned that this is going to tank any machines.
625 */
626
627 if (S_ISREG(st.st_mode)) {
628 if (st.st_size >= (1U << 31)) {
629 curp->file_status = MANDOCLEVEL_FATAL;
630 if (curp->mmsg)
631 (*curp->mmsg)(MANDOCERR_TOOLARGE,
632 curp->file_status, file, 0, 0, NULL);
633 return(0);
634 }
635 *with_mmap = 1;
636 fb->sz = (size_t)st.st_size;
637 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
638 if (fb->buf != MAP_FAILED)
639 return(1);
640 }
641 #endif
642
643 /*
644 * If this isn't a regular file (like, say, stdin), then we must
645 * go the old way and just read things in bit by bit.
646 */
647
648 *with_mmap = 0;
649 off = 0;
650 fb->sz = 0;
651 fb->buf = NULL;
652 for (;;) {
653 if (off == fb->sz) {
654 if (fb->sz == (1U << 31)) {
655 curp->file_status = MANDOCLEVEL_FATAL;
656 if (curp->mmsg)
657 (*curp->mmsg)(MANDOCERR_TOOLARGE,
658 curp->file_status,
659 file, 0, 0, NULL);
660 break;
661 }
662 resize_buf(fb, 65536);
663 }
664 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
665 if (ssz == 0) {
666 fb->sz = off;
667 return(1);
668 }
669 if (ssz == -1) {
670 curp->file_status = MANDOCLEVEL_SYSERR;
671 if (curp->mmsg)
672 (*curp->mmsg)(MANDOCERR_SYSREAD,
673 curp->file_status, file, 0, 0,
674 strerror(errno));
675 break;
676 }
677 off += (size_t)ssz;
678 }
679
680 free(fb->buf);
681 fb->buf = NULL;
682 return(0);
683 }
684
685 static void
686 mparse_end(struct mparse *curp)
687 {
688
689 if (MANDOCLEVEL_FATAL <= curp->file_status)
690 return;
691
692 if (curp->mdoc == NULL &&
693 curp->man == NULL &&
694 curp->sodest == NULL) {
695 if (curp->options & MPARSE_MDOC)
696 curp->mdoc = curp->pmdoc;
697 else {
698 if (curp->pman == NULL)
699 curp->pman = man_alloc(curp->roff, curp,
700 curp->options & MPARSE_QUICK ? 1 : 0);
701 curp->man = curp->pman;
702 }
703 }
704
705 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
706 assert(MANDOCLEVEL_FATAL <= curp->file_status);
707 return;
708 }
709
710 if (curp->man && ! man_endparse(curp->man)) {
711 assert(MANDOCLEVEL_FATAL <= curp->file_status);
712 return;
713 }
714
715 roff_endparse(curp->roff);
716 }
717
718 static void
719 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
720 {
721 struct buf *svprimary;
722 const char *svfile;
723 static int recursion_depth;
724
725 if (64 < recursion_depth) {
726 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
727 return;
728 }
729
730 /* Line number is per-file. */
731 svfile = curp->file;
732 curp->file = file;
733 svprimary = curp->primary;
734 curp->primary = &blk;
735 curp->line = 1;
736 recursion_depth++;
737
738 mparse_buf_r(curp, blk, 1);
739
740 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
741 mparse_end(curp);
742
743 curp->primary = svprimary;
744 curp->file = svfile;
745 }
746
747 enum mandoclevel
748 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
749 const char *file)
750 {
751 struct buf blk;
752
753 blk.buf = UNCONST(buf);
754 blk.sz = len;
755
756 mparse_parse_buffer(curp, blk, file);
757 return(curp->file_status);
758 }
759
760 enum mandoclevel
761 mparse_readfd(struct mparse *curp, int fd, const char *file)
762 {
763 struct buf blk;
764 int with_mmap;
765
766 if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
767 curp->file_status = MANDOCLEVEL_SYSERR;
768 if (curp->mmsg)
769 (*curp->mmsg)(MANDOCERR_SYSOPEN,
770 curp->file_status,
771 file, 0, 0, strerror(errno));
772 return(curp->file_status);
773 }
774
775 /*
776 * Run for each opened file; may be called more than once for
777 * each full parse sequence if the opened file is nested (i.e.,
778 * from `so'). Simply sucks in the whole file and moves into
779 * the parse phase for the file.
780 */
781
782 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
783 mparse_parse_buffer(curp, blk, file);
784 #if HAVE_MMAP
785 if (with_mmap)
786 munmap(blk.buf, blk.sz);
787 else
788 #endif
789 free(blk.buf);
790 }
791
792 if (STDIN_FILENO != fd && -1 == close(fd))
793 perror(file);
794
795 return(curp->file_status);
796 }
797
798 enum mandoclevel
799 mparse_open(struct mparse *curp, int *fd, const char *file,
800 pid_t *child_pid)
801 {
802 int pfd[2];
803 char *cp;
804 enum mandocerr err;
805
806 pfd[1] = -1;
807 curp->file = file;
808 if ((cp = strrchr(file, '.')) == NULL ||
809 strcmp(cp + 1, "gz")) {
810 *child_pid = 0;
811 if ((*fd = open(file, O_RDONLY)) == -1) {
812 err = MANDOCERR_SYSOPEN;
813 goto out;
814 }
815 return(MANDOCLEVEL_OK);
816 }
817
818 if (pipe(pfd) == -1) {
819 err = MANDOCERR_SYSPIPE;
820 goto out;
821 }
822
823 switch (*child_pid = fork()) {
824 case -1:
825 err = MANDOCERR_SYSFORK;
826 close(pfd[0]);
827 close(pfd[1]);
828 pfd[1] = -1;
829 break;
830 case 0:
831 close(pfd[0]);
832 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
833 err = MANDOCERR_SYSDUP;
834 break;
835 }
836 execlp("gunzip", "gunzip", "-c", file, NULL);
837 err = MANDOCERR_SYSEXEC;
838 break;
839 default:
840 close(pfd[1]);
841 *fd = pfd[0];
842 return(MANDOCLEVEL_OK);
843 }
844
845 out:
846 *fd = -1;
847 *child_pid = 0;
848 curp->file_status = MANDOCLEVEL_SYSERR;
849 if (curp->mmsg)
850 (*curp->mmsg)(err, curp->file_status, file,
851 0, 0, strerror(errno));
852 if (pfd[1] != -1)
853 exit(1);
854 return(curp->file_status);
855 }
856
857 enum mandoclevel
858 mparse_wait(struct mparse *curp, pid_t child_pid)
859 {
860 int status;
861
862 if (waitpid(child_pid, &status, 0) == -1) {
863 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
864 strerror(errno));
865 curp->file_status = MANDOCLEVEL_SYSERR;
866 return(curp->file_status);
867 }
868 if (WIFSIGNALED(status)) {
869 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
870 "%d", WTERMSIG(status));
871 curp->file_status = MANDOCLEVEL_SYSERR;
872 return(curp->file_status);
873 }
874 if (WEXITSTATUS(status)) {
875 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
876 "%d", WEXITSTATUS(status));
877 curp->file_status = MANDOCLEVEL_SYSERR;
878 return(curp->file_status);
879 }
880 return(MANDOCLEVEL_OK);
881 }
882
883 struct mparse *
884 mparse_alloc(int options, enum mandoclevel wlevel,
885 mandocmsg mmsg, const char *defos)
886 {
887 struct mparse *curp;
888
889 assert(wlevel <= MANDOCLEVEL_FATAL);
890
891 curp = mandoc_calloc(1, sizeof(struct mparse));
892
893 curp->options = options;
894 curp->wlevel = wlevel;
895 curp->mmsg = mmsg;
896 curp->defos = defos;
897
898 curp->roff = roff_alloc(curp, options);
899 if (curp->options & MPARSE_MDOC)
900 curp->pmdoc = mdoc_alloc(
901 curp->roff, curp, curp->defos,
902 curp->options & MPARSE_QUICK ? 1 : 0);
903 if (curp->options & MPARSE_MAN)
904 curp->pman = man_alloc(curp->roff, curp,
905 curp->options & MPARSE_QUICK ? 1 : 0);
906
907 return(curp);
908 }
909
910 void
911 mparse_reset(struct mparse *curp)
912 {
913
914 roff_reset(curp->roff);
915
916 if (curp->mdoc)
917 mdoc_reset(curp->mdoc);
918 if (curp->man)
919 man_reset(curp->man);
920 if (curp->secondary)
921 curp->secondary->sz = 0;
922
923 curp->file_status = MANDOCLEVEL_OK;
924 curp->mdoc = NULL;
925 curp->man = NULL;
926
927 free(curp->sodest);
928 curp->sodest = NULL;
929 }
930
931 void
932 mparse_free(struct mparse *curp)
933 {
934
935 if (curp->pmdoc)
936 mdoc_free(curp->pmdoc);
937 if (curp->pman)
938 man_free(curp->pman);
939 if (curp->roff)
940 roff_free(curp->roff);
941 if (curp->secondary)
942 free(curp->secondary->buf);
943
944 free(curp->secondary);
945 free(curp->sodest);
946 free(curp);
947 }
948
949 void
950 mparse_result(struct mparse *curp,
951 struct mdoc **mdoc, struct man **man, char **sodest)
952 {
953
954 if (sodest && NULL != (*sodest = curp->sodest)) {
955 *mdoc = NULL;
956 *man = NULL;
957 return;
958 }
959 if (mdoc)
960 *mdoc = curp->mdoc;
961 if (man)
962 *man = curp->man;
963 }
964
965 void
966 mandoc_vmsg(enum mandocerr t, struct mparse *m,
967 int ln, int pos, const char *fmt, ...)
968 {
969 char buf[256];
970 va_list ap;
971
972 va_start(ap, fmt);
973 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
974 va_end(ap);
975
976 mandoc_msg(t, m, ln, pos, buf);
977 }
978
979 void
980 mandoc_msg(enum mandocerr er, struct mparse *m,
981 int ln, int col, const char *msg)
982 {
983 enum mandoclevel level;
984
985 level = MANDOCLEVEL_FATAL;
986 while (er < mandoclimits[level])
987 level--;
988
989 if (level < m->wlevel)
990 return;
991
992 if (m->mmsg)
993 (*m->mmsg)(er, level, m->file, ln, col, msg);
994
995 if (m->file_status < level)
996 m->file_status = level;
997 }
998
999 const char *
1000 mparse_strerror(enum mandocerr er)
1001 {
1002
1003 return(mandocerrs[er]);
1004 }
1005
1006 const char *
1007 mparse_strlevel(enum mandoclevel lvl)
1008 {
1009 return(mandoclevels[lvl]);
1010 }
1011
1012 void
1013 mparse_keep(struct mparse *p)
1014 {
1015
1016 assert(NULL == p->secondary);
1017 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1018 }
1019
1020 const char *
1021 mparse_getkeep(const struct mparse *p)
1022 {
1023
1024 assert(p->secondary);
1025 return(p->secondary->sz ? p->secondary->buf : NULL);
1026 }