]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Reduce memory and time consumption on certain malformed input files
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.105 2014/12/16 23:44:41 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_FATAL,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "bad table syntax",
184 "bad table option",
185 "bad table layout",
186 "no table layout cells specified",
187 "no table data cells specified",
188 "ignore data in cell",
189 "data block still open",
190 "ignoring extra data cells",
191 "ignoring macro in table",
192
193 /* related to document structure and macros */
194 "input stack limit exceeded, infinite loop?",
195 "skipping bad character",
196 "skipping unknown macro",
197 "skipping item outside list",
198 "skipping column outside column list",
199 "skipping end of block that is not open",
200 "inserting missing end of block",
201 "appending missing end of block",
202
203 /* related to request and macro arguments */
204 "escaped character not allowed in a name",
205 "argument count wrong",
206 "NOT IMPLEMENTED: Bd -file",
207 "missing list type, using -item",
208 "missing manual name, using \"\"",
209 "uname(3) system call failed, using UNKNOWN",
210 "unknown standard specifier",
211 "skipping request without numeric argument",
212 "skipping all arguments",
213 "skipping excess arguments",
214 "divide by zero",
215
216 "generic fatal error",
217
218 "input too large",
219 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
220 ".so request failed",
221
222 /* system errors */
223 "cannot dup file descriptor",
224 "cannot exec",
225 "gunzip failed with code",
226 "cannot fork",
227 NULL,
228 "cannot open pipe",
229 "cannot read file",
230 "gunzip died from signal",
231 "cannot stat file",
232 "wait failed",
233 };
234
235 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
236 "SUCCESS",
237 "RESERVED",
238 "WARNING",
239 "ERROR",
240 "FATAL",
241 "BADARG",
242 "SYSERR"
243 };
244
245
246 static void
247 resize_buf(struct buf *buf, size_t initial)
248 {
249
250 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
251 buf->buf = mandoc_realloc(buf->buf, buf->sz);
252 }
253
254 static void
255 choose_parser(struct mparse *curp)
256 {
257 char *cp, *ep;
258 int format;
259
260 /*
261 * If neither command line arguments -mdoc or -man select
262 * a parser nor the roff parser found a .Dd or .TH macro
263 * yet, look ahead in the main input buffer.
264 */
265
266 if ((format = roff_getformat(curp->roff)) == 0) {
267 cp = curp->primary->buf;
268 ep = cp + curp->primary->sz;
269 while (cp < ep) {
270 if (*cp == '.' || *cp == '\'') {
271 cp++;
272 if (cp[0] == 'D' && cp[1] == 'd') {
273 format = MPARSE_MDOC;
274 break;
275 }
276 if (cp[0] == 'T' && cp[1] == 'H') {
277 format = MPARSE_MAN;
278 break;
279 }
280 }
281 cp = memchr(cp, '\n', ep - cp);
282 if (cp == NULL)
283 break;
284 cp++;
285 }
286 }
287
288 if (format == MPARSE_MDOC) {
289 if (NULL == curp->pmdoc)
290 curp->pmdoc = mdoc_alloc(
291 curp->roff, curp, curp->defos,
292 MPARSE_QUICK & curp->options ? 1 : 0);
293 assert(curp->pmdoc);
294 curp->mdoc = curp->pmdoc;
295 return;
296 }
297
298 /* Fall back to man(7) as a last resort. */
299
300 if (NULL == curp->pman)
301 curp->pman = man_alloc(curp->roff, curp,
302 MPARSE_QUICK & curp->options ? 1 : 0);
303 assert(curp->pman);
304 curp->man = curp->pman;
305 }
306
307 /*
308 * Main parse routine for a buffer.
309 * It assumes encoding and line numbering are already set up.
310 * It can recurse directly (for invocations of user-defined
311 * macros, inline equations, and input line traps)
312 * and indirectly (for .so file inclusion).
313 */
314 static void
315 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
316 {
317 const struct tbl_span *span;
318 struct buf ln;
319 size_t pos; /* byte number in the ln buffer */
320 enum rofferr rr;
321 int of;
322 int lnn; /* line number in the real file */
323 unsigned char c;
324
325 memset(&ln, 0, sizeof(ln));
326
327 lnn = curp->line;
328 pos = 0;
329
330 while (i < blk.sz) {
331 if (0 == pos && '\0' == blk.buf[i])
332 break;
333
334 if (start) {
335 curp->line = lnn;
336 curp->reparse_count = 0;
337
338 if (lnn < 3 &&
339 curp->filenc & MPARSE_UTF8 &&
340 curp->filenc & MPARSE_LATIN1)
341 curp->filenc = preconv_cue(&blk, i);
342 }
343
344 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
345
346 /*
347 * When finding an unescaped newline character,
348 * leave the character loop to process the line.
349 * Skip a preceding carriage return, if any.
350 */
351
352 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
353 '\n' == blk.buf[i + 1])
354 ++i;
355 if ('\n' == blk.buf[i]) {
356 ++i;
357 ++lnn;
358 break;
359 }
360
361 /*
362 * Make sure we have space for the worst
363 * case of 11 bytes: "\\[u10ffff]\0"
364 */
365
366 if (pos + 11 > ln.sz)
367 resize_buf(&ln, 256);
368
369 /*
370 * Encode 8-bit input.
371 */
372
373 c = blk.buf[i];
374 if (c & 0x80) {
375 if ( ! (curp->filenc && preconv_encode(
376 &blk, &i, &ln, &pos, &curp->filenc))) {
377 mandoc_vmsg(MANDOCERR_BADCHAR,
378 curp, curp->line, pos,
379 "0x%x", c);
380 ln.buf[pos++] = '?';
381 i++;
382 }
383 continue;
384 }
385
386 /*
387 * Exclude control characters.
388 */
389
390 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
391 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
392 curp->line, pos, "0x%x", c);
393 i++;
394 ln.buf[pos++] = '?';
395 continue;
396 }
397
398 /* Trailing backslash = a plain char. */
399
400 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
401 ln.buf[pos++] = blk.buf[i++];
402 continue;
403 }
404
405 /*
406 * Found escape and at least one other character.
407 * When it's a newline character, skip it.
408 * When there is a carriage return in between,
409 * skip that one as well.
410 */
411
412 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
413 '\n' == blk.buf[i + 2])
414 ++i;
415 if ('\n' == blk.buf[i + 1]) {
416 i += 2;
417 ++lnn;
418 continue;
419 }
420
421 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
422 i += 2;
423 /* Comment, skip to end of line */
424 for (; i < blk.sz; ++i) {
425 if ('\n' == blk.buf[i]) {
426 ++i;
427 ++lnn;
428 break;
429 }
430 }
431
432 /* Backout trailing whitespaces */
433 for (; pos > 0; --pos) {
434 if (ln.buf[pos - 1] != ' ')
435 break;
436 if (pos > 2 && ln.buf[pos - 2] == '\\')
437 break;
438 }
439 break;
440 }
441
442 /* Catch escaped bogus characters. */
443
444 c = (unsigned char) blk.buf[i+1];
445
446 if ( ! (isascii(c) &&
447 (isgraph(c) || isblank(c)))) {
448 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
449 curp->line, pos, "0x%x", c);
450 i += 2;
451 ln.buf[pos++] = '?';
452 continue;
453 }
454
455 /* Some other escape sequence, copy & cont. */
456
457 ln.buf[pos++] = blk.buf[i++];
458 ln.buf[pos++] = blk.buf[i++];
459 }
460
461 if (pos >= ln.sz)
462 resize_buf(&ln, 256);
463
464 ln.buf[pos] = '\0';
465
466 /*
467 * A significant amount of complexity is contained by
468 * the roff preprocessor. It's line-oriented but can be
469 * expressed on one line, so we need at times to
470 * readjust our starting point and re-run it. The roff
471 * preprocessor can also readjust the buffers with new
472 * data, so we pass them in wholesale.
473 */
474
475 of = 0;
476
477 /*
478 * Maintain a lookaside buffer of all parsed lines. We
479 * only do this if mparse_keep() has been invoked (the
480 * buffer may be accessed with mparse_getkeep()).
481 */
482
483 if (curp->secondary) {
484 curp->secondary->buf = mandoc_realloc(
485 curp->secondary->buf,
486 curp->secondary->sz + pos + 2);
487 memcpy(curp->secondary->buf +
488 curp->secondary->sz,
489 ln.buf, pos);
490 curp->secondary->sz += pos;
491 curp->secondary->buf
492 [curp->secondary->sz] = '\n';
493 curp->secondary->sz++;
494 curp->secondary->buf
495 [curp->secondary->sz] = '\0';
496 }
497 rerun:
498 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
499
500 switch (rr) {
501 case ROFF_REPARSE:
502 if (REPARSE_LIMIT >= ++curp->reparse_count)
503 mparse_buf_r(curp, ln, of, 0);
504 else
505 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
506 curp->line, pos, NULL);
507 pos = 0;
508 continue;
509 case ROFF_APPEND:
510 pos = strlen(ln.buf);
511 continue;
512 case ROFF_RERUN:
513 goto rerun;
514 case ROFF_IGN:
515 pos = 0;
516 continue;
517 case ROFF_ERR:
518 assert(MANDOCLEVEL_FATAL <= curp->file_status);
519 break;
520 case ROFF_SO:
521 if ( ! (curp->options & MPARSE_SO) &&
522 (i >= blk.sz || blk.buf[i] == '\0')) {
523 curp->sodest = mandoc_strdup(ln.buf + of);
524 free(ln.buf);
525 return;
526 }
527 /*
528 * We remove `so' clauses from our lookaside
529 * buffer because we're going to descend into
530 * the file recursively.
531 */
532 if (curp->secondary)
533 curp->secondary->sz -= pos + 1;
534 mparse_readfd(curp, -1, ln.buf + of);
535 if (MANDOCLEVEL_FATAL <= curp->file_status) {
536 mandoc_vmsg(MANDOCERR_SO_FAIL,
537 curp, curp->line, pos,
538 ".so %s", ln.buf + of);
539 break;
540 }
541 pos = 0;
542 continue;
543 default:
544 break;
545 }
546
547 /*
548 * If we encounter errors in the recursive parse, make
549 * sure we don't continue parsing.
550 */
551
552 if (MANDOCLEVEL_FATAL <= curp->file_status)
553 break;
554
555 /*
556 * If input parsers have not been allocated, do so now.
557 * We keep these instanced between parsers, but set them
558 * locally per parse routine since we can use different
559 * parsers with each one.
560 */
561
562 if ( ! (curp->man || curp->mdoc))
563 choose_parser(curp);
564
565 /*
566 * Lastly, push down into the parsers themselves.
567 * If libroff returns ROFF_TBL, then add it to the
568 * currently open parse. Since we only get here if
569 * there does exist data (see tbl_data.c), we're
570 * guaranteed that something's been allocated.
571 * Do the same for ROFF_EQN.
572 */
573
574 if (rr == ROFF_TBL) {
575 while ((span = roff_span(curp->roff)) != NULL)
576 if (curp->man == NULL)
577 mdoc_addspan(curp->mdoc, span);
578 else
579 man_addspan(curp->man, span);
580 } else if (rr == ROFF_EQN) {
581 if (curp->man == NULL)
582 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
583 else
584 man_addeqn(curp->man, roff_eqn(curp->roff));
585 } else if ((curp->man == NULL ?
586 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
587 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
588 break;
589
590 /* Temporary buffers typically are not full. */
591
592 if (0 == start && '\0' == blk.buf[i])
593 break;
594
595 /* Start the next input line. */
596
597 pos = 0;
598 }
599
600 free(ln.buf);
601 }
602
603 static int
604 read_whole_file(struct mparse *curp, const char *file, int fd,
605 struct buf *fb, int *with_mmap)
606 {
607 size_t off;
608 ssize_t ssz;
609
610 #if HAVE_MMAP
611 struct stat st;
612 if (-1 == fstat(fd, &st)) {
613 curp->file_status = MANDOCLEVEL_SYSERR;
614 if (curp->mmsg)
615 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
616 file, 0, 0, strerror(errno));
617 return(0);
618 }
619
620 /*
621 * If we're a regular file, try just reading in the whole entry
622 * via mmap(). This is faster than reading it into blocks, and
623 * since each file is only a few bytes to begin with, I'm not
624 * concerned that this is going to tank any machines.
625 */
626
627 if (S_ISREG(st.st_mode)) {
628 if (st.st_size >= (1U << 31)) {
629 curp->file_status = MANDOCLEVEL_FATAL;
630 if (curp->mmsg)
631 (*curp->mmsg)(MANDOCERR_TOOLARGE,
632 curp->file_status, file, 0, 0, NULL);
633 return(0);
634 }
635 *with_mmap = 1;
636 fb->sz = (size_t)st.st_size;
637 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
638 if (fb->buf != MAP_FAILED)
639 return(1);
640 }
641 #endif
642
643 /*
644 * If this isn't a regular file (like, say, stdin), then we must
645 * go the old way and just read things in bit by bit.
646 */
647
648 *with_mmap = 0;
649 off = 0;
650 fb->sz = 0;
651 fb->buf = NULL;
652 for (;;) {
653 if (off == fb->sz) {
654 if (fb->sz == (1U << 31)) {
655 curp->file_status = MANDOCLEVEL_FATAL;
656 if (curp->mmsg)
657 (*curp->mmsg)(MANDOCERR_TOOLARGE,
658 curp->file_status,
659 file, 0, 0, NULL);
660 break;
661 }
662 resize_buf(fb, 65536);
663 }
664 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
665 if (ssz == 0) {
666 fb->sz = off;
667 return(1);
668 }
669 if (ssz == -1) {
670 curp->file_status = MANDOCLEVEL_SYSERR;
671 if (curp->mmsg)
672 (*curp->mmsg)(MANDOCERR_SYSREAD,
673 curp->file_status, file, 0, 0,
674 strerror(errno));
675 break;
676 }
677 off += (size_t)ssz;
678 }
679
680 free(fb->buf);
681 fb->buf = NULL;
682 return(0);
683 }
684
685 static void
686 mparse_end(struct mparse *curp)
687 {
688
689 if (MANDOCLEVEL_FATAL <= curp->file_status)
690 return;
691
692 if (curp->mdoc == NULL &&
693 curp->man == NULL &&
694 curp->sodest == NULL) {
695 if (curp->options & MPARSE_MDOC)
696 curp->mdoc = curp->pmdoc;
697 else {
698 if (curp->pman == NULL)
699 curp->pman = man_alloc(curp->roff, curp,
700 curp->options & MPARSE_QUICK ? 1 : 0);
701 curp->man = curp->pman;
702 }
703 }
704
705 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
706 assert(MANDOCLEVEL_FATAL <= curp->file_status);
707 return;
708 }
709
710 if (curp->man && ! man_endparse(curp->man)) {
711 assert(MANDOCLEVEL_FATAL <= curp->file_status);
712 return;
713 }
714
715 roff_endparse(curp->roff);
716 }
717
718 static void
719 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
720 {
721 struct buf *svprimary;
722 const char *svfile;
723 size_t offset;
724 static int recursion_depth;
725
726 if (64 < recursion_depth) {
727 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
728 return;
729 }
730
731 /* Line number is per-file. */
732 svfile = curp->file;
733 curp->file = file;
734 svprimary = curp->primary;
735 curp->primary = &blk;
736 curp->line = 1;
737 recursion_depth++;
738
739 /* Skip an UTF-8 byte order mark. */
740 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
741 (unsigned char)blk.buf[0] == 0xef &&
742 (unsigned char)blk.buf[1] == 0xbb &&
743 (unsigned char)blk.buf[2] == 0xbf) {
744 offset = 3;
745 curp->filenc &= ~MPARSE_LATIN1;
746 } else
747 offset = 0;
748
749 mparse_buf_r(curp, blk, offset, 1);
750
751 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
752 mparse_end(curp);
753
754 curp->primary = svprimary;
755 curp->file = svfile;
756 }
757
758 enum mandoclevel
759 mparse_readmem(struct mparse *curp, void *buf, size_t len,
760 const char *file)
761 {
762 struct buf blk;
763
764 blk.buf = buf;
765 blk.sz = len;
766
767 mparse_parse_buffer(curp, blk, file);
768 return(curp->file_status);
769 }
770
771 /*
772 * If a file descriptor is given, use it and assume it points
773 * to the named file. Otherwise, open the named file.
774 * Read the whole file into memory and call the parsers.
775 * Called recursively when an .so request is encountered.
776 */
777 enum mandoclevel
778 mparse_readfd(struct mparse *curp, int fd, const char *file)
779 {
780 struct buf blk;
781 int with_mmap;
782 int save_filenc;
783 pid_t save_child;
784
785 save_child = curp->child;
786 if (fd != -1)
787 curp->child = 0;
788 else if (mparse_open(curp, &fd, file) >= MANDOCLEVEL_SYSERR)
789 goto out;
790
791 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
792 save_filenc = curp->filenc;
793 curp->filenc = curp->options &
794 (MPARSE_UTF8 | MPARSE_LATIN1);
795 mparse_parse_buffer(curp, blk, file);
796 curp->filenc = save_filenc;
797 #if HAVE_MMAP
798 if (with_mmap)
799 munmap(blk.buf, blk.sz);
800 else
801 #endif
802 free(blk.buf);
803 }
804
805 if (fd != STDIN_FILENO && close(fd) == -1)
806 perror(file);
807
808 mparse_wait(curp);
809 out:
810 curp->child = save_child;
811 return(curp->file_status);
812 }
813
814 enum mandoclevel
815 mparse_open(struct mparse *curp, int *fd, const char *file)
816 {
817 int pfd[2];
818 int save_errno;
819 char *cp;
820 enum mandocerr err;
821
822 pfd[1] = -1;
823 curp->file = file;
824
825 /* Unless zipped, try to just open the file. */
826
827 if ((cp = strrchr(file, '.')) == NULL ||
828 strcmp(cp + 1, "gz")) {
829 curp->child = 0;
830 if ((*fd = open(file, O_RDONLY)) != -1)
831 return(MANDOCLEVEL_OK);
832
833 /* Open failed; try to append ".gz". */
834
835 mandoc_asprintf(&cp, "%s.gz", file);
836 file = cp;
837 } else
838 cp = NULL;
839
840 /* Before forking, make sure the file can be read. */
841
842 save_errno = errno;
843 if (access(file, R_OK) == -1) {
844 if (cp != NULL)
845 errno = save_errno;
846 err = MANDOCERR_SYSOPEN;
847 goto out;
848 }
849
850 /* Run gunzip(1). */
851
852 if (pipe(pfd) == -1) {
853 err = MANDOCERR_SYSPIPE;
854 goto out;
855 }
856
857 switch (curp->child = fork()) {
858 case -1:
859 err = MANDOCERR_SYSFORK;
860 close(pfd[0]);
861 close(pfd[1]);
862 pfd[1] = -1;
863 break;
864 case 0:
865 close(pfd[0]);
866 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
867 err = MANDOCERR_SYSDUP;
868 break;
869 }
870 execlp("gunzip", "gunzip", "-c", file, NULL);
871 err = MANDOCERR_SYSEXEC;
872 break;
873 default:
874 close(pfd[1]);
875 *fd = pfd[0];
876 return(MANDOCLEVEL_OK);
877 }
878
879 out:
880 free(cp);
881 *fd = -1;
882 curp->child = 0;
883 curp->file_status = MANDOCLEVEL_SYSERR;
884 if (curp->mmsg)
885 (*curp->mmsg)(err, curp->file_status, curp->file,
886 0, 0, strerror(errno));
887 if (pfd[1] != -1)
888 exit(1);
889 return(curp->file_status);
890 }
891
892 enum mandoclevel
893 mparse_wait(struct mparse *curp)
894 {
895 int status;
896
897 if (curp->child == 0)
898 return(MANDOCLEVEL_OK);
899
900 if (waitpid(curp->child, &status, 0) == -1) {
901 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
902 strerror(errno));
903 curp->file_status = MANDOCLEVEL_SYSERR;
904 return(curp->file_status);
905 }
906 if (WIFSIGNALED(status)) {
907 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
908 "%d", WTERMSIG(status));
909 curp->file_status = MANDOCLEVEL_SYSERR;
910 return(curp->file_status);
911 }
912 if (WEXITSTATUS(status)) {
913 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
914 "%d", WEXITSTATUS(status));
915 curp->file_status = MANDOCLEVEL_SYSERR;
916 return(curp->file_status);
917 }
918 return(MANDOCLEVEL_OK);
919 }
920
921 struct mparse *
922 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
923 const struct mchars *mchars, const char *defos)
924 {
925 struct mparse *curp;
926
927 assert(wlevel <= MANDOCLEVEL_FATAL);
928
929 curp = mandoc_calloc(1, sizeof(struct mparse));
930
931 curp->options = options;
932 curp->wlevel = wlevel;
933 curp->mmsg = mmsg;
934 curp->defos = defos;
935
936 curp->mchars = mchars;
937 curp->roff = roff_alloc(curp, curp->mchars, options);
938 if (curp->options & MPARSE_MDOC)
939 curp->pmdoc = mdoc_alloc(
940 curp->roff, curp, curp->defos,
941 curp->options & MPARSE_QUICK ? 1 : 0);
942 if (curp->options & MPARSE_MAN)
943 curp->pman = man_alloc(curp->roff, curp,
944 curp->options & MPARSE_QUICK ? 1 : 0);
945
946 return(curp);
947 }
948
949 void
950 mparse_reset(struct mparse *curp)
951 {
952
953 roff_reset(curp->roff);
954
955 if (curp->mdoc)
956 mdoc_reset(curp->mdoc);
957 if (curp->man)
958 man_reset(curp->man);
959 if (curp->secondary)
960 curp->secondary->sz = 0;
961
962 curp->file_status = MANDOCLEVEL_OK;
963 curp->mdoc = NULL;
964 curp->man = NULL;
965
966 free(curp->sodest);
967 curp->sodest = NULL;
968 }
969
970 void
971 mparse_free(struct mparse *curp)
972 {
973
974 if (curp->pmdoc)
975 mdoc_free(curp->pmdoc);
976 if (curp->pman)
977 man_free(curp->pman);
978 if (curp->roff)
979 roff_free(curp->roff);
980 if (curp->secondary)
981 free(curp->secondary->buf);
982
983 free(curp->secondary);
984 free(curp->sodest);
985 free(curp);
986 }
987
988 void
989 mparse_result(struct mparse *curp,
990 struct mdoc **mdoc, struct man **man, char **sodest)
991 {
992
993 if (sodest && NULL != (*sodest = curp->sodest)) {
994 *mdoc = NULL;
995 *man = NULL;
996 return;
997 }
998 if (mdoc)
999 *mdoc = curp->mdoc;
1000 if (man)
1001 *man = curp->man;
1002 }
1003
1004 void
1005 mandoc_vmsg(enum mandocerr t, struct mparse *m,
1006 int ln, int pos, const char *fmt, ...)
1007 {
1008 char buf[256];
1009 va_list ap;
1010
1011 va_start(ap, fmt);
1012 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1013 va_end(ap);
1014
1015 mandoc_msg(t, m, ln, pos, buf);
1016 }
1017
1018 void
1019 mandoc_msg(enum mandocerr er, struct mparse *m,
1020 int ln, int col, const char *msg)
1021 {
1022 enum mandoclevel level;
1023
1024 level = MANDOCLEVEL_FATAL;
1025 while (er < mandoclimits[level])
1026 level--;
1027
1028 if (level < m->wlevel)
1029 return;
1030
1031 if (m->mmsg)
1032 (*m->mmsg)(er, level, m->file, ln, col, msg);
1033
1034 if (m->file_status < level)
1035 m->file_status = level;
1036 }
1037
1038 const char *
1039 mparse_strerror(enum mandocerr er)
1040 {
1041
1042 return(mandocerrs[er]);
1043 }
1044
1045 const char *
1046 mparse_strlevel(enum mandoclevel lvl)
1047 {
1048 return(mandoclevels[lvl]);
1049 }
1050
1051 void
1052 mparse_keep(struct mparse *p)
1053 {
1054
1055 assert(NULL == p->secondary);
1056 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1057 }
1058
1059 const char *
1060 mparse_getkeep(const struct mparse *p)
1061 {
1062
1063 assert(p->secondary);
1064 return(p->secondary->sz ? p->secondary->buf : NULL);
1065 }