]> git.cameronkatri.com Git - mandoc.git/blob - read.c
downgrade .so failure from FATAL to ERROR
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.110 2015/01/15 02:29:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_FATAL,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "bad table syntax",
184 "bad table option",
185 "bad table layout",
186 "no table layout cells specified",
187 "no table data cells specified",
188 "ignore data in cell",
189 "data block still open",
190 "ignoring extra data cells",
191 "ignoring macro in table",
192
193 /* related to document structure and macros */
194 NULL,
195 "input stack limit exceeded, infinite loop?",
196 "skipping bad character",
197 "skipping unknown macro",
198 "skipping item outside list",
199 "skipping column outside column list",
200 "skipping end of block that is not open",
201 "inserting missing end of block",
202 "appending missing end of block",
203
204 /* related to request and macro arguments */
205 "escaped character not allowed in a name",
206 "argument count wrong",
207 "NOT IMPLEMENTED: Bd -file",
208 "missing list type, using -item",
209 "missing manual name, using \"\"",
210 "uname(3) system call failed, using UNKNOWN",
211 "unknown standard specifier",
212 "skipping request without numeric argument",
213 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
214 ".so request failed",
215 "skipping all arguments",
216 "skipping excess arguments",
217 "divide by zero",
218
219 "generic fatal error",
220
221 "input too large",
222 };
223
224 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
225 "SUCCESS",
226 "RESERVED",
227 "WARNING",
228 "ERROR",
229 "FATAL",
230 "BADARG",
231 "SYSERR"
232 };
233
234
235 static void
236 resize_buf(struct buf *buf, size_t initial)
237 {
238
239 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
240 buf->buf = mandoc_realloc(buf->buf, buf->sz);
241 }
242
243 static void
244 choose_parser(struct mparse *curp)
245 {
246 char *cp, *ep;
247 int format;
248
249 /*
250 * If neither command line arguments -mdoc or -man select
251 * a parser nor the roff parser found a .Dd or .TH macro
252 * yet, look ahead in the main input buffer.
253 */
254
255 if ((format = roff_getformat(curp->roff)) == 0) {
256 cp = curp->primary->buf;
257 ep = cp + curp->primary->sz;
258 while (cp < ep) {
259 if (*cp == '.' || *cp == '\'') {
260 cp++;
261 if (cp[0] == 'D' && cp[1] == 'd') {
262 format = MPARSE_MDOC;
263 break;
264 }
265 if (cp[0] == 'T' && cp[1] == 'H') {
266 format = MPARSE_MAN;
267 break;
268 }
269 }
270 cp = memchr(cp, '\n', ep - cp);
271 if (cp == NULL)
272 break;
273 cp++;
274 }
275 }
276
277 if (format == MPARSE_MDOC) {
278 if (NULL == curp->pmdoc)
279 curp->pmdoc = mdoc_alloc(
280 curp->roff, curp, curp->defos,
281 MPARSE_QUICK & curp->options ? 1 : 0);
282 assert(curp->pmdoc);
283 curp->mdoc = curp->pmdoc;
284 return;
285 }
286
287 /* Fall back to man(7) as a last resort. */
288
289 if (NULL == curp->pman)
290 curp->pman = man_alloc(
291 curp->roff, curp, curp->defos,
292 MPARSE_QUICK & curp->options ? 1 : 0);
293 assert(curp->pman);
294 curp->man = curp->pman;
295 }
296
297 /*
298 * Main parse routine for a buffer.
299 * It assumes encoding and line numbering are already set up.
300 * It can recurse directly (for invocations of user-defined
301 * macros, inline equations, and input line traps)
302 * and indirectly (for .so file inclusion).
303 */
304 static void
305 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
306 {
307 const struct tbl_span *span;
308 struct buf ln;
309 char *cp;
310 size_t pos; /* byte number in the ln buffer */
311 enum rofferr rr;
312 int of;
313 int lnn; /* line number in the real file */
314 int fd;
315 pid_t save_child;
316 unsigned char c;
317
318 memset(&ln, 0, sizeof(ln));
319
320 lnn = curp->line;
321 pos = 0;
322
323 while (i < blk.sz) {
324 if (0 == pos && '\0' == blk.buf[i])
325 break;
326
327 if (start) {
328 curp->line = lnn;
329 curp->reparse_count = 0;
330
331 if (lnn < 3 &&
332 curp->filenc & MPARSE_UTF8 &&
333 curp->filenc & MPARSE_LATIN1)
334 curp->filenc = preconv_cue(&blk, i);
335 }
336
337 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
338
339 /*
340 * When finding an unescaped newline character,
341 * leave the character loop to process the line.
342 * Skip a preceding carriage return, if any.
343 */
344
345 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
346 '\n' == blk.buf[i + 1])
347 ++i;
348 if ('\n' == blk.buf[i]) {
349 ++i;
350 ++lnn;
351 break;
352 }
353
354 /*
355 * Make sure we have space for the worst
356 * case of 11 bytes: "\\[u10ffff]\0"
357 */
358
359 if (pos + 11 > ln.sz)
360 resize_buf(&ln, 256);
361
362 /*
363 * Encode 8-bit input.
364 */
365
366 c = blk.buf[i];
367 if (c & 0x80) {
368 if ( ! (curp->filenc && preconv_encode(
369 &blk, &i, &ln, &pos, &curp->filenc))) {
370 mandoc_vmsg(MANDOCERR_BADCHAR,
371 curp, curp->line, pos,
372 "0x%x", c);
373 ln.buf[pos++] = '?';
374 i++;
375 }
376 continue;
377 }
378
379 /*
380 * Exclude control characters.
381 */
382
383 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
384 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
385 curp->line, pos, "0x%x", c);
386 i++;
387 ln.buf[pos++] = '?';
388 continue;
389 }
390
391 /* Trailing backslash = a plain char. */
392
393 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
394 ln.buf[pos++] = blk.buf[i++];
395 continue;
396 }
397
398 /*
399 * Found escape and at least one other character.
400 * When it's a newline character, skip it.
401 * When there is a carriage return in between,
402 * skip that one as well.
403 */
404
405 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
406 '\n' == blk.buf[i + 2])
407 ++i;
408 if ('\n' == blk.buf[i + 1]) {
409 i += 2;
410 ++lnn;
411 continue;
412 }
413
414 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
415 i += 2;
416 /* Comment, skip to end of line */
417 for (; i < blk.sz; ++i) {
418 if ('\n' == blk.buf[i]) {
419 ++i;
420 ++lnn;
421 break;
422 }
423 }
424
425 /* Backout trailing whitespaces */
426 for (; pos > 0; --pos) {
427 if (ln.buf[pos - 1] != ' ')
428 break;
429 if (pos > 2 && ln.buf[pos - 2] == '\\')
430 break;
431 }
432 break;
433 }
434
435 /* Catch escaped bogus characters. */
436
437 c = (unsigned char) blk.buf[i+1];
438
439 if ( ! (isascii(c) &&
440 (isgraph(c) || isblank(c)))) {
441 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
442 curp->line, pos, "0x%x", c);
443 i += 2;
444 ln.buf[pos++] = '?';
445 continue;
446 }
447
448 /* Some other escape sequence, copy & cont. */
449
450 ln.buf[pos++] = blk.buf[i++];
451 ln.buf[pos++] = blk.buf[i++];
452 }
453
454 if (pos >= ln.sz)
455 resize_buf(&ln, 256);
456
457 ln.buf[pos] = '\0';
458
459 /*
460 * A significant amount of complexity is contained by
461 * the roff preprocessor. It's line-oriented but can be
462 * expressed on one line, so we need at times to
463 * readjust our starting point and re-run it. The roff
464 * preprocessor can also readjust the buffers with new
465 * data, so we pass them in wholesale.
466 */
467
468 of = 0;
469
470 /*
471 * Maintain a lookaside buffer of all parsed lines. We
472 * only do this if mparse_keep() has been invoked (the
473 * buffer may be accessed with mparse_getkeep()).
474 */
475
476 if (curp->secondary) {
477 curp->secondary->buf = mandoc_realloc(
478 curp->secondary->buf,
479 curp->secondary->sz + pos + 2);
480 memcpy(curp->secondary->buf +
481 curp->secondary->sz,
482 ln.buf, pos);
483 curp->secondary->sz += pos;
484 curp->secondary->buf
485 [curp->secondary->sz] = '\n';
486 curp->secondary->sz++;
487 curp->secondary->buf
488 [curp->secondary->sz] = '\0';
489 }
490 rerun:
491 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
492
493 switch (rr) {
494 case ROFF_REPARSE:
495 if (REPARSE_LIMIT >= ++curp->reparse_count)
496 mparse_buf_r(curp, ln, of, 0);
497 else
498 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
499 curp->line, pos, NULL);
500 pos = 0;
501 continue;
502 case ROFF_APPEND:
503 pos = strlen(ln.buf);
504 continue;
505 case ROFF_RERUN:
506 goto rerun;
507 case ROFF_IGN:
508 pos = 0;
509 continue;
510 case ROFF_SO:
511 if ( ! (curp->options & MPARSE_SO) &&
512 (i >= blk.sz || blk.buf[i] == '\0')) {
513 curp->sodest = mandoc_strdup(ln.buf + of);
514 free(ln.buf);
515 return;
516 }
517 /*
518 * We remove `so' clauses from our lookaside
519 * buffer because we're going to descend into
520 * the file recursively.
521 */
522 if (curp->secondary)
523 curp->secondary->sz -= pos + 1;
524 save_child = curp->child;
525 if (mparse_open(curp, &fd, ln.buf + of) ==
526 MANDOCLEVEL_OK)
527 mparse_readfd(curp, fd, ln.buf + of);
528 else {
529 mandoc_vmsg(MANDOCERR_SO_FAIL,
530 curp, curp->line, pos,
531 ".so %s", ln.buf + of);
532 ln.sz = mandoc_asprintf(&cp,
533 ".sp\nSee the file %s.\n.sp",
534 ln.buf + of);
535 free(ln.buf);
536 ln.buf = cp;
537 of = 0;
538 mparse_buf_r(curp, ln, of, 0);
539 }
540 curp->child = save_child;
541 pos = 0;
542 continue;
543 default:
544 break;
545 }
546
547 /*
548 * If we encounter errors in the recursive parse, make
549 * sure we don't continue parsing.
550 */
551
552 if (MANDOCLEVEL_FATAL <= curp->file_status)
553 break;
554
555 /*
556 * If input parsers have not been allocated, do so now.
557 * We keep these instanced between parsers, but set them
558 * locally per parse routine since we can use different
559 * parsers with each one.
560 */
561
562 if ( ! (curp->man || curp->mdoc))
563 choose_parser(curp);
564
565 /*
566 * Lastly, push down into the parsers themselves.
567 * If libroff returns ROFF_TBL, then add it to the
568 * currently open parse. Since we only get here if
569 * there does exist data (see tbl_data.c), we're
570 * guaranteed that something's been allocated.
571 * Do the same for ROFF_EQN.
572 */
573
574 if (rr == ROFF_TBL) {
575 while ((span = roff_span(curp->roff)) != NULL)
576 if (curp->man == NULL)
577 mdoc_addspan(curp->mdoc, span);
578 else
579 man_addspan(curp->man, span);
580 } else if (rr == ROFF_EQN) {
581 if (curp->man == NULL)
582 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
583 else
584 man_addeqn(curp->man, roff_eqn(curp->roff));
585 } else if ((curp->man == NULL ?
586 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
587 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
588 break;
589
590 /* Temporary buffers typically are not full. */
591
592 if (0 == start && '\0' == blk.buf[i])
593 break;
594
595 /* Start the next input line. */
596
597 pos = 0;
598 }
599
600 free(ln.buf);
601 }
602
603 static int
604 read_whole_file(struct mparse *curp, const char *file, int fd,
605 struct buf *fb, int *with_mmap)
606 {
607 size_t off;
608 ssize_t ssz;
609
610 #if HAVE_MMAP
611 struct stat st;
612 if (-1 == fstat(fd, &st)) {
613 perror(file);
614 exit((int)MANDOCLEVEL_SYSERR);
615 }
616
617 /*
618 * If we're a regular file, try just reading in the whole entry
619 * via mmap(). This is faster than reading it into blocks, and
620 * since each file is only a few bytes to begin with, I'm not
621 * concerned that this is going to tank any machines.
622 */
623
624 if (S_ISREG(st.st_mode)) {
625 if (st.st_size >= (1U << 31)) {
626 curp->file_status = MANDOCLEVEL_FATAL;
627 if (curp->mmsg)
628 (*curp->mmsg)(MANDOCERR_TOOLARGE,
629 curp->file_status, file, 0, 0, NULL);
630 return(0);
631 }
632 *with_mmap = 1;
633 fb->sz = (size_t)st.st_size;
634 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
635 if (fb->buf != MAP_FAILED)
636 return(1);
637 }
638 #endif
639
640 /*
641 * If this isn't a regular file (like, say, stdin), then we must
642 * go the old way and just read things in bit by bit.
643 */
644
645 *with_mmap = 0;
646 off = 0;
647 fb->sz = 0;
648 fb->buf = NULL;
649 for (;;) {
650 if (off == fb->sz) {
651 if (fb->sz == (1U << 31)) {
652 curp->file_status = MANDOCLEVEL_FATAL;
653 if (curp->mmsg)
654 (*curp->mmsg)(MANDOCERR_TOOLARGE,
655 curp->file_status,
656 file, 0, 0, NULL);
657 break;
658 }
659 resize_buf(fb, 65536);
660 }
661 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
662 if (ssz == 0) {
663 fb->sz = off;
664 return(1);
665 }
666 if (ssz == -1) {
667 perror(file);
668 exit((int)MANDOCLEVEL_SYSERR);
669 }
670 off += (size_t)ssz;
671 }
672
673 free(fb->buf);
674 fb->buf = NULL;
675 return(0);
676 }
677
678 static void
679 mparse_end(struct mparse *curp)
680 {
681
682 if (MANDOCLEVEL_FATAL <= curp->file_status)
683 return;
684
685 if (curp->mdoc == NULL &&
686 curp->man == NULL &&
687 curp->sodest == NULL) {
688 if (curp->options & MPARSE_MDOC)
689 curp->mdoc = curp->pmdoc;
690 else {
691 if (curp->pman == NULL)
692 curp->pman = man_alloc(
693 curp->roff, curp, curp->defos,
694 curp->options & MPARSE_QUICK ? 1 : 0);
695 curp->man = curp->pman;
696 }
697 }
698
699 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
700 assert(MANDOCLEVEL_FATAL <= curp->file_status);
701 return;
702 }
703
704 if (curp->man && ! man_endparse(curp->man)) {
705 assert(MANDOCLEVEL_FATAL <= curp->file_status);
706 return;
707 }
708
709 roff_endparse(curp->roff);
710 }
711
712 static void
713 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
714 {
715 struct buf *svprimary;
716 const char *svfile;
717 size_t offset;
718 static int recursion_depth;
719
720 if (64 < recursion_depth) {
721 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
722 return;
723 }
724
725 /* Line number is per-file. */
726 svfile = curp->file;
727 curp->file = file;
728 svprimary = curp->primary;
729 curp->primary = &blk;
730 curp->line = 1;
731 recursion_depth++;
732
733 /* Skip an UTF-8 byte order mark. */
734 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
735 (unsigned char)blk.buf[0] == 0xef &&
736 (unsigned char)blk.buf[1] == 0xbb &&
737 (unsigned char)blk.buf[2] == 0xbf) {
738 offset = 3;
739 curp->filenc &= ~MPARSE_LATIN1;
740 } else
741 offset = 0;
742
743 mparse_buf_r(curp, blk, offset, 1);
744
745 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
746 mparse_end(curp);
747
748 curp->primary = svprimary;
749 curp->file = svfile;
750 }
751
752 enum mandoclevel
753 mparse_readmem(struct mparse *curp, void *buf, size_t len,
754 const char *file)
755 {
756 struct buf blk;
757
758 blk.buf = buf;
759 blk.sz = len;
760
761 mparse_parse_buffer(curp, blk, file);
762 return(curp->file_status);
763 }
764
765 /*
766 * Read the whole file into memory and call the parsers.
767 * Called recursively when an .so request is encountered.
768 */
769 enum mandoclevel
770 mparse_readfd(struct mparse *curp, int fd, const char *file)
771 {
772 struct buf blk;
773 int with_mmap;
774 int save_filenc;
775
776 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
777 save_filenc = curp->filenc;
778 curp->filenc = curp->options &
779 (MPARSE_UTF8 | MPARSE_LATIN1);
780 mparse_parse_buffer(curp, blk, file);
781 curp->filenc = save_filenc;
782 #if HAVE_MMAP
783 if (with_mmap)
784 munmap(blk.buf, blk.sz);
785 else
786 #endif
787 free(blk.buf);
788 }
789
790 if (fd != STDIN_FILENO && close(fd) == -1)
791 perror(file);
792
793 mparse_wait(curp);
794 return(curp->file_status);
795 }
796
797 enum mandoclevel
798 mparse_open(struct mparse *curp, int *fd, const char *file)
799 {
800 int pfd[2];
801 int save_errno;
802 char *cp;
803
804 curp->file = file;
805
806 /* Unless zipped, try to just open the file. */
807
808 if ((cp = strrchr(file, '.')) == NULL ||
809 strcmp(cp + 1, "gz")) {
810 curp->child = 0;
811 if ((*fd = open(file, O_RDONLY)) != -1)
812 return(MANDOCLEVEL_OK);
813
814 /* Open failed; try to append ".gz". */
815
816 mandoc_asprintf(&cp, "%s.gz", file);
817 file = cp;
818 } else
819 cp = NULL;
820
821 /* Before forking, make sure the file can be read. */
822
823 save_errno = errno;
824 if (access(file, R_OK) == -1) {
825 if (cp != NULL)
826 errno = save_errno;
827 free(cp);
828 *fd = -1;
829 curp->child = 0;
830 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
831 return(MANDOCLEVEL_ERROR);
832 }
833
834 /* Run gunzip(1). */
835
836 if (pipe(pfd) == -1) {
837 perror("pipe");
838 exit((int)MANDOCLEVEL_SYSERR);
839 }
840
841 switch (curp->child = fork()) {
842 case -1:
843 perror("fork");
844 exit((int)MANDOCLEVEL_SYSERR);
845 case 0:
846 close(pfd[0]);
847 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
848 perror("dup");
849 exit((int)MANDOCLEVEL_SYSERR);
850 }
851 execlp("gunzip", "gunzip", "-c", file, NULL);
852 perror("exec");
853 exit((int)MANDOCLEVEL_SYSERR);
854 default:
855 close(pfd[1]);
856 *fd = pfd[0];
857 return(MANDOCLEVEL_OK);
858 }
859 }
860
861 enum mandoclevel
862 mparse_wait(struct mparse *curp)
863 {
864 int status;
865
866 if (curp->child == 0)
867 return(MANDOCLEVEL_OK);
868
869 if (waitpid(curp->child, &status, 0) == -1) {
870 perror("wait");
871 exit((int)MANDOCLEVEL_SYSERR);
872 }
873 if (WIFSIGNALED(status)) {
874 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
875 "gunzip died from signal %d", WTERMSIG(status));
876 return(MANDOCLEVEL_ERROR);
877 }
878 if (WEXITSTATUS(status)) {
879 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
880 "gunzip failed with code %d", WEXITSTATUS(status));
881 return(MANDOCLEVEL_ERROR);
882 }
883 return(MANDOCLEVEL_OK);
884 }
885
886 struct mparse *
887 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
888 const struct mchars *mchars, const char *defos)
889 {
890 struct mparse *curp;
891
892 assert(wlevel <= MANDOCLEVEL_FATAL);
893
894 curp = mandoc_calloc(1, sizeof(struct mparse));
895
896 curp->options = options;
897 curp->wlevel = wlevel;
898 curp->mmsg = mmsg;
899 curp->defos = defos;
900
901 curp->mchars = mchars;
902 curp->roff = roff_alloc(curp, curp->mchars, options);
903 if (curp->options & MPARSE_MDOC)
904 curp->pmdoc = mdoc_alloc(
905 curp->roff, curp, curp->defos,
906 curp->options & MPARSE_QUICK ? 1 : 0);
907 if (curp->options & MPARSE_MAN)
908 curp->pman = man_alloc(
909 curp->roff, curp, curp->defos,
910 curp->options & MPARSE_QUICK ? 1 : 0);
911
912 return(curp);
913 }
914
915 void
916 mparse_reset(struct mparse *curp)
917 {
918
919 roff_reset(curp->roff);
920
921 if (curp->mdoc)
922 mdoc_reset(curp->mdoc);
923 if (curp->man)
924 man_reset(curp->man);
925 if (curp->secondary)
926 curp->secondary->sz = 0;
927
928 curp->file_status = MANDOCLEVEL_OK;
929 curp->mdoc = NULL;
930 curp->man = NULL;
931
932 free(curp->sodest);
933 curp->sodest = NULL;
934 }
935
936 void
937 mparse_free(struct mparse *curp)
938 {
939
940 if (curp->pmdoc)
941 mdoc_free(curp->pmdoc);
942 if (curp->pman)
943 man_free(curp->pman);
944 if (curp->roff)
945 roff_free(curp->roff);
946 if (curp->secondary)
947 free(curp->secondary->buf);
948
949 free(curp->secondary);
950 free(curp->sodest);
951 free(curp);
952 }
953
954 void
955 mparse_result(struct mparse *curp,
956 struct mdoc **mdoc, struct man **man, char **sodest)
957 {
958
959 if (sodest && NULL != (*sodest = curp->sodest)) {
960 *mdoc = NULL;
961 *man = NULL;
962 return;
963 }
964 if (mdoc)
965 *mdoc = curp->mdoc;
966 if (man)
967 *man = curp->man;
968 }
969
970 void
971 mandoc_vmsg(enum mandocerr t, struct mparse *m,
972 int ln, int pos, const char *fmt, ...)
973 {
974 char buf[256];
975 va_list ap;
976
977 va_start(ap, fmt);
978 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
979 va_end(ap);
980
981 mandoc_msg(t, m, ln, pos, buf);
982 }
983
984 void
985 mandoc_msg(enum mandocerr er, struct mparse *m,
986 int ln, int col, const char *msg)
987 {
988 enum mandoclevel level;
989
990 level = MANDOCLEVEL_FATAL;
991 while (er < mandoclimits[level])
992 level--;
993
994 if (level < m->wlevel && er != MANDOCERR_FILE)
995 return;
996
997 if (m->mmsg)
998 (*m->mmsg)(er, level, m->file, ln, col, msg);
999
1000 if (m->file_status < level)
1001 m->file_status = level;
1002 }
1003
1004 const char *
1005 mparse_strerror(enum mandocerr er)
1006 {
1007
1008 return(mandocerrs[er]);
1009 }
1010
1011 const char *
1012 mparse_strlevel(enum mandoclevel lvl)
1013 {
1014 return(mandoclevels[lvl]);
1015 }
1016
1017 void
1018 mparse_keep(struct mparse *p)
1019 {
1020
1021 assert(NULL == p->secondary);
1022 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1023 }
1024
1025 const char *
1026 mparse_getkeep(const struct mparse *p)
1027 {
1028
1029 assert(p->secondary);
1030 return(p->secondary->sz ? p->secondary->buf : NULL);
1031 }