]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Simplify handling of system errors: just exit(3).
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.107 2015/01/14 17:49:15 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_FATAL,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "bad table syntax",
184 "bad table option",
185 "bad table layout",
186 "no table layout cells specified",
187 "no table data cells specified",
188 "ignore data in cell",
189 "data block still open",
190 "ignoring extra data cells",
191 "ignoring macro in table",
192
193 /* related to document structure and macros */
194 "input stack limit exceeded, infinite loop?",
195 "skipping bad character",
196 "skipping unknown macro",
197 "skipping item outside list",
198 "skipping column outside column list",
199 "skipping end of block that is not open",
200 "inserting missing end of block",
201 "appending missing end of block",
202
203 /* related to request and macro arguments */
204 "escaped character not allowed in a name",
205 "argument count wrong",
206 "NOT IMPLEMENTED: Bd -file",
207 "missing list type, using -item",
208 "missing manual name, using \"\"",
209 "uname(3) system call failed, using UNKNOWN",
210 "unknown standard specifier",
211 "skipping request without numeric argument",
212 "skipping all arguments",
213 "skipping excess arguments",
214 "divide by zero",
215
216 "generic fatal error",
217
218 "input too large",
219 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
220 ".so request failed",
221
222 /* system errors */
223 "gunzip failed with code",
224 NULL,
225 "gunzip died from signal",
226 };
227
228 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
229 "SUCCESS",
230 "RESERVED",
231 "WARNING",
232 "ERROR",
233 "FATAL",
234 "BADARG",
235 "SYSERR"
236 };
237
238
239 static void
240 resize_buf(struct buf *buf, size_t initial)
241 {
242
243 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
244 buf->buf = mandoc_realloc(buf->buf, buf->sz);
245 }
246
247 static void
248 choose_parser(struct mparse *curp)
249 {
250 char *cp, *ep;
251 int format;
252
253 /*
254 * If neither command line arguments -mdoc or -man select
255 * a parser nor the roff parser found a .Dd or .TH macro
256 * yet, look ahead in the main input buffer.
257 */
258
259 if ((format = roff_getformat(curp->roff)) == 0) {
260 cp = curp->primary->buf;
261 ep = cp + curp->primary->sz;
262 while (cp < ep) {
263 if (*cp == '.' || *cp == '\'') {
264 cp++;
265 if (cp[0] == 'D' && cp[1] == 'd') {
266 format = MPARSE_MDOC;
267 break;
268 }
269 if (cp[0] == 'T' && cp[1] == 'H') {
270 format = MPARSE_MAN;
271 break;
272 }
273 }
274 cp = memchr(cp, '\n', ep - cp);
275 if (cp == NULL)
276 break;
277 cp++;
278 }
279 }
280
281 if (format == MPARSE_MDOC) {
282 if (NULL == curp->pmdoc)
283 curp->pmdoc = mdoc_alloc(
284 curp->roff, curp, curp->defos,
285 MPARSE_QUICK & curp->options ? 1 : 0);
286 assert(curp->pmdoc);
287 curp->mdoc = curp->pmdoc;
288 return;
289 }
290
291 /* Fall back to man(7) as a last resort. */
292
293 if (NULL == curp->pman)
294 curp->pman = man_alloc(
295 curp->roff, curp, curp->defos,
296 MPARSE_QUICK & curp->options ? 1 : 0);
297 assert(curp->pman);
298 curp->man = curp->pman;
299 }
300
301 /*
302 * Main parse routine for a buffer.
303 * It assumes encoding and line numbering are already set up.
304 * It can recurse directly (for invocations of user-defined
305 * macros, inline equations, and input line traps)
306 * and indirectly (for .so file inclusion).
307 */
308 static void
309 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
310 {
311 const struct tbl_span *span;
312 struct buf ln;
313 size_t pos; /* byte number in the ln buffer */
314 enum rofferr rr;
315 int of;
316 int lnn; /* line number in the real file */
317 unsigned char c;
318
319 memset(&ln, 0, sizeof(ln));
320
321 lnn = curp->line;
322 pos = 0;
323
324 while (i < blk.sz) {
325 if (0 == pos && '\0' == blk.buf[i])
326 break;
327
328 if (start) {
329 curp->line = lnn;
330 curp->reparse_count = 0;
331
332 if (lnn < 3 &&
333 curp->filenc & MPARSE_UTF8 &&
334 curp->filenc & MPARSE_LATIN1)
335 curp->filenc = preconv_cue(&blk, i);
336 }
337
338 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
339
340 /*
341 * When finding an unescaped newline character,
342 * leave the character loop to process the line.
343 * Skip a preceding carriage return, if any.
344 */
345
346 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
347 '\n' == blk.buf[i + 1])
348 ++i;
349 if ('\n' == blk.buf[i]) {
350 ++i;
351 ++lnn;
352 break;
353 }
354
355 /*
356 * Make sure we have space for the worst
357 * case of 11 bytes: "\\[u10ffff]\0"
358 */
359
360 if (pos + 11 > ln.sz)
361 resize_buf(&ln, 256);
362
363 /*
364 * Encode 8-bit input.
365 */
366
367 c = blk.buf[i];
368 if (c & 0x80) {
369 if ( ! (curp->filenc && preconv_encode(
370 &blk, &i, &ln, &pos, &curp->filenc))) {
371 mandoc_vmsg(MANDOCERR_BADCHAR,
372 curp, curp->line, pos,
373 "0x%x", c);
374 ln.buf[pos++] = '?';
375 i++;
376 }
377 continue;
378 }
379
380 /*
381 * Exclude control characters.
382 */
383
384 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
385 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
386 curp->line, pos, "0x%x", c);
387 i++;
388 ln.buf[pos++] = '?';
389 continue;
390 }
391
392 /* Trailing backslash = a plain char. */
393
394 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
395 ln.buf[pos++] = blk.buf[i++];
396 continue;
397 }
398
399 /*
400 * Found escape and at least one other character.
401 * When it's a newline character, skip it.
402 * When there is a carriage return in between,
403 * skip that one as well.
404 */
405
406 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
407 '\n' == blk.buf[i + 2])
408 ++i;
409 if ('\n' == blk.buf[i + 1]) {
410 i += 2;
411 ++lnn;
412 continue;
413 }
414
415 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
416 i += 2;
417 /* Comment, skip to end of line */
418 for (; i < blk.sz; ++i) {
419 if ('\n' == blk.buf[i]) {
420 ++i;
421 ++lnn;
422 break;
423 }
424 }
425
426 /* Backout trailing whitespaces */
427 for (; pos > 0; --pos) {
428 if (ln.buf[pos - 1] != ' ')
429 break;
430 if (pos > 2 && ln.buf[pos - 2] == '\\')
431 break;
432 }
433 break;
434 }
435
436 /* Catch escaped bogus characters. */
437
438 c = (unsigned char) blk.buf[i+1];
439
440 if ( ! (isascii(c) &&
441 (isgraph(c) || isblank(c)))) {
442 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
443 curp->line, pos, "0x%x", c);
444 i += 2;
445 ln.buf[pos++] = '?';
446 continue;
447 }
448
449 /* Some other escape sequence, copy & cont. */
450
451 ln.buf[pos++] = blk.buf[i++];
452 ln.buf[pos++] = blk.buf[i++];
453 }
454
455 if (pos >= ln.sz)
456 resize_buf(&ln, 256);
457
458 ln.buf[pos] = '\0';
459
460 /*
461 * A significant amount of complexity is contained by
462 * the roff preprocessor. It's line-oriented but can be
463 * expressed on one line, so we need at times to
464 * readjust our starting point and re-run it. The roff
465 * preprocessor can also readjust the buffers with new
466 * data, so we pass them in wholesale.
467 */
468
469 of = 0;
470
471 /*
472 * Maintain a lookaside buffer of all parsed lines. We
473 * only do this if mparse_keep() has been invoked (the
474 * buffer may be accessed with mparse_getkeep()).
475 */
476
477 if (curp->secondary) {
478 curp->secondary->buf = mandoc_realloc(
479 curp->secondary->buf,
480 curp->secondary->sz + pos + 2);
481 memcpy(curp->secondary->buf +
482 curp->secondary->sz,
483 ln.buf, pos);
484 curp->secondary->sz += pos;
485 curp->secondary->buf
486 [curp->secondary->sz] = '\n';
487 curp->secondary->sz++;
488 curp->secondary->buf
489 [curp->secondary->sz] = '\0';
490 }
491 rerun:
492 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
493
494 switch (rr) {
495 case ROFF_REPARSE:
496 if (REPARSE_LIMIT >= ++curp->reparse_count)
497 mparse_buf_r(curp, ln, of, 0);
498 else
499 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
500 curp->line, pos, NULL);
501 pos = 0;
502 continue;
503 case ROFF_APPEND:
504 pos = strlen(ln.buf);
505 continue;
506 case ROFF_RERUN:
507 goto rerun;
508 case ROFF_IGN:
509 pos = 0;
510 continue;
511 case ROFF_ERR:
512 assert(MANDOCLEVEL_FATAL <= curp->file_status);
513 break;
514 case ROFF_SO:
515 if ( ! (curp->options & MPARSE_SO) &&
516 (i >= blk.sz || blk.buf[i] == '\0')) {
517 curp->sodest = mandoc_strdup(ln.buf + of);
518 free(ln.buf);
519 return;
520 }
521 /*
522 * We remove `so' clauses from our lookaside
523 * buffer because we're going to descend into
524 * the file recursively.
525 */
526 if (curp->secondary)
527 curp->secondary->sz -= pos + 1;
528 mparse_readfd(curp, -1, ln.buf + of);
529 if (MANDOCLEVEL_FATAL <= curp->file_status) {
530 mandoc_vmsg(MANDOCERR_SO_FAIL,
531 curp, curp->line, pos,
532 ".so %s", ln.buf + of);
533 break;
534 }
535 pos = 0;
536 continue;
537 default:
538 break;
539 }
540
541 /*
542 * If we encounter errors in the recursive parse, make
543 * sure we don't continue parsing.
544 */
545
546 if (MANDOCLEVEL_FATAL <= curp->file_status)
547 break;
548
549 /*
550 * If input parsers have not been allocated, do so now.
551 * We keep these instanced between parsers, but set them
552 * locally per parse routine since we can use different
553 * parsers with each one.
554 */
555
556 if ( ! (curp->man || curp->mdoc))
557 choose_parser(curp);
558
559 /*
560 * Lastly, push down into the parsers themselves.
561 * If libroff returns ROFF_TBL, then add it to the
562 * currently open parse. Since we only get here if
563 * there does exist data (see tbl_data.c), we're
564 * guaranteed that something's been allocated.
565 * Do the same for ROFF_EQN.
566 */
567
568 if (rr == ROFF_TBL) {
569 while ((span = roff_span(curp->roff)) != NULL)
570 if (curp->man == NULL)
571 mdoc_addspan(curp->mdoc, span);
572 else
573 man_addspan(curp->man, span);
574 } else if (rr == ROFF_EQN) {
575 if (curp->man == NULL)
576 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
577 else
578 man_addeqn(curp->man, roff_eqn(curp->roff));
579 } else if ((curp->man == NULL ?
580 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
581 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
582 break;
583
584 /* Temporary buffers typically are not full. */
585
586 if (0 == start && '\0' == blk.buf[i])
587 break;
588
589 /* Start the next input line. */
590
591 pos = 0;
592 }
593
594 free(ln.buf);
595 }
596
597 static int
598 read_whole_file(struct mparse *curp, const char *file, int fd,
599 struct buf *fb, int *with_mmap)
600 {
601 size_t off;
602 ssize_t ssz;
603
604 #if HAVE_MMAP
605 struct stat st;
606 if (-1 == fstat(fd, &st)) {
607 perror(file);
608 exit((int)MANDOCLEVEL_SYSERR);
609 }
610
611 /*
612 * If we're a regular file, try just reading in the whole entry
613 * via mmap(). This is faster than reading it into blocks, and
614 * since each file is only a few bytes to begin with, I'm not
615 * concerned that this is going to tank any machines.
616 */
617
618 if (S_ISREG(st.st_mode)) {
619 if (st.st_size >= (1U << 31)) {
620 curp->file_status = MANDOCLEVEL_FATAL;
621 if (curp->mmsg)
622 (*curp->mmsg)(MANDOCERR_TOOLARGE,
623 curp->file_status, file, 0, 0, NULL);
624 return(0);
625 }
626 *with_mmap = 1;
627 fb->sz = (size_t)st.st_size;
628 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
629 if (fb->buf != MAP_FAILED)
630 return(1);
631 }
632 #endif
633
634 /*
635 * If this isn't a regular file (like, say, stdin), then we must
636 * go the old way and just read things in bit by bit.
637 */
638
639 *with_mmap = 0;
640 off = 0;
641 fb->sz = 0;
642 fb->buf = NULL;
643 for (;;) {
644 if (off == fb->sz) {
645 if (fb->sz == (1U << 31)) {
646 curp->file_status = MANDOCLEVEL_FATAL;
647 if (curp->mmsg)
648 (*curp->mmsg)(MANDOCERR_TOOLARGE,
649 curp->file_status,
650 file, 0, 0, NULL);
651 break;
652 }
653 resize_buf(fb, 65536);
654 }
655 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
656 if (ssz == 0) {
657 fb->sz = off;
658 return(1);
659 }
660 if (ssz == -1) {
661 perror(file);
662 exit((int)MANDOCLEVEL_SYSERR);
663 }
664 off += (size_t)ssz;
665 }
666
667 free(fb->buf);
668 fb->buf = NULL;
669 return(0);
670 }
671
672 static void
673 mparse_end(struct mparse *curp)
674 {
675
676 if (MANDOCLEVEL_FATAL <= curp->file_status)
677 return;
678
679 if (curp->mdoc == NULL &&
680 curp->man == NULL &&
681 curp->sodest == NULL) {
682 if (curp->options & MPARSE_MDOC)
683 curp->mdoc = curp->pmdoc;
684 else {
685 if (curp->pman == NULL)
686 curp->pman = man_alloc(
687 curp->roff, curp, curp->defos,
688 curp->options & MPARSE_QUICK ? 1 : 0);
689 curp->man = curp->pman;
690 }
691 }
692
693 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
694 assert(MANDOCLEVEL_FATAL <= curp->file_status);
695 return;
696 }
697
698 if (curp->man && ! man_endparse(curp->man)) {
699 assert(MANDOCLEVEL_FATAL <= curp->file_status);
700 return;
701 }
702
703 roff_endparse(curp->roff);
704 }
705
706 static void
707 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
708 {
709 struct buf *svprimary;
710 const char *svfile;
711 size_t offset;
712 static int recursion_depth;
713
714 if (64 < recursion_depth) {
715 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
716 return;
717 }
718
719 /* Line number is per-file. */
720 svfile = curp->file;
721 curp->file = file;
722 svprimary = curp->primary;
723 curp->primary = &blk;
724 curp->line = 1;
725 recursion_depth++;
726
727 /* Skip an UTF-8 byte order mark. */
728 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
729 (unsigned char)blk.buf[0] == 0xef &&
730 (unsigned char)blk.buf[1] == 0xbb &&
731 (unsigned char)blk.buf[2] == 0xbf) {
732 offset = 3;
733 curp->filenc &= ~MPARSE_LATIN1;
734 } else
735 offset = 0;
736
737 mparse_buf_r(curp, blk, offset, 1);
738
739 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
740 mparse_end(curp);
741
742 curp->primary = svprimary;
743 curp->file = svfile;
744 }
745
746 enum mandoclevel
747 mparse_readmem(struct mparse *curp, void *buf, size_t len,
748 const char *file)
749 {
750 struct buf blk;
751
752 blk.buf = buf;
753 blk.sz = len;
754
755 mparse_parse_buffer(curp, blk, file);
756 return(curp->file_status);
757 }
758
759 /*
760 * If a file descriptor is given, use it and assume it points
761 * to the named file. Otherwise, open the named file.
762 * Read the whole file into memory and call the parsers.
763 * Called recursively when an .so request is encountered.
764 */
765 enum mandoclevel
766 mparse_readfd(struct mparse *curp, int fd, const char *file)
767 {
768 struct buf blk;
769 int with_mmap;
770 int save_filenc;
771 pid_t save_child;
772
773 save_child = curp->child;
774 if (fd != -1)
775 curp->child = 0;
776 else if (mparse_open(curp, &fd, file) >= MANDOCLEVEL_SYSERR)
777 goto out;
778
779 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
780 save_filenc = curp->filenc;
781 curp->filenc = curp->options &
782 (MPARSE_UTF8 | MPARSE_LATIN1);
783 mparse_parse_buffer(curp, blk, file);
784 curp->filenc = save_filenc;
785 #if HAVE_MMAP
786 if (with_mmap)
787 munmap(blk.buf, blk.sz);
788 else
789 #endif
790 free(blk.buf);
791 }
792
793 if (fd != STDIN_FILENO && close(fd) == -1)
794 perror(file);
795
796 mparse_wait(curp);
797 out:
798 curp->child = save_child;
799 return(curp->file_status);
800 }
801
802 enum mandoclevel
803 mparse_open(struct mparse *curp, int *fd, const char *file)
804 {
805 int pfd[2];
806 int save_errno;
807 char *cp;
808 enum mandocerr err;
809
810 pfd[1] = -1;
811 curp->file = file;
812
813 /* Unless zipped, try to just open the file. */
814
815 if ((cp = strrchr(file, '.')) == NULL ||
816 strcmp(cp + 1, "gz")) {
817 curp->child = 0;
818 if ((*fd = open(file, O_RDONLY)) != -1)
819 return(MANDOCLEVEL_OK);
820
821 /* Open failed; try to append ".gz". */
822
823 mandoc_asprintf(&cp, "%s.gz", file);
824 file = cp;
825 } else
826 cp = NULL;
827
828 /* Before forking, make sure the file can be read. */
829
830 save_errno = errno;
831 if (access(file, R_OK) == -1) {
832 if (cp != NULL)
833 errno = save_errno;
834 err = MANDOCERR_SYSOPEN;
835 goto out;
836 }
837
838 /* Run gunzip(1). */
839
840 if (pipe(pfd) == -1) {
841 perror("pipe");
842 exit((int)MANDOCLEVEL_SYSERR);
843 }
844
845 switch (curp->child = fork()) {
846 case -1:
847 perror("fork");
848 exit((int)MANDOCLEVEL_SYSERR);
849 case 0:
850 close(pfd[0]);
851 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
852 perror("dup");
853 exit((int)MANDOCLEVEL_SYSERR);
854 }
855 execlp("gunzip", "gunzip", "-c", file, NULL);
856 perror("exec");
857 exit((int)MANDOCLEVEL_SYSERR);
858 default:
859 close(pfd[1]);
860 *fd = pfd[0];
861 return(MANDOCLEVEL_OK);
862 }
863
864 out:
865 free(cp);
866 *fd = -1;
867 curp->child = 0;
868 curp->file_status = MANDOCLEVEL_SYSERR;
869 if (curp->mmsg)
870 (*curp->mmsg)(err, curp->file_status, curp->file,
871 0, 0, strerror(errno));
872 if (pfd[1] != -1)
873 exit(1);
874 return(curp->file_status);
875 }
876
877 enum mandoclevel
878 mparse_wait(struct mparse *curp)
879 {
880 int status;
881
882 if (curp->child == 0)
883 return(MANDOCLEVEL_OK);
884
885 if (waitpid(curp->child, &status, 0) == -1) {
886 perror("wait");
887 exit((int)MANDOCLEVEL_SYSERR);
888 }
889 if (WIFSIGNALED(status)) {
890 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
891 "%d", WTERMSIG(status));
892 curp->file_status = MANDOCLEVEL_SYSERR;
893 return(curp->file_status);
894 }
895 if (WEXITSTATUS(status)) {
896 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
897 "%d", WEXITSTATUS(status));
898 curp->file_status = MANDOCLEVEL_SYSERR;
899 return(curp->file_status);
900 }
901 return(MANDOCLEVEL_OK);
902 }
903
904 struct mparse *
905 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
906 const struct mchars *mchars, const char *defos)
907 {
908 struct mparse *curp;
909
910 assert(wlevel <= MANDOCLEVEL_FATAL);
911
912 curp = mandoc_calloc(1, sizeof(struct mparse));
913
914 curp->options = options;
915 curp->wlevel = wlevel;
916 curp->mmsg = mmsg;
917 curp->defos = defos;
918
919 curp->mchars = mchars;
920 curp->roff = roff_alloc(curp, curp->mchars, options);
921 if (curp->options & MPARSE_MDOC)
922 curp->pmdoc = mdoc_alloc(
923 curp->roff, curp, curp->defos,
924 curp->options & MPARSE_QUICK ? 1 : 0);
925 if (curp->options & MPARSE_MAN)
926 curp->pman = man_alloc(
927 curp->roff, curp, curp->defos,
928 curp->options & MPARSE_QUICK ? 1 : 0);
929
930 return(curp);
931 }
932
933 void
934 mparse_reset(struct mparse *curp)
935 {
936
937 roff_reset(curp->roff);
938
939 if (curp->mdoc)
940 mdoc_reset(curp->mdoc);
941 if (curp->man)
942 man_reset(curp->man);
943 if (curp->secondary)
944 curp->secondary->sz = 0;
945
946 curp->file_status = MANDOCLEVEL_OK;
947 curp->mdoc = NULL;
948 curp->man = NULL;
949
950 free(curp->sodest);
951 curp->sodest = NULL;
952 }
953
954 void
955 mparse_free(struct mparse *curp)
956 {
957
958 if (curp->pmdoc)
959 mdoc_free(curp->pmdoc);
960 if (curp->pman)
961 man_free(curp->pman);
962 if (curp->roff)
963 roff_free(curp->roff);
964 if (curp->secondary)
965 free(curp->secondary->buf);
966
967 free(curp->secondary);
968 free(curp->sodest);
969 free(curp);
970 }
971
972 void
973 mparse_result(struct mparse *curp,
974 struct mdoc **mdoc, struct man **man, char **sodest)
975 {
976
977 if (sodest && NULL != (*sodest = curp->sodest)) {
978 *mdoc = NULL;
979 *man = NULL;
980 return;
981 }
982 if (mdoc)
983 *mdoc = curp->mdoc;
984 if (man)
985 *man = curp->man;
986 }
987
988 void
989 mandoc_vmsg(enum mandocerr t, struct mparse *m,
990 int ln, int pos, const char *fmt, ...)
991 {
992 char buf[256];
993 va_list ap;
994
995 va_start(ap, fmt);
996 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
997 va_end(ap);
998
999 mandoc_msg(t, m, ln, pos, buf);
1000 }
1001
1002 void
1003 mandoc_msg(enum mandocerr er, struct mparse *m,
1004 int ln, int col, const char *msg)
1005 {
1006 enum mandoclevel level;
1007
1008 level = MANDOCLEVEL_FATAL;
1009 while (er < mandoclimits[level])
1010 level--;
1011
1012 if (level < m->wlevel)
1013 return;
1014
1015 if (m->mmsg)
1016 (*m->mmsg)(er, level, m->file, ln, col, msg);
1017
1018 if (m->file_status < level)
1019 m->file_status = level;
1020 }
1021
1022 const char *
1023 mparse_strerror(enum mandocerr er)
1024 {
1025
1026 return(mandocerrs[er]);
1027 }
1028
1029 const char *
1030 mparse_strlevel(enum mandoclevel lvl)
1031 {
1032 return(mandoclevels[lvl]);
1033 }
1034
1035 void
1036 mparse_keep(struct mparse *p)
1037 {
1038
1039 assert(NULL == p->secondary);
1040 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1041 }
1042
1043 const char *
1044 mparse_getkeep(const struct mparse *p)
1045 {
1046
1047 assert(p->secondary);
1048 return(p->secondary->sz ? p->secondary->buf : NULL);
1049 }