]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Let mparse_readfd() use mparse_open() and mparse_wait()
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.98 2014/11/26 23:42:14 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct mparse {
49 struct man *pman; /* persistent man parser */
50 struct mdoc *pmdoc; /* persistent mdoc parser */
51 struct man *man; /* man parser */
52 struct mdoc *mdoc; /* mdoc parser */
53 struct roff *roff; /* roff parser (!NULL) */
54 const struct mchars *mchars; /* character table */
55 char *sodest; /* filename pointed to by .so */
56 const char *file; /* filename of current input file */
57 struct buf *primary; /* buffer currently being parsed */
58 struct buf *secondary; /* preprocessed copy of input */
59 const char *defos; /* default operating system */
60 mandocmsg mmsg; /* warning/error message handler */
61 enum mandoclevel file_status; /* status of current parse */
62 enum mandoclevel wlevel; /* ignore messages below this */
63 int options; /* parser options */
64 int filenc; /* encoding of the current file */
65 int reparse_count; /* finite interp. stack */
66 int line; /* line number in the file */
67 pid_t child; /* the gunzip(1) process */
68 };
69
70 static void choose_parser(struct mparse *);
71 static void resize_buf(struct buf *, size_t);
72 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
73 static int read_whole_file(struct mparse *, const char *, int,
74 struct buf *, int *);
75 static void mparse_end(struct mparse *);
76 static void mparse_parse_buffer(struct mparse *, struct buf,
77 const char *);
78
79 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
80 MANDOCERR_OK,
81 MANDOCERR_WARNING,
82 MANDOCERR_WARNING,
83 MANDOCERR_ERROR,
84 MANDOCERR_FATAL,
85 MANDOCERR_MAX,
86 MANDOCERR_MAX
87 };
88
89 static const char * const mandocerrs[MANDOCERR_MAX] = {
90 "ok",
91
92 "generic warning",
93
94 /* related to the prologue */
95 "missing manual title, using UNTITLED",
96 "missing manual title, using \"\"",
97 "lower case character in document title",
98 "missing manual section, using \"\"",
99 "unknown manual section",
100 "unknown manual volume or arch",
101 "missing date, using today's date",
102 "cannot parse date, using it verbatim",
103 "missing Os macro, using \"\"",
104 "duplicate prologue macro",
105 "late prologue macro",
106 "skipping late title macro",
107 "prologue macros out of order",
108
109 /* related to document structure */
110 ".so is fragile, better use ln(1)",
111 "no document body",
112 "content before first section header",
113 "first section is not \"NAME\"",
114 "bad NAME section contents",
115 "sections out of conventional order",
116 "duplicate section title",
117 "unexpected section",
118 "unusual Xr order",
119 "unusual Xr punctuation",
120 "AUTHORS section without An macro",
121
122 /* related to macros and nesting */
123 "obsolete macro",
124 "skipping paragraph macro",
125 "moving paragraph macro out of list",
126 "skipping no-space macro",
127 "blocks badly nested",
128 "nested displays are not portable",
129 "moving content out of list",
130 ".Vt block has child macro",
131 "fill mode already enabled, skipping",
132 "fill mode already disabled, skipping",
133 "line scope broken",
134
135 /* related to missing macro arguments */
136 "skipping empty request",
137 "conditional request controls empty scope",
138 "skipping empty macro",
139 "empty argument, using 0n",
140 "argument count wrong",
141 "missing display type, using -ragged",
142 "list type is not the first argument",
143 "missing -width in -tag list, using 8n",
144 "missing utility name, using \"\"",
145 "empty head in list item",
146 "empty list item",
147 "missing font type, using \\fR",
148 "unknown font type, using \\fR",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "bad table syntax",
184 "bad table option",
185 "bad table layout",
186 "no table layout cells specified",
187 "no table data cells specified",
188 "ignore data in cell",
189 "data block still open",
190 "ignoring extra data cells",
191
192 /* related to document structure and macros */
193 "input stack limit exceeded, infinite loop?",
194 "skipping bad character",
195 "skipping unknown macro",
196 "skipping item outside list",
197 "skipping column outside column list",
198 "skipping end of block that is not open",
199 "inserting missing end of block",
200 "appending missing end of block",
201
202 /* related to request and macro arguments */
203 "escaped character not allowed in a name",
204 "argument count wrong",
205 "missing list type, using -item",
206 "missing manual name, using \"\"",
207 "uname(3) system call failed, using UNKNOWN",
208 "unknown standard specifier",
209 "skipping request without numeric argument",
210 "skipping all arguments",
211 "skipping excess arguments",
212 "divide by zero",
213
214 "generic fatal error",
215
216 "input too large",
217 "NOT IMPLEMENTED: Bd -file",
218 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
219 ".so request failed",
220
221 /* system errors */
222 "cannot dup file descriptor",
223 "cannot exec",
224 "gunzip failed with code",
225 "cannot fork",
226 NULL,
227 "cannot open pipe",
228 "cannot read file",
229 "gunzip died from signal",
230 "cannot stat file",
231 "wait failed",
232 };
233
234 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
235 "SUCCESS",
236 "RESERVED",
237 "WARNING",
238 "ERROR",
239 "FATAL",
240 "BADARG",
241 "SYSERR"
242 };
243
244
245 static void
246 resize_buf(struct buf *buf, size_t initial)
247 {
248
249 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
250 buf->buf = mandoc_realloc(buf->buf, buf->sz);
251 }
252
253 static void
254 choose_parser(struct mparse *curp)
255 {
256 char *cp, *ep;
257 int format;
258
259 /*
260 * If neither command line arguments -mdoc or -man select
261 * a parser nor the roff parser found a .Dd or .TH macro
262 * yet, look ahead in the main input buffer.
263 */
264
265 if ((format = roff_getformat(curp->roff)) == 0) {
266 cp = curp->primary->buf;
267 ep = cp + curp->primary->sz;
268 while (cp < ep) {
269 if (*cp == '.' || *cp == '\'') {
270 cp++;
271 if (cp[0] == 'D' && cp[1] == 'd') {
272 format = MPARSE_MDOC;
273 break;
274 }
275 if (cp[0] == 'T' && cp[1] == 'H') {
276 format = MPARSE_MAN;
277 break;
278 }
279 }
280 cp = memchr(cp, '\n', ep - cp);
281 if (cp == NULL)
282 break;
283 cp++;
284 }
285 }
286
287 if (format == MPARSE_MDOC) {
288 if (NULL == curp->pmdoc)
289 curp->pmdoc = mdoc_alloc(
290 curp->roff, curp, curp->defos,
291 MPARSE_QUICK & curp->options ? 1 : 0);
292 assert(curp->pmdoc);
293 curp->mdoc = curp->pmdoc;
294 return;
295 }
296
297 /* Fall back to man(7) as a last resort. */
298
299 if (NULL == curp->pman)
300 curp->pman = man_alloc(curp->roff, curp,
301 MPARSE_QUICK & curp->options ? 1 : 0);
302 assert(curp->pman);
303 curp->man = curp->pman;
304 }
305
306 /*
307 * Main parse routine for a buffer.
308 * It assumes encoding and line numbering are already set up.
309 * It can recurse directly (for invocations of user-defined
310 * macros, inline equations, and input line traps)
311 * and indirectly (for .so file inclusion).
312 */
313 static void
314 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
315 {
316 const struct tbl_span *span;
317 struct buf ln;
318 size_t pos; /* byte number in the ln buffer */
319 enum rofferr rr;
320 int of, rc;
321 int lnn; /* line number in the real file */
322 unsigned char c;
323
324 memset(&ln, 0, sizeof(ln));
325
326 lnn = curp->line;
327 pos = 0;
328
329 while (i < blk.sz) {
330 if (0 == pos && '\0' == blk.buf[i])
331 break;
332
333 if (start) {
334 curp->line = lnn;
335 curp->reparse_count = 0;
336
337 if (lnn < 3 &&
338 curp->filenc & MPARSE_UTF8 &&
339 curp->filenc & MPARSE_LATIN1)
340 curp->filenc = preconv_cue(&blk, i);
341 }
342
343 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
344
345 /*
346 * When finding an unescaped newline character,
347 * leave the character loop to process the line.
348 * Skip a preceding carriage return, if any.
349 */
350
351 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
352 '\n' == blk.buf[i + 1])
353 ++i;
354 if ('\n' == blk.buf[i]) {
355 ++i;
356 ++lnn;
357 break;
358 }
359
360 /*
361 * Make sure we have space for the worst
362 * case of 11 bytes: "\\[u10ffff]\0"
363 */
364
365 if (pos + 11 > ln.sz)
366 resize_buf(&ln, 256);
367
368 /*
369 * Encode 8-bit input.
370 */
371
372 c = blk.buf[i];
373 if (c & 0x80) {
374 if ( ! (curp->filenc && preconv_encode(
375 &blk, &i, &ln, &pos, &curp->filenc))) {
376 mandoc_vmsg(MANDOCERR_BADCHAR,
377 curp, curp->line, pos,
378 "0x%x", c);
379 ln.buf[pos++] = '?';
380 i++;
381 }
382 continue;
383 }
384
385 /*
386 * Exclude control characters.
387 */
388
389 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
390 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
391 curp->line, pos, "0x%x", c);
392 i++;
393 ln.buf[pos++] = '?';
394 continue;
395 }
396
397 /* Trailing backslash = a plain char. */
398
399 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
400 ln.buf[pos++] = blk.buf[i++];
401 continue;
402 }
403
404 /*
405 * Found escape and at least one other character.
406 * When it's a newline character, skip it.
407 * When there is a carriage return in between,
408 * skip that one as well.
409 */
410
411 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
412 '\n' == blk.buf[i + 2])
413 ++i;
414 if ('\n' == blk.buf[i + 1]) {
415 i += 2;
416 ++lnn;
417 continue;
418 }
419
420 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
421 i += 2;
422 /* Comment, skip to end of line */
423 for (; i < blk.sz; ++i) {
424 if ('\n' == blk.buf[i]) {
425 ++i;
426 ++lnn;
427 break;
428 }
429 }
430
431 /* Backout trailing whitespaces */
432 for (; pos > 0; --pos) {
433 if (ln.buf[pos - 1] != ' ')
434 break;
435 if (pos > 2 && ln.buf[pos - 2] == '\\')
436 break;
437 }
438 break;
439 }
440
441 /* Catch escaped bogus characters. */
442
443 c = (unsigned char) blk.buf[i+1];
444
445 if ( ! (isascii(c) &&
446 (isgraph(c) || isblank(c)))) {
447 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
448 curp->line, pos, "0x%x", c);
449 i += 2;
450 ln.buf[pos++] = '?';
451 continue;
452 }
453
454 /* Some other escape sequence, copy & cont. */
455
456 ln.buf[pos++] = blk.buf[i++];
457 ln.buf[pos++] = blk.buf[i++];
458 }
459
460 if (pos >= ln.sz)
461 resize_buf(&ln, 256);
462
463 ln.buf[pos] = '\0';
464
465 /*
466 * A significant amount of complexity is contained by
467 * the roff preprocessor. It's line-oriented but can be
468 * expressed on one line, so we need at times to
469 * readjust our starting point and re-run it. The roff
470 * preprocessor can also readjust the buffers with new
471 * data, so we pass them in wholesale.
472 */
473
474 of = 0;
475
476 /*
477 * Maintain a lookaside buffer of all parsed lines. We
478 * only do this if mparse_keep() has been invoked (the
479 * buffer may be accessed with mparse_getkeep()).
480 */
481
482 if (curp->secondary) {
483 curp->secondary->buf = mandoc_realloc(
484 curp->secondary->buf,
485 curp->secondary->sz + pos + 2);
486 memcpy(curp->secondary->buf +
487 curp->secondary->sz,
488 ln.buf, pos);
489 curp->secondary->sz += pos;
490 curp->secondary->buf
491 [curp->secondary->sz] = '\n';
492 curp->secondary->sz++;
493 curp->secondary->buf
494 [curp->secondary->sz] = '\0';
495 }
496 rerun:
497 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
498
499 switch (rr) {
500 case ROFF_REPARSE:
501 if (REPARSE_LIMIT >= ++curp->reparse_count)
502 mparse_buf_r(curp, ln, of, 0);
503 else
504 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
505 curp->line, pos, NULL);
506 pos = 0;
507 continue;
508 case ROFF_APPEND:
509 pos = strlen(ln.buf);
510 continue;
511 case ROFF_RERUN:
512 goto rerun;
513 case ROFF_IGN:
514 pos = 0;
515 continue;
516 case ROFF_ERR:
517 assert(MANDOCLEVEL_FATAL <= curp->file_status);
518 break;
519 case ROFF_SO:
520 if ( ! (curp->options & MPARSE_SO) &&
521 (i >= blk.sz || blk.buf[i] == '\0')) {
522 curp->sodest = mandoc_strdup(ln.buf + of);
523 free(ln.buf);
524 return;
525 }
526 /*
527 * We remove `so' clauses from our lookaside
528 * buffer because we're going to descend into
529 * the file recursively.
530 */
531 if (curp->secondary)
532 curp->secondary->sz -= pos + 1;
533 mparse_readfd(curp, -1, ln.buf + of);
534 if (MANDOCLEVEL_FATAL <= curp->file_status) {
535 mandoc_vmsg(MANDOCERR_SO_FAIL,
536 curp, curp->line, pos,
537 ".so %s", ln.buf + of);
538 break;
539 }
540 pos = 0;
541 continue;
542 default:
543 break;
544 }
545
546 /*
547 * If we encounter errors in the recursive parse, make
548 * sure we don't continue parsing.
549 */
550
551 if (MANDOCLEVEL_FATAL <= curp->file_status)
552 break;
553
554 /*
555 * If input parsers have not been allocated, do so now.
556 * We keep these instanced between parsers, but set them
557 * locally per parse routine since we can use different
558 * parsers with each one.
559 */
560
561 if ( ! (curp->man || curp->mdoc))
562 choose_parser(curp);
563
564 /*
565 * Lastly, push down into the parsers themselves.
566 * If libroff returns ROFF_TBL, then add it to the
567 * currently open parse. Since we only get here if
568 * there does exist data (see tbl_data.c), we're
569 * guaranteed that something's been allocated.
570 * Do the same for ROFF_EQN.
571 */
572
573 rc = -1;
574
575 if (ROFF_TBL == rr)
576 while (NULL != (span = roff_span(curp->roff))) {
577 rc = curp->man ?
578 man_addspan(curp->man, span) :
579 mdoc_addspan(curp->mdoc, span);
580 if (0 == rc)
581 break;
582 }
583 else if (ROFF_EQN == rr)
584 rc = curp->mdoc ?
585 mdoc_addeqn(curp->mdoc,
586 roff_eqn(curp->roff)) :
587 man_addeqn(curp->man,
588 roff_eqn(curp->roff));
589 else if (curp->man || curp->mdoc)
590 rc = curp->man ?
591 man_parseln(curp->man,
592 curp->line, ln.buf, of) :
593 mdoc_parseln(curp->mdoc,
594 curp->line, ln.buf, of);
595
596 if (0 == rc) {
597 assert(MANDOCLEVEL_FATAL <= curp->file_status);
598 break;
599 } else if (2 == rc)
600 break;
601
602 /* Temporary buffers typically are not full. */
603
604 if (0 == start && '\0' == blk.buf[i])
605 break;
606
607 /* Start the next input line. */
608
609 pos = 0;
610 }
611
612 free(ln.buf);
613 }
614
615 static int
616 read_whole_file(struct mparse *curp, const char *file, int fd,
617 struct buf *fb, int *with_mmap)
618 {
619 size_t off;
620 ssize_t ssz;
621
622 #if HAVE_MMAP
623 struct stat st;
624 if (-1 == fstat(fd, &st)) {
625 curp->file_status = MANDOCLEVEL_SYSERR;
626 if (curp->mmsg)
627 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
628 file, 0, 0, strerror(errno));
629 return(0);
630 }
631
632 /*
633 * If we're a regular file, try just reading in the whole entry
634 * via mmap(). This is faster than reading it into blocks, and
635 * since each file is only a few bytes to begin with, I'm not
636 * concerned that this is going to tank any machines.
637 */
638
639 if (S_ISREG(st.st_mode)) {
640 if (st.st_size >= (1U << 31)) {
641 curp->file_status = MANDOCLEVEL_FATAL;
642 if (curp->mmsg)
643 (*curp->mmsg)(MANDOCERR_TOOLARGE,
644 curp->file_status, file, 0, 0, NULL);
645 return(0);
646 }
647 *with_mmap = 1;
648 fb->sz = (size_t)st.st_size;
649 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
650 if (fb->buf != MAP_FAILED)
651 return(1);
652 }
653 #endif
654
655 /*
656 * If this isn't a regular file (like, say, stdin), then we must
657 * go the old way and just read things in bit by bit.
658 */
659
660 *with_mmap = 0;
661 off = 0;
662 fb->sz = 0;
663 fb->buf = NULL;
664 for (;;) {
665 if (off == fb->sz) {
666 if (fb->sz == (1U << 31)) {
667 curp->file_status = MANDOCLEVEL_FATAL;
668 if (curp->mmsg)
669 (*curp->mmsg)(MANDOCERR_TOOLARGE,
670 curp->file_status,
671 file, 0, 0, NULL);
672 break;
673 }
674 resize_buf(fb, 65536);
675 }
676 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
677 if (ssz == 0) {
678 fb->sz = off;
679 return(1);
680 }
681 if (ssz == -1) {
682 curp->file_status = MANDOCLEVEL_SYSERR;
683 if (curp->mmsg)
684 (*curp->mmsg)(MANDOCERR_SYSREAD,
685 curp->file_status, file, 0, 0,
686 strerror(errno));
687 break;
688 }
689 off += (size_t)ssz;
690 }
691
692 free(fb->buf);
693 fb->buf = NULL;
694 return(0);
695 }
696
697 static void
698 mparse_end(struct mparse *curp)
699 {
700
701 if (MANDOCLEVEL_FATAL <= curp->file_status)
702 return;
703
704 if (curp->mdoc == NULL &&
705 curp->man == NULL &&
706 curp->sodest == NULL) {
707 if (curp->options & MPARSE_MDOC)
708 curp->mdoc = curp->pmdoc;
709 else {
710 if (curp->pman == NULL)
711 curp->pman = man_alloc(curp->roff, curp,
712 curp->options & MPARSE_QUICK ? 1 : 0);
713 curp->man = curp->pman;
714 }
715 }
716
717 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
718 assert(MANDOCLEVEL_FATAL <= curp->file_status);
719 return;
720 }
721
722 if (curp->man && ! man_endparse(curp->man)) {
723 assert(MANDOCLEVEL_FATAL <= curp->file_status);
724 return;
725 }
726
727 roff_endparse(curp->roff);
728 }
729
730 static void
731 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
732 {
733 struct buf *svprimary;
734 const char *svfile;
735 size_t offset;
736 static int recursion_depth;
737
738 if (64 < recursion_depth) {
739 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
740 return;
741 }
742
743 /* Line number is per-file. */
744 svfile = curp->file;
745 curp->file = file;
746 svprimary = curp->primary;
747 curp->primary = &blk;
748 curp->line = 1;
749 recursion_depth++;
750
751 /* Skip an UTF-8 byte order mark. */
752 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
753 (unsigned char)blk.buf[0] == 0xef &&
754 (unsigned char)blk.buf[1] == 0xbb &&
755 (unsigned char)blk.buf[2] == 0xbf) {
756 offset = 3;
757 curp->filenc &= ~MPARSE_LATIN1;
758 } else
759 offset = 0;
760
761 mparse_buf_r(curp, blk, offset, 1);
762
763 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
764 mparse_end(curp);
765
766 curp->primary = svprimary;
767 curp->file = svfile;
768 }
769
770 enum mandoclevel
771 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
772 const char *file)
773 {
774 struct buf blk;
775
776 blk.buf = UNCONST(buf);
777 blk.sz = len;
778
779 mparse_parse_buffer(curp, blk, file);
780 return(curp->file_status);
781 }
782
783 /*
784 * If a file descriptor is given, use it and assume it points
785 * to the named file. Otherwise, open the named file.
786 * Read the whole file into memory and call the parsers.
787 * Called recursively when an .so request is encountered.
788 */
789 enum mandoclevel
790 mparse_readfd(struct mparse *curp, int fd, const char *file)
791 {
792 struct buf blk;
793 int with_mmap;
794 int save_filenc;
795 pid_t save_child;
796
797 save_child = curp->child;
798 if (fd != -1)
799 curp->child = 0;
800 else if (mparse_open(curp, &fd, file) >= MANDOCLEVEL_SYSERR)
801 goto out;
802
803 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
804 save_filenc = curp->filenc;
805 curp->filenc = curp->options &
806 (MPARSE_UTF8 | MPARSE_LATIN1);
807 mparse_parse_buffer(curp, blk, file);
808 curp->filenc = save_filenc;
809 #if HAVE_MMAP
810 if (with_mmap)
811 munmap(blk.buf, blk.sz);
812 else
813 #endif
814 free(blk.buf);
815 }
816
817 if (fd != STDIN_FILENO && close(fd) == -1)
818 perror(file);
819
820 mparse_wait(curp);
821 out:
822 curp->child = save_child;
823 return(curp->file_status);
824 }
825
826 enum mandoclevel
827 mparse_open(struct mparse *curp, int *fd, const char *file)
828 {
829 int pfd[2];
830 int save_errno;
831 char *cp;
832 enum mandocerr err;
833
834 pfd[1] = -1;
835 curp->file = file;
836
837 /* Unless zipped, try to just open the file. */
838
839 if ((cp = strrchr(file, '.')) == NULL ||
840 strcmp(cp + 1, "gz")) {
841 curp->child = 0;
842 if ((*fd = open(file, O_RDONLY)) != -1)
843 return(MANDOCLEVEL_OK);
844
845 /* Open failed; try to append ".gz". */
846
847 mandoc_asprintf(&cp, "%s.gz", file);
848 file = cp;
849 } else
850 cp = NULL;
851
852 /* Before forking, make sure the file can be read. */
853
854 save_errno = errno;
855 if (access(file, R_OK) == -1) {
856 if (cp != NULL)
857 errno = save_errno;
858 err = MANDOCERR_SYSOPEN;
859 goto out;
860 }
861
862 /* Run gunzip(1). */
863
864 if (pipe(pfd) == -1) {
865 err = MANDOCERR_SYSPIPE;
866 goto out;
867 }
868
869 switch (curp->child = fork()) {
870 case -1:
871 err = MANDOCERR_SYSFORK;
872 close(pfd[0]);
873 close(pfd[1]);
874 pfd[1] = -1;
875 break;
876 case 0:
877 close(pfd[0]);
878 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
879 err = MANDOCERR_SYSDUP;
880 break;
881 }
882 execlp("gunzip", "gunzip", "-c", file, NULL);
883 err = MANDOCERR_SYSEXEC;
884 break;
885 default:
886 close(pfd[1]);
887 *fd = pfd[0];
888 return(MANDOCLEVEL_OK);
889 }
890
891 out:
892 free(cp);
893 *fd = -1;
894 curp->child = 0;
895 curp->file_status = MANDOCLEVEL_SYSERR;
896 if (curp->mmsg)
897 (*curp->mmsg)(err, curp->file_status, curp->file,
898 0, 0, strerror(errno));
899 if (pfd[1] != -1)
900 exit(1);
901 return(curp->file_status);
902 }
903
904 enum mandoclevel
905 mparse_wait(struct mparse *curp)
906 {
907 int status;
908
909 if (curp->child == 0)
910 return(MANDOCLEVEL_OK);
911
912 if (waitpid(curp->child, &status, 0) == -1) {
913 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
914 strerror(errno));
915 curp->file_status = MANDOCLEVEL_SYSERR;
916 return(curp->file_status);
917 }
918 if (WIFSIGNALED(status)) {
919 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
920 "%d", WTERMSIG(status));
921 curp->file_status = MANDOCLEVEL_SYSERR;
922 return(curp->file_status);
923 }
924 if (WEXITSTATUS(status)) {
925 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
926 "%d", WEXITSTATUS(status));
927 curp->file_status = MANDOCLEVEL_SYSERR;
928 return(curp->file_status);
929 }
930 return(MANDOCLEVEL_OK);
931 }
932
933 struct mparse *
934 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
935 const struct mchars *mchars, const char *defos)
936 {
937 struct mparse *curp;
938
939 assert(wlevel <= MANDOCLEVEL_FATAL);
940
941 curp = mandoc_calloc(1, sizeof(struct mparse));
942
943 curp->options = options;
944 curp->wlevel = wlevel;
945 curp->mmsg = mmsg;
946 curp->defos = defos;
947
948 curp->mchars = mchars;
949 curp->roff = roff_alloc(curp, curp->mchars, options);
950 if (curp->options & MPARSE_MDOC)
951 curp->pmdoc = mdoc_alloc(
952 curp->roff, curp, curp->defos,
953 curp->options & MPARSE_QUICK ? 1 : 0);
954 if (curp->options & MPARSE_MAN)
955 curp->pman = man_alloc(curp->roff, curp,
956 curp->options & MPARSE_QUICK ? 1 : 0);
957
958 return(curp);
959 }
960
961 void
962 mparse_reset(struct mparse *curp)
963 {
964
965 roff_reset(curp->roff);
966
967 if (curp->mdoc)
968 mdoc_reset(curp->mdoc);
969 if (curp->man)
970 man_reset(curp->man);
971 if (curp->secondary)
972 curp->secondary->sz = 0;
973
974 curp->file_status = MANDOCLEVEL_OK;
975 curp->mdoc = NULL;
976 curp->man = NULL;
977
978 free(curp->sodest);
979 curp->sodest = NULL;
980 }
981
982 void
983 mparse_free(struct mparse *curp)
984 {
985
986 if (curp->pmdoc)
987 mdoc_free(curp->pmdoc);
988 if (curp->pman)
989 man_free(curp->pman);
990 if (curp->roff)
991 roff_free(curp->roff);
992 if (curp->secondary)
993 free(curp->secondary->buf);
994
995 free(curp->secondary);
996 free(curp->sodest);
997 free(curp);
998 }
999
1000 void
1001 mparse_result(struct mparse *curp,
1002 struct mdoc **mdoc, struct man **man, char **sodest)
1003 {
1004
1005 if (sodest && NULL != (*sodest = curp->sodest)) {
1006 *mdoc = NULL;
1007 *man = NULL;
1008 return;
1009 }
1010 if (mdoc)
1011 *mdoc = curp->mdoc;
1012 if (man)
1013 *man = curp->man;
1014 }
1015
1016 void
1017 mandoc_vmsg(enum mandocerr t, struct mparse *m,
1018 int ln, int pos, const char *fmt, ...)
1019 {
1020 char buf[256];
1021 va_list ap;
1022
1023 va_start(ap, fmt);
1024 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1025 va_end(ap);
1026
1027 mandoc_msg(t, m, ln, pos, buf);
1028 }
1029
1030 void
1031 mandoc_msg(enum mandocerr er, struct mparse *m,
1032 int ln, int col, const char *msg)
1033 {
1034 enum mandoclevel level;
1035
1036 level = MANDOCLEVEL_FATAL;
1037 while (er < mandoclimits[level])
1038 level--;
1039
1040 if (level < m->wlevel)
1041 return;
1042
1043 if (m->mmsg)
1044 (*m->mmsg)(er, level, m->file, ln, col, msg);
1045
1046 if (m->file_status < level)
1047 m->file_status = level;
1048 }
1049
1050 const char *
1051 mparse_strerror(enum mandocerr er)
1052 {
1053
1054 return(mandocerrs[er]);
1055 }
1056
1057 const char *
1058 mparse_strlevel(enum mandoclevel lvl)
1059 {
1060 return(mandoclevels[lvl]);
1061 }
1062
1063 void
1064 mparse_keep(struct mparse *p)
1065 {
1066
1067 assert(NULL == p->secondary);
1068 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1069 }
1070
1071 const char *
1072 mparse_getkeep(const struct mparse *p)
1073 {
1074
1075 assert(p->secondary);
1076 return(p->secondary->sz ? p->secondary->buf : NULL);
1077 }