]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Actually use the new man.conf(5) "output" directive.
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.132 2015/03/17 07:33:07 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <signal.h>
33 #include <stdarg.h>
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39
40 #include "mandoc.h"
41 #include "mandoc_aux.h"
42 #include "libmandoc.h"
43 #include "mdoc.h"
44 #include "man.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct mparse {
49 struct man *pman; /* persistent man parser */
50 struct mdoc *pmdoc; /* persistent mdoc parser */
51 struct man *man; /* man parser */
52 struct mdoc *mdoc; /* mdoc parser */
53 struct roff *roff; /* roff parser (!NULL) */
54 const struct mchars *mchars; /* character table */
55 char *sodest; /* filename pointed to by .so */
56 const char *file; /* filename of current input file */
57 struct buf *primary; /* buffer currently being parsed */
58 struct buf *secondary; /* preprocessed copy of input */
59 const char *defos; /* default operating system */
60 mandocmsg mmsg; /* warning/error message handler */
61 enum mandoclevel file_status; /* status of current parse */
62 enum mandoclevel wlevel; /* ignore messages below this */
63 int options; /* parser options */
64 int filenc; /* encoding of the current file */
65 int reparse_count; /* finite interp. stack */
66 int line; /* line number in the file */
67 pid_t child; /* the gunzip(1) process */
68 };
69
70 static void choose_parser(struct mparse *);
71 static void resize_buf(struct buf *, size_t);
72 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
73 static int read_whole_file(struct mparse *, const char *, int,
74 struct buf *, int *);
75 static void mparse_end(struct mparse *);
76 static void mparse_parse_buffer(struct mparse *, struct buf,
77 const char *);
78
79 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
80 MANDOCERR_OK,
81 MANDOCERR_WARNING,
82 MANDOCERR_WARNING,
83 MANDOCERR_ERROR,
84 MANDOCERR_UNSUPP,
85 MANDOCERR_MAX,
86 MANDOCERR_MAX
87 };
88
89 static const char * const mandocerrs[MANDOCERR_MAX] = {
90 "ok",
91
92 "generic warning",
93
94 /* related to the prologue */
95 "missing manual title, using UNTITLED",
96 "missing manual title, using \"\"",
97 "lower case character in document title",
98 "missing manual section, using \"\"",
99 "unknown manual section",
100 "missing date, using today's date",
101 "cannot parse date, using it verbatim",
102 "missing Os macro, using \"\"",
103 "duplicate prologue macro",
104 "late prologue macro",
105 "skipping late title macro",
106 "prologue macros out of order",
107
108 /* related to document structure */
109 ".so is fragile, better use ln(1)",
110 "no document body",
111 "content before first section header",
112 "first section is not \"NAME\"",
113 "NAME section without name",
114 "NAME section without description",
115 "description not at the end of NAME",
116 "bad NAME section content",
117 "missing description line, using \"\"",
118 "sections out of conventional order",
119 "duplicate section title",
120 "unexpected section",
121 "unusual Xr order",
122 "unusual Xr punctuation",
123 "AUTHORS section without An macro",
124
125 /* related to macros and nesting */
126 "obsolete macro",
127 "macro neither callable nor escaped",
128 "skipping paragraph macro",
129 "moving paragraph macro out of list",
130 "skipping no-space macro",
131 "blocks badly nested",
132 "nested displays are not portable",
133 "moving content out of list",
134 ".Vt block has child macro",
135 "fill mode already enabled, skipping",
136 "fill mode already disabled, skipping",
137 "line scope broken",
138
139 /* related to missing macro arguments */
140 "skipping empty request",
141 "conditional request controls empty scope",
142 "skipping empty macro",
143 "empty block",
144 "empty argument, using 0n",
145 "missing display type, using -ragged",
146 "list type is not the first argument",
147 "missing -width in -tag list, using 8n",
148 "missing utility name, using \"\"",
149 "missing function name, using \"\"",
150 "empty head in list item",
151 "empty list item",
152 "missing font type, using \\fR",
153 "unknown font type, using \\fR",
154 "nothing follows prefix",
155 "empty reference block",
156 "missing -std argument, adding it",
157 "missing option string, using \"\"",
158 "missing resource identifier, using \"\"",
159 "missing eqn box, using \"\"",
160
161 /* related to bad macro arguments */
162 "unterminated quoted argument",
163 "duplicate argument",
164 "skipping duplicate argument",
165 "skipping duplicate display type",
166 "skipping duplicate list type",
167 "skipping -width argument",
168 "wrong number of cells",
169 "unknown AT&T UNIX version",
170 "comma in function argument",
171 "parenthesis in function name",
172 "invalid content in Rs block",
173 "invalid Boolean argument",
174 "unknown font, skipping request",
175 "odd number of characters in request",
176
177 /* related to plain text */
178 "blank line in fill mode, using .sp",
179 "tab in filled text",
180 "whitespace at end of input line",
181 "bad comment style",
182 "invalid escape sequence",
183 "undefined string, using \"\"",
184
185 /* related to tables */
186 "tbl line starts with span",
187 "tbl column starts with span",
188 "skipping vertical bar in tbl layout",
189
190 "generic error",
191
192 /* related to tables */
193 "non-alphabetic character in tbl options",
194 "skipping unknown tbl option",
195 "missing tbl option argument",
196 "wrong tbl option argument size",
197 "empty tbl layout",
198 "invalid character in tbl layout",
199 "unmatched parenthesis in tbl layout",
200 "tbl without any data cells",
201 "ignoring data in spanned tbl cell",
202 "ignoring extra tbl data cells",
203 "data block open at end of tbl",
204
205 /* related to document structure and macros */
206 NULL,
207 "input stack limit exceeded, infinite loop?",
208 "skipping bad character",
209 "skipping unknown macro",
210 "skipping insecure request",
211 "skipping item outside list",
212 "skipping column outside column list",
213 "skipping end of block that is not open",
214 "fewer RS blocks open, skipping",
215 "inserting missing end of block",
216 "appending missing end of block",
217
218 /* related to request and macro arguments */
219 "escaped character not allowed in a name",
220 "NOT IMPLEMENTED: Bd -file",
221 "missing list type, using -item",
222 "missing manual name, using \"\"",
223 "uname(3) system call failed, using UNKNOWN",
224 "unknown standard specifier",
225 "skipping request without numeric argument",
226 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
227 ".so request failed",
228 "skipping all arguments",
229 "skipping excess arguments",
230 "divide by zero",
231
232 "unsupported feature",
233 "input too large",
234 "unsupported control character",
235 "unsupported roff request",
236 "eqn delim option in tbl",
237 "unsupported tbl layout modifier",
238 "ignoring macro in table",
239 };
240
241 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
242 "SUCCESS",
243 "RESERVED",
244 "WARNING",
245 "ERROR",
246 "UNSUPP",
247 "BADARG",
248 "SYSERR"
249 };
250
251
252 static void
253 resize_buf(struct buf *buf, size_t initial)
254 {
255
256 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
257 buf->buf = mandoc_realloc(buf->buf, buf->sz);
258 }
259
260 static void
261 choose_parser(struct mparse *curp)
262 {
263 char *cp, *ep;
264 int format;
265
266 /*
267 * If neither command line arguments -mdoc or -man select
268 * a parser nor the roff parser found a .Dd or .TH macro
269 * yet, look ahead in the main input buffer.
270 */
271
272 if ((format = roff_getformat(curp->roff)) == 0) {
273 cp = curp->primary->buf;
274 ep = cp + curp->primary->sz;
275 while (cp < ep) {
276 if (*cp == '.' || *cp == '\'') {
277 cp++;
278 if (cp[0] == 'D' && cp[1] == 'd') {
279 format = MPARSE_MDOC;
280 break;
281 }
282 if (cp[0] == 'T' && cp[1] == 'H') {
283 format = MPARSE_MAN;
284 break;
285 }
286 }
287 cp = memchr(cp, '\n', ep - cp);
288 if (cp == NULL)
289 break;
290 cp++;
291 }
292 }
293
294 if (format == MPARSE_MDOC) {
295 if (NULL == curp->pmdoc)
296 curp->pmdoc = mdoc_alloc(
297 curp->roff, curp, curp->defos,
298 MPARSE_QUICK & curp->options ? 1 : 0);
299 assert(curp->pmdoc);
300 curp->mdoc = curp->pmdoc;
301 return;
302 }
303
304 /* Fall back to man(7) as a last resort. */
305
306 if (NULL == curp->pman)
307 curp->pman = man_alloc(
308 curp->roff, curp, curp->defos,
309 MPARSE_QUICK & curp->options ? 1 : 0);
310 assert(curp->pman);
311 curp->man = curp->pman;
312 }
313
314 /*
315 * Main parse routine for a buffer.
316 * It assumes encoding and line numbering are already set up.
317 * It can recurse directly (for invocations of user-defined
318 * macros, inline equations, and input line traps)
319 * and indirectly (for .so file inclusion).
320 */
321 static void
322 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
323 {
324 const struct tbl_span *span;
325 struct buf ln;
326 const char *save_file;
327 char *cp;
328 size_t pos; /* byte number in the ln buffer */
329 enum rofferr rr;
330 int of;
331 int lnn; /* line number in the real file */
332 int fd;
333 pid_t save_child;
334 unsigned char c;
335
336 memset(&ln, 0, sizeof(ln));
337
338 lnn = curp->line;
339 pos = 0;
340
341 while (i < blk.sz) {
342 if (0 == pos && '\0' == blk.buf[i])
343 break;
344
345 if (start) {
346 curp->line = lnn;
347 curp->reparse_count = 0;
348
349 if (lnn < 3 &&
350 curp->filenc & MPARSE_UTF8 &&
351 curp->filenc & MPARSE_LATIN1)
352 curp->filenc = preconv_cue(&blk, i);
353 }
354
355 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
356
357 /*
358 * When finding an unescaped newline character,
359 * leave the character loop to process the line.
360 * Skip a preceding carriage return, if any.
361 */
362
363 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
364 '\n' == blk.buf[i + 1])
365 ++i;
366 if ('\n' == blk.buf[i]) {
367 ++i;
368 ++lnn;
369 break;
370 }
371
372 /*
373 * Make sure we have space for the worst
374 * case of 11 bytes: "\\[u10ffff]\0"
375 */
376
377 if (pos + 11 > ln.sz)
378 resize_buf(&ln, 256);
379
380 /*
381 * Encode 8-bit input.
382 */
383
384 c = blk.buf[i];
385 if (c & 0x80) {
386 if ( ! (curp->filenc && preconv_encode(
387 &blk, &i, &ln, &pos, &curp->filenc))) {
388 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
389 curp->line, pos, "0x%x", c);
390 ln.buf[pos++] = '?';
391 i++;
392 }
393 continue;
394 }
395
396 /*
397 * Exclude control characters.
398 */
399
400 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
401 mandoc_vmsg(c == 0x00 || c == 0x04 ||
402 c > 0x0a ? MANDOCERR_CHAR_BAD :
403 MANDOCERR_CHAR_UNSUPP,
404 curp, curp->line, pos, "0x%x", c);
405 i++;
406 if (c != '\r')
407 ln.buf[pos++] = '?';
408 continue;
409 }
410
411 /* Trailing backslash = a plain char. */
412
413 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
414 ln.buf[pos++] = blk.buf[i++];
415 continue;
416 }
417
418 /*
419 * Found escape and at least one other character.
420 * When it's a newline character, skip it.
421 * When there is a carriage return in between,
422 * skip that one as well.
423 */
424
425 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
426 '\n' == blk.buf[i + 2])
427 ++i;
428 if ('\n' == blk.buf[i + 1]) {
429 i += 2;
430 ++lnn;
431 continue;
432 }
433
434 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
435 i += 2;
436 /* Comment, skip to end of line */
437 for (; i < blk.sz; ++i) {
438 if ('\n' == blk.buf[i]) {
439 ++i;
440 ++lnn;
441 break;
442 }
443 }
444
445 /* Backout trailing whitespaces */
446 for (; pos > 0; --pos) {
447 if (ln.buf[pos - 1] != ' ')
448 break;
449 if (pos > 2 && ln.buf[pos - 2] == '\\')
450 break;
451 }
452 break;
453 }
454
455 /* Catch escaped bogus characters. */
456
457 c = (unsigned char) blk.buf[i+1];
458
459 if ( ! (isascii(c) &&
460 (isgraph(c) || isblank(c)))) {
461 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
462 curp->line, pos, "0x%x", c);
463 i += 2;
464 ln.buf[pos++] = '?';
465 continue;
466 }
467
468 /* Some other escape sequence, copy & cont. */
469
470 ln.buf[pos++] = blk.buf[i++];
471 ln.buf[pos++] = blk.buf[i++];
472 }
473
474 if (pos >= ln.sz)
475 resize_buf(&ln, 256);
476
477 ln.buf[pos] = '\0';
478
479 /*
480 * A significant amount of complexity is contained by
481 * the roff preprocessor. It's line-oriented but can be
482 * expressed on one line, so we need at times to
483 * readjust our starting point and re-run it. The roff
484 * preprocessor can also readjust the buffers with new
485 * data, so we pass them in wholesale.
486 */
487
488 of = 0;
489
490 /*
491 * Maintain a lookaside buffer of all parsed lines. We
492 * only do this if mparse_keep() has been invoked (the
493 * buffer may be accessed with mparse_getkeep()).
494 */
495
496 if (curp->secondary) {
497 curp->secondary->buf = mandoc_realloc(
498 curp->secondary->buf,
499 curp->secondary->sz + pos + 2);
500 memcpy(curp->secondary->buf +
501 curp->secondary->sz,
502 ln.buf, pos);
503 curp->secondary->sz += pos;
504 curp->secondary->buf
505 [curp->secondary->sz] = '\n';
506 curp->secondary->sz++;
507 curp->secondary->buf
508 [curp->secondary->sz] = '\0';
509 }
510 rerun:
511 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
512
513 switch (rr) {
514 case ROFF_REPARSE:
515 if (REPARSE_LIMIT >= ++curp->reparse_count)
516 mparse_buf_r(curp, ln, of, 0);
517 else
518 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
519 curp->line, pos, NULL);
520 pos = 0;
521 continue;
522 case ROFF_APPEND:
523 pos = strlen(ln.buf);
524 continue;
525 case ROFF_RERUN:
526 goto rerun;
527 case ROFF_IGN:
528 pos = 0;
529 continue;
530 case ROFF_SO:
531 if ( ! (curp->options & MPARSE_SO) &&
532 (i >= blk.sz || blk.buf[i] == '\0')) {
533 curp->sodest = mandoc_strdup(ln.buf + of);
534 free(ln.buf);
535 return;
536 }
537 /*
538 * We remove `so' clauses from our lookaside
539 * buffer because we're going to descend into
540 * the file recursively.
541 */
542 if (curp->secondary)
543 curp->secondary->sz -= pos + 1;
544 save_file = curp->file;
545 save_child = curp->child;
546 if (mparse_open(curp, &fd, ln.buf + of) ==
547 MANDOCLEVEL_OK) {
548 mparse_readfd(curp, fd, ln.buf + of);
549 curp->file = save_file;
550 } else {
551 curp->file = save_file;
552 mandoc_vmsg(MANDOCERR_SO_FAIL,
553 curp, curp->line, pos,
554 ".so %s", ln.buf + of);
555 ln.sz = mandoc_asprintf(&cp,
556 ".sp\nSee the file %s.\n.sp",
557 ln.buf + of);
558 free(ln.buf);
559 ln.buf = cp;
560 of = 0;
561 mparse_buf_r(curp, ln, of, 0);
562 }
563 curp->child = save_child;
564 pos = 0;
565 continue;
566 default:
567 break;
568 }
569
570 /*
571 * If input parsers have not been allocated, do so now.
572 * We keep these instanced between parsers, but set them
573 * locally per parse routine since we can use different
574 * parsers with each one.
575 */
576
577 if ( ! (curp->man || curp->mdoc))
578 choose_parser(curp);
579
580 /*
581 * Lastly, push down into the parsers themselves.
582 * If libroff returns ROFF_TBL, then add it to the
583 * currently open parse. Since we only get here if
584 * there does exist data (see tbl_data.c), we're
585 * guaranteed that something's been allocated.
586 * Do the same for ROFF_EQN.
587 */
588
589 if (rr == ROFF_TBL) {
590 while ((span = roff_span(curp->roff)) != NULL)
591 if (curp->man == NULL)
592 mdoc_addspan(curp->mdoc, span);
593 else
594 man_addspan(curp->man, span);
595 } else if (rr == ROFF_EQN) {
596 if (curp->man == NULL)
597 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
598 else
599 man_addeqn(curp->man, roff_eqn(curp->roff));
600 } else if ((curp->man == NULL ?
601 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
602 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
603 break;
604
605 /* Temporary buffers typically are not full. */
606
607 if (0 == start && '\0' == blk.buf[i])
608 break;
609
610 /* Start the next input line. */
611
612 pos = 0;
613 }
614
615 free(ln.buf);
616 }
617
618 static int
619 read_whole_file(struct mparse *curp, const char *file, int fd,
620 struct buf *fb, int *with_mmap)
621 {
622 size_t off;
623 ssize_t ssz;
624
625 #if HAVE_MMAP
626 struct stat st;
627 if (-1 == fstat(fd, &st)) {
628 perror(file);
629 exit((int)MANDOCLEVEL_SYSERR);
630 }
631
632 /*
633 * If we're a regular file, try just reading in the whole entry
634 * via mmap(). This is faster than reading it into blocks, and
635 * since each file is only a few bytes to begin with, I'm not
636 * concerned that this is going to tank any machines.
637 */
638
639 if (S_ISREG(st.st_mode)) {
640 if (st.st_size > 0x7fffffff) {
641 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
642 return(0);
643 }
644 *with_mmap = 1;
645 fb->sz = (size_t)st.st_size;
646 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
647 if (fb->buf != MAP_FAILED)
648 return(1);
649 }
650 #endif
651
652 /*
653 * If this isn't a regular file (like, say, stdin), then we must
654 * go the old way and just read things in bit by bit.
655 */
656
657 *with_mmap = 0;
658 off = 0;
659 fb->sz = 0;
660 fb->buf = NULL;
661 for (;;) {
662 if (off == fb->sz) {
663 if (fb->sz == (1U << 31)) {
664 mandoc_msg(MANDOCERR_TOOLARGE, curp,
665 0, 0, NULL);
666 break;
667 }
668 resize_buf(fb, 65536);
669 }
670 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
671 if (ssz == 0) {
672 fb->sz = off;
673 return(1);
674 }
675 if (ssz == -1) {
676 perror(file);
677 exit((int)MANDOCLEVEL_SYSERR);
678 }
679 off += (size_t)ssz;
680 }
681
682 free(fb->buf);
683 fb->buf = NULL;
684 return(0);
685 }
686
687 static void
688 mparse_end(struct mparse *curp)
689 {
690
691 if (curp->mdoc == NULL &&
692 curp->man == NULL &&
693 curp->sodest == NULL) {
694 if (curp->options & MPARSE_MDOC)
695 curp->mdoc = curp->pmdoc;
696 else {
697 if (curp->pman == NULL)
698 curp->pman = man_alloc(
699 curp->roff, curp, curp->defos,
700 curp->options & MPARSE_QUICK ? 1 : 0);
701 curp->man = curp->pman;
702 }
703 }
704 if (curp->mdoc)
705 mdoc_endparse(curp->mdoc);
706 if (curp->man)
707 man_endparse(curp->man);
708 roff_endparse(curp->roff);
709 }
710
711 static void
712 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
713 {
714 struct buf *svprimary;
715 const char *svfile;
716 size_t offset;
717 static int recursion_depth;
718
719 if (64 < recursion_depth) {
720 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
721 return;
722 }
723
724 /* Line number is per-file. */
725 svfile = curp->file;
726 curp->file = file;
727 svprimary = curp->primary;
728 curp->primary = &blk;
729 curp->line = 1;
730 recursion_depth++;
731
732 /* Skip an UTF-8 byte order mark. */
733 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
734 (unsigned char)blk.buf[0] == 0xef &&
735 (unsigned char)blk.buf[1] == 0xbb &&
736 (unsigned char)blk.buf[2] == 0xbf) {
737 offset = 3;
738 curp->filenc &= ~MPARSE_LATIN1;
739 } else
740 offset = 0;
741
742 mparse_buf_r(curp, blk, offset, 1);
743
744 if (--recursion_depth == 0)
745 mparse_end(curp);
746
747 curp->primary = svprimary;
748 curp->file = svfile;
749 }
750
751 enum mandoclevel
752 mparse_readmem(struct mparse *curp, void *buf, size_t len,
753 const char *file)
754 {
755 struct buf blk;
756
757 blk.buf = buf;
758 blk.sz = len;
759
760 mparse_parse_buffer(curp, blk, file);
761 return(curp->file_status);
762 }
763
764 /*
765 * Read the whole file into memory and call the parsers.
766 * Called recursively when an .so request is encountered.
767 */
768 enum mandoclevel
769 mparse_readfd(struct mparse *curp, int fd, const char *file)
770 {
771 struct buf blk;
772 int with_mmap;
773 int save_filenc;
774
775 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
776 save_filenc = curp->filenc;
777 curp->filenc = curp->options &
778 (MPARSE_UTF8 | MPARSE_LATIN1);
779 mparse_parse_buffer(curp, blk, file);
780 curp->filenc = save_filenc;
781 #if HAVE_MMAP
782 if (with_mmap)
783 munmap(blk.buf, blk.sz);
784 else
785 #endif
786 free(blk.buf);
787 }
788
789 if (fd != STDIN_FILENO && close(fd) == -1)
790 perror(file);
791
792 mparse_wait(curp);
793 return(curp->file_status);
794 }
795
796 enum mandoclevel
797 mparse_open(struct mparse *curp, int *fd, const char *file)
798 {
799 int pfd[2];
800 int save_errno;
801 char *cp;
802
803 curp->file = file;
804
805 /* Unless zipped, try to just open the file. */
806
807 if ((cp = strrchr(file, '.')) == NULL ||
808 strcmp(cp + 1, "gz")) {
809 curp->child = 0;
810 if ((*fd = open(file, O_RDONLY)) != -1)
811 return(MANDOCLEVEL_OK);
812
813 /* Open failed; try to append ".gz". */
814
815 mandoc_asprintf(&cp, "%s.gz", file);
816 file = cp;
817 } else
818 cp = NULL;
819
820 /* Before forking, make sure the file can be read. */
821
822 save_errno = errno;
823 if (access(file, R_OK) == -1) {
824 if (cp != NULL)
825 errno = save_errno;
826 free(cp);
827 *fd = -1;
828 curp->child = 0;
829 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
830 return(MANDOCLEVEL_ERROR);
831 }
832
833 /* Run gunzip(1). */
834
835 if (pipe(pfd) == -1) {
836 perror("pipe");
837 exit((int)MANDOCLEVEL_SYSERR);
838 }
839
840 switch (curp->child = fork()) {
841 case -1:
842 perror("fork");
843 exit((int)MANDOCLEVEL_SYSERR);
844 case 0:
845 close(pfd[0]);
846 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
847 perror("dup");
848 exit((int)MANDOCLEVEL_SYSERR);
849 }
850 signal(SIGPIPE, SIG_DFL);
851 execlp("gunzip", "gunzip", "-c", file, NULL);
852 perror("exec");
853 exit((int)MANDOCLEVEL_SYSERR);
854 default:
855 close(pfd[1]);
856 *fd = pfd[0];
857 return(MANDOCLEVEL_OK);
858 }
859 }
860
861 enum mandoclevel
862 mparse_wait(struct mparse *curp)
863 {
864 int status;
865
866 if (curp->child == 0)
867 return(MANDOCLEVEL_OK);
868
869 if (waitpid(curp->child, &status, 0) == -1) {
870 perror("wait");
871 exit((int)MANDOCLEVEL_SYSERR);
872 }
873 curp->child = 0;
874 if (WIFSIGNALED(status)) {
875 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
876 "gunzip died from signal %d", WTERMSIG(status));
877 return(MANDOCLEVEL_ERROR);
878 }
879 if (WEXITSTATUS(status)) {
880 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
881 "gunzip failed with code %d", WEXITSTATUS(status));
882 return(MANDOCLEVEL_ERROR);
883 }
884 return(MANDOCLEVEL_OK);
885 }
886
887 struct mparse *
888 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
889 const struct mchars *mchars, const char *defos)
890 {
891 struct mparse *curp;
892
893 curp = mandoc_calloc(1, sizeof(struct mparse));
894
895 curp->options = options;
896 curp->wlevel = wlevel;
897 curp->mmsg = mmsg;
898 curp->defos = defos;
899
900 curp->mchars = mchars;
901 curp->roff = roff_alloc(curp, curp->mchars, options);
902 if (curp->options & MPARSE_MDOC)
903 curp->pmdoc = mdoc_alloc(
904 curp->roff, curp, curp->defos,
905 curp->options & MPARSE_QUICK ? 1 : 0);
906 if (curp->options & MPARSE_MAN)
907 curp->pman = man_alloc(
908 curp->roff, curp, curp->defos,
909 curp->options & MPARSE_QUICK ? 1 : 0);
910
911 return(curp);
912 }
913
914 void
915 mparse_reset(struct mparse *curp)
916 {
917
918 roff_reset(curp->roff);
919
920 if (curp->mdoc)
921 mdoc_reset(curp->mdoc);
922 if (curp->man)
923 man_reset(curp->man);
924 if (curp->secondary)
925 curp->secondary->sz = 0;
926
927 curp->file_status = MANDOCLEVEL_OK;
928 curp->mdoc = NULL;
929 curp->man = NULL;
930
931 free(curp->sodest);
932 curp->sodest = NULL;
933 }
934
935 void
936 mparse_free(struct mparse *curp)
937 {
938
939 if (curp->pmdoc)
940 mdoc_free(curp->pmdoc);
941 if (curp->pman)
942 man_free(curp->pman);
943 if (curp->roff)
944 roff_free(curp->roff);
945 if (curp->secondary)
946 free(curp->secondary->buf);
947
948 free(curp->secondary);
949 free(curp->sodest);
950 free(curp);
951 }
952
953 void
954 mparse_result(struct mparse *curp,
955 struct mdoc **mdoc, struct man **man, char **sodest)
956 {
957
958 if (sodest && NULL != (*sodest = curp->sodest)) {
959 *mdoc = NULL;
960 *man = NULL;
961 return;
962 }
963 if (mdoc)
964 *mdoc = curp->mdoc;
965 if (man)
966 *man = curp->man;
967 }
968
969 void
970 mandoc_vmsg(enum mandocerr t, struct mparse *m,
971 int ln, int pos, const char *fmt, ...)
972 {
973 char buf[256];
974 va_list ap;
975
976 va_start(ap, fmt);
977 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
978 va_end(ap);
979
980 mandoc_msg(t, m, ln, pos, buf);
981 }
982
983 void
984 mandoc_msg(enum mandocerr er, struct mparse *m,
985 int ln, int col, const char *msg)
986 {
987 enum mandoclevel level;
988
989 level = MANDOCLEVEL_UNSUPP;
990 while (er < mandoclimits[level])
991 level--;
992
993 if (level < m->wlevel && er != MANDOCERR_FILE)
994 return;
995
996 if (m->mmsg)
997 (*m->mmsg)(er, level, m->file, ln, col, msg);
998
999 if (m->file_status < level)
1000 m->file_status = level;
1001 }
1002
1003 const char *
1004 mparse_strerror(enum mandocerr er)
1005 {
1006
1007 return(mandocerrs[er]);
1008 }
1009
1010 const char *
1011 mparse_strlevel(enum mandoclevel lvl)
1012 {
1013 return(mandoclevels[lvl]);
1014 }
1015
1016 void
1017 mparse_keep(struct mparse *p)
1018 {
1019
1020 assert(NULL == p->secondary);
1021 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1022 }
1023
1024 const char *
1025 mparse_getkeep(const struct mparse *p)
1026 {
1027
1028 assert(p->secondary);
1029 return(p->secondary->sz ? p->secondary->buf : NULL);
1030 }