]> git.cameronkatri.com Git - mandoc.git/blob - read.c
If a non-gz manual is read after a gzipped manual, refrain
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.129 2015/03/02 14:50:17 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "NAME section without name",
113 "NAME section without description",
114 "description not at the end of NAME",
115 "bad NAME section content",
116 "missing description line, using \"\"",
117 "sections out of conventional order",
118 "duplicate section title",
119 "unexpected section",
120 "unusual Xr order",
121 "unusual Xr punctuation",
122 "AUTHORS section without An macro",
123
124 /* related to macros and nesting */
125 "obsolete macro",
126 "macro neither callable nor escaped",
127 "skipping paragraph macro",
128 "moving paragraph macro out of list",
129 "skipping no-space macro",
130 "blocks badly nested",
131 "nested displays are not portable",
132 "moving content out of list",
133 ".Vt block has child macro",
134 "fill mode already enabled, skipping",
135 "fill mode already disabled, skipping",
136 "line scope broken",
137
138 /* related to missing macro arguments */
139 "skipping empty request",
140 "conditional request controls empty scope",
141 "skipping empty macro",
142 "empty block",
143 "empty argument, using 0n",
144 "missing display type, using -ragged",
145 "list type is not the first argument",
146 "missing -width in -tag list, using 8n",
147 "missing utility name, using \"\"",
148 "missing function name, using \"\"",
149 "empty head in list item",
150 "empty list item",
151 "missing font type, using \\fR",
152 "unknown font type, using \\fR",
153 "nothing follows prefix",
154 "empty reference block",
155 "missing -std argument, adding it",
156 "missing option string, using \"\"",
157 "missing resource identifier, using \"\"",
158 "missing eqn box, using \"\"",
159
160 /* related to bad macro arguments */
161 "unterminated quoted argument",
162 "duplicate argument",
163 "skipping duplicate argument",
164 "skipping duplicate display type",
165 "skipping duplicate list type",
166 "skipping -width argument",
167 "wrong number of cells",
168 "unknown AT&T UNIX version",
169 "comma in function argument",
170 "parenthesis in function name",
171 "invalid content in Rs block",
172 "invalid Boolean argument",
173 "unknown font, skipping request",
174 "odd number of characters in request",
175
176 /* related to plain text */
177 "blank line in fill mode, using .sp",
178 "tab in filled text",
179 "whitespace at end of input line",
180 "bad comment style",
181 "invalid escape sequence",
182 "undefined string, using \"\"",
183
184 /* related to tables */
185 "tbl line starts with span",
186 "tbl column starts with span",
187 "skipping vertical bar in tbl layout",
188
189 "generic error",
190
191 /* related to tables */
192 "non-alphabetic character in tbl options",
193 "skipping unknown tbl option",
194 "missing tbl option argument",
195 "wrong tbl option argument size",
196 "empty tbl layout",
197 "invalid character in tbl layout",
198 "unmatched parenthesis in tbl layout",
199 "tbl without any data cells",
200 "ignoring data in spanned tbl cell",
201 "ignoring extra tbl data cells",
202 "data block open at end of tbl",
203
204 /* related to document structure and macros */
205 NULL,
206 "input stack limit exceeded, infinite loop?",
207 "skipping bad character",
208 "skipping unknown macro",
209 "skipping insecure request",
210 "skipping item outside list",
211 "skipping column outside column list",
212 "skipping end of block that is not open",
213 "fewer RS blocks open, skipping",
214 "inserting missing end of block",
215 "appending missing end of block",
216
217 /* related to request and macro arguments */
218 "escaped character not allowed in a name",
219 "NOT IMPLEMENTED: Bd -file",
220 "missing list type, using -item",
221 "missing manual name, using \"\"",
222 "uname(3) system call failed, using UNKNOWN",
223 "unknown standard specifier",
224 "skipping request without numeric argument",
225 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
226 ".so request failed",
227 "skipping all arguments",
228 "skipping excess arguments",
229 "divide by zero",
230
231 "unsupported feature",
232 "input too large",
233 "unsupported control character",
234 "unsupported roff request",
235 "eqn delim option in tbl",
236 "unsupported tbl layout modifier",
237 "ignoring macro in table",
238 };
239
240 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
241 "SUCCESS",
242 "RESERVED",
243 "WARNING",
244 "ERROR",
245 "UNSUPP",
246 "BADARG",
247 "SYSERR"
248 };
249
250
251 static void
252 resize_buf(struct buf *buf, size_t initial)
253 {
254
255 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
256 buf->buf = mandoc_realloc(buf->buf, buf->sz);
257 }
258
259 static void
260 choose_parser(struct mparse *curp)
261 {
262 char *cp, *ep;
263 int format;
264
265 /*
266 * If neither command line arguments -mdoc or -man select
267 * a parser nor the roff parser found a .Dd or .TH macro
268 * yet, look ahead in the main input buffer.
269 */
270
271 if ((format = roff_getformat(curp->roff)) == 0) {
272 cp = curp->primary->buf;
273 ep = cp + curp->primary->sz;
274 while (cp < ep) {
275 if (*cp == '.' || *cp == '\'') {
276 cp++;
277 if (cp[0] == 'D' && cp[1] == 'd') {
278 format = MPARSE_MDOC;
279 break;
280 }
281 if (cp[0] == 'T' && cp[1] == 'H') {
282 format = MPARSE_MAN;
283 break;
284 }
285 }
286 cp = memchr(cp, '\n', ep - cp);
287 if (cp == NULL)
288 break;
289 cp++;
290 }
291 }
292
293 if (format == MPARSE_MDOC) {
294 if (NULL == curp->pmdoc)
295 curp->pmdoc = mdoc_alloc(
296 curp->roff, curp, curp->defos,
297 MPARSE_QUICK & curp->options ? 1 : 0);
298 assert(curp->pmdoc);
299 curp->mdoc = curp->pmdoc;
300 return;
301 }
302
303 /* Fall back to man(7) as a last resort. */
304
305 if (NULL == curp->pman)
306 curp->pman = man_alloc(
307 curp->roff, curp, curp->defos,
308 MPARSE_QUICK & curp->options ? 1 : 0);
309 assert(curp->pman);
310 curp->man = curp->pman;
311 }
312
313 /*
314 * Main parse routine for a buffer.
315 * It assumes encoding and line numbering are already set up.
316 * It can recurse directly (for invocations of user-defined
317 * macros, inline equations, and input line traps)
318 * and indirectly (for .so file inclusion).
319 */
320 static void
321 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
322 {
323 const struct tbl_span *span;
324 struct buf ln;
325 const char *save_file;
326 char *cp;
327 size_t pos; /* byte number in the ln buffer */
328 enum rofferr rr;
329 int of;
330 int lnn; /* line number in the real file */
331 int fd;
332 pid_t save_child;
333 unsigned char c;
334
335 memset(&ln, 0, sizeof(ln));
336
337 lnn = curp->line;
338 pos = 0;
339
340 while (i < blk.sz) {
341 if (0 == pos && '\0' == blk.buf[i])
342 break;
343
344 if (start) {
345 curp->line = lnn;
346 curp->reparse_count = 0;
347
348 if (lnn < 3 &&
349 curp->filenc & MPARSE_UTF8 &&
350 curp->filenc & MPARSE_LATIN1)
351 curp->filenc = preconv_cue(&blk, i);
352 }
353
354 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
355
356 /*
357 * When finding an unescaped newline character,
358 * leave the character loop to process the line.
359 * Skip a preceding carriage return, if any.
360 */
361
362 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
363 '\n' == blk.buf[i + 1])
364 ++i;
365 if ('\n' == blk.buf[i]) {
366 ++i;
367 ++lnn;
368 break;
369 }
370
371 /*
372 * Make sure we have space for the worst
373 * case of 11 bytes: "\\[u10ffff]\0"
374 */
375
376 if (pos + 11 > ln.sz)
377 resize_buf(&ln, 256);
378
379 /*
380 * Encode 8-bit input.
381 */
382
383 c = blk.buf[i];
384 if (c & 0x80) {
385 if ( ! (curp->filenc && preconv_encode(
386 &blk, &i, &ln, &pos, &curp->filenc))) {
387 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
388 curp->line, pos, "0x%x", c);
389 ln.buf[pos++] = '?';
390 i++;
391 }
392 continue;
393 }
394
395 /*
396 * Exclude control characters.
397 */
398
399 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
400 mandoc_vmsg(c == 0x00 || c == 0x04 ||
401 c > 0x0a ? MANDOCERR_CHAR_BAD :
402 MANDOCERR_CHAR_UNSUPP,
403 curp, curp->line, pos, "0x%x", c);
404 i++;
405 if (c != '\r')
406 ln.buf[pos++] = '?';
407 continue;
408 }
409
410 /* Trailing backslash = a plain char. */
411
412 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
413 ln.buf[pos++] = blk.buf[i++];
414 continue;
415 }
416
417 /*
418 * Found escape and at least one other character.
419 * When it's a newline character, skip it.
420 * When there is a carriage return in between,
421 * skip that one as well.
422 */
423
424 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
425 '\n' == blk.buf[i + 2])
426 ++i;
427 if ('\n' == blk.buf[i + 1]) {
428 i += 2;
429 ++lnn;
430 continue;
431 }
432
433 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
434 i += 2;
435 /* Comment, skip to end of line */
436 for (; i < blk.sz; ++i) {
437 if ('\n' == blk.buf[i]) {
438 ++i;
439 ++lnn;
440 break;
441 }
442 }
443
444 /* Backout trailing whitespaces */
445 for (; pos > 0; --pos) {
446 if (ln.buf[pos - 1] != ' ')
447 break;
448 if (pos > 2 && ln.buf[pos - 2] == '\\')
449 break;
450 }
451 break;
452 }
453
454 /* Catch escaped bogus characters. */
455
456 c = (unsigned char) blk.buf[i+1];
457
458 if ( ! (isascii(c) &&
459 (isgraph(c) || isblank(c)))) {
460 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
461 curp->line, pos, "0x%x", c);
462 i += 2;
463 ln.buf[pos++] = '?';
464 continue;
465 }
466
467 /* Some other escape sequence, copy & cont. */
468
469 ln.buf[pos++] = blk.buf[i++];
470 ln.buf[pos++] = blk.buf[i++];
471 }
472
473 if (pos >= ln.sz)
474 resize_buf(&ln, 256);
475
476 ln.buf[pos] = '\0';
477
478 /*
479 * A significant amount of complexity is contained by
480 * the roff preprocessor. It's line-oriented but can be
481 * expressed on one line, so we need at times to
482 * readjust our starting point and re-run it. The roff
483 * preprocessor can also readjust the buffers with new
484 * data, so we pass them in wholesale.
485 */
486
487 of = 0;
488
489 /*
490 * Maintain a lookaside buffer of all parsed lines. We
491 * only do this if mparse_keep() has been invoked (the
492 * buffer may be accessed with mparse_getkeep()).
493 */
494
495 if (curp->secondary) {
496 curp->secondary->buf = mandoc_realloc(
497 curp->secondary->buf,
498 curp->secondary->sz + pos + 2);
499 memcpy(curp->secondary->buf +
500 curp->secondary->sz,
501 ln.buf, pos);
502 curp->secondary->sz += pos;
503 curp->secondary->buf
504 [curp->secondary->sz] = '\n';
505 curp->secondary->sz++;
506 curp->secondary->buf
507 [curp->secondary->sz] = '\0';
508 }
509 rerun:
510 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
511
512 switch (rr) {
513 case ROFF_REPARSE:
514 if (REPARSE_LIMIT >= ++curp->reparse_count)
515 mparse_buf_r(curp, ln, of, 0);
516 else
517 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
518 curp->line, pos, NULL);
519 pos = 0;
520 continue;
521 case ROFF_APPEND:
522 pos = strlen(ln.buf);
523 continue;
524 case ROFF_RERUN:
525 goto rerun;
526 case ROFF_IGN:
527 pos = 0;
528 continue;
529 case ROFF_SO:
530 if ( ! (curp->options & MPARSE_SO) &&
531 (i >= blk.sz || blk.buf[i] == '\0')) {
532 curp->sodest = mandoc_strdup(ln.buf + of);
533 free(ln.buf);
534 return;
535 }
536 /*
537 * We remove `so' clauses from our lookaside
538 * buffer because we're going to descend into
539 * the file recursively.
540 */
541 if (curp->secondary)
542 curp->secondary->sz -= pos + 1;
543 save_file = curp->file;
544 save_child = curp->child;
545 if (mparse_open(curp, &fd, ln.buf + of) ==
546 MANDOCLEVEL_OK) {
547 mparse_readfd(curp, fd, ln.buf + of);
548 curp->file = save_file;
549 } else {
550 curp->file = save_file;
551 mandoc_vmsg(MANDOCERR_SO_FAIL,
552 curp, curp->line, pos,
553 ".so %s", ln.buf + of);
554 ln.sz = mandoc_asprintf(&cp,
555 ".sp\nSee the file %s.\n.sp",
556 ln.buf + of);
557 free(ln.buf);
558 ln.buf = cp;
559 of = 0;
560 mparse_buf_r(curp, ln, of, 0);
561 }
562 curp->child = save_child;
563 pos = 0;
564 continue;
565 default:
566 break;
567 }
568
569 /*
570 * If input parsers have not been allocated, do so now.
571 * We keep these instanced between parsers, but set them
572 * locally per parse routine since we can use different
573 * parsers with each one.
574 */
575
576 if ( ! (curp->man || curp->mdoc))
577 choose_parser(curp);
578
579 /*
580 * Lastly, push down into the parsers themselves.
581 * If libroff returns ROFF_TBL, then add it to the
582 * currently open parse. Since we only get here if
583 * there does exist data (see tbl_data.c), we're
584 * guaranteed that something's been allocated.
585 * Do the same for ROFF_EQN.
586 */
587
588 if (rr == ROFF_TBL) {
589 while ((span = roff_span(curp->roff)) != NULL)
590 if (curp->man == NULL)
591 mdoc_addspan(curp->mdoc, span);
592 else
593 man_addspan(curp->man, span);
594 } else if (rr == ROFF_EQN) {
595 if (curp->man == NULL)
596 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
597 else
598 man_addeqn(curp->man, roff_eqn(curp->roff));
599 } else if ((curp->man == NULL ?
600 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
601 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
602 break;
603
604 /* Temporary buffers typically are not full. */
605
606 if (0 == start && '\0' == blk.buf[i])
607 break;
608
609 /* Start the next input line. */
610
611 pos = 0;
612 }
613
614 free(ln.buf);
615 }
616
617 static int
618 read_whole_file(struct mparse *curp, const char *file, int fd,
619 struct buf *fb, int *with_mmap)
620 {
621 size_t off;
622 ssize_t ssz;
623
624 #if HAVE_MMAP
625 struct stat st;
626 if (-1 == fstat(fd, &st)) {
627 perror(file);
628 exit((int)MANDOCLEVEL_SYSERR);
629 }
630
631 /*
632 * If we're a regular file, try just reading in the whole entry
633 * via mmap(). This is faster than reading it into blocks, and
634 * since each file is only a few bytes to begin with, I'm not
635 * concerned that this is going to tank any machines.
636 */
637
638 if (S_ISREG(st.st_mode)) {
639 if (st.st_size >= (1U << 31)) {
640 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
641 return(0);
642 }
643 *with_mmap = 1;
644 fb->sz = (size_t)st.st_size;
645 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
646 if (fb->buf != MAP_FAILED)
647 return(1);
648 }
649 #endif
650
651 /*
652 * If this isn't a regular file (like, say, stdin), then we must
653 * go the old way and just read things in bit by bit.
654 */
655
656 *with_mmap = 0;
657 off = 0;
658 fb->sz = 0;
659 fb->buf = NULL;
660 for (;;) {
661 if (off == fb->sz) {
662 if (fb->sz == (1U << 31)) {
663 mandoc_msg(MANDOCERR_TOOLARGE, curp,
664 0, 0, NULL);
665 break;
666 }
667 resize_buf(fb, 65536);
668 }
669 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
670 if (ssz == 0) {
671 fb->sz = off;
672 return(1);
673 }
674 if (ssz == -1) {
675 perror(file);
676 exit((int)MANDOCLEVEL_SYSERR);
677 }
678 off += (size_t)ssz;
679 }
680
681 free(fb->buf);
682 fb->buf = NULL;
683 return(0);
684 }
685
686 static void
687 mparse_end(struct mparse *curp)
688 {
689
690 if (curp->mdoc == NULL &&
691 curp->man == NULL &&
692 curp->sodest == NULL) {
693 if (curp->options & MPARSE_MDOC)
694 curp->mdoc = curp->pmdoc;
695 else {
696 if (curp->pman == NULL)
697 curp->pman = man_alloc(
698 curp->roff, curp, curp->defos,
699 curp->options & MPARSE_QUICK ? 1 : 0);
700 curp->man = curp->pman;
701 }
702 }
703 if (curp->mdoc)
704 mdoc_endparse(curp->mdoc);
705 if (curp->man)
706 man_endparse(curp->man);
707 roff_endparse(curp->roff);
708 }
709
710 static void
711 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
712 {
713 struct buf *svprimary;
714 const char *svfile;
715 size_t offset;
716 static int recursion_depth;
717
718 if (64 < recursion_depth) {
719 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
720 return;
721 }
722
723 /* Line number is per-file. */
724 svfile = curp->file;
725 curp->file = file;
726 svprimary = curp->primary;
727 curp->primary = &blk;
728 curp->line = 1;
729 recursion_depth++;
730
731 /* Skip an UTF-8 byte order mark. */
732 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
733 (unsigned char)blk.buf[0] == 0xef &&
734 (unsigned char)blk.buf[1] == 0xbb &&
735 (unsigned char)blk.buf[2] == 0xbf) {
736 offset = 3;
737 curp->filenc &= ~MPARSE_LATIN1;
738 } else
739 offset = 0;
740
741 mparse_buf_r(curp, blk, offset, 1);
742
743 if (--recursion_depth == 0)
744 mparse_end(curp);
745
746 curp->primary = svprimary;
747 curp->file = svfile;
748 }
749
750 enum mandoclevel
751 mparse_readmem(struct mparse *curp, void *buf, size_t len,
752 const char *file)
753 {
754 struct buf blk;
755
756 blk.buf = buf;
757 blk.sz = len;
758
759 mparse_parse_buffer(curp, blk, file);
760 return(curp->file_status);
761 }
762
763 /*
764 * Read the whole file into memory and call the parsers.
765 * Called recursively when an .so request is encountered.
766 */
767 enum mandoclevel
768 mparse_readfd(struct mparse *curp, int fd, const char *file)
769 {
770 struct buf blk;
771 int with_mmap;
772 int save_filenc;
773
774 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
775 save_filenc = curp->filenc;
776 curp->filenc = curp->options &
777 (MPARSE_UTF8 | MPARSE_LATIN1);
778 mparse_parse_buffer(curp, blk, file);
779 curp->filenc = save_filenc;
780 #if HAVE_MMAP
781 if (with_mmap)
782 munmap(blk.buf, blk.sz);
783 else
784 #endif
785 free(blk.buf);
786 }
787
788 if (fd != STDIN_FILENO && close(fd) == -1)
789 perror(file);
790
791 mparse_wait(curp);
792 return(curp->file_status);
793 }
794
795 enum mandoclevel
796 mparse_open(struct mparse *curp, int *fd, const char *file)
797 {
798 int pfd[2];
799 int save_errno;
800 char *cp;
801
802 curp->file = file;
803
804 /* Unless zipped, try to just open the file. */
805
806 if ((cp = strrchr(file, '.')) == NULL ||
807 strcmp(cp + 1, "gz")) {
808 curp->child = 0;
809 if ((*fd = open(file, O_RDONLY)) != -1)
810 return(MANDOCLEVEL_OK);
811
812 /* Open failed; try to append ".gz". */
813
814 mandoc_asprintf(&cp, "%s.gz", file);
815 file = cp;
816 } else
817 cp = NULL;
818
819 /* Before forking, make sure the file can be read. */
820
821 save_errno = errno;
822 if (access(file, R_OK) == -1) {
823 if (cp != NULL)
824 errno = save_errno;
825 free(cp);
826 *fd = -1;
827 curp->child = 0;
828 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
829 return(MANDOCLEVEL_ERROR);
830 }
831
832 /* Run gunzip(1). */
833
834 if (pipe(pfd) == -1) {
835 perror("pipe");
836 exit((int)MANDOCLEVEL_SYSERR);
837 }
838
839 switch (curp->child = fork()) {
840 case -1:
841 perror("fork");
842 exit((int)MANDOCLEVEL_SYSERR);
843 case 0:
844 close(pfd[0]);
845 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
846 perror("dup");
847 exit((int)MANDOCLEVEL_SYSERR);
848 }
849 execlp("gunzip", "gunzip", "-c", file, NULL);
850 perror("exec");
851 exit((int)MANDOCLEVEL_SYSERR);
852 default:
853 close(pfd[1]);
854 *fd = pfd[0];
855 return(MANDOCLEVEL_OK);
856 }
857 }
858
859 enum mandoclevel
860 mparse_wait(struct mparse *curp)
861 {
862 int status;
863
864 if (curp->child == 0)
865 return(MANDOCLEVEL_OK);
866
867 if (waitpid(curp->child, &status, 0) == -1) {
868 perror("wait");
869 exit((int)MANDOCLEVEL_SYSERR);
870 }
871 curp->child = 0;
872 if (WIFSIGNALED(status)) {
873 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
874 "gunzip died from signal %d", WTERMSIG(status));
875 return(MANDOCLEVEL_ERROR);
876 }
877 if (WEXITSTATUS(status)) {
878 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
879 "gunzip failed with code %d", WEXITSTATUS(status));
880 return(MANDOCLEVEL_ERROR);
881 }
882 return(MANDOCLEVEL_OK);
883 }
884
885 struct mparse *
886 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
887 const struct mchars *mchars, const char *defos)
888 {
889 struct mparse *curp;
890
891 curp = mandoc_calloc(1, sizeof(struct mparse));
892
893 curp->options = options;
894 curp->wlevel = wlevel;
895 curp->mmsg = mmsg;
896 curp->defos = defos;
897
898 curp->mchars = mchars;
899 curp->roff = roff_alloc(curp, curp->mchars, options);
900 if (curp->options & MPARSE_MDOC)
901 curp->pmdoc = mdoc_alloc(
902 curp->roff, curp, curp->defos,
903 curp->options & MPARSE_QUICK ? 1 : 0);
904 if (curp->options & MPARSE_MAN)
905 curp->pman = man_alloc(
906 curp->roff, curp, curp->defos,
907 curp->options & MPARSE_QUICK ? 1 : 0);
908
909 return(curp);
910 }
911
912 void
913 mparse_reset(struct mparse *curp)
914 {
915
916 roff_reset(curp->roff);
917
918 if (curp->mdoc)
919 mdoc_reset(curp->mdoc);
920 if (curp->man)
921 man_reset(curp->man);
922 if (curp->secondary)
923 curp->secondary->sz = 0;
924
925 curp->file_status = MANDOCLEVEL_OK;
926 curp->mdoc = NULL;
927 curp->man = NULL;
928
929 free(curp->sodest);
930 curp->sodest = NULL;
931 }
932
933 void
934 mparse_free(struct mparse *curp)
935 {
936
937 if (curp->pmdoc)
938 mdoc_free(curp->pmdoc);
939 if (curp->pman)
940 man_free(curp->pman);
941 if (curp->roff)
942 roff_free(curp->roff);
943 if (curp->secondary)
944 free(curp->secondary->buf);
945
946 free(curp->secondary);
947 free(curp->sodest);
948 free(curp);
949 }
950
951 void
952 mparse_result(struct mparse *curp,
953 struct mdoc **mdoc, struct man **man, char **sodest)
954 {
955
956 if (sodest && NULL != (*sodest = curp->sodest)) {
957 *mdoc = NULL;
958 *man = NULL;
959 return;
960 }
961 if (mdoc)
962 *mdoc = curp->mdoc;
963 if (man)
964 *man = curp->man;
965 }
966
967 void
968 mandoc_vmsg(enum mandocerr t, struct mparse *m,
969 int ln, int pos, const char *fmt, ...)
970 {
971 char buf[256];
972 va_list ap;
973
974 va_start(ap, fmt);
975 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
976 va_end(ap);
977
978 mandoc_msg(t, m, ln, pos, buf);
979 }
980
981 void
982 mandoc_msg(enum mandocerr er, struct mparse *m,
983 int ln, int col, const char *msg)
984 {
985 enum mandoclevel level;
986
987 level = MANDOCLEVEL_UNSUPP;
988 while (er < mandoclimits[level])
989 level--;
990
991 if (level < m->wlevel && er != MANDOCERR_FILE)
992 return;
993
994 if (m->mmsg)
995 (*m->mmsg)(er, level, m->file, ln, col, msg);
996
997 if (m->file_status < level)
998 m->file_status = level;
999 }
1000
1001 const char *
1002 mparse_strerror(enum mandocerr er)
1003 {
1004
1005 return(mandocerrs[er]);
1006 }
1007
1008 const char *
1009 mparse_strlevel(enum mandoclevel lvl)
1010 {
1011 return(mandoclevels[lvl]);
1012 }
1013
1014 void
1015 mparse_keep(struct mparse *p)
1016 {
1017
1018 assert(NULL == p->secondary);
1019 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1020 }
1021
1022 const char *
1023 mparse_getkeep(const struct mparse *p)
1024 {
1025
1026 assert(p->secondary);
1027 return(p->secondary->sz ? p->secondary->buf : NULL);
1028 }