]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Add *.gz support to apropos(1) -a, man(1), and even mandoc(1).
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.82 2014/09/03 23:21:47 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct buf {
49 char *buf; /* binary input buffer */
50 size_t sz; /* size of binary buffer */
51 };
52
53 struct mparse {
54 enum mandoclevel file_status; /* status of current parse */
55 enum mandoclevel wlevel; /* ignore messages below this */
56 int line; /* line number in the file */
57 int options; /* parser options */
58 struct man *pman; /* persistent man parser */
59 struct mdoc *pmdoc; /* persistent mdoc parser */
60 struct man *man; /* man parser */
61 struct mdoc *mdoc; /* mdoc parser */
62 struct roff *roff; /* roff parser (!NULL) */
63 char *sodest; /* filename pointed to by .so */
64 int reparse_count; /* finite interp. stack */
65 mandocmsg mmsg; /* warning/error message handler */
66 const char *file;
67 struct buf *secondary;
68 const char *defos; /* default operating system */
69 };
70
71 static void resize_buf(struct buf *, size_t);
72 static void mparse_buf_r(struct mparse *, struct buf, int);
73 static void pset(const char *, int, struct mparse *);
74 static int read_whole_file(struct mparse *, const char *, int,
75 struct buf *, int *);
76 static void mparse_end(struct mparse *);
77 static void mparse_parse_buffer(struct mparse *, struct buf,
78 const char *);
79
80 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
81 MANDOCERR_OK,
82 MANDOCERR_WARNING,
83 MANDOCERR_WARNING,
84 MANDOCERR_ERROR,
85 MANDOCERR_FATAL,
86 MANDOCERR_MAX,
87 MANDOCERR_MAX
88 };
89
90 static const char * const mandocerrs[MANDOCERR_MAX] = {
91 "ok",
92
93 "generic warning",
94
95 /* related to the prologue */
96 "missing manual title, using UNTITLED",
97 "missing manual title, using \"\"",
98 "lower case character in document title",
99 "missing manual section, using \"\"",
100 "unknown manual section",
101 "unknown manual volume or arch",
102 "missing date, using today's date",
103 "cannot parse date, using it verbatim",
104 "missing Os macro, using \"\"",
105 "duplicate prologue macro",
106 "late prologue macro",
107 "skipping late title macro",
108 "prologue macros out of order",
109
110 /* related to document structure */
111 ".so is fragile, better use ln(1)",
112 "no document body",
113 "content before first section header",
114 "first section is not \"NAME\"",
115 "bad NAME section contents",
116 "sections out of conventional order",
117 "duplicate section title",
118 "unexpected section",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "skipping paragraph macro",
123 "moving paragraph macro out of list",
124 "skipping no-space macro",
125 "blocks badly nested",
126 "nested displays are not portable",
127 "moving content out of list",
128 ".Vt block has child macro",
129 "fill mode already enabled, skipping",
130 "fill mode already disabled, skipping",
131 "line scope broken",
132
133 /* related to missing macro arguments */
134 "skipping empty request",
135 "conditional request controls empty scope",
136 "skipping empty macro",
137 "empty argument, using 0n",
138 "argument count wrong",
139 "missing display type, using -ragged",
140 "list type is not the first argument",
141 "missing -width in -tag list, using 8n",
142 "missing utility name, using \"\"",
143 "empty head in list item",
144 "empty list item",
145 "missing font type, using \\fR",
146 "unknown font type, using \\fR",
147 "missing -std argument, adding it",
148
149 /* related to bad macro arguments */
150 "unterminated quoted argument",
151 "duplicate argument",
152 "skipping duplicate argument",
153 "skipping duplicate display type",
154 "skipping duplicate list type",
155 "skipping -width argument",
156 "unknown AT&T UNIX version",
157 "invalid content in Rs block",
158 "invalid Boolean argument",
159 "unknown font, skipping request",
160
161 /* related to plain text */
162 "blank line in fill mode, using .sp",
163 "tab in filled text",
164 "whitespace at end of input line",
165 "bad comment style",
166 "invalid escape sequence",
167 "undefined string, using \"\"",
168
169 "generic error",
170
171 /* related to equations */
172 "unexpected equation scope closure",
173 "equation scope open on exit",
174 "overlapping equation scopes",
175 "unexpected end of equation",
176 "equation syntax error",
177
178 /* related to tables */
179 "bad table syntax",
180 "bad table option",
181 "bad table layout",
182 "no table layout cells specified",
183 "no table data cells specified",
184 "ignore data in cell",
185 "data block still open",
186 "ignoring extra data cells",
187
188 /* related to document structure and macros */
189 "input stack limit exceeded, infinite loop?",
190 "skipping bad character",
191 "skipping unknown macro",
192 "skipping item outside list",
193 "skipping column outside column list",
194 "skipping end of block that is not open",
195 "inserting missing end of block",
196 "appending missing end of block",
197
198 /* related to request and macro arguments */
199 "escaped character not allowed in a name",
200 "argument count wrong",
201 "missing list type, using -item",
202 "missing manual name, using \"\"",
203 "uname(3) system call failed, using UNKNOWN",
204 "unknown standard specifier",
205 "skipping request without numeric argument",
206 "skipping all arguments",
207 "skipping excess arguments",
208
209 "generic fatal error",
210
211 "input too large",
212 "NOT IMPLEMENTED: Bd -file",
213 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
214 ".so request failed",
215
216 /* system errors */
217 "cannot dup file descriptor",
218 "cannot exec",
219 "gunzip failed with code",
220 "cannot fork",
221 NULL,
222 "cannot open pipe",
223 "cannot read file",
224 "gunzip died from signal",
225 "cannot stat file",
226 "wait failed",
227 };
228
229 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
230 "SUCCESS",
231 "RESERVED",
232 "WARNING",
233 "ERROR",
234 "FATAL",
235 "BADARG",
236 "SYSERR"
237 };
238
239
240 static void
241 resize_buf(struct buf *buf, size_t initial)
242 {
243
244 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
245 buf->buf = mandoc_realloc(buf->buf, buf->sz);
246 }
247
248 static void
249 pset(const char *buf, int pos, struct mparse *curp)
250 {
251 int i;
252
253 /*
254 * Try to intuit which kind of manual parser should be used. If
255 * passed in by command-line (-man, -mdoc), then use that
256 * explicitly. If passed as -mandoc, then try to guess from the
257 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
258 * default to -man, which is more lenient.
259 *
260 * Separate out pmdoc/pman from mdoc/man: the first persists
261 * through all parsers, while the latter is used per-parse.
262 */
263
264 if ('.' == buf[0] || '\'' == buf[0]) {
265 for (i = 1; buf[i]; i++)
266 if (' ' != buf[i] && '\t' != buf[i])
267 break;
268 if ('\0' == buf[i])
269 return;
270 }
271
272 if (MPARSE_MDOC & curp->options) {
273 curp->mdoc = curp->pmdoc;
274 return;
275 } else if (MPARSE_MAN & curp->options) {
276 curp->man = curp->pman;
277 return;
278 }
279
280 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
281 if (NULL == curp->pmdoc)
282 curp->pmdoc = mdoc_alloc(
283 curp->roff, curp, curp->defos,
284 MPARSE_QUICK & curp->options ? 1 : 0);
285 assert(curp->pmdoc);
286 curp->mdoc = curp->pmdoc;
287 return;
288 }
289
290 if (NULL == curp->pman)
291 curp->pman = man_alloc(curp->roff, curp,
292 MPARSE_QUICK & curp->options ? 1 : 0);
293 assert(curp->pman);
294 curp->man = curp->pman;
295 }
296
297 /*
298 * Main parse routine for an opened file. This is called for each
299 * opened file and simply loops around the full input file, possibly
300 * nesting (i.e., with `so').
301 */
302 static void
303 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
304 {
305 const struct tbl_span *span;
306 struct buf ln;
307 enum rofferr rr;
308 int i, of, rc;
309 int pos; /* byte number in the ln buffer */
310 int lnn; /* line number in the real file */
311 unsigned char c;
312
313 memset(&ln, 0, sizeof(struct buf));
314
315 lnn = curp->line;
316 pos = 0;
317
318 for (i = 0; i < (int)blk.sz; ) {
319 if (0 == pos && '\0' == blk.buf[i])
320 break;
321
322 if (start) {
323 curp->line = lnn;
324 curp->reparse_count = 0;
325 }
326
327 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
328
329 /*
330 * When finding an unescaped newline character,
331 * leave the character loop to process the line.
332 * Skip a preceding carriage return, if any.
333 */
334
335 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
336 '\n' == blk.buf[i + 1])
337 ++i;
338 if ('\n' == blk.buf[i]) {
339 ++i;
340 ++lnn;
341 break;
342 }
343
344 /*
345 * Make sure we have space for at least
346 * one backslash and one other character
347 * and the trailing NUL byte.
348 */
349
350 if (pos + 2 >= (int)ln.sz)
351 resize_buf(&ln, 256);
352
353 /*
354 * Warn about bogus characters. If you're using
355 * non-ASCII encoding, you're screwing your
356 * readers. Since I'd rather this not happen,
357 * I'll be helpful and replace these characters
358 * with "?", so we don't display gibberish.
359 * Note to manual writers: use special characters.
360 */
361
362 c = (unsigned char) blk.buf[i];
363
364 if ( ! (isascii(c) &&
365 (isgraph(c) || isblank(c)))) {
366 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
367 curp->line, pos, "0x%x", c);
368 i++;
369 ln.buf[pos++] = '?';
370 continue;
371 }
372
373 /* Trailing backslash = a plain char. */
374
375 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
376 ln.buf[pos++] = blk.buf[i++];
377 continue;
378 }
379
380 /*
381 * Found escape and at least one other character.
382 * When it's a newline character, skip it.
383 * When there is a carriage return in between,
384 * skip that one as well.
385 */
386
387 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
388 '\n' == blk.buf[i + 2])
389 ++i;
390 if ('\n' == blk.buf[i + 1]) {
391 i += 2;
392 ++lnn;
393 continue;
394 }
395
396 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
397 i += 2;
398 /* Comment, skip to end of line */
399 for (; i < (int)blk.sz; ++i) {
400 if ('\n' == blk.buf[i]) {
401 ++i;
402 ++lnn;
403 break;
404 }
405 }
406
407 /* Backout trailing whitespaces */
408 for (; pos > 0; --pos) {
409 if (ln.buf[pos - 1] != ' ')
410 break;
411 if (pos > 2 && ln.buf[pos - 2] == '\\')
412 break;
413 }
414 break;
415 }
416
417 /* Catch escaped bogus characters. */
418
419 c = (unsigned char) blk.buf[i+1];
420
421 if ( ! (isascii(c) &&
422 (isgraph(c) || isblank(c)))) {
423 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
424 curp->line, pos, "0x%x", c);
425 i += 2;
426 ln.buf[pos++] = '?';
427 continue;
428 }
429
430 /* Some other escape sequence, copy & cont. */
431
432 ln.buf[pos++] = blk.buf[i++];
433 ln.buf[pos++] = blk.buf[i++];
434 }
435
436 if (pos >= (int)ln.sz)
437 resize_buf(&ln, 256);
438
439 ln.buf[pos] = '\0';
440
441 /*
442 * A significant amount of complexity is contained by
443 * the roff preprocessor. It's line-oriented but can be
444 * expressed on one line, so we need at times to
445 * readjust our starting point and re-run it. The roff
446 * preprocessor can also readjust the buffers with new
447 * data, so we pass them in wholesale.
448 */
449
450 of = 0;
451
452 /*
453 * Maintain a lookaside buffer of all parsed lines. We
454 * only do this if mparse_keep() has been invoked (the
455 * buffer may be accessed with mparse_getkeep()).
456 */
457
458 if (curp->secondary) {
459 curp->secondary->buf = mandoc_realloc(
460 curp->secondary->buf,
461 curp->secondary->sz + pos + 2);
462 memcpy(curp->secondary->buf +
463 curp->secondary->sz,
464 ln.buf, pos);
465 curp->secondary->sz += pos;
466 curp->secondary->buf
467 [curp->secondary->sz] = '\n';
468 curp->secondary->sz++;
469 curp->secondary->buf
470 [curp->secondary->sz] = '\0';
471 }
472 rerun:
473 rr = roff_parseln(curp->roff, curp->line,
474 &ln.buf, &ln.sz, of, &of);
475
476 switch (rr) {
477 case ROFF_REPARSE:
478 if (REPARSE_LIMIT >= ++curp->reparse_count)
479 mparse_buf_r(curp, ln, 0);
480 else
481 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
482 curp->line, pos, NULL);
483 pos = 0;
484 continue;
485 case ROFF_APPEND:
486 pos = (int)strlen(ln.buf);
487 continue;
488 case ROFF_RERUN:
489 goto rerun;
490 case ROFF_IGN:
491 pos = 0;
492 continue;
493 case ROFF_ERR:
494 assert(MANDOCLEVEL_FATAL <= curp->file_status);
495 break;
496 case ROFF_SO:
497 if (0 == (MPARSE_SO & curp->options) &&
498 (i >= (int)blk.sz || '\0' == blk.buf[i])) {
499 curp->sodest = mandoc_strdup(ln.buf + of);
500 free(ln.buf);
501 return;
502 }
503 /*
504 * We remove `so' clauses from our lookaside
505 * buffer because we're going to descend into
506 * the file recursively.
507 */
508 if (curp->secondary)
509 curp->secondary->sz -= pos + 1;
510 mparse_readfd(curp, -1, ln.buf + of);
511 if (MANDOCLEVEL_FATAL <= curp->file_status) {
512 mandoc_vmsg(MANDOCERR_SO_FAIL,
513 curp, curp->line, pos,
514 ".so %s", ln.buf + of);
515 break;
516 }
517 pos = 0;
518 continue;
519 default:
520 break;
521 }
522
523 /*
524 * If we encounter errors in the recursive parse, make
525 * sure we don't continue parsing.
526 */
527
528 if (MANDOCLEVEL_FATAL <= curp->file_status)
529 break;
530
531 /*
532 * If input parsers have not been allocated, do so now.
533 * We keep these instanced between parsers, but set them
534 * locally per parse routine since we can use different
535 * parsers with each one.
536 */
537
538 if ( ! (curp->man || curp->mdoc))
539 pset(ln.buf + of, pos - of, curp);
540
541 /*
542 * Lastly, push down into the parsers themselves. One
543 * of these will have already been set in the pset()
544 * routine.
545 * If libroff returns ROFF_TBL, then add it to the
546 * currently open parse. Since we only get here if
547 * there does exist data (see tbl_data.c), we're
548 * guaranteed that something's been allocated.
549 * Do the same for ROFF_EQN.
550 */
551
552 rc = -1;
553
554 if (ROFF_TBL == rr)
555 while (NULL != (span = roff_span(curp->roff))) {
556 rc = curp->man ?
557 man_addspan(curp->man, span) :
558 mdoc_addspan(curp->mdoc, span);
559 if (0 == rc)
560 break;
561 }
562 else if (ROFF_EQN == rr)
563 rc = curp->mdoc ?
564 mdoc_addeqn(curp->mdoc,
565 roff_eqn(curp->roff)) :
566 man_addeqn(curp->man,
567 roff_eqn(curp->roff));
568 else if (curp->man || curp->mdoc)
569 rc = curp->man ?
570 man_parseln(curp->man,
571 curp->line, ln.buf, of) :
572 mdoc_parseln(curp->mdoc,
573 curp->line, ln.buf, of);
574
575 if (0 == rc) {
576 assert(MANDOCLEVEL_FATAL <= curp->file_status);
577 break;
578 } else if (2 == rc)
579 break;
580
581 /* Temporary buffers typically are not full. */
582
583 if (0 == start && '\0' == blk.buf[i])
584 break;
585
586 /* Start the next input line. */
587
588 pos = 0;
589 }
590
591 free(ln.buf);
592 }
593
594 static int
595 read_whole_file(struct mparse *curp, const char *file, int fd,
596 struct buf *fb, int *with_mmap)
597 {
598 size_t off;
599 ssize_t ssz;
600
601 #if HAVE_MMAP
602 struct stat st;
603 if (-1 == fstat(fd, &st)) {
604 curp->file_status = MANDOCLEVEL_SYSERR;
605 if (curp->mmsg)
606 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
607 file, 0, 0, strerror(errno));
608 return(0);
609 }
610
611 /*
612 * If we're a regular file, try just reading in the whole entry
613 * via mmap(). This is faster than reading it into blocks, and
614 * since each file is only a few bytes to begin with, I'm not
615 * concerned that this is going to tank any machines.
616 */
617
618 if (S_ISREG(st.st_mode)) {
619 if (st.st_size >= (1U << 31)) {
620 curp->file_status = MANDOCLEVEL_FATAL;
621 if (curp->mmsg)
622 (*curp->mmsg)(MANDOCERR_TOOLARGE,
623 curp->file_status, file, 0, 0, NULL);
624 return(0);
625 }
626 *with_mmap = 1;
627 fb->sz = (size_t)st.st_size;
628 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
629 if (fb->buf != MAP_FAILED)
630 return(1);
631 }
632 #endif
633
634 /*
635 * If this isn't a regular file (like, say, stdin), then we must
636 * go the old way and just read things in bit by bit.
637 */
638
639 *with_mmap = 0;
640 off = 0;
641 fb->sz = 0;
642 fb->buf = NULL;
643 for (;;) {
644 if (off == fb->sz) {
645 if (fb->sz == (1U << 31)) {
646 curp->file_status = MANDOCLEVEL_FATAL;
647 if (curp->mmsg)
648 (*curp->mmsg)(MANDOCERR_TOOLARGE,
649 curp->file_status,
650 file, 0, 0, NULL);
651 break;
652 }
653 resize_buf(fb, 65536);
654 }
655 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
656 if (ssz == 0) {
657 fb->sz = off;
658 return(1);
659 }
660 if (ssz == -1) {
661 curp->file_status = MANDOCLEVEL_SYSERR;
662 if (curp->mmsg)
663 (*curp->mmsg)(MANDOCERR_SYSREAD,
664 curp->file_status, file, 0, 0,
665 strerror(errno));
666 break;
667 }
668 off += (size_t)ssz;
669 }
670
671 free(fb->buf);
672 fb->buf = NULL;
673 return(0);
674 }
675
676 static void
677 mparse_end(struct mparse *curp)
678 {
679
680 if (MANDOCLEVEL_FATAL <= curp->file_status)
681 return;
682
683 if (curp->mdoc == NULL &&
684 curp->man == NULL &&
685 curp->sodest == NULL) {
686 if (curp->options & MPARSE_MDOC)
687 curp->mdoc = curp->pmdoc;
688 else {
689 if (curp->pman == NULL)
690 curp->pman = man_alloc(curp->roff, curp,
691 curp->options & MPARSE_QUICK ? 1 : 0);
692 curp->man = curp->pman;
693 }
694 }
695
696 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
697 assert(MANDOCLEVEL_FATAL <= curp->file_status);
698 return;
699 }
700
701 if (curp->man && ! man_endparse(curp->man)) {
702 assert(MANDOCLEVEL_FATAL <= curp->file_status);
703 return;
704 }
705
706 roff_endparse(curp->roff);
707 }
708
709 static void
710 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
711 {
712 const char *svfile;
713 static int recursion_depth;
714
715 if (64 < recursion_depth) {
716 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
717 return;
718 }
719
720 /* Line number is per-file. */
721 svfile = curp->file;
722 curp->file = file;
723 curp->line = 1;
724 recursion_depth++;
725
726 mparse_buf_r(curp, blk, 1);
727
728 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
729 mparse_end(curp);
730
731 curp->file = svfile;
732 }
733
734 enum mandoclevel
735 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
736 const char *file)
737 {
738 struct buf blk;
739
740 blk.buf = UNCONST(buf);
741 blk.sz = len;
742
743 mparse_parse_buffer(curp, blk, file);
744 return(curp->file_status);
745 }
746
747 enum mandoclevel
748 mparse_readfd(struct mparse *curp, int fd, const char *file)
749 {
750 struct buf blk;
751 int with_mmap;
752
753 if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
754 curp->file_status = MANDOCLEVEL_SYSERR;
755 if (curp->mmsg)
756 (*curp->mmsg)(MANDOCERR_SYSOPEN,
757 curp->file_status,
758 file, 0, 0, strerror(errno));
759 goto out;
760 }
761
762 /*
763 * Run for each opened file; may be called more than once for
764 * each full parse sequence if the opened file is nested (i.e.,
765 * from `so'). Simply sucks in the whole file and moves into
766 * the parse phase for the file.
767 */
768
769 if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
770 goto out;
771
772 mparse_parse_buffer(curp, blk, file);
773
774 #if HAVE_MMAP
775 if (with_mmap)
776 munmap(blk.buf, blk.sz);
777 else
778 #endif
779 free(blk.buf);
780
781 if (STDIN_FILENO != fd && -1 == close(fd))
782 perror(file);
783 out:
784 return(curp->file_status);
785 }
786
787 enum mandoclevel
788 mparse_open(struct mparse *curp, int *fd, const char *file,
789 pid_t *child_pid)
790 {
791 int pfd[2];
792 char *cp;
793 enum mandocerr err;
794
795 pfd[1] = -1;
796 curp->file = file;
797 if ((cp = strrchr(file, '.')) == NULL ||
798 strcmp(cp + 1, "gz")) {
799 *child_pid = 0;
800 if ((*fd = open(file, O_RDONLY)) == -1) {
801 err = MANDOCERR_SYSOPEN;
802 goto out;
803 }
804 return(MANDOCLEVEL_OK);
805 }
806
807 if (pipe(pfd) == -1) {
808 err = MANDOCERR_SYSPIPE;
809 goto out;
810 }
811
812 switch (*child_pid = fork()) {
813 case -1:
814 err = MANDOCERR_SYSFORK;
815 close(pfd[0]);
816 close(pfd[1]);
817 pfd[1] = -1;
818 break;
819 case 0:
820 close(pfd[0]);
821 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
822 err = MANDOCERR_SYSDUP;
823 break;
824 }
825 execlp("gunzip", "gunzip", "-c", file, NULL);
826 err = MANDOCERR_SYSEXEC;
827 break;
828 default:
829 close(pfd[1]);
830 *fd = pfd[0];
831 return(MANDOCLEVEL_OK);
832 }
833
834 out:
835 *fd = -1;
836 *child_pid = 0;
837 curp->file_status = MANDOCLEVEL_SYSERR;
838 if (curp->mmsg)
839 (*curp->mmsg)(err, curp->file_status, file,
840 0, 0, strerror(errno));
841 if (pfd[1] != -1)
842 exit(1);
843 return(curp->file_status);
844 }
845
846 enum mandoclevel
847 mparse_wait(struct mparse *curp, pid_t child_pid)
848 {
849 int status;
850
851 if (waitpid(child_pid, &status, 0) == -1) {
852 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
853 strerror(errno));
854 curp->file_status = MANDOCLEVEL_SYSERR;
855 return(curp->file_status);
856 }
857 if (WIFSIGNALED(status)) {
858 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
859 "%d", WTERMSIG(status));
860 curp->file_status = MANDOCLEVEL_SYSERR;
861 return(curp->file_status);
862 }
863 if (WEXITSTATUS(status)) {
864 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
865 "%d", WEXITSTATUS(status));
866 curp->file_status = MANDOCLEVEL_SYSERR;
867 return(curp->file_status);
868 }
869 return(MANDOCLEVEL_OK);
870 }
871
872 struct mparse *
873 mparse_alloc(int options, enum mandoclevel wlevel,
874 mandocmsg mmsg, const char *defos)
875 {
876 struct mparse *curp;
877
878 assert(wlevel <= MANDOCLEVEL_FATAL);
879
880 curp = mandoc_calloc(1, sizeof(struct mparse));
881
882 curp->options = options;
883 curp->wlevel = wlevel;
884 curp->mmsg = mmsg;
885 curp->defos = defos;
886
887 curp->roff = roff_alloc(curp, options);
888 if (curp->options & MPARSE_MDOC)
889 curp->pmdoc = mdoc_alloc(
890 curp->roff, curp, curp->defos,
891 curp->options & MPARSE_QUICK ? 1 : 0);
892 if (curp->options & MPARSE_MAN)
893 curp->pman = man_alloc(curp->roff, curp,
894 curp->options & MPARSE_QUICK ? 1 : 0);
895
896 return(curp);
897 }
898
899 void
900 mparse_reset(struct mparse *curp)
901 {
902
903 roff_reset(curp->roff);
904
905 if (curp->mdoc)
906 mdoc_reset(curp->mdoc);
907 if (curp->man)
908 man_reset(curp->man);
909 if (curp->secondary)
910 curp->secondary->sz = 0;
911
912 curp->file_status = MANDOCLEVEL_OK;
913 curp->mdoc = NULL;
914 curp->man = NULL;
915
916 free(curp->sodest);
917 curp->sodest = NULL;
918 }
919
920 void
921 mparse_free(struct mparse *curp)
922 {
923
924 if (curp->pmdoc)
925 mdoc_free(curp->pmdoc);
926 if (curp->pman)
927 man_free(curp->pman);
928 if (curp->roff)
929 roff_free(curp->roff);
930 if (curp->secondary)
931 free(curp->secondary->buf);
932
933 free(curp->secondary);
934 free(curp->sodest);
935 free(curp);
936 }
937
938 void
939 mparse_result(struct mparse *curp,
940 struct mdoc **mdoc, struct man **man, char **sodest)
941 {
942
943 if (sodest && NULL != (*sodest = curp->sodest)) {
944 *mdoc = NULL;
945 *man = NULL;
946 return;
947 }
948 if (mdoc)
949 *mdoc = curp->mdoc;
950 if (man)
951 *man = curp->man;
952 }
953
954 void
955 mandoc_vmsg(enum mandocerr t, struct mparse *m,
956 int ln, int pos, const char *fmt, ...)
957 {
958 char buf[256];
959 va_list ap;
960
961 va_start(ap, fmt);
962 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
963 va_end(ap);
964
965 mandoc_msg(t, m, ln, pos, buf);
966 }
967
968 void
969 mandoc_msg(enum mandocerr er, struct mparse *m,
970 int ln, int col, const char *msg)
971 {
972 enum mandoclevel level;
973
974 level = MANDOCLEVEL_FATAL;
975 while (er < mandoclimits[level])
976 level--;
977
978 if (level < m->wlevel)
979 return;
980
981 if (m->mmsg)
982 (*m->mmsg)(er, level, m->file, ln, col, msg);
983
984 if (m->file_status < level)
985 m->file_status = level;
986 }
987
988 const char *
989 mparse_strerror(enum mandocerr er)
990 {
991
992 return(mandocerrs[er]);
993 }
994
995 const char *
996 mparse_strlevel(enum mandoclevel lvl)
997 {
998 return(mandoclevels[lvl]);
999 }
1000
1001 void
1002 mparse_keep(struct mparse *p)
1003 {
1004
1005 assert(NULL == p->secondary);
1006 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1007 }
1008
1009 const char *
1010 mparse_getkeep(const struct mparse *p)
1011 {
1012
1013 assert(p->secondary);
1014 return(p->secondary->sz ? p->secondary->buf : NULL);
1015 }