]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Fatal errors no longer exist.
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.111 2015/01/15 04:26:40 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_MAX,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "bad table syntax",
184 "bad table option",
185 "bad table layout",
186 "no table layout cells specified",
187 "no table data cells specified",
188 "ignore data in cell",
189 "data block still open",
190 "ignoring extra data cells",
191 "ignoring macro in table",
192
193 /* related to document structure and macros */
194 NULL,
195 "input too large",
196 "input stack limit exceeded, infinite loop?",
197 "skipping bad character",
198 "skipping unknown macro",
199 "skipping item outside list",
200 "skipping column outside column list",
201 "skipping end of block that is not open",
202 "inserting missing end of block",
203 "appending missing end of block",
204
205 /* related to request and macro arguments */
206 "escaped character not allowed in a name",
207 "argument count wrong",
208 "NOT IMPLEMENTED: Bd -file",
209 "missing list type, using -item",
210 "missing manual name, using \"\"",
211 "uname(3) system call failed, using UNKNOWN",
212 "unknown standard specifier",
213 "skipping request without numeric argument",
214 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
215 ".so request failed",
216 "skipping all arguments",
217 "skipping excess arguments",
218 "divide by zero",
219 };
220
221 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
222 "SUCCESS",
223 "RESERVED",
224 "WARNING",
225 "ERROR",
226 "FATAL",
227 "BADARG",
228 "SYSERR"
229 };
230
231
232 static void
233 resize_buf(struct buf *buf, size_t initial)
234 {
235
236 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
237 buf->buf = mandoc_realloc(buf->buf, buf->sz);
238 }
239
240 static void
241 choose_parser(struct mparse *curp)
242 {
243 char *cp, *ep;
244 int format;
245
246 /*
247 * If neither command line arguments -mdoc or -man select
248 * a parser nor the roff parser found a .Dd or .TH macro
249 * yet, look ahead in the main input buffer.
250 */
251
252 if ((format = roff_getformat(curp->roff)) == 0) {
253 cp = curp->primary->buf;
254 ep = cp + curp->primary->sz;
255 while (cp < ep) {
256 if (*cp == '.' || *cp == '\'') {
257 cp++;
258 if (cp[0] == 'D' && cp[1] == 'd') {
259 format = MPARSE_MDOC;
260 break;
261 }
262 if (cp[0] == 'T' && cp[1] == 'H') {
263 format = MPARSE_MAN;
264 break;
265 }
266 }
267 cp = memchr(cp, '\n', ep - cp);
268 if (cp == NULL)
269 break;
270 cp++;
271 }
272 }
273
274 if (format == MPARSE_MDOC) {
275 if (NULL == curp->pmdoc)
276 curp->pmdoc = mdoc_alloc(
277 curp->roff, curp, curp->defos,
278 MPARSE_QUICK & curp->options ? 1 : 0);
279 assert(curp->pmdoc);
280 curp->mdoc = curp->pmdoc;
281 return;
282 }
283
284 /* Fall back to man(7) as a last resort. */
285
286 if (NULL == curp->pman)
287 curp->pman = man_alloc(
288 curp->roff, curp, curp->defos,
289 MPARSE_QUICK & curp->options ? 1 : 0);
290 assert(curp->pman);
291 curp->man = curp->pman;
292 }
293
294 /*
295 * Main parse routine for a buffer.
296 * It assumes encoding and line numbering are already set up.
297 * It can recurse directly (for invocations of user-defined
298 * macros, inline equations, and input line traps)
299 * and indirectly (for .so file inclusion).
300 */
301 static void
302 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
303 {
304 const struct tbl_span *span;
305 struct buf ln;
306 char *cp;
307 size_t pos; /* byte number in the ln buffer */
308 enum rofferr rr;
309 int of;
310 int lnn; /* line number in the real file */
311 int fd;
312 pid_t save_child;
313 unsigned char c;
314
315 memset(&ln, 0, sizeof(ln));
316
317 lnn = curp->line;
318 pos = 0;
319
320 while (i < blk.sz) {
321 if (0 == pos && '\0' == blk.buf[i])
322 break;
323
324 if (start) {
325 curp->line = lnn;
326 curp->reparse_count = 0;
327
328 if (lnn < 3 &&
329 curp->filenc & MPARSE_UTF8 &&
330 curp->filenc & MPARSE_LATIN1)
331 curp->filenc = preconv_cue(&blk, i);
332 }
333
334 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
335
336 /*
337 * When finding an unescaped newline character,
338 * leave the character loop to process the line.
339 * Skip a preceding carriage return, if any.
340 */
341
342 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
343 '\n' == blk.buf[i + 1])
344 ++i;
345 if ('\n' == blk.buf[i]) {
346 ++i;
347 ++lnn;
348 break;
349 }
350
351 /*
352 * Make sure we have space for the worst
353 * case of 11 bytes: "\\[u10ffff]\0"
354 */
355
356 if (pos + 11 > ln.sz)
357 resize_buf(&ln, 256);
358
359 /*
360 * Encode 8-bit input.
361 */
362
363 c = blk.buf[i];
364 if (c & 0x80) {
365 if ( ! (curp->filenc && preconv_encode(
366 &blk, &i, &ln, &pos, &curp->filenc))) {
367 mandoc_vmsg(MANDOCERR_BADCHAR,
368 curp, curp->line, pos,
369 "0x%x", c);
370 ln.buf[pos++] = '?';
371 i++;
372 }
373 continue;
374 }
375
376 /*
377 * Exclude control characters.
378 */
379
380 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
381 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
382 curp->line, pos, "0x%x", c);
383 i++;
384 ln.buf[pos++] = '?';
385 continue;
386 }
387
388 /* Trailing backslash = a plain char. */
389
390 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
391 ln.buf[pos++] = blk.buf[i++];
392 continue;
393 }
394
395 /*
396 * Found escape and at least one other character.
397 * When it's a newline character, skip it.
398 * When there is a carriage return in between,
399 * skip that one as well.
400 */
401
402 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
403 '\n' == blk.buf[i + 2])
404 ++i;
405 if ('\n' == blk.buf[i + 1]) {
406 i += 2;
407 ++lnn;
408 continue;
409 }
410
411 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
412 i += 2;
413 /* Comment, skip to end of line */
414 for (; i < blk.sz; ++i) {
415 if ('\n' == blk.buf[i]) {
416 ++i;
417 ++lnn;
418 break;
419 }
420 }
421
422 /* Backout trailing whitespaces */
423 for (; pos > 0; --pos) {
424 if (ln.buf[pos - 1] != ' ')
425 break;
426 if (pos > 2 && ln.buf[pos - 2] == '\\')
427 break;
428 }
429 break;
430 }
431
432 /* Catch escaped bogus characters. */
433
434 c = (unsigned char) blk.buf[i+1];
435
436 if ( ! (isascii(c) &&
437 (isgraph(c) || isblank(c)))) {
438 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
439 curp->line, pos, "0x%x", c);
440 i += 2;
441 ln.buf[pos++] = '?';
442 continue;
443 }
444
445 /* Some other escape sequence, copy & cont. */
446
447 ln.buf[pos++] = blk.buf[i++];
448 ln.buf[pos++] = blk.buf[i++];
449 }
450
451 if (pos >= ln.sz)
452 resize_buf(&ln, 256);
453
454 ln.buf[pos] = '\0';
455
456 /*
457 * A significant amount of complexity is contained by
458 * the roff preprocessor. It's line-oriented but can be
459 * expressed on one line, so we need at times to
460 * readjust our starting point and re-run it. The roff
461 * preprocessor can also readjust the buffers with new
462 * data, so we pass them in wholesale.
463 */
464
465 of = 0;
466
467 /*
468 * Maintain a lookaside buffer of all parsed lines. We
469 * only do this if mparse_keep() has been invoked (the
470 * buffer may be accessed with mparse_getkeep()).
471 */
472
473 if (curp->secondary) {
474 curp->secondary->buf = mandoc_realloc(
475 curp->secondary->buf,
476 curp->secondary->sz + pos + 2);
477 memcpy(curp->secondary->buf +
478 curp->secondary->sz,
479 ln.buf, pos);
480 curp->secondary->sz += pos;
481 curp->secondary->buf
482 [curp->secondary->sz] = '\n';
483 curp->secondary->sz++;
484 curp->secondary->buf
485 [curp->secondary->sz] = '\0';
486 }
487 rerun:
488 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
489
490 switch (rr) {
491 case ROFF_REPARSE:
492 if (REPARSE_LIMIT >= ++curp->reparse_count)
493 mparse_buf_r(curp, ln, of, 0);
494 else
495 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
496 curp->line, pos, NULL);
497 pos = 0;
498 continue;
499 case ROFF_APPEND:
500 pos = strlen(ln.buf);
501 continue;
502 case ROFF_RERUN:
503 goto rerun;
504 case ROFF_IGN:
505 pos = 0;
506 continue;
507 case ROFF_SO:
508 if ( ! (curp->options & MPARSE_SO) &&
509 (i >= blk.sz || blk.buf[i] == '\0')) {
510 curp->sodest = mandoc_strdup(ln.buf + of);
511 free(ln.buf);
512 return;
513 }
514 /*
515 * We remove `so' clauses from our lookaside
516 * buffer because we're going to descend into
517 * the file recursively.
518 */
519 if (curp->secondary)
520 curp->secondary->sz -= pos + 1;
521 save_child = curp->child;
522 if (mparse_open(curp, &fd, ln.buf + of) ==
523 MANDOCLEVEL_OK)
524 mparse_readfd(curp, fd, ln.buf + of);
525 else {
526 mandoc_vmsg(MANDOCERR_SO_FAIL,
527 curp, curp->line, pos,
528 ".so %s", ln.buf + of);
529 ln.sz = mandoc_asprintf(&cp,
530 ".sp\nSee the file %s.\n.sp",
531 ln.buf + of);
532 free(ln.buf);
533 ln.buf = cp;
534 of = 0;
535 mparse_buf_r(curp, ln, of, 0);
536 }
537 curp->child = save_child;
538 pos = 0;
539 continue;
540 default:
541 break;
542 }
543
544 /*
545 * If input parsers have not been allocated, do so now.
546 * We keep these instanced between parsers, but set them
547 * locally per parse routine since we can use different
548 * parsers with each one.
549 */
550
551 if ( ! (curp->man || curp->mdoc))
552 choose_parser(curp);
553
554 /*
555 * Lastly, push down into the parsers themselves.
556 * If libroff returns ROFF_TBL, then add it to the
557 * currently open parse. Since we only get here if
558 * there does exist data (see tbl_data.c), we're
559 * guaranteed that something's been allocated.
560 * Do the same for ROFF_EQN.
561 */
562
563 if (rr == ROFF_TBL) {
564 while ((span = roff_span(curp->roff)) != NULL)
565 if (curp->man == NULL)
566 mdoc_addspan(curp->mdoc, span);
567 else
568 man_addspan(curp->man, span);
569 } else if (rr == ROFF_EQN) {
570 if (curp->man == NULL)
571 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
572 else
573 man_addeqn(curp->man, roff_eqn(curp->roff));
574 } else if ((curp->man == NULL ?
575 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
576 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
577 break;
578
579 /* Temporary buffers typically are not full. */
580
581 if (0 == start && '\0' == blk.buf[i])
582 break;
583
584 /* Start the next input line. */
585
586 pos = 0;
587 }
588
589 free(ln.buf);
590 }
591
592 static int
593 read_whole_file(struct mparse *curp, const char *file, int fd,
594 struct buf *fb, int *with_mmap)
595 {
596 size_t off;
597 ssize_t ssz;
598
599 #if HAVE_MMAP
600 struct stat st;
601 if (-1 == fstat(fd, &st)) {
602 perror(file);
603 exit((int)MANDOCLEVEL_SYSERR);
604 }
605
606 /*
607 * If we're a regular file, try just reading in the whole entry
608 * via mmap(). This is faster than reading it into blocks, and
609 * since each file is only a few bytes to begin with, I'm not
610 * concerned that this is going to tank any machines.
611 */
612
613 if (S_ISREG(st.st_mode)) {
614 if (st.st_size >= (1U << 31)) {
615 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
616 return(0);
617 }
618 *with_mmap = 1;
619 fb->sz = (size_t)st.st_size;
620 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
621 if (fb->buf != MAP_FAILED)
622 return(1);
623 }
624 #endif
625
626 /*
627 * If this isn't a regular file (like, say, stdin), then we must
628 * go the old way and just read things in bit by bit.
629 */
630
631 *with_mmap = 0;
632 off = 0;
633 fb->sz = 0;
634 fb->buf = NULL;
635 for (;;) {
636 if (off == fb->sz) {
637 if (fb->sz == (1U << 31)) {
638 mandoc_msg(MANDOCERR_TOOLARGE, curp,
639 0, 0, NULL);
640 break;
641 }
642 resize_buf(fb, 65536);
643 }
644 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
645 if (ssz == 0) {
646 fb->sz = off;
647 return(1);
648 }
649 if (ssz == -1) {
650 perror(file);
651 exit((int)MANDOCLEVEL_SYSERR);
652 }
653 off += (size_t)ssz;
654 }
655
656 free(fb->buf);
657 fb->buf = NULL;
658 return(0);
659 }
660
661 static void
662 mparse_end(struct mparse *curp)
663 {
664
665 if (curp->mdoc == NULL &&
666 curp->man == NULL &&
667 curp->sodest == NULL) {
668 if (curp->options & MPARSE_MDOC)
669 curp->mdoc = curp->pmdoc;
670 else {
671 if (curp->pman == NULL)
672 curp->pman = man_alloc(
673 curp->roff, curp, curp->defos,
674 curp->options & MPARSE_QUICK ? 1 : 0);
675 curp->man = curp->pman;
676 }
677 }
678 if (curp->mdoc)
679 mdoc_endparse(curp->mdoc);
680 if (curp->man)
681 man_endparse(curp->man);
682 roff_endparse(curp->roff);
683 }
684
685 static void
686 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
687 {
688 struct buf *svprimary;
689 const char *svfile;
690 size_t offset;
691 static int recursion_depth;
692
693 if (64 < recursion_depth) {
694 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
695 return;
696 }
697
698 /* Line number is per-file. */
699 svfile = curp->file;
700 curp->file = file;
701 svprimary = curp->primary;
702 curp->primary = &blk;
703 curp->line = 1;
704 recursion_depth++;
705
706 /* Skip an UTF-8 byte order mark. */
707 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
708 (unsigned char)blk.buf[0] == 0xef &&
709 (unsigned char)blk.buf[1] == 0xbb &&
710 (unsigned char)blk.buf[2] == 0xbf) {
711 offset = 3;
712 curp->filenc &= ~MPARSE_LATIN1;
713 } else
714 offset = 0;
715
716 mparse_buf_r(curp, blk, offset, 1);
717
718 if (--recursion_depth == 0)
719 mparse_end(curp);
720
721 curp->primary = svprimary;
722 curp->file = svfile;
723 }
724
725 enum mandoclevel
726 mparse_readmem(struct mparse *curp, void *buf, size_t len,
727 const char *file)
728 {
729 struct buf blk;
730
731 blk.buf = buf;
732 blk.sz = len;
733
734 mparse_parse_buffer(curp, blk, file);
735 return(curp->file_status);
736 }
737
738 /*
739 * Read the whole file into memory and call the parsers.
740 * Called recursively when an .so request is encountered.
741 */
742 enum mandoclevel
743 mparse_readfd(struct mparse *curp, int fd, const char *file)
744 {
745 struct buf blk;
746 int with_mmap;
747 int save_filenc;
748
749 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
750 save_filenc = curp->filenc;
751 curp->filenc = curp->options &
752 (MPARSE_UTF8 | MPARSE_LATIN1);
753 mparse_parse_buffer(curp, blk, file);
754 curp->filenc = save_filenc;
755 #if HAVE_MMAP
756 if (with_mmap)
757 munmap(blk.buf, blk.sz);
758 else
759 #endif
760 free(blk.buf);
761 }
762
763 if (fd != STDIN_FILENO && close(fd) == -1)
764 perror(file);
765
766 mparse_wait(curp);
767 return(curp->file_status);
768 }
769
770 enum mandoclevel
771 mparse_open(struct mparse *curp, int *fd, const char *file)
772 {
773 int pfd[2];
774 int save_errno;
775 char *cp;
776
777 curp->file = file;
778
779 /* Unless zipped, try to just open the file. */
780
781 if ((cp = strrchr(file, '.')) == NULL ||
782 strcmp(cp + 1, "gz")) {
783 curp->child = 0;
784 if ((*fd = open(file, O_RDONLY)) != -1)
785 return(MANDOCLEVEL_OK);
786
787 /* Open failed; try to append ".gz". */
788
789 mandoc_asprintf(&cp, "%s.gz", file);
790 file = cp;
791 } else
792 cp = NULL;
793
794 /* Before forking, make sure the file can be read. */
795
796 save_errno = errno;
797 if (access(file, R_OK) == -1) {
798 if (cp != NULL)
799 errno = save_errno;
800 free(cp);
801 *fd = -1;
802 curp->child = 0;
803 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
804 return(MANDOCLEVEL_ERROR);
805 }
806
807 /* Run gunzip(1). */
808
809 if (pipe(pfd) == -1) {
810 perror("pipe");
811 exit((int)MANDOCLEVEL_SYSERR);
812 }
813
814 switch (curp->child = fork()) {
815 case -1:
816 perror("fork");
817 exit((int)MANDOCLEVEL_SYSERR);
818 case 0:
819 close(pfd[0]);
820 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
821 perror("dup");
822 exit((int)MANDOCLEVEL_SYSERR);
823 }
824 execlp("gunzip", "gunzip", "-c", file, NULL);
825 perror("exec");
826 exit((int)MANDOCLEVEL_SYSERR);
827 default:
828 close(pfd[1]);
829 *fd = pfd[0];
830 return(MANDOCLEVEL_OK);
831 }
832 }
833
834 enum mandoclevel
835 mparse_wait(struct mparse *curp)
836 {
837 int status;
838
839 if (curp->child == 0)
840 return(MANDOCLEVEL_OK);
841
842 if (waitpid(curp->child, &status, 0) == -1) {
843 perror("wait");
844 exit((int)MANDOCLEVEL_SYSERR);
845 }
846 if (WIFSIGNALED(status)) {
847 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
848 "gunzip died from signal %d", WTERMSIG(status));
849 return(MANDOCLEVEL_ERROR);
850 }
851 if (WEXITSTATUS(status)) {
852 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
853 "gunzip failed with code %d", WEXITSTATUS(status));
854 return(MANDOCLEVEL_ERROR);
855 }
856 return(MANDOCLEVEL_OK);
857 }
858
859 struct mparse *
860 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
861 const struct mchars *mchars, const char *defos)
862 {
863 struct mparse *curp;
864
865 curp = mandoc_calloc(1, sizeof(struct mparse));
866
867 curp->options = options;
868 curp->wlevel = wlevel;
869 curp->mmsg = mmsg;
870 curp->defos = defos;
871
872 curp->mchars = mchars;
873 curp->roff = roff_alloc(curp, curp->mchars, options);
874 if (curp->options & MPARSE_MDOC)
875 curp->pmdoc = mdoc_alloc(
876 curp->roff, curp, curp->defos,
877 curp->options & MPARSE_QUICK ? 1 : 0);
878 if (curp->options & MPARSE_MAN)
879 curp->pman = man_alloc(
880 curp->roff, curp, curp->defos,
881 curp->options & MPARSE_QUICK ? 1 : 0);
882
883 return(curp);
884 }
885
886 void
887 mparse_reset(struct mparse *curp)
888 {
889
890 roff_reset(curp->roff);
891
892 if (curp->mdoc)
893 mdoc_reset(curp->mdoc);
894 if (curp->man)
895 man_reset(curp->man);
896 if (curp->secondary)
897 curp->secondary->sz = 0;
898
899 curp->file_status = MANDOCLEVEL_OK;
900 curp->mdoc = NULL;
901 curp->man = NULL;
902
903 free(curp->sodest);
904 curp->sodest = NULL;
905 }
906
907 void
908 mparse_free(struct mparse *curp)
909 {
910
911 if (curp->pmdoc)
912 mdoc_free(curp->pmdoc);
913 if (curp->pman)
914 man_free(curp->pman);
915 if (curp->roff)
916 roff_free(curp->roff);
917 if (curp->secondary)
918 free(curp->secondary->buf);
919
920 free(curp->secondary);
921 free(curp->sodest);
922 free(curp);
923 }
924
925 void
926 mparse_result(struct mparse *curp,
927 struct mdoc **mdoc, struct man **man, char **sodest)
928 {
929
930 if (sodest && NULL != (*sodest = curp->sodest)) {
931 *mdoc = NULL;
932 *man = NULL;
933 return;
934 }
935 if (mdoc)
936 *mdoc = curp->mdoc;
937 if (man)
938 *man = curp->man;
939 }
940
941 void
942 mandoc_vmsg(enum mandocerr t, struct mparse *m,
943 int ln, int pos, const char *fmt, ...)
944 {
945 char buf[256];
946 va_list ap;
947
948 va_start(ap, fmt);
949 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
950 va_end(ap);
951
952 mandoc_msg(t, m, ln, pos, buf);
953 }
954
955 void
956 mandoc_msg(enum mandocerr er, struct mparse *m,
957 int ln, int col, const char *msg)
958 {
959 enum mandoclevel level;
960
961 level = MANDOCLEVEL_ERROR;
962 while (er < mandoclimits[level])
963 level--;
964
965 if (level < m->wlevel && er != MANDOCERR_FILE)
966 return;
967
968 if (m->mmsg)
969 (*m->mmsg)(er, level, m->file, ln, col, msg);
970
971 if (m->file_status < level)
972 m->file_status = level;
973 }
974
975 const char *
976 mparse_strerror(enum mandocerr er)
977 {
978
979 return(mandocerrs[er]);
980 }
981
982 const char *
983 mparse_strlevel(enum mandoclevel lvl)
984 {
985 return(mandoclevels[lvl]);
986 }
987
988 void
989 mparse_keep(struct mparse *p)
990 {
991
992 assert(NULL == p->secondary);
993 p->secondary = mandoc_calloc(1, sizeof(struct buf));
994 }
995
996 const char *
997 mparse_getkeep(const struct mparse *p)
998 {
999
1000 assert(p->secondary);
1001 return(p->secondary->sz ? p->secondary->buf : NULL);
1002 }