]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Traditional roff(7) explicitly allows certain control characters
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.114 2015/01/22 21:38:16 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "no table layout cells specified",
184 "no table data cells specified",
185 "ignore data in cell",
186 "data block still open",
187 "ignoring extra data cells",
188
189 /* related to document structure and macros */
190 NULL,
191 "input stack limit exceeded, infinite loop?",
192 "skipping bad character",
193 "skipping unknown macro",
194 "skipping insecure request",
195 "skipping item outside list",
196 "skipping column outside column list",
197 "skipping end of block that is not open",
198 "inserting missing end of block",
199 "appending missing end of block",
200
201 /* related to request and macro arguments */
202 "escaped character not allowed in a name",
203 "argument count wrong",
204 "NOT IMPLEMENTED: Bd -file",
205 "missing list type, using -item",
206 "missing manual name, using \"\"",
207 "uname(3) system call failed, using UNKNOWN",
208 "unknown standard specifier",
209 "skipping request without numeric argument",
210 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
211 ".so request failed",
212 "skipping all arguments",
213 "skipping excess arguments",
214 "divide by zero",
215
216 "unsupported feature",
217 "input too large",
218 "unsupported control character",
219 "unsupported roff request",
220 "unsupported table syntax",
221 "unsupported table option",
222 "unsupported table layout",
223 "ignoring macro in table",
224 };
225
226 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
227 "SUCCESS",
228 "RESERVED",
229 "WARNING",
230 "ERROR",
231 "UNSUPP",
232 "BADARG",
233 "SYSERR"
234 };
235
236
237 static void
238 resize_buf(struct buf *buf, size_t initial)
239 {
240
241 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
242 buf->buf = mandoc_realloc(buf->buf, buf->sz);
243 }
244
245 static void
246 choose_parser(struct mparse *curp)
247 {
248 char *cp, *ep;
249 int format;
250
251 /*
252 * If neither command line arguments -mdoc or -man select
253 * a parser nor the roff parser found a .Dd or .TH macro
254 * yet, look ahead in the main input buffer.
255 */
256
257 if ((format = roff_getformat(curp->roff)) == 0) {
258 cp = curp->primary->buf;
259 ep = cp + curp->primary->sz;
260 while (cp < ep) {
261 if (*cp == '.' || *cp == '\'') {
262 cp++;
263 if (cp[0] == 'D' && cp[1] == 'd') {
264 format = MPARSE_MDOC;
265 break;
266 }
267 if (cp[0] == 'T' && cp[1] == 'H') {
268 format = MPARSE_MAN;
269 break;
270 }
271 }
272 cp = memchr(cp, '\n', ep - cp);
273 if (cp == NULL)
274 break;
275 cp++;
276 }
277 }
278
279 if (format == MPARSE_MDOC) {
280 if (NULL == curp->pmdoc)
281 curp->pmdoc = mdoc_alloc(
282 curp->roff, curp, curp->defos,
283 MPARSE_QUICK & curp->options ? 1 : 0);
284 assert(curp->pmdoc);
285 curp->mdoc = curp->pmdoc;
286 return;
287 }
288
289 /* Fall back to man(7) as a last resort. */
290
291 if (NULL == curp->pman)
292 curp->pman = man_alloc(
293 curp->roff, curp, curp->defos,
294 MPARSE_QUICK & curp->options ? 1 : 0);
295 assert(curp->pman);
296 curp->man = curp->pman;
297 }
298
299 /*
300 * Main parse routine for a buffer.
301 * It assumes encoding and line numbering are already set up.
302 * It can recurse directly (for invocations of user-defined
303 * macros, inline equations, and input line traps)
304 * and indirectly (for .so file inclusion).
305 */
306 static void
307 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
308 {
309 const struct tbl_span *span;
310 struct buf ln;
311 const char *save_file;
312 char *cp;
313 size_t pos; /* byte number in the ln buffer */
314 enum rofferr rr;
315 int of;
316 int lnn; /* line number in the real file */
317 int fd;
318 pid_t save_child;
319 unsigned char c;
320
321 memset(&ln, 0, sizeof(ln));
322
323 lnn = curp->line;
324 pos = 0;
325
326 while (i < blk.sz) {
327 if (0 == pos && '\0' == blk.buf[i])
328 break;
329
330 if (start) {
331 curp->line = lnn;
332 curp->reparse_count = 0;
333
334 if (lnn < 3 &&
335 curp->filenc & MPARSE_UTF8 &&
336 curp->filenc & MPARSE_LATIN1)
337 curp->filenc = preconv_cue(&blk, i);
338 }
339
340 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
341
342 /*
343 * When finding an unescaped newline character,
344 * leave the character loop to process the line.
345 * Skip a preceding carriage return, if any.
346 */
347
348 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
349 '\n' == blk.buf[i + 1])
350 ++i;
351 if ('\n' == blk.buf[i]) {
352 ++i;
353 ++lnn;
354 break;
355 }
356
357 /*
358 * Make sure we have space for the worst
359 * case of 11 bytes: "\\[u10ffff]\0"
360 */
361
362 if (pos + 11 > ln.sz)
363 resize_buf(&ln, 256);
364
365 /*
366 * Encode 8-bit input.
367 */
368
369 c = blk.buf[i];
370 if (c & 0x80) {
371 if ( ! (curp->filenc && preconv_encode(
372 &blk, &i, &ln, &pos, &curp->filenc))) {
373 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
374 curp->line, pos, "0x%x", c);
375 ln.buf[pos++] = '?';
376 i++;
377 }
378 continue;
379 }
380
381 /*
382 * Exclude control characters.
383 */
384
385 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
386 mandoc_vmsg(c == 0x00 || c == 0x04 ||
387 c > 0x0a ? MANDOCERR_CHAR_BAD :
388 MANDOCERR_CHAR_UNSUPP,
389 curp, curp->line, pos, "0x%x", c);
390 i++;
391 ln.buf[pos++] = '?';
392 continue;
393 }
394
395 /* Trailing backslash = a plain char. */
396
397 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
398 ln.buf[pos++] = blk.buf[i++];
399 continue;
400 }
401
402 /*
403 * Found escape and at least one other character.
404 * When it's a newline character, skip it.
405 * When there is a carriage return in between,
406 * skip that one as well.
407 */
408
409 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
410 '\n' == blk.buf[i + 2])
411 ++i;
412 if ('\n' == blk.buf[i + 1]) {
413 i += 2;
414 ++lnn;
415 continue;
416 }
417
418 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
419 i += 2;
420 /* Comment, skip to end of line */
421 for (; i < blk.sz; ++i) {
422 if ('\n' == blk.buf[i]) {
423 ++i;
424 ++lnn;
425 break;
426 }
427 }
428
429 /* Backout trailing whitespaces */
430 for (; pos > 0; --pos) {
431 if (ln.buf[pos - 1] != ' ')
432 break;
433 if (pos > 2 && ln.buf[pos - 2] == '\\')
434 break;
435 }
436 break;
437 }
438
439 /* Catch escaped bogus characters. */
440
441 c = (unsigned char) blk.buf[i+1];
442
443 if ( ! (isascii(c) &&
444 (isgraph(c) || isblank(c)))) {
445 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
446 curp->line, pos, "0x%x", c);
447 i += 2;
448 ln.buf[pos++] = '?';
449 continue;
450 }
451
452 /* Some other escape sequence, copy & cont. */
453
454 ln.buf[pos++] = blk.buf[i++];
455 ln.buf[pos++] = blk.buf[i++];
456 }
457
458 if (pos >= ln.sz)
459 resize_buf(&ln, 256);
460
461 ln.buf[pos] = '\0';
462
463 /*
464 * A significant amount of complexity is contained by
465 * the roff preprocessor. It's line-oriented but can be
466 * expressed on one line, so we need at times to
467 * readjust our starting point and re-run it. The roff
468 * preprocessor can also readjust the buffers with new
469 * data, so we pass them in wholesale.
470 */
471
472 of = 0;
473
474 /*
475 * Maintain a lookaside buffer of all parsed lines. We
476 * only do this if mparse_keep() has been invoked (the
477 * buffer may be accessed with mparse_getkeep()).
478 */
479
480 if (curp->secondary) {
481 curp->secondary->buf = mandoc_realloc(
482 curp->secondary->buf,
483 curp->secondary->sz + pos + 2);
484 memcpy(curp->secondary->buf +
485 curp->secondary->sz,
486 ln.buf, pos);
487 curp->secondary->sz += pos;
488 curp->secondary->buf
489 [curp->secondary->sz] = '\n';
490 curp->secondary->sz++;
491 curp->secondary->buf
492 [curp->secondary->sz] = '\0';
493 }
494 rerun:
495 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
496
497 switch (rr) {
498 case ROFF_REPARSE:
499 if (REPARSE_LIMIT >= ++curp->reparse_count)
500 mparse_buf_r(curp, ln, of, 0);
501 else
502 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
503 curp->line, pos, NULL);
504 pos = 0;
505 continue;
506 case ROFF_APPEND:
507 pos = strlen(ln.buf);
508 continue;
509 case ROFF_RERUN:
510 goto rerun;
511 case ROFF_IGN:
512 pos = 0;
513 continue;
514 case ROFF_SO:
515 if ( ! (curp->options & MPARSE_SO) &&
516 (i >= blk.sz || blk.buf[i] == '\0')) {
517 curp->sodest = mandoc_strdup(ln.buf + of);
518 free(ln.buf);
519 return;
520 }
521 /*
522 * We remove `so' clauses from our lookaside
523 * buffer because we're going to descend into
524 * the file recursively.
525 */
526 if (curp->secondary)
527 curp->secondary->sz -= pos + 1;
528 save_file = curp->file;
529 save_child = curp->child;
530 if (mparse_open(curp, &fd, ln.buf + of) ==
531 MANDOCLEVEL_OK) {
532 mparse_readfd(curp, fd, ln.buf + of);
533 curp->file = save_file;
534 } else {
535 curp->file = save_file;
536 mandoc_vmsg(MANDOCERR_SO_FAIL,
537 curp, curp->line, pos,
538 ".so %s", ln.buf + of);
539 ln.sz = mandoc_asprintf(&cp,
540 ".sp\nSee the file %s.\n.sp",
541 ln.buf + of);
542 free(ln.buf);
543 ln.buf = cp;
544 of = 0;
545 mparse_buf_r(curp, ln, of, 0);
546 }
547 curp->child = save_child;
548 pos = 0;
549 continue;
550 default:
551 break;
552 }
553
554 /*
555 * If input parsers have not been allocated, do so now.
556 * We keep these instanced between parsers, but set them
557 * locally per parse routine since we can use different
558 * parsers with each one.
559 */
560
561 if ( ! (curp->man || curp->mdoc))
562 choose_parser(curp);
563
564 /*
565 * Lastly, push down into the parsers themselves.
566 * If libroff returns ROFF_TBL, then add it to the
567 * currently open parse. Since we only get here if
568 * there does exist data (see tbl_data.c), we're
569 * guaranteed that something's been allocated.
570 * Do the same for ROFF_EQN.
571 */
572
573 if (rr == ROFF_TBL) {
574 while ((span = roff_span(curp->roff)) != NULL)
575 if (curp->man == NULL)
576 mdoc_addspan(curp->mdoc, span);
577 else
578 man_addspan(curp->man, span);
579 } else if (rr == ROFF_EQN) {
580 if (curp->man == NULL)
581 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
582 else
583 man_addeqn(curp->man, roff_eqn(curp->roff));
584 } else if ((curp->man == NULL ?
585 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
586 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
587 break;
588
589 /* Temporary buffers typically are not full. */
590
591 if (0 == start && '\0' == blk.buf[i])
592 break;
593
594 /* Start the next input line. */
595
596 pos = 0;
597 }
598
599 free(ln.buf);
600 }
601
602 static int
603 read_whole_file(struct mparse *curp, const char *file, int fd,
604 struct buf *fb, int *with_mmap)
605 {
606 size_t off;
607 ssize_t ssz;
608
609 #if HAVE_MMAP
610 struct stat st;
611 if (-1 == fstat(fd, &st)) {
612 perror(file);
613 exit((int)MANDOCLEVEL_SYSERR);
614 }
615
616 /*
617 * If we're a regular file, try just reading in the whole entry
618 * via mmap(). This is faster than reading it into blocks, and
619 * since each file is only a few bytes to begin with, I'm not
620 * concerned that this is going to tank any machines.
621 */
622
623 if (S_ISREG(st.st_mode)) {
624 if (st.st_size >= (1U << 31)) {
625 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
626 return(0);
627 }
628 *with_mmap = 1;
629 fb->sz = (size_t)st.st_size;
630 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
631 if (fb->buf != MAP_FAILED)
632 return(1);
633 }
634 #endif
635
636 /*
637 * If this isn't a regular file (like, say, stdin), then we must
638 * go the old way and just read things in bit by bit.
639 */
640
641 *with_mmap = 0;
642 off = 0;
643 fb->sz = 0;
644 fb->buf = NULL;
645 for (;;) {
646 if (off == fb->sz) {
647 if (fb->sz == (1U << 31)) {
648 mandoc_msg(MANDOCERR_TOOLARGE, curp,
649 0, 0, NULL);
650 break;
651 }
652 resize_buf(fb, 65536);
653 }
654 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
655 if (ssz == 0) {
656 fb->sz = off;
657 return(1);
658 }
659 if (ssz == -1) {
660 perror(file);
661 exit((int)MANDOCLEVEL_SYSERR);
662 }
663 off += (size_t)ssz;
664 }
665
666 free(fb->buf);
667 fb->buf = NULL;
668 return(0);
669 }
670
671 static void
672 mparse_end(struct mparse *curp)
673 {
674
675 if (curp->mdoc == NULL &&
676 curp->man == NULL &&
677 curp->sodest == NULL) {
678 if (curp->options & MPARSE_MDOC)
679 curp->mdoc = curp->pmdoc;
680 else {
681 if (curp->pman == NULL)
682 curp->pman = man_alloc(
683 curp->roff, curp, curp->defos,
684 curp->options & MPARSE_QUICK ? 1 : 0);
685 curp->man = curp->pman;
686 }
687 }
688 if (curp->mdoc)
689 mdoc_endparse(curp->mdoc);
690 if (curp->man)
691 man_endparse(curp->man);
692 roff_endparse(curp->roff);
693 }
694
695 static void
696 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
697 {
698 struct buf *svprimary;
699 const char *svfile;
700 size_t offset;
701 static int recursion_depth;
702
703 if (64 < recursion_depth) {
704 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
705 return;
706 }
707
708 /* Line number is per-file. */
709 svfile = curp->file;
710 curp->file = file;
711 svprimary = curp->primary;
712 curp->primary = &blk;
713 curp->line = 1;
714 recursion_depth++;
715
716 /* Skip an UTF-8 byte order mark. */
717 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
718 (unsigned char)blk.buf[0] == 0xef &&
719 (unsigned char)blk.buf[1] == 0xbb &&
720 (unsigned char)blk.buf[2] == 0xbf) {
721 offset = 3;
722 curp->filenc &= ~MPARSE_LATIN1;
723 } else
724 offset = 0;
725
726 mparse_buf_r(curp, blk, offset, 1);
727
728 if (--recursion_depth == 0)
729 mparse_end(curp);
730
731 curp->primary = svprimary;
732 curp->file = svfile;
733 }
734
735 enum mandoclevel
736 mparse_readmem(struct mparse *curp, void *buf, size_t len,
737 const char *file)
738 {
739 struct buf blk;
740
741 blk.buf = buf;
742 blk.sz = len;
743
744 mparse_parse_buffer(curp, blk, file);
745 return(curp->file_status);
746 }
747
748 /*
749 * Read the whole file into memory and call the parsers.
750 * Called recursively when an .so request is encountered.
751 */
752 enum mandoclevel
753 mparse_readfd(struct mparse *curp, int fd, const char *file)
754 {
755 struct buf blk;
756 int with_mmap;
757 int save_filenc;
758
759 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
760 save_filenc = curp->filenc;
761 curp->filenc = curp->options &
762 (MPARSE_UTF8 | MPARSE_LATIN1);
763 mparse_parse_buffer(curp, blk, file);
764 curp->filenc = save_filenc;
765 #if HAVE_MMAP
766 if (with_mmap)
767 munmap(blk.buf, blk.sz);
768 else
769 #endif
770 free(blk.buf);
771 }
772
773 if (fd != STDIN_FILENO && close(fd) == -1)
774 perror(file);
775
776 mparse_wait(curp);
777 return(curp->file_status);
778 }
779
780 enum mandoclevel
781 mparse_open(struct mparse *curp, int *fd, const char *file)
782 {
783 int pfd[2];
784 int save_errno;
785 char *cp;
786
787 curp->file = file;
788
789 /* Unless zipped, try to just open the file. */
790
791 if ((cp = strrchr(file, '.')) == NULL ||
792 strcmp(cp + 1, "gz")) {
793 curp->child = 0;
794 if ((*fd = open(file, O_RDONLY)) != -1)
795 return(MANDOCLEVEL_OK);
796
797 /* Open failed; try to append ".gz". */
798
799 mandoc_asprintf(&cp, "%s.gz", file);
800 file = cp;
801 } else
802 cp = NULL;
803
804 /* Before forking, make sure the file can be read. */
805
806 save_errno = errno;
807 if (access(file, R_OK) == -1) {
808 if (cp != NULL)
809 errno = save_errno;
810 free(cp);
811 *fd = -1;
812 curp->child = 0;
813 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
814 return(MANDOCLEVEL_ERROR);
815 }
816
817 /* Run gunzip(1). */
818
819 if (pipe(pfd) == -1) {
820 perror("pipe");
821 exit((int)MANDOCLEVEL_SYSERR);
822 }
823
824 switch (curp->child = fork()) {
825 case -1:
826 perror("fork");
827 exit((int)MANDOCLEVEL_SYSERR);
828 case 0:
829 close(pfd[0]);
830 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
831 perror("dup");
832 exit((int)MANDOCLEVEL_SYSERR);
833 }
834 execlp("gunzip", "gunzip", "-c", file, NULL);
835 perror("exec");
836 exit((int)MANDOCLEVEL_SYSERR);
837 default:
838 close(pfd[1]);
839 *fd = pfd[0];
840 return(MANDOCLEVEL_OK);
841 }
842 }
843
844 enum mandoclevel
845 mparse_wait(struct mparse *curp)
846 {
847 int status;
848
849 if (curp->child == 0)
850 return(MANDOCLEVEL_OK);
851
852 if (waitpid(curp->child, &status, 0) == -1) {
853 perror("wait");
854 exit((int)MANDOCLEVEL_SYSERR);
855 }
856 if (WIFSIGNALED(status)) {
857 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
858 "gunzip died from signal %d", WTERMSIG(status));
859 return(MANDOCLEVEL_ERROR);
860 }
861 if (WEXITSTATUS(status)) {
862 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
863 "gunzip failed with code %d", WEXITSTATUS(status));
864 return(MANDOCLEVEL_ERROR);
865 }
866 return(MANDOCLEVEL_OK);
867 }
868
869 struct mparse *
870 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
871 const struct mchars *mchars, const char *defos)
872 {
873 struct mparse *curp;
874
875 curp = mandoc_calloc(1, sizeof(struct mparse));
876
877 curp->options = options;
878 curp->wlevel = wlevel;
879 curp->mmsg = mmsg;
880 curp->defos = defos;
881
882 curp->mchars = mchars;
883 curp->roff = roff_alloc(curp, curp->mchars, options);
884 if (curp->options & MPARSE_MDOC)
885 curp->pmdoc = mdoc_alloc(
886 curp->roff, curp, curp->defos,
887 curp->options & MPARSE_QUICK ? 1 : 0);
888 if (curp->options & MPARSE_MAN)
889 curp->pman = man_alloc(
890 curp->roff, curp, curp->defos,
891 curp->options & MPARSE_QUICK ? 1 : 0);
892
893 return(curp);
894 }
895
896 void
897 mparse_reset(struct mparse *curp)
898 {
899
900 roff_reset(curp->roff);
901
902 if (curp->mdoc)
903 mdoc_reset(curp->mdoc);
904 if (curp->man)
905 man_reset(curp->man);
906 if (curp->secondary)
907 curp->secondary->sz = 0;
908
909 curp->file_status = MANDOCLEVEL_OK;
910 curp->mdoc = NULL;
911 curp->man = NULL;
912
913 free(curp->sodest);
914 curp->sodest = NULL;
915 }
916
917 void
918 mparse_free(struct mparse *curp)
919 {
920
921 if (curp->pmdoc)
922 mdoc_free(curp->pmdoc);
923 if (curp->pman)
924 man_free(curp->pman);
925 if (curp->roff)
926 roff_free(curp->roff);
927 if (curp->secondary)
928 free(curp->secondary->buf);
929
930 free(curp->secondary);
931 free(curp->sodest);
932 free(curp);
933 }
934
935 void
936 mparse_result(struct mparse *curp,
937 struct mdoc **mdoc, struct man **man, char **sodest)
938 {
939
940 if (sodest && NULL != (*sodest = curp->sodest)) {
941 *mdoc = NULL;
942 *man = NULL;
943 return;
944 }
945 if (mdoc)
946 *mdoc = curp->mdoc;
947 if (man)
948 *man = curp->man;
949 }
950
951 void
952 mandoc_vmsg(enum mandocerr t, struct mparse *m,
953 int ln, int pos, const char *fmt, ...)
954 {
955 char buf[256];
956 va_list ap;
957
958 va_start(ap, fmt);
959 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
960 va_end(ap);
961
962 mandoc_msg(t, m, ln, pos, buf);
963 }
964
965 void
966 mandoc_msg(enum mandocerr er, struct mparse *m,
967 int ln, int col, const char *msg)
968 {
969 enum mandoclevel level;
970
971 level = MANDOCLEVEL_UNSUPP;
972 while (er < mandoclimits[level])
973 level--;
974
975 if (level < m->wlevel && er != MANDOCERR_FILE)
976 return;
977
978 if (m->mmsg)
979 (*m->mmsg)(er, level, m->file, ln, col, msg);
980
981 if (m->file_status < level)
982 m->file_status = level;
983 }
984
985 const char *
986 mparse_strerror(enum mandocerr er)
987 {
988
989 return(mandocerrs[er]);
990 }
991
992 const char *
993 mparse_strlevel(enum mandoclevel lvl)
994 {
995 return(mandoclevels[lvl]);
996 }
997
998 void
999 mparse_keep(struct mparse *p)
1000 {
1001
1002 assert(NULL == p->secondary);
1003 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1004 }
1005
1006 const char *
1007 mparse_getkeep(const struct mparse *p)
1008 {
1009
1010 assert(p->secondary);
1011 return(p->secondary->sz ? p->secondary->buf : NULL);
1012 }