]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Improve (or rather, rewrite) tbl(7) option parsing.
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.116 2015/01/26 00:57:22 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "non-alphabetic character in tbl options",
184 "skipping unknown tbl option",
185 "missing tbl option argument",
186 "wrong tbl option argument size",
187 "no table layout cells specified",
188 "no table data cells specified",
189 "ignore data in cell",
190 "data block still open",
191 "ignoring extra data cells",
192
193 /* related to document structure and macros */
194 NULL,
195 "input stack limit exceeded, infinite loop?",
196 "skipping bad character",
197 "skipping unknown macro",
198 "skipping insecure request",
199 "skipping item outside list",
200 "skipping column outside column list",
201 "skipping end of block that is not open",
202 "fewer RS blocks open, skipping",
203 "inserting missing end of block",
204 "appending missing end of block",
205
206 /* related to request and macro arguments */
207 "escaped character not allowed in a name",
208 "argument count wrong",
209 "NOT IMPLEMENTED: Bd -file",
210 "missing list type, using -item",
211 "missing manual name, using \"\"",
212 "uname(3) system call failed, using UNKNOWN",
213 "unknown standard specifier",
214 "skipping request without numeric argument",
215 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
216 ".so request failed",
217 "skipping all arguments",
218 "skipping excess arguments",
219 "divide by zero",
220
221 "unsupported feature",
222 "input too large",
223 "unsupported control character",
224 "unsupported roff request",
225 "unsupported table layout",
226 "ignoring macro in table",
227 };
228
229 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
230 "SUCCESS",
231 "RESERVED",
232 "WARNING",
233 "ERROR",
234 "UNSUPP",
235 "BADARG",
236 "SYSERR"
237 };
238
239
240 static void
241 resize_buf(struct buf *buf, size_t initial)
242 {
243
244 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
245 buf->buf = mandoc_realloc(buf->buf, buf->sz);
246 }
247
248 static void
249 choose_parser(struct mparse *curp)
250 {
251 char *cp, *ep;
252 int format;
253
254 /*
255 * If neither command line arguments -mdoc or -man select
256 * a parser nor the roff parser found a .Dd or .TH macro
257 * yet, look ahead in the main input buffer.
258 */
259
260 if ((format = roff_getformat(curp->roff)) == 0) {
261 cp = curp->primary->buf;
262 ep = cp + curp->primary->sz;
263 while (cp < ep) {
264 if (*cp == '.' || *cp == '\'') {
265 cp++;
266 if (cp[0] == 'D' && cp[1] == 'd') {
267 format = MPARSE_MDOC;
268 break;
269 }
270 if (cp[0] == 'T' && cp[1] == 'H') {
271 format = MPARSE_MAN;
272 break;
273 }
274 }
275 cp = memchr(cp, '\n', ep - cp);
276 if (cp == NULL)
277 break;
278 cp++;
279 }
280 }
281
282 if (format == MPARSE_MDOC) {
283 if (NULL == curp->pmdoc)
284 curp->pmdoc = mdoc_alloc(
285 curp->roff, curp, curp->defos,
286 MPARSE_QUICK & curp->options ? 1 : 0);
287 assert(curp->pmdoc);
288 curp->mdoc = curp->pmdoc;
289 return;
290 }
291
292 /* Fall back to man(7) as a last resort. */
293
294 if (NULL == curp->pman)
295 curp->pman = man_alloc(
296 curp->roff, curp, curp->defos,
297 MPARSE_QUICK & curp->options ? 1 : 0);
298 assert(curp->pman);
299 curp->man = curp->pman;
300 }
301
302 /*
303 * Main parse routine for a buffer.
304 * It assumes encoding and line numbering are already set up.
305 * It can recurse directly (for invocations of user-defined
306 * macros, inline equations, and input line traps)
307 * and indirectly (for .so file inclusion).
308 */
309 static void
310 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
311 {
312 const struct tbl_span *span;
313 struct buf ln;
314 const char *save_file;
315 char *cp;
316 size_t pos; /* byte number in the ln buffer */
317 enum rofferr rr;
318 int of;
319 int lnn; /* line number in the real file */
320 int fd;
321 pid_t save_child;
322 unsigned char c;
323
324 memset(&ln, 0, sizeof(ln));
325
326 lnn = curp->line;
327 pos = 0;
328
329 while (i < blk.sz) {
330 if (0 == pos && '\0' == blk.buf[i])
331 break;
332
333 if (start) {
334 curp->line = lnn;
335 curp->reparse_count = 0;
336
337 if (lnn < 3 &&
338 curp->filenc & MPARSE_UTF8 &&
339 curp->filenc & MPARSE_LATIN1)
340 curp->filenc = preconv_cue(&blk, i);
341 }
342
343 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
344
345 /*
346 * When finding an unescaped newline character,
347 * leave the character loop to process the line.
348 * Skip a preceding carriage return, if any.
349 */
350
351 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
352 '\n' == blk.buf[i + 1])
353 ++i;
354 if ('\n' == blk.buf[i]) {
355 ++i;
356 ++lnn;
357 break;
358 }
359
360 /*
361 * Make sure we have space for the worst
362 * case of 11 bytes: "\\[u10ffff]\0"
363 */
364
365 if (pos + 11 > ln.sz)
366 resize_buf(&ln, 256);
367
368 /*
369 * Encode 8-bit input.
370 */
371
372 c = blk.buf[i];
373 if (c & 0x80) {
374 if ( ! (curp->filenc && preconv_encode(
375 &blk, &i, &ln, &pos, &curp->filenc))) {
376 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
377 curp->line, pos, "0x%x", c);
378 ln.buf[pos++] = '?';
379 i++;
380 }
381 continue;
382 }
383
384 /*
385 * Exclude control characters.
386 */
387
388 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
389 mandoc_vmsg(c == 0x00 || c == 0x04 ||
390 c > 0x0a ? MANDOCERR_CHAR_BAD :
391 MANDOCERR_CHAR_UNSUPP,
392 curp, curp->line, pos, "0x%x", c);
393 i++;
394 ln.buf[pos++] = '?';
395 continue;
396 }
397
398 /* Trailing backslash = a plain char. */
399
400 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
401 ln.buf[pos++] = blk.buf[i++];
402 continue;
403 }
404
405 /*
406 * Found escape and at least one other character.
407 * When it's a newline character, skip it.
408 * When there is a carriage return in between,
409 * skip that one as well.
410 */
411
412 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
413 '\n' == blk.buf[i + 2])
414 ++i;
415 if ('\n' == blk.buf[i + 1]) {
416 i += 2;
417 ++lnn;
418 continue;
419 }
420
421 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
422 i += 2;
423 /* Comment, skip to end of line */
424 for (; i < blk.sz; ++i) {
425 if ('\n' == blk.buf[i]) {
426 ++i;
427 ++lnn;
428 break;
429 }
430 }
431
432 /* Backout trailing whitespaces */
433 for (; pos > 0; --pos) {
434 if (ln.buf[pos - 1] != ' ')
435 break;
436 if (pos > 2 && ln.buf[pos - 2] == '\\')
437 break;
438 }
439 break;
440 }
441
442 /* Catch escaped bogus characters. */
443
444 c = (unsigned char) blk.buf[i+1];
445
446 if ( ! (isascii(c) &&
447 (isgraph(c) || isblank(c)))) {
448 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
449 curp->line, pos, "0x%x", c);
450 i += 2;
451 ln.buf[pos++] = '?';
452 continue;
453 }
454
455 /* Some other escape sequence, copy & cont. */
456
457 ln.buf[pos++] = blk.buf[i++];
458 ln.buf[pos++] = blk.buf[i++];
459 }
460
461 if (pos >= ln.sz)
462 resize_buf(&ln, 256);
463
464 ln.buf[pos] = '\0';
465
466 /*
467 * A significant amount of complexity is contained by
468 * the roff preprocessor. It's line-oriented but can be
469 * expressed on one line, so we need at times to
470 * readjust our starting point and re-run it. The roff
471 * preprocessor can also readjust the buffers with new
472 * data, so we pass them in wholesale.
473 */
474
475 of = 0;
476
477 /*
478 * Maintain a lookaside buffer of all parsed lines. We
479 * only do this if mparse_keep() has been invoked (the
480 * buffer may be accessed with mparse_getkeep()).
481 */
482
483 if (curp->secondary) {
484 curp->secondary->buf = mandoc_realloc(
485 curp->secondary->buf,
486 curp->secondary->sz + pos + 2);
487 memcpy(curp->secondary->buf +
488 curp->secondary->sz,
489 ln.buf, pos);
490 curp->secondary->sz += pos;
491 curp->secondary->buf
492 [curp->secondary->sz] = '\n';
493 curp->secondary->sz++;
494 curp->secondary->buf
495 [curp->secondary->sz] = '\0';
496 }
497 rerun:
498 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
499
500 switch (rr) {
501 case ROFF_REPARSE:
502 if (REPARSE_LIMIT >= ++curp->reparse_count)
503 mparse_buf_r(curp, ln, of, 0);
504 else
505 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
506 curp->line, pos, NULL);
507 pos = 0;
508 continue;
509 case ROFF_APPEND:
510 pos = strlen(ln.buf);
511 continue;
512 case ROFF_RERUN:
513 goto rerun;
514 case ROFF_IGN:
515 pos = 0;
516 continue;
517 case ROFF_SO:
518 if ( ! (curp->options & MPARSE_SO) &&
519 (i >= blk.sz || blk.buf[i] == '\0')) {
520 curp->sodest = mandoc_strdup(ln.buf + of);
521 free(ln.buf);
522 return;
523 }
524 /*
525 * We remove `so' clauses from our lookaside
526 * buffer because we're going to descend into
527 * the file recursively.
528 */
529 if (curp->secondary)
530 curp->secondary->sz -= pos + 1;
531 save_file = curp->file;
532 save_child = curp->child;
533 if (mparse_open(curp, &fd, ln.buf + of) ==
534 MANDOCLEVEL_OK) {
535 mparse_readfd(curp, fd, ln.buf + of);
536 curp->file = save_file;
537 } else {
538 curp->file = save_file;
539 mandoc_vmsg(MANDOCERR_SO_FAIL,
540 curp, curp->line, pos,
541 ".so %s", ln.buf + of);
542 ln.sz = mandoc_asprintf(&cp,
543 ".sp\nSee the file %s.\n.sp",
544 ln.buf + of);
545 free(ln.buf);
546 ln.buf = cp;
547 of = 0;
548 mparse_buf_r(curp, ln, of, 0);
549 }
550 curp->child = save_child;
551 pos = 0;
552 continue;
553 default:
554 break;
555 }
556
557 /*
558 * If input parsers have not been allocated, do so now.
559 * We keep these instanced between parsers, but set them
560 * locally per parse routine since we can use different
561 * parsers with each one.
562 */
563
564 if ( ! (curp->man || curp->mdoc))
565 choose_parser(curp);
566
567 /*
568 * Lastly, push down into the parsers themselves.
569 * If libroff returns ROFF_TBL, then add it to the
570 * currently open parse. Since we only get here if
571 * there does exist data (see tbl_data.c), we're
572 * guaranteed that something's been allocated.
573 * Do the same for ROFF_EQN.
574 */
575
576 if (rr == ROFF_TBL) {
577 while ((span = roff_span(curp->roff)) != NULL)
578 if (curp->man == NULL)
579 mdoc_addspan(curp->mdoc, span);
580 else
581 man_addspan(curp->man, span);
582 } else if (rr == ROFF_EQN) {
583 if (curp->man == NULL)
584 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
585 else
586 man_addeqn(curp->man, roff_eqn(curp->roff));
587 } else if ((curp->man == NULL ?
588 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
589 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
590 break;
591
592 /* Temporary buffers typically are not full. */
593
594 if (0 == start && '\0' == blk.buf[i])
595 break;
596
597 /* Start the next input line. */
598
599 pos = 0;
600 }
601
602 free(ln.buf);
603 }
604
605 static int
606 read_whole_file(struct mparse *curp, const char *file, int fd,
607 struct buf *fb, int *with_mmap)
608 {
609 size_t off;
610 ssize_t ssz;
611
612 #if HAVE_MMAP
613 struct stat st;
614 if (-1 == fstat(fd, &st)) {
615 perror(file);
616 exit((int)MANDOCLEVEL_SYSERR);
617 }
618
619 /*
620 * If we're a regular file, try just reading in the whole entry
621 * via mmap(). This is faster than reading it into blocks, and
622 * since each file is only a few bytes to begin with, I'm not
623 * concerned that this is going to tank any machines.
624 */
625
626 if (S_ISREG(st.st_mode)) {
627 if (st.st_size >= (1U << 31)) {
628 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
629 return(0);
630 }
631 *with_mmap = 1;
632 fb->sz = (size_t)st.st_size;
633 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
634 if (fb->buf != MAP_FAILED)
635 return(1);
636 }
637 #endif
638
639 /*
640 * If this isn't a regular file (like, say, stdin), then we must
641 * go the old way and just read things in bit by bit.
642 */
643
644 *with_mmap = 0;
645 off = 0;
646 fb->sz = 0;
647 fb->buf = NULL;
648 for (;;) {
649 if (off == fb->sz) {
650 if (fb->sz == (1U << 31)) {
651 mandoc_msg(MANDOCERR_TOOLARGE, curp,
652 0, 0, NULL);
653 break;
654 }
655 resize_buf(fb, 65536);
656 }
657 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
658 if (ssz == 0) {
659 fb->sz = off;
660 return(1);
661 }
662 if (ssz == -1) {
663 perror(file);
664 exit((int)MANDOCLEVEL_SYSERR);
665 }
666 off += (size_t)ssz;
667 }
668
669 free(fb->buf);
670 fb->buf = NULL;
671 return(0);
672 }
673
674 static void
675 mparse_end(struct mparse *curp)
676 {
677
678 if (curp->mdoc == NULL &&
679 curp->man == NULL &&
680 curp->sodest == NULL) {
681 if (curp->options & MPARSE_MDOC)
682 curp->mdoc = curp->pmdoc;
683 else {
684 if (curp->pman == NULL)
685 curp->pman = man_alloc(
686 curp->roff, curp, curp->defos,
687 curp->options & MPARSE_QUICK ? 1 : 0);
688 curp->man = curp->pman;
689 }
690 }
691 if (curp->mdoc)
692 mdoc_endparse(curp->mdoc);
693 if (curp->man)
694 man_endparse(curp->man);
695 roff_endparse(curp->roff);
696 }
697
698 static void
699 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
700 {
701 struct buf *svprimary;
702 const char *svfile;
703 size_t offset;
704 static int recursion_depth;
705
706 if (64 < recursion_depth) {
707 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
708 return;
709 }
710
711 /* Line number is per-file. */
712 svfile = curp->file;
713 curp->file = file;
714 svprimary = curp->primary;
715 curp->primary = &blk;
716 curp->line = 1;
717 recursion_depth++;
718
719 /* Skip an UTF-8 byte order mark. */
720 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
721 (unsigned char)blk.buf[0] == 0xef &&
722 (unsigned char)blk.buf[1] == 0xbb &&
723 (unsigned char)blk.buf[2] == 0xbf) {
724 offset = 3;
725 curp->filenc &= ~MPARSE_LATIN1;
726 } else
727 offset = 0;
728
729 mparse_buf_r(curp, blk, offset, 1);
730
731 if (--recursion_depth == 0)
732 mparse_end(curp);
733
734 curp->primary = svprimary;
735 curp->file = svfile;
736 }
737
738 enum mandoclevel
739 mparse_readmem(struct mparse *curp, void *buf, size_t len,
740 const char *file)
741 {
742 struct buf blk;
743
744 blk.buf = buf;
745 blk.sz = len;
746
747 mparse_parse_buffer(curp, blk, file);
748 return(curp->file_status);
749 }
750
751 /*
752 * Read the whole file into memory and call the parsers.
753 * Called recursively when an .so request is encountered.
754 */
755 enum mandoclevel
756 mparse_readfd(struct mparse *curp, int fd, const char *file)
757 {
758 struct buf blk;
759 int with_mmap;
760 int save_filenc;
761
762 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
763 save_filenc = curp->filenc;
764 curp->filenc = curp->options &
765 (MPARSE_UTF8 | MPARSE_LATIN1);
766 mparse_parse_buffer(curp, blk, file);
767 curp->filenc = save_filenc;
768 #if HAVE_MMAP
769 if (with_mmap)
770 munmap(blk.buf, blk.sz);
771 else
772 #endif
773 free(blk.buf);
774 }
775
776 if (fd != STDIN_FILENO && close(fd) == -1)
777 perror(file);
778
779 mparse_wait(curp);
780 return(curp->file_status);
781 }
782
783 enum mandoclevel
784 mparse_open(struct mparse *curp, int *fd, const char *file)
785 {
786 int pfd[2];
787 int save_errno;
788 char *cp;
789
790 curp->file = file;
791
792 /* Unless zipped, try to just open the file. */
793
794 if ((cp = strrchr(file, '.')) == NULL ||
795 strcmp(cp + 1, "gz")) {
796 curp->child = 0;
797 if ((*fd = open(file, O_RDONLY)) != -1)
798 return(MANDOCLEVEL_OK);
799
800 /* Open failed; try to append ".gz". */
801
802 mandoc_asprintf(&cp, "%s.gz", file);
803 file = cp;
804 } else
805 cp = NULL;
806
807 /* Before forking, make sure the file can be read. */
808
809 save_errno = errno;
810 if (access(file, R_OK) == -1) {
811 if (cp != NULL)
812 errno = save_errno;
813 free(cp);
814 *fd = -1;
815 curp->child = 0;
816 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
817 return(MANDOCLEVEL_ERROR);
818 }
819
820 /* Run gunzip(1). */
821
822 if (pipe(pfd) == -1) {
823 perror("pipe");
824 exit((int)MANDOCLEVEL_SYSERR);
825 }
826
827 switch (curp->child = fork()) {
828 case -1:
829 perror("fork");
830 exit((int)MANDOCLEVEL_SYSERR);
831 case 0:
832 close(pfd[0]);
833 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
834 perror("dup");
835 exit((int)MANDOCLEVEL_SYSERR);
836 }
837 execlp("gunzip", "gunzip", "-c", file, NULL);
838 perror("exec");
839 exit((int)MANDOCLEVEL_SYSERR);
840 default:
841 close(pfd[1]);
842 *fd = pfd[0];
843 return(MANDOCLEVEL_OK);
844 }
845 }
846
847 enum mandoclevel
848 mparse_wait(struct mparse *curp)
849 {
850 int status;
851
852 if (curp->child == 0)
853 return(MANDOCLEVEL_OK);
854
855 if (waitpid(curp->child, &status, 0) == -1) {
856 perror("wait");
857 exit((int)MANDOCLEVEL_SYSERR);
858 }
859 if (WIFSIGNALED(status)) {
860 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
861 "gunzip died from signal %d", WTERMSIG(status));
862 return(MANDOCLEVEL_ERROR);
863 }
864 if (WEXITSTATUS(status)) {
865 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
866 "gunzip failed with code %d", WEXITSTATUS(status));
867 return(MANDOCLEVEL_ERROR);
868 }
869 return(MANDOCLEVEL_OK);
870 }
871
872 struct mparse *
873 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
874 const struct mchars *mchars, const char *defos)
875 {
876 struct mparse *curp;
877
878 curp = mandoc_calloc(1, sizeof(struct mparse));
879
880 curp->options = options;
881 curp->wlevel = wlevel;
882 curp->mmsg = mmsg;
883 curp->defos = defos;
884
885 curp->mchars = mchars;
886 curp->roff = roff_alloc(curp, curp->mchars, options);
887 if (curp->options & MPARSE_MDOC)
888 curp->pmdoc = mdoc_alloc(
889 curp->roff, curp, curp->defos,
890 curp->options & MPARSE_QUICK ? 1 : 0);
891 if (curp->options & MPARSE_MAN)
892 curp->pman = man_alloc(
893 curp->roff, curp, curp->defos,
894 curp->options & MPARSE_QUICK ? 1 : 0);
895
896 return(curp);
897 }
898
899 void
900 mparse_reset(struct mparse *curp)
901 {
902
903 roff_reset(curp->roff);
904
905 if (curp->mdoc)
906 mdoc_reset(curp->mdoc);
907 if (curp->man)
908 man_reset(curp->man);
909 if (curp->secondary)
910 curp->secondary->sz = 0;
911
912 curp->file_status = MANDOCLEVEL_OK;
913 curp->mdoc = NULL;
914 curp->man = NULL;
915
916 free(curp->sodest);
917 curp->sodest = NULL;
918 }
919
920 void
921 mparse_free(struct mparse *curp)
922 {
923
924 if (curp->pmdoc)
925 mdoc_free(curp->pmdoc);
926 if (curp->pman)
927 man_free(curp->pman);
928 if (curp->roff)
929 roff_free(curp->roff);
930 if (curp->secondary)
931 free(curp->secondary->buf);
932
933 free(curp->secondary);
934 free(curp->sodest);
935 free(curp);
936 }
937
938 void
939 mparse_result(struct mparse *curp,
940 struct mdoc **mdoc, struct man **man, char **sodest)
941 {
942
943 if (sodest && NULL != (*sodest = curp->sodest)) {
944 *mdoc = NULL;
945 *man = NULL;
946 return;
947 }
948 if (mdoc)
949 *mdoc = curp->mdoc;
950 if (man)
951 *man = curp->man;
952 }
953
954 void
955 mandoc_vmsg(enum mandocerr t, struct mparse *m,
956 int ln, int pos, const char *fmt, ...)
957 {
958 char buf[256];
959 va_list ap;
960
961 va_start(ap, fmt);
962 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
963 va_end(ap);
964
965 mandoc_msg(t, m, ln, pos, buf);
966 }
967
968 void
969 mandoc_msg(enum mandocerr er, struct mparse *m,
970 int ln, int col, const char *msg)
971 {
972 enum mandoclevel level;
973
974 level = MANDOCLEVEL_UNSUPP;
975 while (er < mandoclimits[level])
976 level--;
977
978 if (level < m->wlevel && er != MANDOCERR_FILE)
979 return;
980
981 if (m->mmsg)
982 (*m->mmsg)(er, level, m->file, ln, col, msg);
983
984 if (m->file_status < level)
985 m->file_status = level;
986 }
987
988 const char *
989 mparse_strerror(enum mandocerr er)
990 {
991
992 return(mandocerrs[er]);
993 }
994
995 const char *
996 mparse_strlevel(enum mandoclevel lvl)
997 {
998 return(mandoclevels[lvl]);
999 }
1000
1001 void
1002 mparse_keep(struct mparse *p)
1003 {
1004
1005 assert(NULL == p->secondary);
1006 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1007 }
1008
1009 const char *
1010 mparse_getkeep(const struct mparse *p)
1011 {
1012
1013 assert(p->secondary);
1014 return(p->secondary->sz ? p->secondary->buf : NULL);
1015 }