]> git.cameronkatri.com Git - mandoc.git/blob - read.c
More improvements regarding tbl(7) options.
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.117 2015/01/26 13:03:48 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "non-alphabetic character in tbl options",
184 "skipping unknown tbl option",
185 "missing tbl option argument",
186 "wrong tbl option argument size",
187 "no table layout cells specified",
188 "no table data cells specified",
189 "ignore data in cell",
190 "data block still open",
191 "ignoring extra data cells",
192
193 /* related to document structure and macros */
194 NULL,
195 "input stack limit exceeded, infinite loop?",
196 "skipping bad character",
197 "skipping unknown macro",
198 "skipping insecure request",
199 "skipping item outside list",
200 "skipping column outside column list",
201 "skipping end of block that is not open",
202 "fewer RS blocks open, skipping",
203 "inserting missing end of block",
204 "appending missing end of block",
205
206 /* related to request and macro arguments */
207 "escaped character not allowed in a name",
208 "argument count wrong",
209 "NOT IMPLEMENTED: Bd -file",
210 "missing list type, using -item",
211 "missing manual name, using \"\"",
212 "uname(3) system call failed, using UNKNOWN",
213 "unknown standard specifier",
214 "skipping request without numeric argument",
215 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
216 ".so request failed",
217 "skipping all arguments",
218 "skipping excess arguments",
219 "divide by zero",
220
221 "unsupported feature",
222 "input too large",
223 "unsupported control character",
224 "unsupported roff request",
225 "unsupported table layout",
226 "ignoring macro in table",
227 "eqn in tbl",
228 };
229
230 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
231 "SUCCESS",
232 "RESERVED",
233 "WARNING",
234 "ERROR",
235 "UNSUPP",
236 "BADARG",
237 "SYSERR"
238 };
239
240
241 static void
242 resize_buf(struct buf *buf, size_t initial)
243 {
244
245 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
246 buf->buf = mandoc_realloc(buf->buf, buf->sz);
247 }
248
249 static void
250 choose_parser(struct mparse *curp)
251 {
252 char *cp, *ep;
253 int format;
254
255 /*
256 * If neither command line arguments -mdoc or -man select
257 * a parser nor the roff parser found a .Dd or .TH macro
258 * yet, look ahead in the main input buffer.
259 */
260
261 if ((format = roff_getformat(curp->roff)) == 0) {
262 cp = curp->primary->buf;
263 ep = cp + curp->primary->sz;
264 while (cp < ep) {
265 if (*cp == '.' || *cp == '\'') {
266 cp++;
267 if (cp[0] == 'D' && cp[1] == 'd') {
268 format = MPARSE_MDOC;
269 break;
270 }
271 if (cp[0] == 'T' && cp[1] == 'H') {
272 format = MPARSE_MAN;
273 break;
274 }
275 }
276 cp = memchr(cp, '\n', ep - cp);
277 if (cp == NULL)
278 break;
279 cp++;
280 }
281 }
282
283 if (format == MPARSE_MDOC) {
284 if (NULL == curp->pmdoc)
285 curp->pmdoc = mdoc_alloc(
286 curp->roff, curp, curp->defos,
287 MPARSE_QUICK & curp->options ? 1 : 0);
288 assert(curp->pmdoc);
289 curp->mdoc = curp->pmdoc;
290 return;
291 }
292
293 /* Fall back to man(7) as a last resort. */
294
295 if (NULL == curp->pman)
296 curp->pman = man_alloc(
297 curp->roff, curp, curp->defos,
298 MPARSE_QUICK & curp->options ? 1 : 0);
299 assert(curp->pman);
300 curp->man = curp->pman;
301 }
302
303 /*
304 * Main parse routine for a buffer.
305 * It assumes encoding and line numbering are already set up.
306 * It can recurse directly (for invocations of user-defined
307 * macros, inline equations, and input line traps)
308 * and indirectly (for .so file inclusion).
309 */
310 static void
311 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
312 {
313 const struct tbl_span *span;
314 struct buf ln;
315 const char *save_file;
316 char *cp;
317 size_t pos; /* byte number in the ln buffer */
318 enum rofferr rr;
319 int of;
320 int lnn; /* line number in the real file */
321 int fd;
322 pid_t save_child;
323 unsigned char c;
324
325 memset(&ln, 0, sizeof(ln));
326
327 lnn = curp->line;
328 pos = 0;
329
330 while (i < blk.sz) {
331 if (0 == pos && '\0' == blk.buf[i])
332 break;
333
334 if (start) {
335 curp->line = lnn;
336 curp->reparse_count = 0;
337
338 if (lnn < 3 &&
339 curp->filenc & MPARSE_UTF8 &&
340 curp->filenc & MPARSE_LATIN1)
341 curp->filenc = preconv_cue(&blk, i);
342 }
343
344 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
345
346 /*
347 * When finding an unescaped newline character,
348 * leave the character loop to process the line.
349 * Skip a preceding carriage return, if any.
350 */
351
352 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
353 '\n' == blk.buf[i + 1])
354 ++i;
355 if ('\n' == blk.buf[i]) {
356 ++i;
357 ++lnn;
358 break;
359 }
360
361 /*
362 * Make sure we have space for the worst
363 * case of 11 bytes: "\\[u10ffff]\0"
364 */
365
366 if (pos + 11 > ln.sz)
367 resize_buf(&ln, 256);
368
369 /*
370 * Encode 8-bit input.
371 */
372
373 c = blk.buf[i];
374 if (c & 0x80) {
375 if ( ! (curp->filenc && preconv_encode(
376 &blk, &i, &ln, &pos, &curp->filenc))) {
377 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
378 curp->line, pos, "0x%x", c);
379 ln.buf[pos++] = '?';
380 i++;
381 }
382 continue;
383 }
384
385 /*
386 * Exclude control characters.
387 */
388
389 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
390 mandoc_vmsg(c == 0x00 || c == 0x04 ||
391 c > 0x0a ? MANDOCERR_CHAR_BAD :
392 MANDOCERR_CHAR_UNSUPP,
393 curp, curp->line, pos, "0x%x", c);
394 i++;
395 ln.buf[pos++] = '?';
396 continue;
397 }
398
399 /* Trailing backslash = a plain char. */
400
401 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
402 ln.buf[pos++] = blk.buf[i++];
403 continue;
404 }
405
406 /*
407 * Found escape and at least one other character.
408 * When it's a newline character, skip it.
409 * When there is a carriage return in between,
410 * skip that one as well.
411 */
412
413 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
414 '\n' == blk.buf[i + 2])
415 ++i;
416 if ('\n' == blk.buf[i + 1]) {
417 i += 2;
418 ++lnn;
419 continue;
420 }
421
422 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
423 i += 2;
424 /* Comment, skip to end of line */
425 for (; i < blk.sz; ++i) {
426 if ('\n' == blk.buf[i]) {
427 ++i;
428 ++lnn;
429 break;
430 }
431 }
432
433 /* Backout trailing whitespaces */
434 for (; pos > 0; --pos) {
435 if (ln.buf[pos - 1] != ' ')
436 break;
437 if (pos > 2 && ln.buf[pos - 2] == '\\')
438 break;
439 }
440 break;
441 }
442
443 /* Catch escaped bogus characters. */
444
445 c = (unsigned char) blk.buf[i+1];
446
447 if ( ! (isascii(c) &&
448 (isgraph(c) || isblank(c)))) {
449 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
450 curp->line, pos, "0x%x", c);
451 i += 2;
452 ln.buf[pos++] = '?';
453 continue;
454 }
455
456 /* Some other escape sequence, copy & cont. */
457
458 ln.buf[pos++] = blk.buf[i++];
459 ln.buf[pos++] = blk.buf[i++];
460 }
461
462 if (pos >= ln.sz)
463 resize_buf(&ln, 256);
464
465 ln.buf[pos] = '\0';
466
467 /*
468 * A significant amount of complexity is contained by
469 * the roff preprocessor. It's line-oriented but can be
470 * expressed on one line, so we need at times to
471 * readjust our starting point and re-run it. The roff
472 * preprocessor can also readjust the buffers with new
473 * data, so we pass them in wholesale.
474 */
475
476 of = 0;
477
478 /*
479 * Maintain a lookaside buffer of all parsed lines. We
480 * only do this if mparse_keep() has been invoked (the
481 * buffer may be accessed with mparse_getkeep()).
482 */
483
484 if (curp->secondary) {
485 curp->secondary->buf = mandoc_realloc(
486 curp->secondary->buf,
487 curp->secondary->sz + pos + 2);
488 memcpy(curp->secondary->buf +
489 curp->secondary->sz,
490 ln.buf, pos);
491 curp->secondary->sz += pos;
492 curp->secondary->buf
493 [curp->secondary->sz] = '\n';
494 curp->secondary->sz++;
495 curp->secondary->buf
496 [curp->secondary->sz] = '\0';
497 }
498 rerun:
499 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
500
501 switch (rr) {
502 case ROFF_REPARSE:
503 if (REPARSE_LIMIT >= ++curp->reparse_count)
504 mparse_buf_r(curp, ln, of, 0);
505 else
506 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
507 curp->line, pos, NULL);
508 pos = 0;
509 continue;
510 case ROFF_APPEND:
511 pos = strlen(ln.buf);
512 continue;
513 case ROFF_RERUN:
514 goto rerun;
515 case ROFF_IGN:
516 pos = 0;
517 continue;
518 case ROFF_SO:
519 if ( ! (curp->options & MPARSE_SO) &&
520 (i >= blk.sz || blk.buf[i] == '\0')) {
521 curp->sodest = mandoc_strdup(ln.buf + of);
522 free(ln.buf);
523 return;
524 }
525 /*
526 * We remove `so' clauses from our lookaside
527 * buffer because we're going to descend into
528 * the file recursively.
529 */
530 if (curp->secondary)
531 curp->secondary->sz -= pos + 1;
532 save_file = curp->file;
533 save_child = curp->child;
534 if (mparse_open(curp, &fd, ln.buf + of) ==
535 MANDOCLEVEL_OK) {
536 mparse_readfd(curp, fd, ln.buf + of);
537 curp->file = save_file;
538 } else {
539 curp->file = save_file;
540 mandoc_vmsg(MANDOCERR_SO_FAIL,
541 curp, curp->line, pos,
542 ".so %s", ln.buf + of);
543 ln.sz = mandoc_asprintf(&cp,
544 ".sp\nSee the file %s.\n.sp",
545 ln.buf + of);
546 free(ln.buf);
547 ln.buf = cp;
548 of = 0;
549 mparse_buf_r(curp, ln, of, 0);
550 }
551 curp->child = save_child;
552 pos = 0;
553 continue;
554 default:
555 break;
556 }
557
558 /*
559 * If input parsers have not been allocated, do so now.
560 * We keep these instanced between parsers, but set them
561 * locally per parse routine since we can use different
562 * parsers with each one.
563 */
564
565 if ( ! (curp->man || curp->mdoc))
566 choose_parser(curp);
567
568 /*
569 * Lastly, push down into the parsers themselves.
570 * If libroff returns ROFF_TBL, then add it to the
571 * currently open parse. Since we only get here if
572 * there does exist data (see tbl_data.c), we're
573 * guaranteed that something's been allocated.
574 * Do the same for ROFF_EQN.
575 */
576
577 if (rr == ROFF_TBL) {
578 while ((span = roff_span(curp->roff)) != NULL)
579 if (curp->man == NULL)
580 mdoc_addspan(curp->mdoc, span);
581 else
582 man_addspan(curp->man, span);
583 } else if (rr == ROFF_EQN) {
584 if (curp->man == NULL)
585 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
586 else
587 man_addeqn(curp->man, roff_eqn(curp->roff));
588 } else if ((curp->man == NULL ?
589 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
590 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
591 break;
592
593 /* Temporary buffers typically are not full. */
594
595 if (0 == start && '\0' == blk.buf[i])
596 break;
597
598 /* Start the next input line. */
599
600 pos = 0;
601 }
602
603 free(ln.buf);
604 }
605
606 static int
607 read_whole_file(struct mparse *curp, const char *file, int fd,
608 struct buf *fb, int *with_mmap)
609 {
610 size_t off;
611 ssize_t ssz;
612
613 #if HAVE_MMAP
614 struct stat st;
615 if (-1 == fstat(fd, &st)) {
616 perror(file);
617 exit((int)MANDOCLEVEL_SYSERR);
618 }
619
620 /*
621 * If we're a regular file, try just reading in the whole entry
622 * via mmap(). This is faster than reading it into blocks, and
623 * since each file is only a few bytes to begin with, I'm not
624 * concerned that this is going to tank any machines.
625 */
626
627 if (S_ISREG(st.st_mode)) {
628 if (st.st_size >= (1U << 31)) {
629 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
630 return(0);
631 }
632 *with_mmap = 1;
633 fb->sz = (size_t)st.st_size;
634 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
635 if (fb->buf != MAP_FAILED)
636 return(1);
637 }
638 #endif
639
640 /*
641 * If this isn't a regular file (like, say, stdin), then we must
642 * go the old way and just read things in bit by bit.
643 */
644
645 *with_mmap = 0;
646 off = 0;
647 fb->sz = 0;
648 fb->buf = NULL;
649 for (;;) {
650 if (off == fb->sz) {
651 if (fb->sz == (1U << 31)) {
652 mandoc_msg(MANDOCERR_TOOLARGE, curp,
653 0, 0, NULL);
654 break;
655 }
656 resize_buf(fb, 65536);
657 }
658 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
659 if (ssz == 0) {
660 fb->sz = off;
661 return(1);
662 }
663 if (ssz == -1) {
664 perror(file);
665 exit((int)MANDOCLEVEL_SYSERR);
666 }
667 off += (size_t)ssz;
668 }
669
670 free(fb->buf);
671 fb->buf = NULL;
672 return(0);
673 }
674
675 static void
676 mparse_end(struct mparse *curp)
677 {
678
679 if (curp->mdoc == NULL &&
680 curp->man == NULL &&
681 curp->sodest == NULL) {
682 if (curp->options & MPARSE_MDOC)
683 curp->mdoc = curp->pmdoc;
684 else {
685 if (curp->pman == NULL)
686 curp->pman = man_alloc(
687 curp->roff, curp, curp->defos,
688 curp->options & MPARSE_QUICK ? 1 : 0);
689 curp->man = curp->pman;
690 }
691 }
692 if (curp->mdoc)
693 mdoc_endparse(curp->mdoc);
694 if (curp->man)
695 man_endparse(curp->man);
696 roff_endparse(curp->roff);
697 }
698
699 static void
700 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
701 {
702 struct buf *svprimary;
703 const char *svfile;
704 size_t offset;
705 static int recursion_depth;
706
707 if (64 < recursion_depth) {
708 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
709 return;
710 }
711
712 /* Line number is per-file. */
713 svfile = curp->file;
714 curp->file = file;
715 svprimary = curp->primary;
716 curp->primary = &blk;
717 curp->line = 1;
718 recursion_depth++;
719
720 /* Skip an UTF-8 byte order mark. */
721 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
722 (unsigned char)blk.buf[0] == 0xef &&
723 (unsigned char)blk.buf[1] == 0xbb &&
724 (unsigned char)blk.buf[2] == 0xbf) {
725 offset = 3;
726 curp->filenc &= ~MPARSE_LATIN1;
727 } else
728 offset = 0;
729
730 mparse_buf_r(curp, blk, offset, 1);
731
732 if (--recursion_depth == 0)
733 mparse_end(curp);
734
735 curp->primary = svprimary;
736 curp->file = svfile;
737 }
738
739 enum mandoclevel
740 mparse_readmem(struct mparse *curp, void *buf, size_t len,
741 const char *file)
742 {
743 struct buf blk;
744
745 blk.buf = buf;
746 blk.sz = len;
747
748 mparse_parse_buffer(curp, blk, file);
749 return(curp->file_status);
750 }
751
752 /*
753 * Read the whole file into memory and call the parsers.
754 * Called recursively when an .so request is encountered.
755 */
756 enum mandoclevel
757 mparse_readfd(struct mparse *curp, int fd, const char *file)
758 {
759 struct buf blk;
760 int with_mmap;
761 int save_filenc;
762
763 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
764 save_filenc = curp->filenc;
765 curp->filenc = curp->options &
766 (MPARSE_UTF8 | MPARSE_LATIN1);
767 mparse_parse_buffer(curp, blk, file);
768 curp->filenc = save_filenc;
769 #if HAVE_MMAP
770 if (with_mmap)
771 munmap(blk.buf, blk.sz);
772 else
773 #endif
774 free(blk.buf);
775 }
776
777 if (fd != STDIN_FILENO && close(fd) == -1)
778 perror(file);
779
780 mparse_wait(curp);
781 return(curp->file_status);
782 }
783
784 enum mandoclevel
785 mparse_open(struct mparse *curp, int *fd, const char *file)
786 {
787 int pfd[2];
788 int save_errno;
789 char *cp;
790
791 curp->file = file;
792
793 /* Unless zipped, try to just open the file. */
794
795 if ((cp = strrchr(file, '.')) == NULL ||
796 strcmp(cp + 1, "gz")) {
797 curp->child = 0;
798 if ((*fd = open(file, O_RDONLY)) != -1)
799 return(MANDOCLEVEL_OK);
800
801 /* Open failed; try to append ".gz". */
802
803 mandoc_asprintf(&cp, "%s.gz", file);
804 file = cp;
805 } else
806 cp = NULL;
807
808 /* Before forking, make sure the file can be read. */
809
810 save_errno = errno;
811 if (access(file, R_OK) == -1) {
812 if (cp != NULL)
813 errno = save_errno;
814 free(cp);
815 *fd = -1;
816 curp->child = 0;
817 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
818 return(MANDOCLEVEL_ERROR);
819 }
820
821 /* Run gunzip(1). */
822
823 if (pipe(pfd) == -1) {
824 perror("pipe");
825 exit((int)MANDOCLEVEL_SYSERR);
826 }
827
828 switch (curp->child = fork()) {
829 case -1:
830 perror("fork");
831 exit((int)MANDOCLEVEL_SYSERR);
832 case 0:
833 close(pfd[0]);
834 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
835 perror("dup");
836 exit((int)MANDOCLEVEL_SYSERR);
837 }
838 execlp("gunzip", "gunzip", "-c", file, NULL);
839 perror("exec");
840 exit((int)MANDOCLEVEL_SYSERR);
841 default:
842 close(pfd[1]);
843 *fd = pfd[0];
844 return(MANDOCLEVEL_OK);
845 }
846 }
847
848 enum mandoclevel
849 mparse_wait(struct mparse *curp)
850 {
851 int status;
852
853 if (curp->child == 0)
854 return(MANDOCLEVEL_OK);
855
856 if (waitpid(curp->child, &status, 0) == -1) {
857 perror("wait");
858 exit((int)MANDOCLEVEL_SYSERR);
859 }
860 if (WIFSIGNALED(status)) {
861 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
862 "gunzip died from signal %d", WTERMSIG(status));
863 return(MANDOCLEVEL_ERROR);
864 }
865 if (WEXITSTATUS(status)) {
866 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
867 "gunzip failed with code %d", WEXITSTATUS(status));
868 return(MANDOCLEVEL_ERROR);
869 }
870 return(MANDOCLEVEL_OK);
871 }
872
873 struct mparse *
874 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
875 const struct mchars *mchars, const char *defos)
876 {
877 struct mparse *curp;
878
879 curp = mandoc_calloc(1, sizeof(struct mparse));
880
881 curp->options = options;
882 curp->wlevel = wlevel;
883 curp->mmsg = mmsg;
884 curp->defos = defos;
885
886 curp->mchars = mchars;
887 curp->roff = roff_alloc(curp, curp->mchars, options);
888 if (curp->options & MPARSE_MDOC)
889 curp->pmdoc = mdoc_alloc(
890 curp->roff, curp, curp->defos,
891 curp->options & MPARSE_QUICK ? 1 : 0);
892 if (curp->options & MPARSE_MAN)
893 curp->pman = man_alloc(
894 curp->roff, curp, curp->defos,
895 curp->options & MPARSE_QUICK ? 1 : 0);
896
897 return(curp);
898 }
899
900 void
901 mparse_reset(struct mparse *curp)
902 {
903
904 roff_reset(curp->roff);
905
906 if (curp->mdoc)
907 mdoc_reset(curp->mdoc);
908 if (curp->man)
909 man_reset(curp->man);
910 if (curp->secondary)
911 curp->secondary->sz = 0;
912
913 curp->file_status = MANDOCLEVEL_OK;
914 curp->mdoc = NULL;
915 curp->man = NULL;
916
917 free(curp->sodest);
918 curp->sodest = NULL;
919 }
920
921 void
922 mparse_free(struct mparse *curp)
923 {
924
925 if (curp->pmdoc)
926 mdoc_free(curp->pmdoc);
927 if (curp->pman)
928 man_free(curp->pman);
929 if (curp->roff)
930 roff_free(curp->roff);
931 if (curp->secondary)
932 free(curp->secondary->buf);
933
934 free(curp->secondary);
935 free(curp->sodest);
936 free(curp);
937 }
938
939 void
940 mparse_result(struct mparse *curp,
941 struct mdoc **mdoc, struct man **man, char **sodest)
942 {
943
944 if (sodest && NULL != (*sodest = curp->sodest)) {
945 *mdoc = NULL;
946 *man = NULL;
947 return;
948 }
949 if (mdoc)
950 *mdoc = curp->mdoc;
951 if (man)
952 *man = curp->man;
953 }
954
955 void
956 mandoc_vmsg(enum mandocerr t, struct mparse *m,
957 int ln, int pos, const char *fmt, ...)
958 {
959 char buf[256];
960 va_list ap;
961
962 va_start(ap, fmt);
963 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
964 va_end(ap);
965
966 mandoc_msg(t, m, ln, pos, buf);
967 }
968
969 void
970 mandoc_msg(enum mandocerr er, struct mparse *m,
971 int ln, int col, const char *msg)
972 {
973 enum mandoclevel level;
974
975 level = MANDOCLEVEL_UNSUPP;
976 while (er < mandoclimits[level])
977 level--;
978
979 if (level < m->wlevel && er != MANDOCERR_FILE)
980 return;
981
982 if (m->mmsg)
983 (*m->mmsg)(er, level, m->file, ln, col, msg);
984
985 if (m->file_status < level)
986 m->file_status = level;
987 }
988
989 const char *
990 mparse_strerror(enum mandocerr er)
991 {
992
993 return(mandocerrs[er]);
994 }
995
996 const char *
997 mparse_strlevel(enum mandoclevel lvl)
998 {
999 return(mandoclevels[lvl]);
1000 }
1001
1002 void
1003 mparse_keep(struct mparse *p)
1004 {
1005
1006 assert(NULL == p->secondary);
1007 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1008 }
1009
1010 const char *
1011 mparse_getkeep(const struct mparse *p)
1012 {
1013
1014 assert(p->secondary);
1015 return(p->secondary->sz ? p->secondary->buf : NULL);
1016 }