]> git.cameronkatri.com Git - mandoc.git/blob - read.c
69d81f5a14d3f1a9029fbe597ef88202f383ab05
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.127 2015/02/20 22:40:38 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "missing description line, using \"\"",
114 "sections out of conventional order",
115 "duplicate section title",
116 "unexpected section",
117 "unusual Xr order",
118 "unusual Xr punctuation",
119 "AUTHORS section without An macro",
120
121 /* related to macros and nesting */
122 "obsolete macro",
123 "macro neither callable nor escaped",
124 "skipping paragraph macro",
125 "moving paragraph macro out of list",
126 "skipping no-space macro",
127 "blocks badly nested",
128 "nested displays are not portable",
129 "moving content out of list",
130 ".Vt block has child macro",
131 "fill mode already enabled, skipping",
132 "fill mode already disabled, skipping",
133 "line scope broken",
134
135 /* related to missing macro arguments */
136 "skipping empty request",
137 "conditional request controls empty scope",
138 "skipping empty macro",
139 "empty block",
140 "empty argument, using 0n",
141 "missing display type, using -ragged",
142 "list type is not the first argument",
143 "missing -width in -tag list, using 8n",
144 "missing utility name, using \"\"",
145 "missing function name, using \"\"",
146 "empty head in list item",
147 "empty list item",
148 "missing font type, using \\fR",
149 "unknown font type, using \\fR",
150 "nothing follows prefix",
151 "empty reference block",
152 "missing -std argument, adding it",
153 "missing option string, using \"\"",
154 "missing resource identifier, using \"\"",
155 "missing eqn box, using \"\"",
156
157 /* related to bad macro arguments */
158 "unterminated quoted argument",
159 "duplicate argument",
160 "skipping duplicate argument",
161 "skipping duplicate display type",
162 "skipping duplicate list type",
163 "skipping -width argument",
164 "wrong number of cells",
165 "unknown AT&T UNIX version",
166 "comma in function argument",
167 "parenthesis in function name",
168 "invalid content in Rs block",
169 "invalid Boolean argument",
170 "unknown font, skipping request",
171 "odd number of characters in request",
172
173 /* related to plain text */
174 "blank line in fill mode, using .sp",
175 "tab in filled text",
176 "whitespace at end of input line",
177 "bad comment style",
178 "invalid escape sequence",
179 "undefined string, using \"\"",
180
181 /* related to tables */
182 "tbl line starts with span",
183 "tbl column starts with span",
184 "skipping vertical bar in tbl layout",
185
186 "generic error",
187
188 /* related to tables */
189 "non-alphabetic character in tbl options",
190 "skipping unknown tbl option",
191 "missing tbl option argument",
192 "wrong tbl option argument size",
193 "empty tbl layout",
194 "invalid character in tbl layout",
195 "unmatched parenthesis in tbl layout",
196 "tbl without any data cells",
197 "ignoring data in spanned tbl cell",
198 "ignoring extra tbl data cells",
199 "data block open at end of tbl",
200
201 /* related to document structure and macros */
202 NULL,
203 "input stack limit exceeded, infinite loop?",
204 "skipping bad character",
205 "skipping unknown macro",
206 "skipping insecure request",
207 "skipping item outside list",
208 "skipping column outside column list",
209 "skipping end of block that is not open",
210 "fewer RS blocks open, skipping",
211 "inserting missing end of block",
212 "appending missing end of block",
213
214 /* related to request and macro arguments */
215 "escaped character not allowed in a name",
216 "NOT IMPLEMENTED: Bd -file",
217 "missing list type, using -item",
218 "missing manual name, using \"\"",
219 "uname(3) system call failed, using UNKNOWN",
220 "unknown standard specifier",
221 "skipping request without numeric argument",
222 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
223 ".so request failed",
224 "skipping all arguments",
225 "skipping excess arguments",
226 "divide by zero",
227
228 "unsupported feature",
229 "input too large",
230 "unsupported control character",
231 "unsupported roff request",
232 "eqn delim option in tbl",
233 "unsupported tbl layout modifier",
234 "ignoring macro in table",
235 };
236
237 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
238 "SUCCESS",
239 "RESERVED",
240 "WARNING",
241 "ERROR",
242 "UNSUPP",
243 "BADARG",
244 "SYSERR"
245 };
246
247
248 static void
249 resize_buf(struct buf *buf, size_t initial)
250 {
251
252 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
253 buf->buf = mandoc_realloc(buf->buf, buf->sz);
254 }
255
256 static void
257 choose_parser(struct mparse *curp)
258 {
259 char *cp, *ep;
260 int format;
261
262 /*
263 * If neither command line arguments -mdoc or -man select
264 * a parser nor the roff parser found a .Dd or .TH macro
265 * yet, look ahead in the main input buffer.
266 */
267
268 if ((format = roff_getformat(curp->roff)) == 0) {
269 cp = curp->primary->buf;
270 ep = cp + curp->primary->sz;
271 while (cp < ep) {
272 if (*cp == '.' || *cp == '\'') {
273 cp++;
274 if (cp[0] == 'D' && cp[1] == 'd') {
275 format = MPARSE_MDOC;
276 break;
277 }
278 if (cp[0] == 'T' && cp[1] == 'H') {
279 format = MPARSE_MAN;
280 break;
281 }
282 }
283 cp = memchr(cp, '\n', ep - cp);
284 if (cp == NULL)
285 break;
286 cp++;
287 }
288 }
289
290 if (format == MPARSE_MDOC) {
291 if (NULL == curp->pmdoc)
292 curp->pmdoc = mdoc_alloc(
293 curp->roff, curp, curp->defos,
294 MPARSE_QUICK & curp->options ? 1 : 0);
295 assert(curp->pmdoc);
296 curp->mdoc = curp->pmdoc;
297 return;
298 }
299
300 /* Fall back to man(7) as a last resort. */
301
302 if (NULL == curp->pman)
303 curp->pman = man_alloc(
304 curp->roff, curp, curp->defos,
305 MPARSE_QUICK & curp->options ? 1 : 0);
306 assert(curp->pman);
307 curp->man = curp->pman;
308 }
309
310 /*
311 * Main parse routine for a buffer.
312 * It assumes encoding and line numbering are already set up.
313 * It can recurse directly (for invocations of user-defined
314 * macros, inline equations, and input line traps)
315 * and indirectly (for .so file inclusion).
316 */
317 static void
318 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
319 {
320 const struct tbl_span *span;
321 struct buf ln;
322 const char *save_file;
323 char *cp;
324 size_t pos; /* byte number in the ln buffer */
325 enum rofferr rr;
326 int of;
327 int lnn; /* line number in the real file */
328 int fd;
329 pid_t save_child;
330 unsigned char c;
331
332 memset(&ln, 0, sizeof(ln));
333
334 lnn = curp->line;
335 pos = 0;
336
337 while (i < blk.sz) {
338 if (0 == pos && '\0' == blk.buf[i])
339 break;
340
341 if (start) {
342 curp->line = lnn;
343 curp->reparse_count = 0;
344
345 if (lnn < 3 &&
346 curp->filenc & MPARSE_UTF8 &&
347 curp->filenc & MPARSE_LATIN1)
348 curp->filenc = preconv_cue(&blk, i);
349 }
350
351 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
352
353 /*
354 * When finding an unescaped newline character,
355 * leave the character loop to process the line.
356 * Skip a preceding carriage return, if any.
357 */
358
359 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
360 '\n' == blk.buf[i + 1])
361 ++i;
362 if ('\n' == blk.buf[i]) {
363 ++i;
364 ++lnn;
365 break;
366 }
367
368 /*
369 * Make sure we have space for the worst
370 * case of 11 bytes: "\\[u10ffff]\0"
371 */
372
373 if (pos + 11 > ln.sz)
374 resize_buf(&ln, 256);
375
376 /*
377 * Encode 8-bit input.
378 */
379
380 c = blk.buf[i];
381 if (c & 0x80) {
382 if ( ! (curp->filenc && preconv_encode(
383 &blk, &i, &ln, &pos, &curp->filenc))) {
384 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
385 curp->line, pos, "0x%x", c);
386 ln.buf[pos++] = '?';
387 i++;
388 }
389 continue;
390 }
391
392 /*
393 * Exclude control characters.
394 */
395
396 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
397 mandoc_vmsg(c == 0x00 || c == 0x04 ||
398 c > 0x0a ? MANDOCERR_CHAR_BAD :
399 MANDOCERR_CHAR_UNSUPP,
400 curp, curp->line, pos, "0x%x", c);
401 i++;
402 if (c != '\r')
403 ln.buf[pos++] = '?';
404 continue;
405 }
406
407 /* Trailing backslash = a plain char. */
408
409 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
410 ln.buf[pos++] = blk.buf[i++];
411 continue;
412 }
413
414 /*
415 * Found escape and at least one other character.
416 * When it's a newline character, skip it.
417 * When there is a carriage return in between,
418 * skip that one as well.
419 */
420
421 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
422 '\n' == blk.buf[i + 2])
423 ++i;
424 if ('\n' == blk.buf[i + 1]) {
425 i += 2;
426 ++lnn;
427 continue;
428 }
429
430 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
431 i += 2;
432 /* Comment, skip to end of line */
433 for (; i < blk.sz; ++i) {
434 if ('\n' == blk.buf[i]) {
435 ++i;
436 ++lnn;
437 break;
438 }
439 }
440
441 /* Backout trailing whitespaces */
442 for (; pos > 0; --pos) {
443 if (ln.buf[pos - 1] != ' ')
444 break;
445 if (pos > 2 && ln.buf[pos - 2] == '\\')
446 break;
447 }
448 break;
449 }
450
451 /* Catch escaped bogus characters. */
452
453 c = (unsigned char) blk.buf[i+1];
454
455 if ( ! (isascii(c) &&
456 (isgraph(c) || isblank(c)))) {
457 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
458 curp->line, pos, "0x%x", c);
459 i += 2;
460 ln.buf[pos++] = '?';
461 continue;
462 }
463
464 /* Some other escape sequence, copy & cont. */
465
466 ln.buf[pos++] = blk.buf[i++];
467 ln.buf[pos++] = blk.buf[i++];
468 }
469
470 if (pos >= ln.sz)
471 resize_buf(&ln, 256);
472
473 ln.buf[pos] = '\0';
474
475 /*
476 * A significant amount of complexity is contained by
477 * the roff preprocessor. It's line-oriented but can be
478 * expressed on one line, so we need at times to
479 * readjust our starting point and re-run it. The roff
480 * preprocessor can also readjust the buffers with new
481 * data, so we pass them in wholesale.
482 */
483
484 of = 0;
485
486 /*
487 * Maintain a lookaside buffer of all parsed lines. We
488 * only do this if mparse_keep() has been invoked (the
489 * buffer may be accessed with mparse_getkeep()).
490 */
491
492 if (curp->secondary) {
493 curp->secondary->buf = mandoc_realloc(
494 curp->secondary->buf,
495 curp->secondary->sz + pos + 2);
496 memcpy(curp->secondary->buf +
497 curp->secondary->sz,
498 ln.buf, pos);
499 curp->secondary->sz += pos;
500 curp->secondary->buf
501 [curp->secondary->sz] = '\n';
502 curp->secondary->sz++;
503 curp->secondary->buf
504 [curp->secondary->sz] = '\0';
505 }
506 rerun:
507 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
508
509 switch (rr) {
510 case ROFF_REPARSE:
511 if (REPARSE_LIMIT >= ++curp->reparse_count)
512 mparse_buf_r(curp, ln, of, 0);
513 else
514 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
515 curp->line, pos, NULL);
516 pos = 0;
517 continue;
518 case ROFF_APPEND:
519 pos = strlen(ln.buf);
520 continue;
521 case ROFF_RERUN:
522 goto rerun;
523 case ROFF_IGN:
524 pos = 0;
525 continue;
526 case ROFF_SO:
527 if ( ! (curp->options & MPARSE_SO) &&
528 (i >= blk.sz || blk.buf[i] == '\0')) {
529 curp->sodest = mandoc_strdup(ln.buf + of);
530 free(ln.buf);
531 return;
532 }
533 /*
534 * We remove `so' clauses from our lookaside
535 * buffer because we're going to descend into
536 * the file recursively.
537 */
538 if (curp->secondary)
539 curp->secondary->sz -= pos + 1;
540 save_file = curp->file;
541 save_child = curp->child;
542 if (mparse_open(curp, &fd, ln.buf + of) ==
543 MANDOCLEVEL_OK) {
544 mparse_readfd(curp, fd, ln.buf + of);
545 curp->file = save_file;
546 } else {
547 curp->file = save_file;
548 mandoc_vmsg(MANDOCERR_SO_FAIL,
549 curp, curp->line, pos,
550 ".so %s", ln.buf + of);
551 ln.sz = mandoc_asprintf(&cp,
552 ".sp\nSee the file %s.\n.sp",
553 ln.buf + of);
554 free(ln.buf);
555 ln.buf = cp;
556 of = 0;
557 mparse_buf_r(curp, ln, of, 0);
558 }
559 curp->child = save_child;
560 pos = 0;
561 continue;
562 default:
563 break;
564 }
565
566 /*
567 * If input parsers have not been allocated, do so now.
568 * We keep these instanced between parsers, but set them
569 * locally per parse routine since we can use different
570 * parsers with each one.
571 */
572
573 if ( ! (curp->man || curp->mdoc))
574 choose_parser(curp);
575
576 /*
577 * Lastly, push down into the parsers themselves.
578 * If libroff returns ROFF_TBL, then add it to the
579 * currently open parse. Since we only get here if
580 * there does exist data (see tbl_data.c), we're
581 * guaranteed that something's been allocated.
582 * Do the same for ROFF_EQN.
583 */
584
585 if (rr == ROFF_TBL) {
586 while ((span = roff_span(curp->roff)) != NULL)
587 if (curp->man == NULL)
588 mdoc_addspan(curp->mdoc, span);
589 else
590 man_addspan(curp->man, span);
591 } else if (rr == ROFF_EQN) {
592 if (curp->man == NULL)
593 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
594 else
595 man_addeqn(curp->man, roff_eqn(curp->roff));
596 } else if ((curp->man == NULL ?
597 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
598 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
599 break;
600
601 /* Temporary buffers typically are not full. */
602
603 if (0 == start && '\0' == blk.buf[i])
604 break;
605
606 /* Start the next input line. */
607
608 pos = 0;
609 }
610
611 free(ln.buf);
612 }
613
614 static int
615 read_whole_file(struct mparse *curp, const char *file, int fd,
616 struct buf *fb, int *with_mmap)
617 {
618 size_t off;
619 ssize_t ssz;
620
621 #if HAVE_MMAP
622 struct stat st;
623 if (-1 == fstat(fd, &st)) {
624 perror(file);
625 exit((int)MANDOCLEVEL_SYSERR);
626 }
627
628 /*
629 * If we're a regular file, try just reading in the whole entry
630 * via mmap(). This is faster than reading it into blocks, and
631 * since each file is only a few bytes to begin with, I'm not
632 * concerned that this is going to tank any machines.
633 */
634
635 if (S_ISREG(st.st_mode)) {
636 if (st.st_size >= (1U << 31)) {
637 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
638 return(0);
639 }
640 *with_mmap = 1;
641 fb->sz = (size_t)st.st_size;
642 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
643 if (fb->buf != MAP_FAILED)
644 return(1);
645 }
646 #endif
647
648 /*
649 * If this isn't a regular file (like, say, stdin), then we must
650 * go the old way and just read things in bit by bit.
651 */
652
653 *with_mmap = 0;
654 off = 0;
655 fb->sz = 0;
656 fb->buf = NULL;
657 for (;;) {
658 if (off == fb->sz) {
659 if (fb->sz == (1U << 31)) {
660 mandoc_msg(MANDOCERR_TOOLARGE, curp,
661 0, 0, NULL);
662 break;
663 }
664 resize_buf(fb, 65536);
665 }
666 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
667 if (ssz == 0) {
668 fb->sz = off;
669 return(1);
670 }
671 if (ssz == -1) {
672 perror(file);
673 exit((int)MANDOCLEVEL_SYSERR);
674 }
675 off += (size_t)ssz;
676 }
677
678 free(fb->buf);
679 fb->buf = NULL;
680 return(0);
681 }
682
683 static void
684 mparse_end(struct mparse *curp)
685 {
686
687 if (curp->mdoc == NULL &&
688 curp->man == NULL &&
689 curp->sodest == NULL) {
690 if (curp->options & MPARSE_MDOC)
691 curp->mdoc = curp->pmdoc;
692 else {
693 if (curp->pman == NULL)
694 curp->pman = man_alloc(
695 curp->roff, curp, curp->defos,
696 curp->options & MPARSE_QUICK ? 1 : 0);
697 curp->man = curp->pman;
698 }
699 }
700 if (curp->mdoc)
701 mdoc_endparse(curp->mdoc);
702 if (curp->man)
703 man_endparse(curp->man);
704 roff_endparse(curp->roff);
705 }
706
707 static void
708 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
709 {
710 struct buf *svprimary;
711 const char *svfile;
712 size_t offset;
713 static int recursion_depth;
714
715 if (64 < recursion_depth) {
716 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
717 return;
718 }
719
720 /* Line number is per-file. */
721 svfile = curp->file;
722 curp->file = file;
723 svprimary = curp->primary;
724 curp->primary = &blk;
725 curp->line = 1;
726 recursion_depth++;
727
728 /* Skip an UTF-8 byte order mark. */
729 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
730 (unsigned char)blk.buf[0] == 0xef &&
731 (unsigned char)blk.buf[1] == 0xbb &&
732 (unsigned char)blk.buf[2] == 0xbf) {
733 offset = 3;
734 curp->filenc &= ~MPARSE_LATIN1;
735 } else
736 offset = 0;
737
738 mparse_buf_r(curp, blk, offset, 1);
739
740 if (--recursion_depth == 0)
741 mparse_end(curp);
742
743 curp->primary = svprimary;
744 curp->file = svfile;
745 }
746
747 enum mandoclevel
748 mparse_readmem(struct mparse *curp, void *buf, size_t len,
749 const char *file)
750 {
751 struct buf blk;
752
753 blk.buf = buf;
754 blk.sz = len;
755
756 mparse_parse_buffer(curp, blk, file);
757 return(curp->file_status);
758 }
759
760 /*
761 * Read the whole file into memory and call the parsers.
762 * Called recursively when an .so request is encountered.
763 */
764 enum mandoclevel
765 mparse_readfd(struct mparse *curp, int fd, const char *file)
766 {
767 struct buf blk;
768 int with_mmap;
769 int save_filenc;
770
771 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
772 save_filenc = curp->filenc;
773 curp->filenc = curp->options &
774 (MPARSE_UTF8 | MPARSE_LATIN1);
775 mparse_parse_buffer(curp, blk, file);
776 curp->filenc = save_filenc;
777 #if HAVE_MMAP
778 if (with_mmap)
779 munmap(blk.buf, blk.sz);
780 else
781 #endif
782 free(blk.buf);
783 }
784
785 if (fd != STDIN_FILENO && close(fd) == -1)
786 perror(file);
787
788 mparse_wait(curp);
789 return(curp->file_status);
790 }
791
792 enum mandoclevel
793 mparse_open(struct mparse *curp, int *fd, const char *file)
794 {
795 int pfd[2];
796 int save_errno;
797 char *cp;
798
799 curp->file = file;
800
801 /* Unless zipped, try to just open the file. */
802
803 if ((cp = strrchr(file, '.')) == NULL ||
804 strcmp(cp + 1, "gz")) {
805 curp->child = 0;
806 if ((*fd = open(file, O_RDONLY)) != -1)
807 return(MANDOCLEVEL_OK);
808
809 /* Open failed; try to append ".gz". */
810
811 mandoc_asprintf(&cp, "%s.gz", file);
812 file = cp;
813 } else
814 cp = NULL;
815
816 /* Before forking, make sure the file can be read. */
817
818 save_errno = errno;
819 if (access(file, R_OK) == -1) {
820 if (cp != NULL)
821 errno = save_errno;
822 free(cp);
823 *fd = -1;
824 curp->child = 0;
825 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
826 return(MANDOCLEVEL_ERROR);
827 }
828
829 /* Run gunzip(1). */
830
831 if (pipe(pfd) == -1) {
832 perror("pipe");
833 exit((int)MANDOCLEVEL_SYSERR);
834 }
835
836 switch (curp->child = fork()) {
837 case -1:
838 perror("fork");
839 exit((int)MANDOCLEVEL_SYSERR);
840 case 0:
841 close(pfd[0]);
842 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
843 perror("dup");
844 exit((int)MANDOCLEVEL_SYSERR);
845 }
846 execlp("gunzip", "gunzip", "-c", file, NULL);
847 perror("exec");
848 exit((int)MANDOCLEVEL_SYSERR);
849 default:
850 close(pfd[1]);
851 *fd = pfd[0];
852 return(MANDOCLEVEL_OK);
853 }
854 }
855
856 enum mandoclevel
857 mparse_wait(struct mparse *curp)
858 {
859 int status;
860
861 if (curp->child == 0)
862 return(MANDOCLEVEL_OK);
863
864 if (waitpid(curp->child, &status, 0) == -1) {
865 perror("wait");
866 exit((int)MANDOCLEVEL_SYSERR);
867 }
868 if (WIFSIGNALED(status)) {
869 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
870 "gunzip died from signal %d", WTERMSIG(status));
871 return(MANDOCLEVEL_ERROR);
872 }
873 if (WEXITSTATUS(status)) {
874 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
875 "gunzip failed with code %d", WEXITSTATUS(status));
876 return(MANDOCLEVEL_ERROR);
877 }
878 return(MANDOCLEVEL_OK);
879 }
880
881 struct mparse *
882 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
883 const struct mchars *mchars, const char *defos)
884 {
885 struct mparse *curp;
886
887 curp = mandoc_calloc(1, sizeof(struct mparse));
888
889 curp->options = options;
890 curp->wlevel = wlevel;
891 curp->mmsg = mmsg;
892 curp->defos = defos;
893
894 curp->mchars = mchars;
895 curp->roff = roff_alloc(curp, curp->mchars, options);
896 if (curp->options & MPARSE_MDOC)
897 curp->pmdoc = mdoc_alloc(
898 curp->roff, curp, curp->defos,
899 curp->options & MPARSE_QUICK ? 1 : 0);
900 if (curp->options & MPARSE_MAN)
901 curp->pman = man_alloc(
902 curp->roff, curp, curp->defos,
903 curp->options & MPARSE_QUICK ? 1 : 0);
904
905 return(curp);
906 }
907
908 void
909 mparse_reset(struct mparse *curp)
910 {
911
912 roff_reset(curp->roff);
913
914 if (curp->mdoc)
915 mdoc_reset(curp->mdoc);
916 if (curp->man)
917 man_reset(curp->man);
918 if (curp->secondary)
919 curp->secondary->sz = 0;
920
921 curp->file_status = MANDOCLEVEL_OK;
922 curp->mdoc = NULL;
923 curp->man = NULL;
924
925 free(curp->sodest);
926 curp->sodest = NULL;
927 }
928
929 void
930 mparse_free(struct mparse *curp)
931 {
932
933 if (curp->pmdoc)
934 mdoc_free(curp->pmdoc);
935 if (curp->pman)
936 man_free(curp->pman);
937 if (curp->roff)
938 roff_free(curp->roff);
939 if (curp->secondary)
940 free(curp->secondary->buf);
941
942 free(curp->secondary);
943 free(curp->sodest);
944 free(curp);
945 }
946
947 void
948 mparse_result(struct mparse *curp,
949 struct mdoc **mdoc, struct man **man, char **sodest)
950 {
951
952 if (sodest && NULL != (*sodest = curp->sodest)) {
953 *mdoc = NULL;
954 *man = NULL;
955 return;
956 }
957 if (mdoc)
958 *mdoc = curp->mdoc;
959 if (man)
960 *man = curp->man;
961 }
962
963 void
964 mandoc_vmsg(enum mandocerr t, struct mparse *m,
965 int ln, int pos, const char *fmt, ...)
966 {
967 char buf[256];
968 va_list ap;
969
970 va_start(ap, fmt);
971 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
972 va_end(ap);
973
974 mandoc_msg(t, m, ln, pos, buf);
975 }
976
977 void
978 mandoc_msg(enum mandocerr er, struct mparse *m,
979 int ln, int col, const char *msg)
980 {
981 enum mandoclevel level;
982
983 level = MANDOCLEVEL_UNSUPP;
984 while (er < mandoclimits[level])
985 level--;
986
987 if (level < m->wlevel && er != MANDOCERR_FILE)
988 return;
989
990 if (m->mmsg)
991 (*m->mmsg)(er, level, m->file, ln, col, msg);
992
993 if (m->file_status < level)
994 m->file_status = level;
995 }
996
997 const char *
998 mparse_strerror(enum mandocerr er)
999 {
1000
1001 return(mandocerrs[er]);
1002 }
1003
1004 const char *
1005 mparse_strlevel(enum mandoclevel lvl)
1006 {
1007 return(mandoclevels[lvl]);
1008 }
1009
1010 void
1011 mparse_keep(struct mparse *p)
1012 {
1013
1014 assert(NULL == p->secondary);
1015 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1016 }
1017
1018 const char *
1019 mparse_getkeep(const struct mparse *p)
1020 {
1021
1022 assert(p->secondary);
1023 return(p->secondary->sz ? p->secondary->buf : NULL);
1024 }