]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Move mdoc_hash_init() and man_hash_init() to libmandoc.h
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.136 2015/04/18 17:01:58 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <signal.h>
33 #include <stdarg.h>
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39
40 #include "mandoc_aux.h"
41 #include "mandoc.h"
42 #include "roff.h"
43 #include "mdoc.h"
44 #include "man.h"
45 #include "libmandoc.h"
46
47 #define REPARSE_LIMIT 1000
48
49 struct mparse {
50 struct roff_man *man; /* man parser */
51 struct roff *roff; /* roff parser (!NULL) */
52 const struct mchars *mchars; /* character table */
53 char *sodest; /* filename pointed to by .so */
54 const char *file; /* filename of current input file */
55 struct buf *primary; /* buffer currently being parsed */
56 struct buf *secondary; /* preprocessed copy of input */
57 const char *defos; /* default operating system */
58 mandocmsg mmsg; /* warning/error message handler */
59 enum mandoclevel file_status; /* status of current parse */
60 enum mandoclevel wlevel; /* ignore messages below this */
61 int options; /* parser options */
62 int filenc; /* encoding of the current file */
63 int reparse_count; /* finite interp. stack */
64 int line; /* line number in the file */
65 pid_t child; /* the gunzip(1) process */
66 };
67
68 static void choose_parser(struct mparse *);
69 static void resize_buf(struct buf *, size_t);
70 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
71 static int read_whole_file(struct mparse *, const char *, int,
72 struct buf *, int *);
73 static void mparse_end(struct mparse *);
74 static void mparse_parse_buffer(struct mparse *, struct buf,
75 const char *);
76
77 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
78 MANDOCERR_OK,
79 MANDOCERR_WARNING,
80 MANDOCERR_WARNING,
81 MANDOCERR_ERROR,
82 MANDOCERR_UNSUPP,
83 MANDOCERR_MAX,
84 MANDOCERR_MAX
85 };
86
87 static const char * const mandocerrs[MANDOCERR_MAX] = {
88 "ok",
89
90 "generic warning",
91
92 /* related to the prologue */
93 "missing manual title, using UNTITLED",
94 "missing manual title, using \"\"",
95 "lower case character in document title",
96 "missing manual section, using \"\"",
97 "unknown manual section",
98 "missing date, using today's date",
99 "cannot parse date, using it verbatim",
100 "missing Os macro, using \"\"",
101 "duplicate prologue macro",
102 "late prologue macro",
103 "skipping late title macro",
104 "prologue macros out of order",
105
106 /* related to document structure */
107 ".so is fragile, better use ln(1)",
108 "no document body",
109 "content before first section header",
110 "first section is not \"NAME\"",
111 "NAME section without name",
112 "NAME section without description",
113 "description not at the end of NAME",
114 "bad NAME section content",
115 "missing description line, using \"\"",
116 "sections out of conventional order",
117 "duplicate section title",
118 "unexpected section",
119 "unusual Xr order",
120 "unusual Xr punctuation",
121 "AUTHORS section without An macro",
122
123 /* related to macros and nesting */
124 "obsolete macro",
125 "macro neither callable nor escaped",
126 "skipping paragraph macro",
127 "moving paragraph macro out of list",
128 "skipping no-space macro",
129 "blocks badly nested",
130 "nested displays are not portable",
131 "moving content out of list",
132 ".Vt block has child macro",
133 "fill mode already enabled, skipping",
134 "fill mode already disabled, skipping",
135 "line scope broken",
136
137 /* related to missing macro arguments */
138 "skipping empty request",
139 "conditional request controls empty scope",
140 "skipping empty macro",
141 "empty block",
142 "empty argument, using 0n",
143 "missing display type, using -ragged",
144 "list type is not the first argument",
145 "missing -width in -tag list, using 8n",
146 "missing utility name, using \"\"",
147 "missing function name, using \"\"",
148 "empty head in list item",
149 "empty list item",
150 "missing font type, using \\fR",
151 "unknown font type, using \\fR",
152 "nothing follows prefix",
153 "empty reference block",
154 "missing -std argument, adding it",
155 "missing option string, using \"\"",
156 "missing resource identifier, using \"\"",
157 "missing eqn box, using \"\"",
158
159 /* related to bad macro arguments */
160 "unterminated quoted argument",
161 "duplicate argument",
162 "skipping duplicate argument",
163 "skipping duplicate display type",
164 "skipping duplicate list type",
165 "skipping -width argument",
166 "wrong number of cells",
167 "unknown AT&T UNIX version",
168 "comma in function argument",
169 "parenthesis in function name",
170 "invalid content in Rs block",
171 "invalid Boolean argument",
172 "unknown font, skipping request",
173 "odd number of characters in request",
174
175 /* related to plain text */
176 "blank line in fill mode, using .sp",
177 "tab in filled text",
178 "whitespace at end of input line",
179 "bad comment style",
180 "invalid escape sequence",
181 "undefined string, using \"\"",
182
183 /* related to tables */
184 "tbl line starts with span",
185 "tbl column starts with span",
186 "skipping vertical bar in tbl layout",
187
188 "generic error",
189
190 /* related to tables */
191 "non-alphabetic character in tbl options",
192 "skipping unknown tbl option",
193 "missing tbl option argument",
194 "wrong tbl option argument size",
195 "empty tbl layout",
196 "invalid character in tbl layout",
197 "unmatched parenthesis in tbl layout",
198 "tbl without any data cells",
199 "ignoring data in spanned tbl cell",
200 "ignoring extra tbl data cells",
201 "data block open at end of tbl",
202
203 /* related to document structure and macros */
204 NULL,
205 "input stack limit exceeded, infinite loop?",
206 "skipping bad character",
207 "skipping unknown macro",
208 "skipping insecure request",
209 "skipping item outside list",
210 "skipping column outside column list",
211 "skipping end of block that is not open",
212 "fewer RS blocks open, skipping",
213 "inserting missing end of block",
214 "appending missing end of block",
215
216 /* related to request and macro arguments */
217 "escaped character not allowed in a name",
218 "NOT IMPLEMENTED: Bd -file",
219 "missing list type, using -item",
220 "missing manual name, using \"\"",
221 "uname(3) system call failed, using UNKNOWN",
222 "unknown standard specifier",
223 "skipping request without numeric argument",
224 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
225 ".so request failed",
226 "skipping all arguments",
227 "skipping excess arguments",
228 "divide by zero",
229
230 "unsupported feature",
231 "input too large",
232 "unsupported control character",
233 "unsupported roff request",
234 "eqn delim option in tbl",
235 "unsupported tbl layout modifier",
236 "ignoring macro in table",
237 };
238
239 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
240 "SUCCESS",
241 "RESERVED",
242 "WARNING",
243 "ERROR",
244 "UNSUPP",
245 "BADARG",
246 "SYSERR"
247 };
248
249
250 static void
251 resize_buf(struct buf *buf, size_t initial)
252 {
253
254 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
255 buf->buf = mandoc_realloc(buf->buf, buf->sz);
256 }
257
258 static void
259 choose_parser(struct mparse *curp)
260 {
261 char *cp, *ep;
262 int format;
263
264 /*
265 * If neither command line arguments -mdoc or -man select
266 * a parser nor the roff parser found a .Dd or .TH macro
267 * yet, look ahead in the main input buffer.
268 */
269
270 if ((format = roff_getformat(curp->roff)) == 0) {
271 cp = curp->primary->buf;
272 ep = cp + curp->primary->sz;
273 while (cp < ep) {
274 if (*cp == '.' || *cp == '\'') {
275 cp++;
276 if (cp[0] == 'D' && cp[1] == 'd') {
277 format = MPARSE_MDOC;
278 break;
279 }
280 if (cp[0] == 'T' && cp[1] == 'H') {
281 format = MPARSE_MAN;
282 break;
283 }
284 }
285 cp = memchr(cp, '\n', ep - cp);
286 if (cp == NULL)
287 break;
288 cp++;
289 }
290 }
291
292 if (format == MPARSE_MDOC) {
293 if (curp->man == NULL)
294 curp->man = mdoc_alloc(
295 curp->roff, curp, curp->defos,
296 MPARSE_QUICK & curp->options ? 1 : 0);
297 else
298 curp->man->macroset = MACROSET_MDOC;
299 mdoc_hash_init();
300 return;
301 }
302
303 /* Fall back to man(7) as a last resort. */
304
305 if (curp->man == NULL)
306 curp->man = man_alloc(
307 curp->roff, curp, curp->defos,
308 MPARSE_QUICK & curp->options ? 1 : 0);
309 else
310 curp->man->macroset = MACROSET_MAN;
311 man_hash_init();
312 }
313
314 /*
315 * Main parse routine for a buffer.
316 * It assumes encoding and line numbering are already set up.
317 * It can recurse directly (for invocations of user-defined
318 * macros, inline equations, and input line traps)
319 * and indirectly (for .so file inclusion).
320 */
321 static void
322 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
323 {
324 const struct tbl_span *span;
325 struct buf ln;
326 const char *save_file;
327 char *cp;
328 size_t pos; /* byte number in the ln buffer */
329 enum rofferr rr;
330 int of;
331 int lnn; /* line number in the real file */
332 int fd;
333 pid_t save_child;
334 unsigned char c;
335
336 memset(&ln, 0, sizeof(ln));
337
338 lnn = curp->line;
339 pos = 0;
340
341 while (i < blk.sz) {
342 if (0 == pos && '\0' == blk.buf[i])
343 break;
344
345 if (start) {
346 curp->line = lnn;
347 curp->reparse_count = 0;
348
349 if (lnn < 3 &&
350 curp->filenc & MPARSE_UTF8 &&
351 curp->filenc & MPARSE_LATIN1)
352 curp->filenc = preconv_cue(&blk, i);
353 }
354
355 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
356
357 /*
358 * When finding an unescaped newline character,
359 * leave the character loop to process the line.
360 * Skip a preceding carriage return, if any.
361 */
362
363 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
364 '\n' == blk.buf[i + 1])
365 ++i;
366 if ('\n' == blk.buf[i]) {
367 ++i;
368 ++lnn;
369 break;
370 }
371
372 /*
373 * Make sure we have space for the worst
374 * case of 11 bytes: "\\[u10ffff]\0"
375 */
376
377 if (pos + 11 > ln.sz)
378 resize_buf(&ln, 256);
379
380 /*
381 * Encode 8-bit input.
382 */
383
384 c = blk.buf[i];
385 if (c & 0x80) {
386 if ( ! (curp->filenc && preconv_encode(
387 &blk, &i, &ln, &pos, &curp->filenc))) {
388 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
389 curp->line, pos, "0x%x", c);
390 ln.buf[pos++] = '?';
391 i++;
392 }
393 continue;
394 }
395
396 /*
397 * Exclude control characters.
398 */
399
400 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
401 mandoc_vmsg(c == 0x00 || c == 0x04 ||
402 c > 0x0a ? MANDOCERR_CHAR_BAD :
403 MANDOCERR_CHAR_UNSUPP,
404 curp, curp->line, pos, "0x%x", c);
405 i++;
406 if (c != '\r')
407 ln.buf[pos++] = '?';
408 continue;
409 }
410
411 /* Trailing backslash = a plain char. */
412
413 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
414 ln.buf[pos++] = blk.buf[i++];
415 continue;
416 }
417
418 /*
419 * Found escape and at least one other character.
420 * When it's a newline character, skip it.
421 * When there is a carriage return in between,
422 * skip that one as well.
423 */
424
425 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
426 '\n' == blk.buf[i + 2])
427 ++i;
428 if ('\n' == blk.buf[i + 1]) {
429 i += 2;
430 ++lnn;
431 continue;
432 }
433
434 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
435 i += 2;
436 /* Comment, skip to end of line */
437 for (; i < blk.sz; ++i) {
438 if ('\n' == blk.buf[i]) {
439 ++i;
440 ++lnn;
441 break;
442 }
443 }
444
445 /* Backout trailing whitespaces */
446 for (; pos > 0; --pos) {
447 if (ln.buf[pos - 1] != ' ')
448 break;
449 if (pos > 2 && ln.buf[pos - 2] == '\\')
450 break;
451 }
452 break;
453 }
454
455 /* Catch escaped bogus characters. */
456
457 c = (unsigned char) blk.buf[i+1];
458
459 if ( ! (isascii(c) &&
460 (isgraph(c) || isblank(c)))) {
461 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
462 curp->line, pos, "0x%x", c);
463 i += 2;
464 ln.buf[pos++] = '?';
465 continue;
466 }
467
468 /* Some other escape sequence, copy & cont. */
469
470 ln.buf[pos++] = blk.buf[i++];
471 ln.buf[pos++] = blk.buf[i++];
472 }
473
474 if (pos >= ln.sz)
475 resize_buf(&ln, 256);
476
477 ln.buf[pos] = '\0';
478
479 /*
480 * A significant amount of complexity is contained by
481 * the roff preprocessor. It's line-oriented but can be
482 * expressed on one line, so we need at times to
483 * readjust our starting point and re-run it. The roff
484 * preprocessor can also readjust the buffers with new
485 * data, so we pass them in wholesale.
486 */
487
488 of = 0;
489
490 /*
491 * Maintain a lookaside buffer of all parsed lines. We
492 * only do this if mparse_keep() has been invoked (the
493 * buffer may be accessed with mparse_getkeep()).
494 */
495
496 if (curp->secondary) {
497 curp->secondary->buf = mandoc_realloc(
498 curp->secondary->buf,
499 curp->secondary->sz + pos + 2);
500 memcpy(curp->secondary->buf +
501 curp->secondary->sz,
502 ln.buf, pos);
503 curp->secondary->sz += pos;
504 curp->secondary->buf
505 [curp->secondary->sz] = '\n';
506 curp->secondary->sz++;
507 curp->secondary->buf
508 [curp->secondary->sz] = '\0';
509 }
510 rerun:
511 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
512
513 switch (rr) {
514 case ROFF_REPARSE:
515 if (REPARSE_LIMIT >= ++curp->reparse_count)
516 mparse_buf_r(curp, ln, of, 0);
517 else
518 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
519 curp->line, pos, NULL);
520 pos = 0;
521 continue;
522 case ROFF_APPEND:
523 pos = strlen(ln.buf);
524 continue;
525 case ROFF_RERUN:
526 goto rerun;
527 case ROFF_IGN:
528 pos = 0;
529 continue;
530 case ROFF_SO:
531 if ( ! (curp->options & MPARSE_SO) &&
532 (i >= blk.sz || blk.buf[i] == '\0')) {
533 curp->sodest = mandoc_strdup(ln.buf + of);
534 free(ln.buf);
535 return;
536 }
537 /*
538 * We remove `so' clauses from our lookaside
539 * buffer because we're going to descend into
540 * the file recursively.
541 */
542 if (curp->secondary)
543 curp->secondary->sz -= pos + 1;
544 save_file = curp->file;
545 save_child = curp->child;
546 if (mparse_open(curp, &fd, ln.buf + of) ==
547 MANDOCLEVEL_OK) {
548 mparse_readfd(curp, fd, ln.buf + of);
549 curp->file = save_file;
550 } else {
551 curp->file = save_file;
552 mandoc_vmsg(MANDOCERR_SO_FAIL,
553 curp, curp->line, pos,
554 ".so %s", ln.buf + of);
555 ln.sz = mandoc_asprintf(&cp,
556 ".sp\nSee the file %s.\n.sp",
557 ln.buf + of);
558 free(ln.buf);
559 ln.buf = cp;
560 of = 0;
561 mparse_buf_r(curp, ln, of, 0);
562 }
563 curp->child = save_child;
564 pos = 0;
565 continue;
566 default:
567 break;
568 }
569
570 /*
571 * If input parsers have not been allocated, do so now.
572 * We keep these instanced between parsers, but set them
573 * locally per parse routine since we can use different
574 * parsers with each one.
575 */
576
577 if (curp->man == NULL ||
578 curp->man->macroset == MACROSET_NONE)
579 choose_parser(curp);
580
581 /*
582 * Lastly, push down into the parsers themselves.
583 * If libroff returns ROFF_TBL, then add it to the
584 * currently open parse. Since we only get here if
585 * there does exist data (see tbl_data.c), we're
586 * guaranteed that something's been allocated.
587 * Do the same for ROFF_EQN.
588 */
589
590 if (rr == ROFF_TBL) {
591 while ((span = roff_span(curp->roff)) != NULL)
592 if (curp->man->macroset == MACROSET_MDOC)
593 mdoc_addspan(curp->man, span);
594 else
595 man_addspan(curp->man, span);
596 } else if (rr == ROFF_EQN) {
597 if (curp->man->macroset == MACROSET_MDOC)
598 mdoc_addeqn(curp->man, roff_eqn(curp->roff));
599 else
600 man_addeqn(curp->man, roff_eqn(curp->roff));
601 } else if ((curp->man->macroset == MACROSET_MDOC ?
602 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
603 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
604 break;
605
606 /* Temporary buffers typically are not full. */
607
608 if (0 == start && '\0' == blk.buf[i])
609 break;
610
611 /* Start the next input line. */
612
613 pos = 0;
614 }
615
616 free(ln.buf);
617 }
618
619 static int
620 read_whole_file(struct mparse *curp, const char *file, int fd,
621 struct buf *fb, int *with_mmap)
622 {
623 size_t off;
624 ssize_t ssz;
625
626 #if HAVE_MMAP
627 struct stat st;
628 if (-1 == fstat(fd, &st)) {
629 perror(file);
630 exit((int)MANDOCLEVEL_SYSERR);
631 }
632
633 /*
634 * If we're a regular file, try just reading in the whole entry
635 * via mmap(). This is faster than reading it into blocks, and
636 * since each file is only a few bytes to begin with, I'm not
637 * concerned that this is going to tank any machines.
638 */
639
640 if (S_ISREG(st.st_mode)) {
641 if (st.st_size > 0x7fffffff) {
642 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
643 return(0);
644 }
645 *with_mmap = 1;
646 fb->sz = (size_t)st.st_size;
647 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
648 if (fb->buf != MAP_FAILED)
649 return(1);
650 }
651 #endif
652
653 /*
654 * If this isn't a regular file (like, say, stdin), then we must
655 * go the old way and just read things in bit by bit.
656 */
657
658 *with_mmap = 0;
659 off = 0;
660 fb->sz = 0;
661 fb->buf = NULL;
662 for (;;) {
663 if (off == fb->sz) {
664 if (fb->sz == (1U << 31)) {
665 mandoc_msg(MANDOCERR_TOOLARGE, curp,
666 0, 0, NULL);
667 break;
668 }
669 resize_buf(fb, 65536);
670 }
671 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
672 if (ssz == 0) {
673 fb->sz = off;
674 return(1);
675 }
676 if (ssz == -1) {
677 perror(file);
678 exit((int)MANDOCLEVEL_SYSERR);
679 }
680 off += (size_t)ssz;
681 }
682
683 free(fb->buf);
684 fb->buf = NULL;
685 return(0);
686 }
687
688 static void
689 mparse_end(struct mparse *curp)
690 {
691
692 if (curp->man == NULL && curp->sodest == NULL)
693 curp->man = man_alloc(curp->roff, curp, curp->defos,
694 curp->options & MPARSE_QUICK ? 1 : 0);
695 if (curp->man->macroset == MACROSET_NONE)
696 curp->man->macroset = MACROSET_MAN;
697 if (curp->man->macroset == MACROSET_MDOC)
698 mdoc_endparse(curp->man);
699 else
700 man_endparse(curp->man);
701 roff_endparse(curp->roff);
702 }
703
704 static void
705 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
706 {
707 struct buf *svprimary;
708 const char *svfile;
709 size_t offset;
710 static int recursion_depth;
711
712 if (64 < recursion_depth) {
713 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
714 return;
715 }
716
717 /* Line number is per-file. */
718 svfile = curp->file;
719 curp->file = file;
720 svprimary = curp->primary;
721 curp->primary = &blk;
722 curp->line = 1;
723 recursion_depth++;
724
725 /* Skip an UTF-8 byte order mark. */
726 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
727 (unsigned char)blk.buf[0] == 0xef &&
728 (unsigned char)blk.buf[1] == 0xbb &&
729 (unsigned char)blk.buf[2] == 0xbf) {
730 offset = 3;
731 curp->filenc &= ~MPARSE_LATIN1;
732 } else
733 offset = 0;
734
735 mparse_buf_r(curp, blk, offset, 1);
736
737 if (--recursion_depth == 0)
738 mparse_end(curp);
739
740 curp->primary = svprimary;
741 curp->file = svfile;
742 }
743
744 enum mandoclevel
745 mparse_readmem(struct mparse *curp, void *buf, size_t len,
746 const char *file)
747 {
748 struct buf blk;
749
750 blk.buf = buf;
751 blk.sz = len;
752
753 mparse_parse_buffer(curp, blk, file);
754 return(curp->file_status);
755 }
756
757 /*
758 * Read the whole file into memory and call the parsers.
759 * Called recursively when an .so request is encountered.
760 */
761 enum mandoclevel
762 mparse_readfd(struct mparse *curp, int fd, const char *file)
763 {
764 struct buf blk;
765 int with_mmap;
766 int save_filenc;
767
768 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
769 save_filenc = curp->filenc;
770 curp->filenc = curp->options &
771 (MPARSE_UTF8 | MPARSE_LATIN1);
772 mparse_parse_buffer(curp, blk, file);
773 curp->filenc = save_filenc;
774 #if HAVE_MMAP
775 if (with_mmap)
776 munmap(blk.buf, blk.sz);
777 else
778 #endif
779 free(blk.buf);
780 }
781
782 if (fd != STDIN_FILENO && close(fd) == -1)
783 perror(file);
784
785 mparse_wait(curp);
786 return(curp->file_status);
787 }
788
789 enum mandoclevel
790 mparse_open(struct mparse *curp, int *fd, const char *file)
791 {
792 int pfd[2];
793 int save_errno;
794 char *cp;
795
796 curp->file = file;
797
798 /* Unless zipped, try to just open the file. */
799
800 if ((cp = strrchr(file, '.')) == NULL ||
801 strcmp(cp + 1, "gz")) {
802 curp->child = 0;
803 if ((*fd = open(file, O_RDONLY)) != -1)
804 return(MANDOCLEVEL_OK);
805
806 /* Open failed; try to append ".gz". */
807
808 mandoc_asprintf(&cp, "%s.gz", file);
809 file = cp;
810 } else
811 cp = NULL;
812
813 /* Before forking, make sure the file can be read. */
814
815 save_errno = errno;
816 if (access(file, R_OK) == -1) {
817 if (cp != NULL)
818 errno = save_errno;
819 free(cp);
820 *fd = -1;
821 curp->child = 0;
822 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
823 return(MANDOCLEVEL_ERROR);
824 }
825
826 /* Run gunzip(1). */
827
828 if (pipe(pfd) == -1) {
829 perror("pipe");
830 exit((int)MANDOCLEVEL_SYSERR);
831 }
832
833 switch (curp->child = fork()) {
834 case -1:
835 perror("fork");
836 exit((int)MANDOCLEVEL_SYSERR);
837 case 0:
838 close(pfd[0]);
839 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
840 perror("dup");
841 exit((int)MANDOCLEVEL_SYSERR);
842 }
843 signal(SIGPIPE, SIG_DFL);
844 execlp("gunzip", "gunzip", "-c", file, NULL);
845 perror("exec");
846 exit((int)MANDOCLEVEL_SYSERR);
847 default:
848 close(pfd[1]);
849 *fd = pfd[0];
850 return(MANDOCLEVEL_OK);
851 }
852 }
853
854 enum mandoclevel
855 mparse_wait(struct mparse *curp)
856 {
857 int status;
858
859 if (curp->child == 0)
860 return(MANDOCLEVEL_OK);
861
862 if (waitpid(curp->child, &status, 0) == -1) {
863 perror("wait");
864 exit((int)MANDOCLEVEL_SYSERR);
865 }
866 curp->child = 0;
867 if (WIFSIGNALED(status)) {
868 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
869 "gunzip died from signal %d", WTERMSIG(status));
870 return(MANDOCLEVEL_ERROR);
871 }
872 if (WEXITSTATUS(status)) {
873 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
874 "gunzip failed with code %d", WEXITSTATUS(status));
875 return(MANDOCLEVEL_ERROR);
876 }
877 return(MANDOCLEVEL_OK);
878 }
879
880 struct mparse *
881 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
882 const struct mchars *mchars, const char *defos)
883 {
884 struct mparse *curp;
885
886 curp = mandoc_calloc(1, sizeof(struct mparse));
887
888 curp->options = options;
889 curp->wlevel = wlevel;
890 curp->mmsg = mmsg;
891 curp->defos = defos;
892
893 curp->mchars = mchars;
894 curp->roff = roff_alloc(curp, curp->mchars, options);
895 if (curp->options & MPARSE_MDOC) {
896 curp->man = mdoc_alloc(
897 curp->roff, curp, curp->defos,
898 curp->options & MPARSE_QUICK ? 1 : 0);
899 mdoc_hash_init();
900 }
901 if (curp->options & MPARSE_MAN) {
902 curp->man = man_alloc(
903 curp->roff, curp, curp->defos,
904 curp->options & MPARSE_QUICK ? 1 : 0);
905 man_hash_init();
906 }
907
908 return(curp);
909 }
910
911 void
912 mparse_reset(struct mparse *curp)
913 {
914
915 roff_reset(curp->roff);
916
917 if (curp->man != NULL) {
918 if (curp->man->macroset == MACROSET_MDOC)
919 mdoc_reset(curp->man);
920 else
921 man_reset(curp->man);
922 curp->man->macroset = MACROSET_NONE;
923 }
924 if (curp->secondary)
925 curp->secondary->sz = 0;
926
927 curp->file_status = MANDOCLEVEL_OK;
928
929 free(curp->sodest);
930 curp->sodest = NULL;
931 }
932
933 void
934 mparse_free(struct mparse *curp)
935 {
936
937 if (curp->man->macroset == MACROSET_MDOC)
938 mdoc_free(curp->man);
939 if (curp->man->macroset == MACROSET_MAN)
940 man_free(curp->man);
941 if (curp->roff)
942 roff_free(curp->roff);
943 if (curp->secondary)
944 free(curp->secondary->buf);
945
946 free(curp->secondary);
947 free(curp->sodest);
948 free(curp);
949 }
950
951 void
952 mparse_result(struct mparse *curp, struct roff_man **man,
953 char **sodest)
954 {
955
956 if (sodest && NULL != (*sodest = curp->sodest)) {
957 *man = NULL;
958 return;
959 }
960 if (man)
961 *man = curp->man;
962 }
963
964 void
965 mandoc_vmsg(enum mandocerr t, struct mparse *m,
966 int ln, int pos, const char *fmt, ...)
967 {
968 char buf[256];
969 va_list ap;
970
971 va_start(ap, fmt);
972 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
973 va_end(ap);
974
975 mandoc_msg(t, m, ln, pos, buf);
976 }
977
978 void
979 mandoc_msg(enum mandocerr er, struct mparse *m,
980 int ln, int col, const char *msg)
981 {
982 enum mandoclevel level;
983
984 level = MANDOCLEVEL_UNSUPP;
985 while (er < mandoclimits[level])
986 level--;
987
988 if (level < m->wlevel && er != MANDOCERR_FILE)
989 return;
990
991 if (m->mmsg)
992 (*m->mmsg)(er, level, m->file, ln, col, msg);
993
994 if (m->file_status < level)
995 m->file_status = level;
996 }
997
998 const char *
999 mparse_strerror(enum mandocerr er)
1000 {
1001
1002 return(mandocerrs[er]);
1003 }
1004
1005 const char *
1006 mparse_strlevel(enum mandoclevel lvl)
1007 {
1008 return(mandoclevels[lvl]);
1009 }
1010
1011 void
1012 mparse_keep(struct mparse *p)
1013 {
1014
1015 assert(NULL == p->secondary);
1016 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1017 }
1018
1019 const char *
1020 mparse_getkeep(const struct mparse *p)
1021 {
1022
1023 assert(p->secondary);
1024 return(p->secondary->sz ? p->secondary->buf : NULL);
1025 }