]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Make the character table available to libroff so it can check the
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.94 2014/10/28 17:36:19 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct mparse {
49 struct man *pman; /* persistent man parser */
50 struct mdoc *pmdoc; /* persistent mdoc parser */
51 struct man *man; /* man parser */
52 struct mdoc *mdoc; /* mdoc parser */
53 struct roff *roff; /* roff parser (!NULL) */
54 const struct mchars *mchars; /* character table */
55 char *sodest; /* filename pointed to by .so */
56 const char *file; /* filename of current input file */
57 struct buf *primary; /* buffer currently being parsed */
58 struct buf *secondary; /* preprocessed copy of input */
59 const char *defos; /* default operating system */
60 mandocmsg mmsg; /* warning/error message handler */
61 enum mandoclevel file_status; /* status of current parse */
62 enum mandoclevel wlevel; /* ignore messages below this */
63 int options; /* parser options */
64 int filenc; /* encoding of the current file */
65 int reparse_count; /* finite interp. stack */
66 int line; /* line number in the file */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_FATAL,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "unknown manual volume or arch",
100 "missing date, using today's date",
101 "cannot parse date, using it verbatim",
102 "missing Os macro, using \"\"",
103 "duplicate prologue macro",
104 "late prologue macro",
105 "skipping late title macro",
106 "prologue macros out of order",
107
108 /* related to document structure */
109 ".so is fragile, better use ln(1)",
110 "no document body",
111 "content before first section header",
112 "first section is not \"NAME\"",
113 "bad NAME section contents",
114 "sections out of conventional order",
115 "duplicate section title",
116 "unexpected section",
117 "unusual Xr order",
118 "unusual Xr punctuation",
119 "AUTHORS section without An macro",
120
121 /* related to macros and nesting */
122 "obsolete macro",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "missing -std argument, adding it",
149 "missing eqn box, using \"\"",
150
151 /* related to bad macro arguments */
152 "unterminated quoted argument",
153 "duplicate argument",
154 "skipping duplicate argument",
155 "skipping duplicate display type",
156 "skipping duplicate list type",
157 "skipping -width argument",
158 "unknown AT&T UNIX version",
159 "comma in function argument",
160 "parenthesis in function name",
161 "invalid content in Rs block",
162 "invalid Boolean argument",
163 "unknown font, skipping request",
164
165 /* related to plain text */
166 "blank line in fill mode, using .sp",
167 "tab in filled text",
168 "whitespace at end of input line",
169 "bad comment style",
170 "invalid escape sequence",
171 "undefined string, using \"\"",
172
173 "generic error",
174
175 /* related to equations */
176 "unexpected equation scope closure",
177 "equation scope open on exit",
178 "overlapping equation scopes",
179 "unexpected end of equation",
180
181 /* related to tables */
182 "bad table syntax",
183 "bad table option",
184 "bad table layout",
185 "no table layout cells specified",
186 "no table data cells specified",
187 "ignore data in cell",
188 "data block still open",
189 "ignoring extra data cells",
190
191 /* related to document structure and macros */
192 "input stack limit exceeded, infinite loop?",
193 "skipping bad character",
194 "skipping unknown macro",
195 "skipping item outside list",
196 "skipping column outside column list",
197 "skipping end of block that is not open",
198 "inserting missing end of block",
199 "appending missing end of block",
200
201 /* related to request and macro arguments */
202 "escaped character not allowed in a name",
203 "argument count wrong",
204 "missing list type, using -item",
205 "missing manual name, using \"\"",
206 "uname(3) system call failed, using UNKNOWN",
207 "unknown standard specifier",
208 "skipping request without numeric argument",
209 "skipping all arguments",
210 "skipping excess arguments",
211 "divide by zero",
212
213 "generic fatal error",
214
215 "input too large",
216 "NOT IMPLEMENTED: Bd -file",
217 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
218 ".so request failed",
219
220 /* system errors */
221 "cannot dup file descriptor",
222 "cannot exec",
223 "gunzip failed with code",
224 "cannot fork",
225 NULL,
226 "cannot open pipe",
227 "cannot read file",
228 "gunzip died from signal",
229 "cannot stat file",
230 "wait failed",
231 };
232
233 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
234 "SUCCESS",
235 "RESERVED",
236 "WARNING",
237 "ERROR",
238 "FATAL",
239 "BADARG",
240 "SYSERR"
241 };
242
243
244 static void
245 resize_buf(struct buf *buf, size_t initial)
246 {
247
248 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
249 buf->buf = mandoc_realloc(buf->buf, buf->sz);
250 }
251
252 static void
253 choose_parser(struct mparse *curp)
254 {
255 char *cp, *ep;
256 int format;
257
258 /*
259 * If neither command line arguments -mdoc or -man select
260 * a parser nor the roff parser found a .Dd or .TH macro
261 * yet, look ahead in the main input buffer.
262 */
263
264 if ((format = roff_getformat(curp->roff)) == 0) {
265 cp = curp->primary->buf;
266 ep = cp + curp->primary->sz;
267 while (cp < ep) {
268 if (*cp == '.' || *cp == '\'') {
269 cp++;
270 if (cp[0] == 'D' && cp[1] == 'd') {
271 format = MPARSE_MDOC;
272 break;
273 }
274 if (cp[0] == 'T' && cp[1] == 'H') {
275 format = MPARSE_MAN;
276 break;
277 }
278 }
279 cp = memchr(cp, '\n', ep - cp);
280 if (cp == NULL)
281 break;
282 cp++;
283 }
284 }
285
286 if (format == MPARSE_MDOC) {
287 if (NULL == curp->pmdoc)
288 curp->pmdoc = mdoc_alloc(
289 curp->roff, curp, curp->defos,
290 MPARSE_QUICK & curp->options ? 1 : 0);
291 assert(curp->pmdoc);
292 curp->mdoc = curp->pmdoc;
293 return;
294 }
295
296 /* Fall back to man(7) as a last resort. */
297
298 if (NULL == curp->pman)
299 curp->pman = man_alloc(curp->roff, curp,
300 MPARSE_QUICK & curp->options ? 1 : 0);
301 assert(curp->pman);
302 curp->man = curp->pman;
303 }
304
305 /*
306 * Main parse routine for an opened file. This is called for each
307 * opened file and simply loops around the full input file, possibly
308 * nesting (i.e., with `so').
309 */
310 static void
311 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
312 {
313 const struct tbl_span *span;
314 struct buf ln;
315 enum rofferr rr;
316 int i, of, rc;
317 int pos; /* byte number in the ln buffer */
318 int lnn; /* line number in the real file */
319 unsigned char c;
320
321 memset(&ln, 0, sizeof(struct buf));
322
323 lnn = curp->line;
324 pos = 0;
325
326 for (i = blk.offs; i < (int)blk.sz; ) {
327 if (0 == pos && '\0' == blk.buf[i])
328 break;
329
330 if (start) {
331 curp->line = lnn;
332 curp->reparse_count = 0;
333
334 if (lnn < 3 &&
335 curp->filenc & MPARSE_UTF8 &&
336 curp->filenc & MPARSE_LATIN1) {
337 blk.offs = i;
338 curp->filenc = preconv_cue(&blk);
339 }
340 }
341
342 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
343
344 /*
345 * When finding an unescaped newline character,
346 * leave the character loop to process the line.
347 * Skip a preceding carriage return, if any.
348 */
349
350 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
351 '\n' == blk.buf[i + 1])
352 ++i;
353 if ('\n' == blk.buf[i]) {
354 ++i;
355 ++lnn;
356 break;
357 }
358
359 /*
360 * Make sure we have space for the worst
361 * case of 11 bytes: "\\[u10ffff]\0"
362 */
363
364 if (pos + 11 > (int)ln.sz)
365 resize_buf(&ln, 256);
366
367 /*
368 * Encode 8-bit input.
369 */
370
371 c = blk.buf[i];
372 if (c & 0x80) {
373 blk.offs = i;
374 ln.offs = pos;
375 if (curp->filenc && preconv_encode(
376 &blk, &ln, &curp->filenc)) {
377 pos = ln.offs;
378 i = blk.offs;
379 } else {
380 mandoc_vmsg(MANDOCERR_BADCHAR,
381 curp, curp->line, pos,
382 "0x%x", c);
383 ln.buf[pos++] = '?';
384 i++;
385 }
386 continue;
387 }
388
389 /*
390 * Exclude control characters.
391 */
392
393 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
394 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
395 curp->line, pos, "0x%x", c);
396 i++;
397 ln.buf[pos++] = '?';
398 continue;
399 }
400
401 /* Trailing backslash = a plain char. */
402
403 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
404 ln.buf[pos++] = blk.buf[i++];
405 continue;
406 }
407
408 /*
409 * Found escape and at least one other character.
410 * When it's a newline character, skip it.
411 * When there is a carriage return in between,
412 * skip that one as well.
413 */
414
415 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
416 '\n' == blk.buf[i + 2])
417 ++i;
418 if ('\n' == blk.buf[i + 1]) {
419 i += 2;
420 ++lnn;
421 continue;
422 }
423
424 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
425 i += 2;
426 /* Comment, skip to end of line */
427 for (; i < (int)blk.sz; ++i) {
428 if ('\n' == blk.buf[i]) {
429 ++i;
430 ++lnn;
431 break;
432 }
433 }
434
435 /* Backout trailing whitespaces */
436 for (; pos > 0; --pos) {
437 if (ln.buf[pos - 1] != ' ')
438 break;
439 if (pos > 2 && ln.buf[pos - 2] == '\\')
440 break;
441 }
442 break;
443 }
444
445 /* Catch escaped bogus characters. */
446
447 c = (unsigned char) blk.buf[i+1];
448
449 if ( ! (isascii(c) &&
450 (isgraph(c) || isblank(c)))) {
451 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
452 curp->line, pos, "0x%x", c);
453 i += 2;
454 ln.buf[pos++] = '?';
455 continue;
456 }
457
458 /* Some other escape sequence, copy & cont. */
459
460 ln.buf[pos++] = blk.buf[i++];
461 ln.buf[pos++] = blk.buf[i++];
462 }
463
464 if (pos >= (int)ln.sz)
465 resize_buf(&ln, 256);
466
467 ln.buf[pos] = '\0';
468
469 /*
470 * A significant amount of complexity is contained by
471 * the roff preprocessor. It's line-oriented but can be
472 * expressed on one line, so we need at times to
473 * readjust our starting point and re-run it. The roff
474 * preprocessor can also readjust the buffers with new
475 * data, so we pass them in wholesale.
476 */
477
478 of = 0;
479
480 /*
481 * Maintain a lookaside buffer of all parsed lines. We
482 * only do this if mparse_keep() has been invoked (the
483 * buffer may be accessed with mparse_getkeep()).
484 */
485
486 if (curp->secondary) {
487 curp->secondary->buf = mandoc_realloc(
488 curp->secondary->buf,
489 curp->secondary->sz + pos + 2);
490 memcpy(curp->secondary->buf +
491 curp->secondary->sz,
492 ln.buf, pos);
493 curp->secondary->sz += pos;
494 curp->secondary->buf
495 [curp->secondary->sz] = '\n';
496 curp->secondary->sz++;
497 curp->secondary->buf
498 [curp->secondary->sz] = '\0';
499 }
500 rerun:
501 rr = roff_parseln(curp->roff, curp->line,
502 &ln.buf, &ln.sz, of, &of);
503
504 switch (rr) {
505 case ROFF_REPARSE:
506 if (REPARSE_LIMIT >= ++curp->reparse_count)
507 mparse_buf_r(curp, ln, 0);
508 else
509 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
510 curp->line, pos, NULL);
511 pos = 0;
512 continue;
513 case ROFF_APPEND:
514 pos = (int)strlen(ln.buf);
515 continue;
516 case ROFF_RERUN:
517 goto rerun;
518 case ROFF_IGN:
519 pos = 0;
520 continue;
521 case ROFF_ERR:
522 assert(MANDOCLEVEL_FATAL <= curp->file_status);
523 break;
524 case ROFF_SO:
525 if (0 == (MPARSE_SO & curp->options) &&
526 (i >= (int)blk.sz || '\0' == blk.buf[i])) {
527 curp->sodest = mandoc_strdup(ln.buf + of);
528 free(ln.buf);
529 return;
530 }
531 /*
532 * We remove `so' clauses from our lookaside
533 * buffer because we're going to descend into
534 * the file recursively.
535 */
536 if (curp->secondary)
537 curp->secondary->sz -= pos + 1;
538 mparse_readfd(curp, -1, ln.buf + of);
539 if (MANDOCLEVEL_FATAL <= curp->file_status) {
540 mandoc_vmsg(MANDOCERR_SO_FAIL,
541 curp, curp->line, pos,
542 ".so %s", ln.buf + of);
543 break;
544 }
545 pos = 0;
546 continue;
547 default:
548 break;
549 }
550
551 /*
552 * If we encounter errors in the recursive parse, make
553 * sure we don't continue parsing.
554 */
555
556 if (MANDOCLEVEL_FATAL <= curp->file_status)
557 break;
558
559 /*
560 * If input parsers have not been allocated, do so now.
561 * We keep these instanced between parsers, but set them
562 * locally per parse routine since we can use different
563 * parsers with each one.
564 */
565
566 if ( ! (curp->man || curp->mdoc))
567 choose_parser(curp);
568
569 /*
570 * Lastly, push down into the parsers themselves.
571 * If libroff returns ROFF_TBL, then add it to the
572 * currently open parse. Since we only get here if
573 * there does exist data (see tbl_data.c), we're
574 * guaranteed that something's been allocated.
575 * Do the same for ROFF_EQN.
576 */
577
578 rc = -1;
579
580 if (ROFF_TBL == rr)
581 while (NULL != (span = roff_span(curp->roff))) {
582 rc = curp->man ?
583 man_addspan(curp->man, span) :
584 mdoc_addspan(curp->mdoc, span);
585 if (0 == rc)
586 break;
587 }
588 else if (ROFF_EQN == rr)
589 rc = curp->mdoc ?
590 mdoc_addeqn(curp->mdoc,
591 roff_eqn(curp->roff)) :
592 man_addeqn(curp->man,
593 roff_eqn(curp->roff));
594 else if (curp->man || curp->mdoc)
595 rc = curp->man ?
596 man_parseln(curp->man,
597 curp->line, ln.buf, of) :
598 mdoc_parseln(curp->mdoc,
599 curp->line, ln.buf, of);
600
601 if (0 == rc) {
602 assert(MANDOCLEVEL_FATAL <= curp->file_status);
603 break;
604 } else if (2 == rc)
605 break;
606
607 /* Temporary buffers typically are not full. */
608
609 if (0 == start && '\0' == blk.buf[i])
610 break;
611
612 /* Start the next input line. */
613
614 pos = 0;
615 }
616
617 free(ln.buf);
618 }
619
620 static int
621 read_whole_file(struct mparse *curp, const char *file, int fd,
622 struct buf *fb, int *with_mmap)
623 {
624 size_t off;
625 ssize_t ssz;
626
627 #if HAVE_MMAP
628 struct stat st;
629 if (-1 == fstat(fd, &st)) {
630 curp->file_status = MANDOCLEVEL_SYSERR;
631 if (curp->mmsg)
632 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
633 file, 0, 0, strerror(errno));
634 return(0);
635 }
636
637 /*
638 * If we're a regular file, try just reading in the whole entry
639 * via mmap(). This is faster than reading it into blocks, and
640 * since each file is only a few bytes to begin with, I'm not
641 * concerned that this is going to tank any machines.
642 */
643
644 if (S_ISREG(st.st_mode)) {
645 if (st.st_size >= (1U << 31)) {
646 curp->file_status = MANDOCLEVEL_FATAL;
647 if (curp->mmsg)
648 (*curp->mmsg)(MANDOCERR_TOOLARGE,
649 curp->file_status, file, 0, 0, NULL);
650 return(0);
651 }
652 *with_mmap = 1;
653 fb->offs = 0;
654 fb->sz = (size_t)st.st_size;
655 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
656 if (fb->buf != MAP_FAILED)
657 return(1);
658 }
659 #endif
660
661 /*
662 * If this isn't a regular file (like, say, stdin), then we must
663 * go the old way and just read things in bit by bit.
664 */
665
666 *with_mmap = 0;
667 off = 0;
668 fb->sz = 0;
669 fb->buf = NULL;
670 for (;;) {
671 if (off == fb->sz) {
672 if (fb->sz == (1U << 31)) {
673 curp->file_status = MANDOCLEVEL_FATAL;
674 if (curp->mmsg)
675 (*curp->mmsg)(MANDOCERR_TOOLARGE,
676 curp->file_status,
677 file, 0, 0, NULL);
678 break;
679 }
680 resize_buf(fb, 65536);
681 }
682 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
683 if (ssz == 0) {
684 fb->sz = off;
685 fb->offs = 0;
686 return(1);
687 }
688 if (ssz == -1) {
689 curp->file_status = MANDOCLEVEL_SYSERR;
690 if (curp->mmsg)
691 (*curp->mmsg)(MANDOCERR_SYSREAD,
692 curp->file_status, file, 0, 0,
693 strerror(errno));
694 break;
695 }
696 off += (size_t)ssz;
697 }
698
699 free(fb->buf);
700 fb->buf = NULL;
701 return(0);
702 }
703
704 static void
705 mparse_end(struct mparse *curp)
706 {
707
708 if (MANDOCLEVEL_FATAL <= curp->file_status)
709 return;
710
711 if (curp->mdoc == NULL &&
712 curp->man == NULL &&
713 curp->sodest == NULL) {
714 if (curp->options & MPARSE_MDOC)
715 curp->mdoc = curp->pmdoc;
716 else {
717 if (curp->pman == NULL)
718 curp->pman = man_alloc(curp->roff, curp,
719 curp->options & MPARSE_QUICK ? 1 : 0);
720 curp->man = curp->pman;
721 }
722 }
723
724 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
725 assert(MANDOCLEVEL_FATAL <= curp->file_status);
726 return;
727 }
728
729 if (curp->man && ! man_endparse(curp->man)) {
730 assert(MANDOCLEVEL_FATAL <= curp->file_status);
731 return;
732 }
733
734 roff_endparse(curp->roff);
735 }
736
737 static void
738 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
739 {
740 struct buf *svprimary;
741 const char *svfile;
742 static int recursion_depth;
743
744 if (64 < recursion_depth) {
745 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
746 return;
747 }
748
749 /* Line number is per-file. */
750 svfile = curp->file;
751 curp->file = file;
752 svprimary = curp->primary;
753 curp->primary = &blk;
754 curp->line = 1;
755 recursion_depth++;
756
757 /* Skip an UTF-8 byte order mark. */
758 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
759 (unsigned char)blk.buf[0] == 0xef &&
760 (unsigned char)blk.buf[1] == 0xbb &&
761 (unsigned char)blk.buf[2] == 0xbf) {
762 blk.offs = 3;
763 curp->filenc &= ~MPARSE_LATIN1;
764 }
765
766 mparse_buf_r(curp, blk, 1);
767
768 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
769 mparse_end(curp);
770
771 curp->primary = svprimary;
772 curp->file = svfile;
773 }
774
775 enum mandoclevel
776 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
777 const char *file)
778 {
779 struct buf blk;
780
781 blk.buf = UNCONST(buf);
782 blk.sz = len;
783 blk.offs = 0;
784
785 mparse_parse_buffer(curp, blk, file);
786 return(curp->file_status);
787 }
788
789 enum mandoclevel
790 mparse_readfd(struct mparse *curp, int fd, const char *file)
791 {
792 struct buf blk;
793 int with_mmap;
794 int save_filenc;
795
796 if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
797 curp->file_status = MANDOCLEVEL_SYSERR;
798 if (curp->mmsg)
799 (*curp->mmsg)(MANDOCERR_SYSOPEN,
800 curp->file_status,
801 file, 0, 0, strerror(errno));
802 return(curp->file_status);
803 }
804
805 /*
806 * Run for each opened file; may be called more than once for
807 * each full parse sequence if the opened file is nested (i.e.,
808 * from `so'). Simply sucks in the whole file and moves into
809 * the parse phase for the file.
810 */
811
812 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
813 save_filenc = curp->filenc;
814 curp->filenc = curp->options &
815 (MPARSE_UTF8 | MPARSE_LATIN1);
816 mparse_parse_buffer(curp, blk, file);
817 curp->filenc = save_filenc;
818 #if HAVE_MMAP
819 if (with_mmap)
820 munmap(blk.buf, blk.sz);
821 else
822 #endif
823 free(blk.buf);
824 }
825
826 if (STDIN_FILENO != fd && -1 == close(fd))
827 perror(file);
828
829 return(curp->file_status);
830 }
831
832 enum mandoclevel
833 mparse_open(struct mparse *curp, int *fd, const char *file,
834 pid_t *child_pid)
835 {
836 int pfd[2];
837 char *cp;
838 enum mandocerr err;
839
840 pfd[1] = -1;
841 curp->file = file;
842 if ((cp = strrchr(file, '.')) == NULL ||
843 strcmp(cp + 1, "gz")) {
844 *child_pid = 0;
845 if ((*fd = open(file, O_RDONLY)) == -1) {
846 err = MANDOCERR_SYSOPEN;
847 goto out;
848 }
849 return(MANDOCLEVEL_OK);
850 }
851
852 if (pipe(pfd) == -1) {
853 err = MANDOCERR_SYSPIPE;
854 goto out;
855 }
856
857 switch (*child_pid = fork()) {
858 case -1:
859 err = MANDOCERR_SYSFORK;
860 close(pfd[0]);
861 close(pfd[1]);
862 pfd[1] = -1;
863 break;
864 case 0:
865 close(pfd[0]);
866 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
867 err = MANDOCERR_SYSDUP;
868 break;
869 }
870 execlp("gunzip", "gunzip", "-c", file, NULL);
871 err = MANDOCERR_SYSEXEC;
872 break;
873 default:
874 close(pfd[1]);
875 *fd = pfd[0];
876 return(MANDOCLEVEL_OK);
877 }
878
879 out:
880 *fd = -1;
881 *child_pid = 0;
882 curp->file_status = MANDOCLEVEL_SYSERR;
883 if (curp->mmsg)
884 (*curp->mmsg)(err, curp->file_status, file,
885 0, 0, strerror(errno));
886 if (pfd[1] != -1)
887 exit(1);
888 return(curp->file_status);
889 }
890
891 enum mandoclevel
892 mparse_wait(struct mparse *curp, pid_t child_pid)
893 {
894 int status;
895
896 if (waitpid(child_pid, &status, 0) == -1) {
897 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
898 strerror(errno));
899 curp->file_status = MANDOCLEVEL_SYSERR;
900 return(curp->file_status);
901 }
902 if (WIFSIGNALED(status)) {
903 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
904 "%d", WTERMSIG(status));
905 curp->file_status = MANDOCLEVEL_SYSERR;
906 return(curp->file_status);
907 }
908 if (WEXITSTATUS(status)) {
909 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
910 "%d", WEXITSTATUS(status));
911 curp->file_status = MANDOCLEVEL_SYSERR;
912 return(curp->file_status);
913 }
914 return(MANDOCLEVEL_OK);
915 }
916
917 struct mparse *
918 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
919 const struct mchars *mchars, const char *defos)
920 {
921 struct mparse *curp;
922
923 assert(wlevel <= MANDOCLEVEL_FATAL);
924
925 curp = mandoc_calloc(1, sizeof(struct mparse));
926
927 curp->options = options;
928 curp->wlevel = wlevel;
929 curp->mmsg = mmsg;
930 curp->defos = defos;
931
932 curp->mchars = mchars;
933 curp->roff = roff_alloc(curp, curp->mchars, options);
934 if (curp->options & MPARSE_MDOC)
935 curp->pmdoc = mdoc_alloc(
936 curp->roff, curp, curp->defos,
937 curp->options & MPARSE_QUICK ? 1 : 0);
938 if (curp->options & MPARSE_MAN)
939 curp->pman = man_alloc(curp->roff, curp,
940 curp->options & MPARSE_QUICK ? 1 : 0);
941
942 return(curp);
943 }
944
945 void
946 mparse_reset(struct mparse *curp)
947 {
948
949 roff_reset(curp->roff);
950
951 if (curp->mdoc)
952 mdoc_reset(curp->mdoc);
953 if (curp->man)
954 man_reset(curp->man);
955 if (curp->secondary)
956 curp->secondary->sz = 0;
957
958 curp->file_status = MANDOCLEVEL_OK;
959 curp->mdoc = NULL;
960 curp->man = NULL;
961
962 free(curp->sodest);
963 curp->sodest = NULL;
964 }
965
966 void
967 mparse_free(struct mparse *curp)
968 {
969
970 if (curp->pmdoc)
971 mdoc_free(curp->pmdoc);
972 if (curp->pman)
973 man_free(curp->pman);
974 if (curp->roff)
975 roff_free(curp->roff);
976 if (curp->secondary)
977 free(curp->secondary->buf);
978
979 free(curp->secondary);
980 free(curp->sodest);
981 free(curp);
982 }
983
984 void
985 mparse_result(struct mparse *curp,
986 struct mdoc **mdoc, struct man **man, char **sodest)
987 {
988
989 if (sodest && NULL != (*sodest = curp->sodest)) {
990 *mdoc = NULL;
991 *man = NULL;
992 return;
993 }
994 if (mdoc)
995 *mdoc = curp->mdoc;
996 if (man)
997 *man = curp->man;
998 }
999
1000 void
1001 mandoc_vmsg(enum mandocerr t, struct mparse *m,
1002 int ln, int pos, const char *fmt, ...)
1003 {
1004 char buf[256];
1005 va_list ap;
1006
1007 va_start(ap, fmt);
1008 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1009 va_end(ap);
1010
1011 mandoc_msg(t, m, ln, pos, buf);
1012 }
1013
1014 void
1015 mandoc_msg(enum mandocerr er, struct mparse *m,
1016 int ln, int col, const char *msg)
1017 {
1018 enum mandoclevel level;
1019
1020 level = MANDOCLEVEL_FATAL;
1021 while (er < mandoclimits[level])
1022 level--;
1023
1024 if (level < m->wlevel)
1025 return;
1026
1027 if (m->mmsg)
1028 (*m->mmsg)(er, level, m->file, ln, col, msg);
1029
1030 if (m->file_status < level)
1031 m->file_status = level;
1032 }
1033
1034 const char *
1035 mparse_strerror(enum mandocerr er)
1036 {
1037
1038 return(mandocerrs[er]);
1039 }
1040
1041 const char *
1042 mparse_strlevel(enum mandoclevel lvl)
1043 {
1044 return(mandoclevels[lvl]);
1045 }
1046
1047 void
1048 mparse_keep(struct mparse *p)
1049 {
1050
1051 assert(NULL == p->secondary);
1052 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1053 }
1054
1055 const char *
1056 mparse_getkeep(const struct mparse *p)
1057 {
1058
1059 assert(p->secondary);
1060 return(p->secondary->sz ? p->secondary->buf : NULL);
1061 }