]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Simplify by handling empty request lines at the one logical place
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.84 2014/09/06 23:24:32 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct buf {
49 char *buf; /* binary input buffer */
50 size_t sz; /* size of binary buffer */
51 };
52
53 struct mparse {
54 struct man *pman; /* persistent man parser */
55 struct mdoc *pmdoc; /* persistent mdoc parser */
56 struct man *man; /* man parser */
57 struct mdoc *mdoc; /* mdoc parser */
58 struct roff *roff; /* roff parser (!NULL) */
59 char *sodest; /* filename pointed to by .so */
60 const char *file; /* filename of current input file */
61 struct buf *primary; /* buffer currently being parsed */
62 struct buf *secondary; /* preprocessed copy of input */
63 const char *defos; /* default operating system */
64 mandocmsg mmsg; /* warning/error message handler */
65 enum mandoclevel file_status; /* status of current parse */
66 enum mandoclevel wlevel; /* ignore messages below this */
67 int options; /* parser options */
68 int reparse_count; /* finite interp. stack */
69 int line; /* line number in the file */
70 };
71
72 static void choose_parser(struct mparse *);
73 static void resize_buf(struct buf *, size_t);
74 static void mparse_buf_r(struct mparse *, struct buf, int);
75 static int read_whole_file(struct mparse *, const char *, int,
76 struct buf *, int *);
77 static void mparse_end(struct mparse *);
78 static void mparse_parse_buffer(struct mparse *, struct buf,
79 const char *);
80
81 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
82 MANDOCERR_OK,
83 MANDOCERR_WARNING,
84 MANDOCERR_WARNING,
85 MANDOCERR_ERROR,
86 MANDOCERR_FATAL,
87 MANDOCERR_MAX,
88 MANDOCERR_MAX
89 };
90
91 static const char * const mandocerrs[MANDOCERR_MAX] = {
92 "ok",
93
94 "generic warning",
95
96 /* related to the prologue */
97 "missing manual title, using UNTITLED",
98 "missing manual title, using \"\"",
99 "lower case character in document title",
100 "missing manual section, using \"\"",
101 "unknown manual section",
102 "unknown manual volume or arch",
103 "missing date, using today's date",
104 "cannot parse date, using it verbatim",
105 "missing Os macro, using \"\"",
106 "duplicate prologue macro",
107 "late prologue macro",
108 "skipping late title macro",
109 "prologue macros out of order",
110
111 /* related to document structure */
112 ".so is fragile, better use ln(1)",
113 "no document body",
114 "content before first section header",
115 "first section is not \"NAME\"",
116 "bad NAME section contents",
117 "sections out of conventional order",
118 "duplicate section title",
119 "unexpected section",
120
121 /* related to macros and nesting */
122 "obsolete macro",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "missing -std argument, adding it",
149
150 /* related to bad macro arguments */
151 "unterminated quoted argument",
152 "duplicate argument",
153 "skipping duplicate argument",
154 "skipping duplicate display type",
155 "skipping duplicate list type",
156 "skipping -width argument",
157 "unknown AT&T UNIX version",
158 "invalid content in Rs block",
159 "invalid Boolean argument",
160 "unknown font, skipping request",
161
162 /* related to plain text */
163 "blank line in fill mode, using .sp",
164 "tab in filled text",
165 "whitespace at end of input line",
166 "bad comment style",
167 "invalid escape sequence",
168 "undefined string, using \"\"",
169
170 "generic error",
171
172 /* related to equations */
173 "unexpected equation scope closure",
174 "equation scope open on exit",
175 "overlapping equation scopes",
176 "unexpected end of equation",
177 "equation syntax error",
178
179 /* related to tables */
180 "bad table syntax",
181 "bad table option",
182 "bad table layout",
183 "no table layout cells specified",
184 "no table data cells specified",
185 "ignore data in cell",
186 "data block still open",
187 "ignoring extra data cells",
188
189 /* related to document structure and macros */
190 "input stack limit exceeded, infinite loop?",
191 "skipping bad character",
192 "skipping unknown macro",
193 "skipping item outside list",
194 "skipping column outside column list",
195 "skipping end of block that is not open",
196 "inserting missing end of block",
197 "appending missing end of block",
198
199 /* related to request and macro arguments */
200 "escaped character not allowed in a name",
201 "argument count wrong",
202 "missing list type, using -item",
203 "missing manual name, using \"\"",
204 "uname(3) system call failed, using UNKNOWN",
205 "unknown standard specifier",
206 "skipping request without numeric argument",
207 "skipping all arguments",
208 "skipping excess arguments",
209
210 "generic fatal error",
211
212 "input too large",
213 "NOT IMPLEMENTED: Bd -file",
214 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
215 ".so request failed",
216
217 /* system errors */
218 "cannot dup file descriptor",
219 "cannot exec",
220 "gunzip failed with code",
221 "cannot fork",
222 NULL,
223 "cannot open pipe",
224 "cannot read file",
225 "gunzip died from signal",
226 "cannot stat file",
227 "wait failed",
228 };
229
230 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
231 "SUCCESS",
232 "RESERVED",
233 "WARNING",
234 "ERROR",
235 "FATAL",
236 "BADARG",
237 "SYSERR"
238 };
239
240
241 static void
242 resize_buf(struct buf *buf, size_t initial)
243 {
244
245 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
246 buf->buf = mandoc_realloc(buf->buf, buf->sz);
247 }
248
249 static void
250 choose_parser(struct mparse *curp)
251 {
252 char *cp, *ep;
253 int format;
254
255 /*
256 * If neither command line arguments -mdoc or -man select
257 * a parser nor the roff parser found a .Dd or .TH macro
258 * yet, look ahead in the main input buffer.
259 */
260
261 if ((format = roff_getformat(curp->roff)) == 0) {
262 cp = curp->primary->buf;
263 ep = cp + curp->primary->sz;
264 while (cp < ep) {
265 if (*cp == '.' || *cp != '\'') {
266 cp++;
267 if (cp[0] == 'D' && cp[1] == 'd') {
268 format = MPARSE_MDOC;
269 break;
270 }
271 if (cp[0] == 'T' && cp[1] == 'H') {
272 format = MPARSE_MAN;
273 break;
274 }
275 }
276 cp = memchr(cp, '\n', ep - cp);
277 if (cp == NULL)
278 break;
279 cp++;
280 }
281 }
282
283 if (format == MPARSE_MDOC) {
284 if (NULL == curp->pmdoc)
285 curp->pmdoc = mdoc_alloc(
286 curp->roff, curp, curp->defos,
287 MPARSE_QUICK & curp->options ? 1 : 0);
288 assert(curp->pmdoc);
289 curp->mdoc = curp->pmdoc;
290 return;
291 }
292
293 /* Fall back to man(7) as a last resort. */
294
295 if (NULL == curp->pman)
296 curp->pman = man_alloc(curp->roff, curp,
297 MPARSE_QUICK & curp->options ? 1 : 0);
298 assert(curp->pman);
299 curp->man = curp->pman;
300 }
301
302 /*
303 * Main parse routine for an opened file. This is called for each
304 * opened file and simply loops around the full input file, possibly
305 * nesting (i.e., with `so').
306 */
307 static void
308 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
309 {
310 const struct tbl_span *span;
311 struct buf ln;
312 enum rofferr rr;
313 int i, of, rc;
314 int pos; /* byte number in the ln buffer */
315 int lnn; /* line number in the real file */
316 unsigned char c;
317
318 memset(&ln, 0, sizeof(struct buf));
319
320 lnn = curp->line;
321 pos = 0;
322
323 for (i = 0; i < (int)blk.sz; ) {
324 if (0 == pos && '\0' == blk.buf[i])
325 break;
326
327 if (start) {
328 curp->line = lnn;
329 curp->reparse_count = 0;
330 }
331
332 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
333
334 /*
335 * When finding an unescaped newline character,
336 * leave the character loop to process the line.
337 * Skip a preceding carriage return, if any.
338 */
339
340 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
341 '\n' == blk.buf[i + 1])
342 ++i;
343 if ('\n' == blk.buf[i]) {
344 ++i;
345 ++lnn;
346 break;
347 }
348
349 /*
350 * Make sure we have space for at least
351 * one backslash and one other character
352 * and the trailing NUL byte.
353 */
354
355 if (pos + 2 >= (int)ln.sz)
356 resize_buf(&ln, 256);
357
358 /*
359 * Warn about bogus characters. If you're using
360 * non-ASCII encoding, you're screwing your
361 * readers. Since I'd rather this not happen,
362 * I'll be helpful and replace these characters
363 * with "?", so we don't display gibberish.
364 * Note to manual writers: use special characters.
365 */
366
367 c = (unsigned char) blk.buf[i];
368
369 if ( ! (isascii(c) &&
370 (isgraph(c) || isblank(c)))) {
371 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
372 curp->line, pos, "0x%x", c);
373 i++;
374 ln.buf[pos++] = '?';
375 continue;
376 }
377
378 /* Trailing backslash = a plain char. */
379
380 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
381 ln.buf[pos++] = blk.buf[i++];
382 continue;
383 }
384
385 /*
386 * Found escape and at least one other character.
387 * When it's a newline character, skip it.
388 * When there is a carriage return in between,
389 * skip that one as well.
390 */
391
392 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
393 '\n' == blk.buf[i + 2])
394 ++i;
395 if ('\n' == blk.buf[i + 1]) {
396 i += 2;
397 ++lnn;
398 continue;
399 }
400
401 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
402 i += 2;
403 /* Comment, skip to end of line */
404 for (; i < (int)blk.sz; ++i) {
405 if ('\n' == blk.buf[i]) {
406 ++i;
407 ++lnn;
408 break;
409 }
410 }
411
412 /* Backout trailing whitespaces */
413 for (; pos > 0; --pos) {
414 if (ln.buf[pos - 1] != ' ')
415 break;
416 if (pos > 2 && ln.buf[pos - 2] == '\\')
417 break;
418 }
419 break;
420 }
421
422 /* Catch escaped bogus characters. */
423
424 c = (unsigned char) blk.buf[i+1];
425
426 if ( ! (isascii(c) &&
427 (isgraph(c) || isblank(c)))) {
428 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
429 curp->line, pos, "0x%x", c);
430 i += 2;
431 ln.buf[pos++] = '?';
432 continue;
433 }
434
435 /* Some other escape sequence, copy & cont. */
436
437 ln.buf[pos++] = blk.buf[i++];
438 ln.buf[pos++] = blk.buf[i++];
439 }
440
441 if (pos >= (int)ln.sz)
442 resize_buf(&ln, 256);
443
444 ln.buf[pos] = '\0';
445
446 /*
447 * A significant amount of complexity is contained by
448 * the roff preprocessor. It's line-oriented but can be
449 * expressed on one line, so we need at times to
450 * readjust our starting point and re-run it. The roff
451 * preprocessor can also readjust the buffers with new
452 * data, so we pass them in wholesale.
453 */
454
455 of = 0;
456
457 /*
458 * Maintain a lookaside buffer of all parsed lines. We
459 * only do this if mparse_keep() has been invoked (the
460 * buffer may be accessed with mparse_getkeep()).
461 */
462
463 if (curp->secondary) {
464 curp->secondary->buf = mandoc_realloc(
465 curp->secondary->buf,
466 curp->secondary->sz + pos + 2);
467 memcpy(curp->secondary->buf +
468 curp->secondary->sz,
469 ln.buf, pos);
470 curp->secondary->sz += pos;
471 curp->secondary->buf
472 [curp->secondary->sz] = '\n';
473 curp->secondary->sz++;
474 curp->secondary->buf
475 [curp->secondary->sz] = '\0';
476 }
477 rerun:
478 rr = roff_parseln(curp->roff, curp->line,
479 &ln.buf, &ln.sz, of, &of);
480
481 switch (rr) {
482 case ROFF_REPARSE:
483 if (REPARSE_LIMIT >= ++curp->reparse_count)
484 mparse_buf_r(curp, ln, 0);
485 else
486 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
487 curp->line, pos, NULL);
488 pos = 0;
489 continue;
490 case ROFF_APPEND:
491 pos = (int)strlen(ln.buf);
492 continue;
493 case ROFF_RERUN:
494 goto rerun;
495 case ROFF_IGN:
496 pos = 0;
497 continue;
498 case ROFF_ERR:
499 assert(MANDOCLEVEL_FATAL <= curp->file_status);
500 break;
501 case ROFF_SO:
502 if (0 == (MPARSE_SO & curp->options) &&
503 (i >= (int)blk.sz || '\0' == blk.buf[i])) {
504 curp->sodest = mandoc_strdup(ln.buf + of);
505 free(ln.buf);
506 return;
507 }
508 /*
509 * We remove `so' clauses from our lookaside
510 * buffer because we're going to descend into
511 * the file recursively.
512 */
513 if (curp->secondary)
514 curp->secondary->sz -= pos + 1;
515 mparse_readfd(curp, -1, ln.buf + of);
516 if (MANDOCLEVEL_FATAL <= curp->file_status) {
517 mandoc_vmsg(MANDOCERR_SO_FAIL,
518 curp, curp->line, pos,
519 ".so %s", ln.buf + of);
520 break;
521 }
522 pos = 0;
523 continue;
524 default:
525 break;
526 }
527
528 /*
529 * If we encounter errors in the recursive parse, make
530 * sure we don't continue parsing.
531 */
532
533 if (MANDOCLEVEL_FATAL <= curp->file_status)
534 break;
535
536 /*
537 * If input parsers have not been allocated, do so now.
538 * We keep these instanced between parsers, but set them
539 * locally per parse routine since we can use different
540 * parsers with each one.
541 */
542
543 if ( ! (curp->man || curp->mdoc))
544 choose_parser(curp);
545
546 /*
547 * Lastly, push down into the parsers themselves.
548 * If libroff returns ROFF_TBL, then add it to the
549 * currently open parse. Since we only get here if
550 * there does exist data (see tbl_data.c), we're
551 * guaranteed that something's been allocated.
552 * Do the same for ROFF_EQN.
553 */
554
555 rc = -1;
556
557 if (ROFF_TBL == rr)
558 while (NULL != (span = roff_span(curp->roff))) {
559 rc = curp->man ?
560 man_addspan(curp->man, span) :
561 mdoc_addspan(curp->mdoc, span);
562 if (0 == rc)
563 break;
564 }
565 else if (ROFF_EQN == rr)
566 rc = curp->mdoc ?
567 mdoc_addeqn(curp->mdoc,
568 roff_eqn(curp->roff)) :
569 man_addeqn(curp->man,
570 roff_eqn(curp->roff));
571 else if (curp->man || curp->mdoc)
572 rc = curp->man ?
573 man_parseln(curp->man,
574 curp->line, ln.buf, of) :
575 mdoc_parseln(curp->mdoc,
576 curp->line, ln.buf, of);
577
578 if (0 == rc) {
579 assert(MANDOCLEVEL_FATAL <= curp->file_status);
580 break;
581 } else if (2 == rc)
582 break;
583
584 /* Temporary buffers typically are not full. */
585
586 if (0 == start && '\0' == blk.buf[i])
587 break;
588
589 /* Start the next input line. */
590
591 pos = 0;
592 }
593
594 free(ln.buf);
595 }
596
597 static int
598 read_whole_file(struct mparse *curp, const char *file, int fd,
599 struct buf *fb, int *with_mmap)
600 {
601 size_t off;
602 ssize_t ssz;
603
604 #if HAVE_MMAP
605 struct stat st;
606 if (-1 == fstat(fd, &st)) {
607 curp->file_status = MANDOCLEVEL_SYSERR;
608 if (curp->mmsg)
609 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
610 file, 0, 0, strerror(errno));
611 return(0);
612 }
613
614 /*
615 * If we're a regular file, try just reading in the whole entry
616 * via mmap(). This is faster than reading it into blocks, and
617 * since each file is only a few bytes to begin with, I'm not
618 * concerned that this is going to tank any machines.
619 */
620
621 if (S_ISREG(st.st_mode)) {
622 if (st.st_size >= (1U << 31)) {
623 curp->file_status = MANDOCLEVEL_FATAL;
624 if (curp->mmsg)
625 (*curp->mmsg)(MANDOCERR_TOOLARGE,
626 curp->file_status, file, 0, 0, NULL);
627 return(0);
628 }
629 *with_mmap = 1;
630 fb->sz = (size_t)st.st_size;
631 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
632 if (fb->buf != MAP_FAILED)
633 return(1);
634 }
635 #endif
636
637 /*
638 * If this isn't a regular file (like, say, stdin), then we must
639 * go the old way and just read things in bit by bit.
640 */
641
642 *with_mmap = 0;
643 off = 0;
644 fb->sz = 0;
645 fb->buf = NULL;
646 for (;;) {
647 if (off == fb->sz) {
648 if (fb->sz == (1U << 31)) {
649 curp->file_status = MANDOCLEVEL_FATAL;
650 if (curp->mmsg)
651 (*curp->mmsg)(MANDOCERR_TOOLARGE,
652 curp->file_status,
653 file, 0, 0, NULL);
654 break;
655 }
656 resize_buf(fb, 65536);
657 }
658 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
659 if (ssz == 0) {
660 fb->sz = off;
661 return(1);
662 }
663 if (ssz == -1) {
664 curp->file_status = MANDOCLEVEL_SYSERR;
665 if (curp->mmsg)
666 (*curp->mmsg)(MANDOCERR_SYSREAD,
667 curp->file_status, file, 0, 0,
668 strerror(errno));
669 break;
670 }
671 off += (size_t)ssz;
672 }
673
674 free(fb->buf);
675 fb->buf = NULL;
676 return(0);
677 }
678
679 static void
680 mparse_end(struct mparse *curp)
681 {
682
683 if (MANDOCLEVEL_FATAL <= curp->file_status)
684 return;
685
686 if (curp->mdoc == NULL &&
687 curp->man == NULL &&
688 curp->sodest == NULL) {
689 if (curp->options & MPARSE_MDOC)
690 curp->mdoc = curp->pmdoc;
691 else {
692 if (curp->pman == NULL)
693 curp->pman = man_alloc(curp->roff, curp,
694 curp->options & MPARSE_QUICK ? 1 : 0);
695 curp->man = curp->pman;
696 }
697 }
698
699 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
700 assert(MANDOCLEVEL_FATAL <= curp->file_status);
701 return;
702 }
703
704 if (curp->man && ! man_endparse(curp->man)) {
705 assert(MANDOCLEVEL_FATAL <= curp->file_status);
706 return;
707 }
708
709 roff_endparse(curp->roff);
710 }
711
712 static void
713 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
714 {
715 const char *svfile;
716 static int recursion_depth;
717
718 if (64 < recursion_depth) {
719 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
720 return;
721 }
722
723 /* Line number is per-file. */
724 svfile = curp->file;
725 curp->file = file;
726 curp->primary = &blk;
727 curp->line = 1;
728 recursion_depth++;
729
730 mparse_buf_r(curp, blk, 1);
731
732 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
733 mparse_end(curp);
734
735 curp->file = svfile;
736 }
737
738 enum mandoclevel
739 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
740 const char *file)
741 {
742 struct buf blk;
743
744 blk.buf = UNCONST(buf);
745 blk.sz = len;
746
747 mparse_parse_buffer(curp, blk, file);
748 return(curp->file_status);
749 }
750
751 enum mandoclevel
752 mparse_readfd(struct mparse *curp, int fd, const char *file)
753 {
754 struct buf blk;
755 int with_mmap;
756
757 if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
758 curp->file_status = MANDOCLEVEL_SYSERR;
759 if (curp->mmsg)
760 (*curp->mmsg)(MANDOCERR_SYSOPEN,
761 curp->file_status,
762 file, 0, 0, strerror(errno));
763 goto out;
764 }
765
766 /*
767 * Run for each opened file; may be called more than once for
768 * each full parse sequence if the opened file is nested (i.e.,
769 * from `so'). Simply sucks in the whole file and moves into
770 * the parse phase for the file.
771 */
772
773 if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
774 goto out;
775
776 mparse_parse_buffer(curp, blk, file);
777
778 #if HAVE_MMAP
779 if (with_mmap)
780 munmap(blk.buf, blk.sz);
781 else
782 #endif
783 free(blk.buf);
784
785 if (STDIN_FILENO != fd && -1 == close(fd))
786 perror(file);
787 out:
788 return(curp->file_status);
789 }
790
791 enum mandoclevel
792 mparse_open(struct mparse *curp, int *fd, const char *file,
793 pid_t *child_pid)
794 {
795 int pfd[2];
796 char *cp;
797 enum mandocerr err;
798
799 pfd[1] = -1;
800 curp->file = file;
801 if ((cp = strrchr(file, '.')) == NULL ||
802 strcmp(cp + 1, "gz")) {
803 *child_pid = 0;
804 if ((*fd = open(file, O_RDONLY)) == -1) {
805 err = MANDOCERR_SYSOPEN;
806 goto out;
807 }
808 return(MANDOCLEVEL_OK);
809 }
810
811 if (pipe(pfd) == -1) {
812 err = MANDOCERR_SYSPIPE;
813 goto out;
814 }
815
816 switch (*child_pid = fork()) {
817 case -1:
818 err = MANDOCERR_SYSFORK;
819 close(pfd[0]);
820 close(pfd[1]);
821 pfd[1] = -1;
822 break;
823 case 0:
824 close(pfd[0]);
825 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
826 err = MANDOCERR_SYSDUP;
827 break;
828 }
829 execlp("gunzip", "gunzip", "-c", file, NULL);
830 err = MANDOCERR_SYSEXEC;
831 break;
832 default:
833 close(pfd[1]);
834 *fd = pfd[0];
835 return(MANDOCLEVEL_OK);
836 }
837
838 out:
839 *fd = -1;
840 *child_pid = 0;
841 curp->file_status = MANDOCLEVEL_SYSERR;
842 if (curp->mmsg)
843 (*curp->mmsg)(err, curp->file_status, file,
844 0, 0, strerror(errno));
845 if (pfd[1] != -1)
846 exit(1);
847 return(curp->file_status);
848 }
849
850 enum mandoclevel
851 mparse_wait(struct mparse *curp, pid_t child_pid)
852 {
853 int status;
854
855 if (waitpid(child_pid, &status, 0) == -1) {
856 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
857 strerror(errno));
858 curp->file_status = MANDOCLEVEL_SYSERR;
859 return(curp->file_status);
860 }
861 if (WIFSIGNALED(status)) {
862 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
863 "%d", WTERMSIG(status));
864 curp->file_status = MANDOCLEVEL_SYSERR;
865 return(curp->file_status);
866 }
867 if (WEXITSTATUS(status)) {
868 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
869 "%d", WEXITSTATUS(status));
870 curp->file_status = MANDOCLEVEL_SYSERR;
871 return(curp->file_status);
872 }
873 return(MANDOCLEVEL_OK);
874 }
875
876 struct mparse *
877 mparse_alloc(int options, enum mandoclevel wlevel,
878 mandocmsg mmsg, const char *defos)
879 {
880 struct mparse *curp;
881
882 assert(wlevel <= MANDOCLEVEL_FATAL);
883
884 curp = mandoc_calloc(1, sizeof(struct mparse));
885
886 curp->options = options;
887 curp->wlevel = wlevel;
888 curp->mmsg = mmsg;
889 curp->defos = defos;
890
891 curp->roff = roff_alloc(curp, options);
892 if (curp->options & MPARSE_MDOC)
893 curp->pmdoc = mdoc_alloc(
894 curp->roff, curp, curp->defos,
895 curp->options & MPARSE_QUICK ? 1 : 0);
896 if (curp->options & MPARSE_MAN)
897 curp->pman = man_alloc(curp->roff, curp,
898 curp->options & MPARSE_QUICK ? 1 : 0);
899
900 return(curp);
901 }
902
903 void
904 mparse_reset(struct mparse *curp)
905 {
906
907 roff_reset(curp->roff);
908
909 if (curp->mdoc)
910 mdoc_reset(curp->mdoc);
911 if (curp->man)
912 man_reset(curp->man);
913 if (curp->secondary)
914 curp->secondary->sz = 0;
915
916 curp->file_status = MANDOCLEVEL_OK;
917 curp->mdoc = NULL;
918 curp->man = NULL;
919
920 free(curp->sodest);
921 curp->sodest = NULL;
922 }
923
924 void
925 mparse_free(struct mparse *curp)
926 {
927
928 if (curp->pmdoc)
929 mdoc_free(curp->pmdoc);
930 if (curp->pman)
931 man_free(curp->pman);
932 if (curp->roff)
933 roff_free(curp->roff);
934 if (curp->secondary)
935 free(curp->secondary->buf);
936
937 free(curp->secondary);
938 free(curp->sodest);
939 free(curp);
940 }
941
942 void
943 mparse_result(struct mparse *curp,
944 struct mdoc **mdoc, struct man **man, char **sodest)
945 {
946
947 if (sodest && NULL != (*sodest = curp->sodest)) {
948 *mdoc = NULL;
949 *man = NULL;
950 return;
951 }
952 if (mdoc)
953 *mdoc = curp->mdoc;
954 if (man)
955 *man = curp->man;
956 }
957
958 void
959 mandoc_vmsg(enum mandocerr t, struct mparse *m,
960 int ln, int pos, const char *fmt, ...)
961 {
962 char buf[256];
963 va_list ap;
964
965 va_start(ap, fmt);
966 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
967 va_end(ap);
968
969 mandoc_msg(t, m, ln, pos, buf);
970 }
971
972 void
973 mandoc_msg(enum mandocerr er, struct mparse *m,
974 int ln, int col, const char *msg)
975 {
976 enum mandoclevel level;
977
978 level = MANDOCLEVEL_FATAL;
979 while (er < mandoclimits[level])
980 level--;
981
982 if (level < m->wlevel)
983 return;
984
985 if (m->mmsg)
986 (*m->mmsg)(er, level, m->file, ln, col, msg);
987
988 if (m->file_status < level)
989 m->file_status = level;
990 }
991
992 const char *
993 mparse_strerror(enum mandocerr er)
994 {
995
996 return(mandocerrs[er]);
997 }
998
999 const char *
1000 mparse_strlevel(enum mandoclevel lvl)
1001 {
1002 return(mandoclevels[lvl]);
1003 }
1004
1005 void
1006 mparse_keep(struct mparse *p)
1007 {
1008
1009 assert(NULL == p->secondary);
1010 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1011 }
1012
1013 const char *
1014 mparse_getkeep(const struct mparse *p)
1015 {
1016
1017 assert(p->secondary);
1018 return(p->secondary->sz ? p->secondary->buf : NULL);
1019 }