]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Support .RE with an argument; needed for audio/pms(1).
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.115 2015/01/24 01:58:33 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "no table layout cells specified",
184 "no table data cells specified",
185 "ignore data in cell",
186 "data block still open",
187 "ignoring extra data cells",
188
189 /* related to document structure and macros */
190 NULL,
191 "input stack limit exceeded, infinite loop?",
192 "skipping bad character",
193 "skipping unknown macro",
194 "skipping insecure request",
195 "skipping item outside list",
196 "skipping column outside column list",
197 "skipping end of block that is not open",
198 "fewer RS blocks open, skipping",
199 "inserting missing end of block",
200 "appending missing end of block",
201
202 /* related to request and macro arguments */
203 "escaped character not allowed in a name",
204 "argument count wrong",
205 "NOT IMPLEMENTED: Bd -file",
206 "missing list type, using -item",
207 "missing manual name, using \"\"",
208 "uname(3) system call failed, using UNKNOWN",
209 "unknown standard specifier",
210 "skipping request without numeric argument",
211 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
212 ".so request failed",
213 "skipping all arguments",
214 "skipping excess arguments",
215 "divide by zero",
216
217 "unsupported feature",
218 "input too large",
219 "unsupported control character",
220 "unsupported roff request",
221 "unsupported table syntax",
222 "unsupported table option",
223 "unsupported table layout",
224 "ignoring macro in table",
225 };
226
227 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
228 "SUCCESS",
229 "RESERVED",
230 "WARNING",
231 "ERROR",
232 "UNSUPP",
233 "BADARG",
234 "SYSERR"
235 };
236
237
238 static void
239 resize_buf(struct buf *buf, size_t initial)
240 {
241
242 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
243 buf->buf = mandoc_realloc(buf->buf, buf->sz);
244 }
245
246 static void
247 choose_parser(struct mparse *curp)
248 {
249 char *cp, *ep;
250 int format;
251
252 /*
253 * If neither command line arguments -mdoc or -man select
254 * a parser nor the roff parser found a .Dd or .TH macro
255 * yet, look ahead in the main input buffer.
256 */
257
258 if ((format = roff_getformat(curp->roff)) == 0) {
259 cp = curp->primary->buf;
260 ep = cp + curp->primary->sz;
261 while (cp < ep) {
262 if (*cp == '.' || *cp == '\'') {
263 cp++;
264 if (cp[0] == 'D' && cp[1] == 'd') {
265 format = MPARSE_MDOC;
266 break;
267 }
268 if (cp[0] == 'T' && cp[1] == 'H') {
269 format = MPARSE_MAN;
270 break;
271 }
272 }
273 cp = memchr(cp, '\n', ep - cp);
274 if (cp == NULL)
275 break;
276 cp++;
277 }
278 }
279
280 if (format == MPARSE_MDOC) {
281 if (NULL == curp->pmdoc)
282 curp->pmdoc = mdoc_alloc(
283 curp->roff, curp, curp->defos,
284 MPARSE_QUICK & curp->options ? 1 : 0);
285 assert(curp->pmdoc);
286 curp->mdoc = curp->pmdoc;
287 return;
288 }
289
290 /* Fall back to man(7) as a last resort. */
291
292 if (NULL == curp->pman)
293 curp->pman = man_alloc(
294 curp->roff, curp, curp->defos,
295 MPARSE_QUICK & curp->options ? 1 : 0);
296 assert(curp->pman);
297 curp->man = curp->pman;
298 }
299
300 /*
301 * Main parse routine for a buffer.
302 * It assumes encoding and line numbering are already set up.
303 * It can recurse directly (for invocations of user-defined
304 * macros, inline equations, and input line traps)
305 * and indirectly (for .so file inclusion).
306 */
307 static void
308 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
309 {
310 const struct tbl_span *span;
311 struct buf ln;
312 const char *save_file;
313 char *cp;
314 size_t pos; /* byte number in the ln buffer */
315 enum rofferr rr;
316 int of;
317 int lnn; /* line number in the real file */
318 int fd;
319 pid_t save_child;
320 unsigned char c;
321
322 memset(&ln, 0, sizeof(ln));
323
324 lnn = curp->line;
325 pos = 0;
326
327 while (i < blk.sz) {
328 if (0 == pos && '\0' == blk.buf[i])
329 break;
330
331 if (start) {
332 curp->line = lnn;
333 curp->reparse_count = 0;
334
335 if (lnn < 3 &&
336 curp->filenc & MPARSE_UTF8 &&
337 curp->filenc & MPARSE_LATIN1)
338 curp->filenc = preconv_cue(&blk, i);
339 }
340
341 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
342
343 /*
344 * When finding an unescaped newline character,
345 * leave the character loop to process the line.
346 * Skip a preceding carriage return, if any.
347 */
348
349 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
350 '\n' == blk.buf[i + 1])
351 ++i;
352 if ('\n' == blk.buf[i]) {
353 ++i;
354 ++lnn;
355 break;
356 }
357
358 /*
359 * Make sure we have space for the worst
360 * case of 11 bytes: "\\[u10ffff]\0"
361 */
362
363 if (pos + 11 > ln.sz)
364 resize_buf(&ln, 256);
365
366 /*
367 * Encode 8-bit input.
368 */
369
370 c = blk.buf[i];
371 if (c & 0x80) {
372 if ( ! (curp->filenc && preconv_encode(
373 &blk, &i, &ln, &pos, &curp->filenc))) {
374 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
375 curp->line, pos, "0x%x", c);
376 ln.buf[pos++] = '?';
377 i++;
378 }
379 continue;
380 }
381
382 /*
383 * Exclude control characters.
384 */
385
386 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
387 mandoc_vmsg(c == 0x00 || c == 0x04 ||
388 c > 0x0a ? MANDOCERR_CHAR_BAD :
389 MANDOCERR_CHAR_UNSUPP,
390 curp, curp->line, pos, "0x%x", c);
391 i++;
392 ln.buf[pos++] = '?';
393 continue;
394 }
395
396 /* Trailing backslash = a plain char. */
397
398 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
399 ln.buf[pos++] = blk.buf[i++];
400 continue;
401 }
402
403 /*
404 * Found escape and at least one other character.
405 * When it's a newline character, skip it.
406 * When there is a carriage return in between,
407 * skip that one as well.
408 */
409
410 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
411 '\n' == blk.buf[i + 2])
412 ++i;
413 if ('\n' == blk.buf[i + 1]) {
414 i += 2;
415 ++lnn;
416 continue;
417 }
418
419 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
420 i += 2;
421 /* Comment, skip to end of line */
422 for (; i < blk.sz; ++i) {
423 if ('\n' == blk.buf[i]) {
424 ++i;
425 ++lnn;
426 break;
427 }
428 }
429
430 /* Backout trailing whitespaces */
431 for (; pos > 0; --pos) {
432 if (ln.buf[pos - 1] != ' ')
433 break;
434 if (pos > 2 && ln.buf[pos - 2] == '\\')
435 break;
436 }
437 break;
438 }
439
440 /* Catch escaped bogus characters. */
441
442 c = (unsigned char) blk.buf[i+1];
443
444 if ( ! (isascii(c) &&
445 (isgraph(c) || isblank(c)))) {
446 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
447 curp->line, pos, "0x%x", c);
448 i += 2;
449 ln.buf[pos++] = '?';
450 continue;
451 }
452
453 /* Some other escape sequence, copy & cont. */
454
455 ln.buf[pos++] = blk.buf[i++];
456 ln.buf[pos++] = blk.buf[i++];
457 }
458
459 if (pos >= ln.sz)
460 resize_buf(&ln, 256);
461
462 ln.buf[pos] = '\0';
463
464 /*
465 * A significant amount of complexity is contained by
466 * the roff preprocessor. It's line-oriented but can be
467 * expressed on one line, so we need at times to
468 * readjust our starting point and re-run it. The roff
469 * preprocessor can also readjust the buffers with new
470 * data, so we pass them in wholesale.
471 */
472
473 of = 0;
474
475 /*
476 * Maintain a lookaside buffer of all parsed lines. We
477 * only do this if mparse_keep() has been invoked (the
478 * buffer may be accessed with mparse_getkeep()).
479 */
480
481 if (curp->secondary) {
482 curp->secondary->buf = mandoc_realloc(
483 curp->secondary->buf,
484 curp->secondary->sz + pos + 2);
485 memcpy(curp->secondary->buf +
486 curp->secondary->sz,
487 ln.buf, pos);
488 curp->secondary->sz += pos;
489 curp->secondary->buf
490 [curp->secondary->sz] = '\n';
491 curp->secondary->sz++;
492 curp->secondary->buf
493 [curp->secondary->sz] = '\0';
494 }
495 rerun:
496 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
497
498 switch (rr) {
499 case ROFF_REPARSE:
500 if (REPARSE_LIMIT >= ++curp->reparse_count)
501 mparse_buf_r(curp, ln, of, 0);
502 else
503 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
504 curp->line, pos, NULL);
505 pos = 0;
506 continue;
507 case ROFF_APPEND:
508 pos = strlen(ln.buf);
509 continue;
510 case ROFF_RERUN:
511 goto rerun;
512 case ROFF_IGN:
513 pos = 0;
514 continue;
515 case ROFF_SO:
516 if ( ! (curp->options & MPARSE_SO) &&
517 (i >= blk.sz || blk.buf[i] == '\0')) {
518 curp->sodest = mandoc_strdup(ln.buf + of);
519 free(ln.buf);
520 return;
521 }
522 /*
523 * We remove `so' clauses from our lookaside
524 * buffer because we're going to descend into
525 * the file recursively.
526 */
527 if (curp->secondary)
528 curp->secondary->sz -= pos + 1;
529 save_file = curp->file;
530 save_child = curp->child;
531 if (mparse_open(curp, &fd, ln.buf + of) ==
532 MANDOCLEVEL_OK) {
533 mparse_readfd(curp, fd, ln.buf + of);
534 curp->file = save_file;
535 } else {
536 curp->file = save_file;
537 mandoc_vmsg(MANDOCERR_SO_FAIL,
538 curp, curp->line, pos,
539 ".so %s", ln.buf + of);
540 ln.sz = mandoc_asprintf(&cp,
541 ".sp\nSee the file %s.\n.sp",
542 ln.buf + of);
543 free(ln.buf);
544 ln.buf = cp;
545 of = 0;
546 mparse_buf_r(curp, ln, of, 0);
547 }
548 curp->child = save_child;
549 pos = 0;
550 continue;
551 default:
552 break;
553 }
554
555 /*
556 * If input parsers have not been allocated, do so now.
557 * We keep these instanced between parsers, but set them
558 * locally per parse routine since we can use different
559 * parsers with each one.
560 */
561
562 if ( ! (curp->man || curp->mdoc))
563 choose_parser(curp);
564
565 /*
566 * Lastly, push down into the parsers themselves.
567 * If libroff returns ROFF_TBL, then add it to the
568 * currently open parse. Since we only get here if
569 * there does exist data (see tbl_data.c), we're
570 * guaranteed that something's been allocated.
571 * Do the same for ROFF_EQN.
572 */
573
574 if (rr == ROFF_TBL) {
575 while ((span = roff_span(curp->roff)) != NULL)
576 if (curp->man == NULL)
577 mdoc_addspan(curp->mdoc, span);
578 else
579 man_addspan(curp->man, span);
580 } else if (rr == ROFF_EQN) {
581 if (curp->man == NULL)
582 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
583 else
584 man_addeqn(curp->man, roff_eqn(curp->roff));
585 } else if ((curp->man == NULL ?
586 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
587 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
588 break;
589
590 /* Temporary buffers typically are not full. */
591
592 if (0 == start && '\0' == blk.buf[i])
593 break;
594
595 /* Start the next input line. */
596
597 pos = 0;
598 }
599
600 free(ln.buf);
601 }
602
603 static int
604 read_whole_file(struct mparse *curp, const char *file, int fd,
605 struct buf *fb, int *with_mmap)
606 {
607 size_t off;
608 ssize_t ssz;
609
610 #if HAVE_MMAP
611 struct stat st;
612 if (-1 == fstat(fd, &st)) {
613 perror(file);
614 exit((int)MANDOCLEVEL_SYSERR);
615 }
616
617 /*
618 * If we're a regular file, try just reading in the whole entry
619 * via mmap(). This is faster than reading it into blocks, and
620 * since each file is only a few bytes to begin with, I'm not
621 * concerned that this is going to tank any machines.
622 */
623
624 if (S_ISREG(st.st_mode)) {
625 if (st.st_size >= (1U << 31)) {
626 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
627 return(0);
628 }
629 *with_mmap = 1;
630 fb->sz = (size_t)st.st_size;
631 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
632 if (fb->buf != MAP_FAILED)
633 return(1);
634 }
635 #endif
636
637 /*
638 * If this isn't a regular file (like, say, stdin), then we must
639 * go the old way and just read things in bit by bit.
640 */
641
642 *with_mmap = 0;
643 off = 0;
644 fb->sz = 0;
645 fb->buf = NULL;
646 for (;;) {
647 if (off == fb->sz) {
648 if (fb->sz == (1U << 31)) {
649 mandoc_msg(MANDOCERR_TOOLARGE, curp,
650 0, 0, NULL);
651 break;
652 }
653 resize_buf(fb, 65536);
654 }
655 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
656 if (ssz == 0) {
657 fb->sz = off;
658 return(1);
659 }
660 if (ssz == -1) {
661 perror(file);
662 exit((int)MANDOCLEVEL_SYSERR);
663 }
664 off += (size_t)ssz;
665 }
666
667 free(fb->buf);
668 fb->buf = NULL;
669 return(0);
670 }
671
672 static void
673 mparse_end(struct mparse *curp)
674 {
675
676 if (curp->mdoc == NULL &&
677 curp->man == NULL &&
678 curp->sodest == NULL) {
679 if (curp->options & MPARSE_MDOC)
680 curp->mdoc = curp->pmdoc;
681 else {
682 if (curp->pman == NULL)
683 curp->pman = man_alloc(
684 curp->roff, curp, curp->defos,
685 curp->options & MPARSE_QUICK ? 1 : 0);
686 curp->man = curp->pman;
687 }
688 }
689 if (curp->mdoc)
690 mdoc_endparse(curp->mdoc);
691 if (curp->man)
692 man_endparse(curp->man);
693 roff_endparse(curp->roff);
694 }
695
696 static void
697 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
698 {
699 struct buf *svprimary;
700 const char *svfile;
701 size_t offset;
702 static int recursion_depth;
703
704 if (64 < recursion_depth) {
705 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
706 return;
707 }
708
709 /* Line number is per-file. */
710 svfile = curp->file;
711 curp->file = file;
712 svprimary = curp->primary;
713 curp->primary = &blk;
714 curp->line = 1;
715 recursion_depth++;
716
717 /* Skip an UTF-8 byte order mark. */
718 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
719 (unsigned char)blk.buf[0] == 0xef &&
720 (unsigned char)blk.buf[1] == 0xbb &&
721 (unsigned char)blk.buf[2] == 0xbf) {
722 offset = 3;
723 curp->filenc &= ~MPARSE_LATIN1;
724 } else
725 offset = 0;
726
727 mparse_buf_r(curp, blk, offset, 1);
728
729 if (--recursion_depth == 0)
730 mparse_end(curp);
731
732 curp->primary = svprimary;
733 curp->file = svfile;
734 }
735
736 enum mandoclevel
737 mparse_readmem(struct mparse *curp, void *buf, size_t len,
738 const char *file)
739 {
740 struct buf blk;
741
742 blk.buf = buf;
743 blk.sz = len;
744
745 mparse_parse_buffer(curp, blk, file);
746 return(curp->file_status);
747 }
748
749 /*
750 * Read the whole file into memory and call the parsers.
751 * Called recursively when an .so request is encountered.
752 */
753 enum mandoclevel
754 mparse_readfd(struct mparse *curp, int fd, const char *file)
755 {
756 struct buf blk;
757 int with_mmap;
758 int save_filenc;
759
760 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
761 save_filenc = curp->filenc;
762 curp->filenc = curp->options &
763 (MPARSE_UTF8 | MPARSE_LATIN1);
764 mparse_parse_buffer(curp, blk, file);
765 curp->filenc = save_filenc;
766 #if HAVE_MMAP
767 if (with_mmap)
768 munmap(blk.buf, blk.sz);
769 else
770 #endif
771 free(blk.buf);
772 }
773
774 if (fd != STDIN_FILENO && close(fd) == -1)
775 perror(file);
776
777 mparse_wait(curp);
778 return(curp->file_status);
779 }
780
781 enum mandoclevel
782 mparse_open(struct mparse *curp, int *fd, const char *file)
783 {
784 int pfd[2];
785 int save_errno;
786 char *cp;
787
788 curp->file = file;
789
790 /* Unless zipped, try to just open the file. */
791
792 if ((cp = strrchr(file, '.')) == NULL ||
793 strcmp(cp + 1, "gz")) {
794 curp->child = 0;
795 if ((*fd = open(file, O_RDONLY)) != -1)
796 return(MANDOCLEVEL_OK);
797
798 /* Open failed; try to append ".gz". */
799
800 mandoc_asprintf(&cp, "%s.gz", file);
801 file = cp;
802 } else
803 cp = NULL;
804
805 /* Before forking, make sure the file can be read. */
806
807 save_errno = errno;
808 if (access(file, R_OK) == -1) {
809 if (cp != NULL)
810 errno = save_errno;
811 free(cp);
812 *fd = -1;
813 curp->child = 0;
814 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
815 return(MANDOCLEVEL_ERROR);
816 }
817
818 /* Run gunzip(1). */
819
820 if (pipe(pfd) == -1) {
821 perror("pipe");
822 exit((int)MANDOCLEVEL_SYSERR);
823 }
824
825 switch (curp->child = fork()) {
826 case -1:
827 perror("fork");
828 exit((int)MANDOCLEVEL_SYSERR);
829 case 0:
830 close(pfd[0]);
831 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
832 perror("dup");
833 exit((int)MANDOCLEVEL_SYSERR);
834 }
835 execlp("gunzip", "gunzip", "-c", file, NULL);
836 perror("exec");
837 exit((int)MANDOCLEVEL_SYSERR);
838 default:
839 close(pfd[1]);
840 *fd = pfd[0];
841 return(MANDOCLEVEL_OK);
842 }
843 }
844
845 enum mandoclevel
846 mparse_wait(struct mparse *curp)
847 {
848 int status;
849
850 if (curp->child == 0)
851 return(MANDOCLEVEL_OK);
852
853 if (waitpid(curp->child, &status, 0) == -1) {
854 perror("wait");
855 exit((int)MANDOCLEVEL_SYSERR);
856 }
857 if (WIFSIGNALED(status)) {
858 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
859 "gunzip died from signal %d", WTERMSIG(status));
860 return(MANDOCLEVEL_ERROR);
861 }
862 if (WEXITSTATUS(status)) {
863 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
864 "gunzip failed with code %d", WEXITSTATUS(status));
865 return(MANDOCLEVEL_ERROR);
866 }
867 return(MANDOCLEVEL_OK);
868 }
869
870 struct mparse *
871 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
872 const struct mchars *mchars, const char *defos)
873 {
874 struct mparse *curp;
875
876 curp = mandoc_calloc(1, sizeof(struct mparse));
877
878 curp->options = options;
879 curp->wlevel = wlevel;
880 curp->mmsg = mmsg;
881 curp->defos = defos;
882
883 curp->mchars = mchars;
884 curp->roff = roff_alloc(curp, curp->mchars, options);
885 if (curp->options & MPARSE_MDOC)
886 curp->pmdoc = mdoc_alloc(
887 curp->roff, curp, curp->defos,
888 curp->options & MPARSE_QUICK ? 1 : 0);
889 if (curp->options & MPARSE_MAN)
890 curp->pman = man_alloc(
891 curp->roff, curp, curp->defos,
892 curp->options & MPARSE_QUICK ? 1 : 0);
893
894 return(curp);
895 }
896
897 void
898 mparse_reset(struct mparse *curp)
899 {
900
901 roff_reset(curp->roff);
902
903 if (curp->mdoc)
904 mdoc_reset(curp->mdoc);
905 if (curp->man)
906 man_reset(curp->man);
907 if (curp->secondary)
908 curp->secondary->sz = 0;
909
910 curp->file_status = MANDOCLEVEL_OK;
911 curp->mdoc = NULL;
912 curp->man = NULL;
913
914 free(curp->sodest);
915 curp->sodest = NULL;
916 }
917
918 void
919 mparse_free(struct mparse *curp)
920 {
921
922 if (curp->pmdoc)
923 mdoc_free(curp->pmdoc);
924 if (curp->pman)
925 man_free(curp->pman);
926 if (curp->roff)
927 roff_free(curp->roff);
928 if (curp->secondary)
929 free(curp->secondary->buf);
930
931 free(curp->secondary);
932 free(curp->sodest);
933 free(curp);
934 }
935
936 void
937 mparse_result(struct mparse *curp,
938 struct mdoc **mdoc, struct man **man, char **sodest)
939 {
940
941 if (sodest && NULL != (*sodest = curp->sodest)) {
942 *mdoc = NULL;
943 *man = NULL;
944 return;
945 }
946 if (mdoc)
947 *mdoc = curp->mdoc;
948 if (man)
949 *man = curp->man;
950 }
951
952 void
953 mandoc_vmsg(enum mandocerr t, struct mparse *m,
954 int ln, int pos, const char *fmt, ...)
955 {
956 char buf[256];
957 va_list ap;
958
959 va_start(ap, fmt);
960 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
961 va_end(ap);
962
963 mandoc_msg(t, m, ln, pos, buf);
964 }
965
966 void
967 mandoc_msg(enum mandocerr er, struct mparse *m,
968 int ln, int col, const char *msg)
969 {
970 enum mandoclevel level;
971
972 level = MANDOCLEVEL_UNSUPP;
973 while (er < mandoclimits[level])
974 level--;
975
976 if (level < m->wlevel && er != MANDOCERR_FILE)
977 return;
978
979 if (m->mmsg)
980 (*m->mmsg)(er, level, m->file, ln, col, msg);
981
982 if (m->file_status < level)
983 m->file_status = level;
984 }
985
986 const char *
987 mparse_strerror(enum mandocerr er)
988 {
989
990 return(mandocerrs[er]);
991 }
992
993 const char *
994 mparse_strlevel(enum mandoclevel lvl)
995 {
996 return(mandoclevels[lvl]);
997 }
998
999 void
1000 mparse_keep(struct mparse *p)
1001 {
1002
1003 assert(NULL == p->secondary);
1004 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1005 }
1006
1007 const char *
1008 mparse_getkeep(const struct mparse *p)
1009 {
1010
1011 assert(p->secondary);
1012 return(p->secondary->sz ? p->secondary->buf : NULL);
1013 }