]> git.cameronkatri.com Git - mandoc.git/blob - read.c
warn about attempts to call non-callable macros;
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.102 2014/11/30 02:36:38 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct mparse {
49 struct man *pman; /* persistent man parser */
50 struct mdoc *pmdoc; /* persistent mdoc parser */
51 struct man *man; /* man parser */
52 struct mdoc *mdoc; /* mdoc parser */
53 struct roff *roff; /* roff parser (!NULL) */
54 const struct mchars *mchars; /* character table */
55 char *sodest; /* filename pointed to by .so */
56 const char *file; /* filename of current input file */
57 struct buf *primary; /* buffer currently being parsed */
58 struct buf *secondary; /* preprocessed copy of input */
59 const char *defos; /* default operating system */
60 mandocmsg mmsg; /* warning/error message handler */
61 enum mandoclevel file_status; /* status of current parse */
62 enum mandoclevel wlevel; /* ignore messages below this */
63 int options; /* parser options */
64 int filenc; /* encoding of the current file */
65 int reparse_count; /* finite interp. stack */
66 int line; /* line number in the file */
67 pid_t child; /* the gunzip(1) process */
68 };
69
70 static void choose_parser(struct mparse *);
71 static void resize_buf(struct buf *, size_t);
72 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
73 static int read_whole_file(struct mparse *, const char *, int,
74 struct buf *, int *);
75 static void mparse_end(struct mparse *);
76 static void mparse_parse_buffer(struct mparse *, struct buf,
77 const char *);
78
79 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
80 MANDOCERR_OK,
81 MANDOCERR_WARNING,
82 MANDOCERR_WARNING,
83 MANDOCERR_ERROR,
84 MANDOCERR_FATAL,
85 MANDOCERR_MAX,
86 MANDOCERR_MAX
87 };
88
89 static const char * const mandocerrs[MANDOCERR_MAX] = {
90 "ok",
91
92 "generic warning",
93
94 /* related to the prologue */
95 "missing manual title, using UNTITLED",
96 "missing manual title, using \"\"",
97 "lower case character in document title",
98 "missing manual section, using \"\"",
99 "unknown manual section",
100 "missing date, using today's date",
101 "cannot parse date, using it verbatim",
102 "missing Os macro, using \"\"",
103 "duplicate prologue macro",
104 "late prologue macro",
105 "skipping late title macro",
106 "prologue macros out of order",
107
108 /* related to document structure */
109 ".so is fragile, better use ln(1)",
110 "no document body",
111 "content before first section header",
112 "first section is not \"NAME\"",
113 "bad NAME section contents",
114 "sections out of conventional order",
115 "duplicate section title",
116 "unexpected section",
117 "unusual Xr order",
118 "unusual Xr punctuation",
119 "AUTHORS section without An macro",
120
121 /* related to macros and nesting */
122 "obsolete macro",
123 "macro neither callable nor escaped",
124 "skipping paragraph macro",
125 "moving paragraph macro out of list",
126 "skipping no-space macro",
127 "blocks badly nested",
128 "nested displays are not portable",
129 "moving content out of list",
130 ".Vt block has child macro",
131 "fill mode already enabled, skipping",
132 "fill mode already disabled, skipping",
133 "line scope broken",
134
135 /* related to missing macro arguments */
136 "skipping empty request",
137 "conditional request controls empty scope",
138 "skipping empty macro",
139 "empty argument, using 0n",
140 "argument count wrong",
141 "missing display type, using -ragged",
142 "list type is not the first argument",
143 "missing -width in -tag list, using 8n",
144 "missing utility name, using \"\"",
145 "empty head in list item",
146 "empty list item",
147 "missing font type, using \\fR",
148 "unknown font type, using \\fR",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 "generic error",
175
176 /* related to equations */
177 "unexpected equation scope closure",
178 "equation scope open on exit",
179 "overlapping equation scopes",
180 "unexpected end of equation",
181
182 /* related to tables */
183 "bad table syntax",
184 "bad table option",
185 "bad table layout",
186 "no table layout cells specified",
187 "no table data cells specified",
188 "ignore data in cell",
189 "data block still open",
190 "ignoring extra data cells",
191
192 /* related to document structure and macros */
193 "input stack limit exceeded, infinite loop?",
194 "skipping bad character",
195 "skipping unknown macro",
196 "skipping item outside list",
197 "skipping column outside column list",
198 "skipping end of block that is not open",
199 "inserting missing end of block",
200 "appending missing end of block",
201
202 /* related to request and macro arguments */
203 "escaped character not allowed in a name",
204 "argument count wrong",
205 "NOT IMPLEMENTED: Bd -file",
206 "missing list type, using -item",
207 "missing manual name, using \"\"",
208 "uname(3) system call failed, using UNKNOWN",
209 "unknown standard specifier",
210 "skipping request without numeric argument",
211 "skipping all arguments",
212 "skipping excess arguments",
213 "divide by zero",
214
215 "generic fatal error",
216
217 "input too large",
218 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
219 ".so request failed",
220
221 /* system errors */
222 "cannot dup file descriptor",
223 "cannot exec",
224 "gunzip failed with code",
225 "cannot fork",
226 NULL,
227 "cannot open pipe",
228 "cannot read file",
229 "gunzip died from signal",
230 "cannot stat file",
231 "wait failed",
232 };
233
234 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
235 "SUCCESS",
236 "RESERVED",
237 "WARNING",
238 "ERROR",
239 "FATAL",
240 "BADARG",
241 "SYSERR"
242 };
243
244
245 static void
246 resize_buf(struct buf *buf, size_t initial)
247 {
248
249 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
250 buf->buf = mandoc_realloc(buf->buf, buf->sz);
251 }
252
253 static void
254 choose_parser(struct mparse *curp)
255 {
256 char *cp, *ep;
257 int format;
258
259 /*
260 * If neither command line arguments -mdoc or -man select
261 * a parser nor the roff parser found a .Dd or .TH macro
262 * yet, look ahead in the main input buffer.
263 */
264
265 if ((format = roff_getformat(curp->roff)) == 0) {
266 cp = curp->primary->buf;
267 ep = cp + curp->primary->sz;
268 while (cp < ep) {
269 if (*cp == '.' || *cp == '\'') {
270 cp++;
271 if (cp[0] == 'D' && cp[1] == 'd') {
272 format = MPARSE_MDOC;
273 break;
274 }
275 if (cp[0] == 'T' && cp[1] == 'H') {
276 format = MPARSE_MAN;
277 break;
278 }
279 }
280 cp = memchr(cp, '\n', ep - cp);
281 if (cp == NULL)
282 break;
283 cp++;
284 }
285 }
286
287 if (format == MPARSE_MDOC) {
288 if (NULL == curp->pmdoc)
289 curp->pmdoc = mdoc_alloc(
290 curp->roff, curp, curp->defos,
291 MPARSE_QUICK & curp->options ? 1 : 0);
292 assert(curp->pmdoc);
293 curp->mdoc = curp->pmdoc;
294 return;
295 }
296
297 /* Fall back to man(7) as a last resort. */
298
299 if (NULL == curp->pman)
300 curp->pman = man_alloc(curp->roff, curp,
301 MPARSE_QUICK & curp->options ? 1 : 0);
302 assert(curp->pman);
303 curp->man = curp->pman;
304 }
305
306 /*
307 * Main parse routine for a buffer.
308 * It assumes encoding and line numbering are already set up.
309 * It can recurse directly (for invocations of user-defined
310 * macros, inline equations, and input line traps)
311 * and indirectly (for .so file inclusion).
312 */
313 static void
314 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
315 {
316 const struct tbl_span *span;
317 struct buf ln;
318 size_t pos; /* byte number in the ln buffer */
319 enum rofferr rr;
320 int of;
321 int lnn; /* line number in the real file */
322 unsigned char c;
323
324 memset(&ln, 0, sizeof(ln));
325
326 lnn = curp->line;
327 pos = 0;
328
329 while (i < blk.sz) {
330 if (0 == pos && '\0' == blk.buf[i])
331 break;
332
333 if (start) {
334 curp->line = lnn;
335 curp->reparse_count = 0;
336
337 if (lnn < 3 &&
338 curp->filenc & MPARSE_UTF8 &&
339 curp->filenc & MPARSE_LATIN1)
340 curp->filenc = preconv_cue(&blk, i);
341 }
342
343 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
344
345 /*
346 * When finding an unescaped newline character,
347 * leave the character loop to process the line.
348 * Skip a preceding carriage return, if any.
349 */
350
351 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
352 '\n' == blk.buf[i + 1])
353 ++i;
354 if ('\n' == blk.buf[i]) {
355 ++i;
356 ++lnn;
357 break;
358 }
359
360 /*
361 * Make sure we have space for the worst
362 * case of 11 bytes: "\\[u10ffff]\0"
363 */
364
365 if (pos + 11 > ln.sz)
366 resize_buf(&ln, 256);
367
368 /*
369 * Encode 8-bit input.
370 */
371
372 c = blk.buf[i];
373 if (c & 0x80) {
374 if ( ! (curp->filenc && preconv_encode(
375 &blk, &i, &ln, &pos, &curp->filenc))) {
376 mandoc_vmsg(MANDOCERR_BADCHAR,
377 curp, curp->line, pos,
378 "0x%x", c);
379 ln.buf[pos++] = '?';
380 i++;
381 }
382 continue;
383 }
384
385 /*
386 * Exclude control characters.
387 */
388
389 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
390 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
391 curp->line, pos, "0x%x", c);
392 i++;
393 ln.buf[pos++] = '?';
394 continue;
395 }
396
397 /* Trailing backslash = a plain char. */
398
399 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
400 ln.buf[pos++] = blk.buf[i++];
401 continue;
402 }
403
404 /*
405 * Found escape and at least one other character.
406 * When it's a newline character, skip it.
407 * When there is a carriage return in between,
408 * skip that one as well.
409 */
410
411 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
412 '\n' == blk.buf[i + 2])
413 ++i;
414 if ('\n' == blk.buf[i + 1]) {
415 i += 2;
416 ++lnn;
417 continue;
418 }
419
420 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
421 i += 2;
422 /* Comment, skip to end of line */
423 for (; i < blk.sz; ++i) {
424 if ('\n' == blk.buf[i]) {
425 ++i;
426 ++lnn;
427 break;
428 }
429 }
430
431 /* Backout trailing whitespaces */
432 for (; pos > 0; --pos) {
433 if (ln.buf[pos - 1] != ' ')
434 break;
435 if (pos > 2 && ln.buf[pos - 2] == '\\')
436 break;
437 }
438 break;
439 }
440
441 /* Catch escaped bogus characters. */
442
443 c = (unsigned char) blk.buf[i+1];
444
445 if ( ! (isascii(c) &&
446 (isgraph(c) || isblank(c)))) {
447 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
448 curp->line, pos, "0x%x", c);
449 i += 2;
450 ln.buf[pos++] = '?';
451 continue;
452 }
453
454 /* Some other escape sequence, copy & cont. */
455
456 ln.buf[pos++] = blk.buf[i++];
457 ln.buf[pos++] = blk.buf[i++];
458 }
459
460 if (pos >= ln.sz)
461 resize_buf(&ln, 256);
462
463 ln.buf[pos] = '\0';
464
465 /*
466 * A significant amount of complexity is contained by
467 * the roff preprocessor. It's line-oriented but can be
468 * expressed on one line, so we need at times to
469 * readjust our starting point and re-run it. The roff
470 * preprocessor can also readjust the buffers with new
471 * data, so we pass them in wholesale.
472 */
473
474 of = 0;
475
476 /*
477 * Maintain a lookaside buffer of all parsed lines. We
478 * only do this if mparse_keep() has been invoked (the
479 * buffer may be accessed with mparse_getkeep()).
480 */
481
482 if (curp->secondary) {
483 curp->secondary->buf = mandoc_realloc(
484 curp->secondary->buf,
485 curp->secondary->sz + pos + 2);
486 memcpy(curp->secondary->buf +
487 curp->secondary->sz,
488 ln.buf, pos);
489 curp->secondary->sz += pos;
490 curp->secondary->buf
491 [curp->secondary->sz] = '\n';
492 curp->secondary->sz++;
493 curp->secondary->buf
494 [curp->secondary->sz] = '\0';
495 }
496 rerun:
497 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
498
499 switch (rr) {
500 case ROFF_REPARSE:
501 if (REPARSE_LIMIT >= ++curp->reparse_count)
502 mparse_buf_r(curp, ln, of, 0);
503 else
504 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
505 curp->line, pos, NULL);
506 pos = 0;
507 continue;
508 case ROFF_APPEND:
509 pos = strlen(ln.buf);
510 continue;
511 case ROFF_RERUN:
512 goto rerun;
513 case ROFF_IGN:
514 pos = 0;
515 continue;
516 case ROFF_ERR:
517 assert(MANDOCLEVEL_FATAL <= curp->file_status);
518 break;
519 case ROFF_SO:
520 if ( ! (curp->options & MPARSE_SO) &&
521 (i >= blk.sz || blk.buf[i] == '\0')) {
522 curp->sodest = mandoc_strdup(ln.buf + of);
523 free(ln.buf);
524 return;
525 }
526 /*
527 * We remove `so' clauses from our lookaside
528 * buffer because we're going to descend into
529 * the file recursively.
530 */
531 if (curp->secondary)
532 curp->secondary->sz -= pos + 1;
533 mparse_readfd(curp, -1, ln.buf + of);
534 if (MANDOCLEVEL_FATAL <= curp->file_status) {
535 mandoc_vmsg(MANDOCERR_SO_FAIL,
536 curp, curp->line, pos,
537 ".so %s", ln.buf + of);
538 break;
539 }
540 pos = 0;
541 continue;
542 default:
543 break;
544 }
545
546 /*
547 * If we encounter errors in the recursive parse, make
548 * sure we don't continue parsing.
549 */
550
551 if (MANDOCLEVEL_FATAL <= curp->file_status)
552 break;
553
554 /*
555 * If input parsers have not been allocated, do so now.
556 * We keep these instanced between parsers, but set them
557 * locally per parse routine since we can use different
558 * parsers with each one.
559 */
560
561 if ( ! (curp->man || curp->mdoc))
562 choose_parser(curp);
563
564 /*
565 * Lastly, push down into the parsers themselves.
566 * If libroff returns ROFF_TBL, then add it to the
567 * currently open parse. Since we only get here if
568 * there does exist data (see tbl_data.c), we're
569 * guaranteed that something's been allocated.
570 * Do the same for ROFF_EQN.
571 */
572
573 if (rr == ROFF_TBL) {
574 while ((span = roff_span(curp->roff)) != NULL)
575 if (curp->man == NULL)
576 mdoc_addspan(curp->mdoc, span);
577 else
578 man_addspan(curp->man, span);
579 } else if (rr == ROFF_EQN) {
580 if (curp->man == NULL)
581 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
582 else
583 man_addeqn(curp->man, roff_eqn(curp->roff));
584 } else if ((curp->man == NULL ?
585 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
586 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
587 break;
588
589 /* Temporary buffers typically are not full. */
590
591 if (0 == start && '\0' == blk.buf[i])
592 break;
593
594 /* Start the next input line. */
595
596 pos = 0;
597 }
598
599 free(ln.buf);
600 }
601
602 static int
603 read_whole_file(struct mparse *curp, const char *file, int fd,
604 struct buf *fb, int *with_mmap)
605 {
606 size_t off;
607 ssize_t ssz;
608
609 #if HAVE_MMAP
610 struct stat st;
611 if (-1 == fstat(fd, &st)) {
612 curp->file_status = MANDOCLEVEL_SYSERR;
613 if (curp->mmsg)
614 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
615 file, 0, 0, strerror(errno));
616 return(0);
617 }
618
619 /*
620 * If we're a regular file, try just reading in the whole entry
621 * via mmap(). This is faster than reading it into blocks, and
622 * since each file is only a few bytes to begin with, I'm not
623 * concerned that this is going to tank any machines.
624 */
625
626 if (S_ISREG(st.st_mode)) {
627 if (st.st_size >= (1U << 31)) {
628 curp->file_status = MANDOCLEVEL_FATAL;
629 if (curp->mmsg)
630 (*curp->mmsg)(MANDOCERR_TOOLARGE,
631 curp->file_status, file, 0, 0, NULL);
632 return(0);
633 }
634 *with_mmap = 1;
635 fb->sz = (size_t)st.st_size;
636 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
637 if (fb->buf != MAP_FAILED)
638 return(1);
639 }
640 #endif
641
642 /*
643 * If this isn't a regular file (like, say, stdin), then we must
644 * go the old way and just read things in bit by bit.
645 */
646
647 *with_mmap = 0;
648 off = 0;
649 fb->sz = 0;
650 fb->buf = NULL;
651 for (;;) {
652 if (off == fb->sz) {
653 if (fb->sz == (1U << 31)) {
654 curp->file_status = MANDOCLEVEL_FATAL;
655 if (curp->mmsg)
656 (*curp->mmsg)(MANDOCERR_TOOLARGE,
657 curp->file_status,
658 file, 0, 0, NULL);
659 break;
660 }
661 resize_buf(fb, 65536);
662 }
663 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
664 if (ssz == 0) {
665 fb->sz = off;
666 return(1);
667 }
668 if (ssz == -1) {
669 curp->file_status = MANDOCLEVEL_SYSERR;
670 if (curp->mmsg)
671 (*curp->mmsg)(MANDOCERR_SYSREAD,
672 curp->file_status, file, 0, 0,
673 strerror(errno));
674 break;
675 }
676 off += (size_t)ssz;
677 }
678
679 free(fb->buf);
680 fb->buf = NULL;
681 return(0);
682 }
683
684 static void
685 mparse_end(struct mparse *curp)
686 {
687
688 if (MANDOCLEVEL_FATAL <= curp->file_status)
689 return;
690
691 if (curp->mdoc == NULL &&
692 curp->man == NULL &&
693 curp->sodest == NULL) {
694 if (curp->options & MPARSE_MDOC)
695 curp->mdoc = curp->pmdoc;
696 else {
697 if (curp->pman == NULL)
698 curp->pman = man_alloc(curp->roff, curp,
699 curp->options & MPARSE_QUICK ? 1 : 0);
700 curp->man = curp->pman;
701 }
702 }
703
704 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
705 assert(MANDOCLEVEL_FATAL <= curp->file_status);
706 return;
707 }
708
709 if (curp->man && ! man_endparse(curp->man)) {
710 assert(MANDOCLEVEL_FATAL <= curp->file_status);
711 return;
712 }
713
714 roff_endparse(curp->roff);
715 }
716
717 static void
718 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
719 {
720 struct buf *svprimary;
721 const char *svfile;
722 size_t offset;
723 static int recursion_depth;
724
725 if (64 < recursion_depth) {
726 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
727 return;
728 }
729
730 /* Line number is per-file. */
731 svfile = curp->file;
732 curp->file = file;
733 svprimary = curp->primary;
734 curp->primary = &blk;
735 curp->line = 1;
736 recursion_depth++;
737
738 /* Skip an UTF-8 byte order mark. */
739 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
740 (unsigned char)blk.buf[0] == 0xef &&
741 (unsigned char)blk.buf[1] == 0xbb &&
742 (unsigned char)blk.buf[2] == 0xbf) {
743 offset = 3;
744 curp->filenc &= ~MPARSE_LATIN1;
745 } else
746 offset = 0;
747
748 mparse_buf_r(curp, blk, offset, 1);
749
750 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
751 mparse_end(curp);
752
753 curp->primary = svprimary;
754 curp->file = svfile;
755 }
756
757 enum mandoclevel
758 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
759 const char *file)
760 {
761 struct buf blk;
762
763 blk.buf = UNCONST(buf);
764 blk.sz = len;
765
766 mparse_parse_buffer(curp, blk, file);
767 return(curp->file_status);
768 }
769
770 /*
771 * If a file descriptor is given, use it and assume it points
772 * to the named file. Otherwise, open the named file.
773 * Read the whole file into memory and call the parsers.
774 * Called recursively when an .so request is encountered.
775 */
776 enum mandoclevel
777 mparse_readfd(struct mparse *curp, int fd, const char *file)
778 {
779 struct buf blk;
780 int with_mmap;
781 int save_filenc;
782 pid_t save_child;
783
784 save_child = curp->child;
785 if (fd != -1)
786 curp->child = 0;
787 else if (mparse_open(curp, &fd, file) >= MANDOCLEVEL_SYSERR)
788 goto out;
789
790 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
791 save_filenc = curp->filenc;
792 curp->filenc = curp->options &
793 (MPARSE_UTF8 | MPARSE_LATIN1);
794 mparse_parse_buffer(curp, blk, file);
795 curp->filenc = save_filenc;
796 #if HAVE_MMAP
797 if (with_mmap)
798 munmap(blk.buf, blk.sz);
799 else
800 #endif
801 free(blk.buf);
802 }
803
804 if (fd != STDIN_FILENO && close(fd) == -1)
805 perror(file);
806
807 mparse_wait(curp);
808 out:
809 curp->child = save_child;
810 return(curp->file_status);
811 }
812
813 enum mandoclevel
814 mparse_open(struct mparse *curp, int *fd, const char *file)
815 {
816 int pfd[2];
817 int save_errno;
818 char *cp;
819 enum mandocerr err;
820
821 pfd[1] = -1;
822 curp->file = file;
823
824 /* Unless zipped, try to just open the file. */
825
826 if ((cp = strrchr(file, '.')) == NULL ||
827 strcmp(cp + 1, "gz")) {
828 curp->child = 0;
829 if ((*fd = open(file, O_RDONLY)) != -1)
830 return(MANDOCLEVEL_OK);
831
832 /* Open failed; try to append ".gz". */
833
834 mandoc_asprintf(&cp, "%s.gz", file);
835 file = cp;
836 } else
837 cp = NULL;
838
839 /* Before forking, make sure the file can be read. */
840
841 save_errno = errno;
842 if (access(file, R_OK) == -1) {
843 if (cp != NULL)
844 errno = save_errno;
845 err = MANDOCERR_SYSOPEN;
846 goto out;
847 }
848
849 /* Run gunzip(1). */
850
851 if (pipe(pfd) == -1) {
852 err = MANDOCERR_SYSPIPE;
853 goto out;
854 }
855
856 switch (curp->child = fork()) {
857 case -1:
858 err = MANDOCERR_SYSFORK;
859 close(pfd[0]);
860 close(pfd[1]);
861 pfd[1] = -1;
862 break;
863 case 0:
864 close(pfd[0]);
865 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
866 err = MANDOCERR_SYSDUP;
867 break;
868 }
869 execlp("gunzip", "gunzip", "-c", file, NULL);
870 err = MANDOCERR_SYSEXEC;
871 break;
872 default:
873 close(pfd[1]);
874 *fd = pfd[0];
875 return(MANDOCLEVEL_OK);
876 }
877
878 out:
879 free(cp);
880 *fd = -1;
881 curp->child = 0;
882 curp->file_status = MANDOCLEVEL_SYSERR;
883 if (curp->mmsg)
884 (*curp->mmsg)(err, curp->file_status, curp->file,
885 0, 0, strerror(errno));
886 if (pfd[1] != -1)
887 exit(1);
888 return(curp->file_status);
889 }
890
891 enum mandoclevel
892 mparse_wait(struct mparse *curp)
893 {
894 int status;
895
896 if (curp->child == 0)
897 return(MANDOCLEVEL_OK);
898
899 if (waitpid(curp->child, &status, 0) == -1) {
900 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
901 strerror(errno));
902 curp->file_status = MANDOCLEVEL_SYSERR;
903 return(curp->file_status);
904 }
905 if (WIFSIGNALED(status)) {
906 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
907 "%d", WTERMSIG(status));
908 curp->file_status = MANDOCLEVEL_SYSERR;
909 return(curp->file_status);
910 }
911 if (WEXITSTATUS(status)) {
912 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
913 "%d", WEXITSTATUS(status));
914 curp->file_status = MANDOCLEVEL_SYSERR;
915 return(curp->file_status);
916 }
917 return(MANDOCLEVEL_OK);
918 }
919
920 struct mparse *
921 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
922 const struct mchars *mchars, const char *defos)
923 {
924 struct mparse *curp;
925
926 assert(wlevel <= MANDOCLEVEL_FATAL);
927
928 curp = mandoc_calloc(1, sizeof(struct mparse));
929
930 curp->options = options;
931 curp->wlevel = wlevel;
932 curp->mmsg = mmsg;
933 curp->defos = defos;
934
935 curp->mchars = mchars;
936 curp->roff = roff_alloc(curp, curp->mchars, options);
937 if (curp->options & MPARSE_MDOC)
938 curp->pmdoc = mdoc_alloc(
939 curp->roff, curp, curp->defos,
940 curp->options & MPARSE_QUICK ? 1 : 0);
941 if (curp->options & MPARSE_MAN)
942 curp->pman = man_alloc(curp->roff, curp,
943 curp->options & MPARSE_QUICK ? 1 : 0);
944
945 return(curp);
946 }
947
948 void
949 mparse_reset(struct mparse *curp)
950 {
951
952 roff_reset(curp->roff);
953
954 if (curp->mdoc)
955 mdoc_reset(curp->mdoc);
956 if (curp->man)
957 man_reset(curp->man);
958 if (curp->secondary)
959 curp->secondary->sz = 0;
960
961 curp->file_status = MANDOCLEVEL_OK;
962 curp->mdoc = NULL;
963 curp->man = NULL;
964
965 free(curp->sodest);
966 curp->sodest = NULL;
967 }
968
969 void
970 mparse_free(struct mparse *curp)
971 {
972
973 if (curp->pmdoc)
974 mdoc_free(curp->pmdoc);
975 if (curp->pman)
976 man_free(curp->pman);
977 if (curp->roff)
978 roff_free(curp->roff);
979 if (curp->secondary)
980 free(curp->secondary->buf);
981
982 free(curp->secondary);
983 free(curp->sodest);
984 free(curp);
985 }
986
987 void
988 mparse_result(struct mparse *curp,
989 struct mdoc **mdoc, struct man **man, char **sodest)
990 {
991
992 if (sodest && NULL != (*sodest = curp->sodest)) {
993 *mdoc = NULL;
994 *man = NULL;
995 return;
996 }
997 if (mdoc)
998 *mdoc = curp->mdoc;
999 if (man)
1000 *man = curp->man;
1001 }
1002
1003 void
1004 mandoc_vmsg(enum mandocerr t, struct mparse *m,
1005 int ln, int pos, const char *fmt, ...)
1006 {
1007 char buf[256];
1008 va_list ap;
1009
1010 va_start(ap, fmt);
1011 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1012 va_end(ap);
1013
1014 mandoc_msg(t, m, ln, pos, buf);
1015 }
1016
1017 void
1018 mandoc_msg(enum mandocerr er, struct mparse *m,
1019 int ln, int col, const char *msg)
1020 {
1021 enum mandoclevel level;
1022
1023 level = MANDOCLEVEL_FATAL;
1024 while (er < mandoclimits[level])
1025 level--;
1026
1027 if (level < m->wlevel)
1028 return;
1029
1030 if (m->mmsg)
1031 (*m->mmsg)(er, level, m->file, ln, col, msg);
1032
1033 if (m->file_status < level)
1034 m->file_status = level;
1035 }
1036
1037 const char *
1038 mparse_strerror(enum mandocerr er)
1039 {
1040
1041 return(mandocerrs[er]);
1042 }
1043
1044 const char *
1045 mparse_strlevel(enum mandoclevel lvl)
1046 {
1047 return(mandoclevels[lvl]);
1048 }
1049
1050 void
1051 mparse_keep(struct mparse *p)
1052 {
1053
1054 assert(NULL == p->secondary);
1055 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1056 }
1057
1058 const char *
1059 mparse_getkeep(const struct mparse *p)
1060 {
1061
1062 assert(p->secondary);
1063 return(p->secondary->sz ? p->secondary->buf : NULL);
1064 }