]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Improve -Tascii output for Unicode escape sequences: For the first 512
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.93 2014/10/25 01:03:52 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45
46 #define REPARSE_LIMIT 1000
47
48 struct mparse {
49 struct man *pman; /* persistent man parser */
50 struct mdoc *pmdoc; /* persistent mdoc parser */
51 struct man *man; /* man parser */
52 struct mdoc *mdoc; /* mdoc parser */
53 struct roff *roff; /* roff parser (!NULL) */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 };
67
68 static void choose_parser(struct mparse *);
69 static void resize_buf(struct buf *, size_t);
70 static void mparse_buf_r(struct mparse *, struct buf, int);
71 static int read_whole_file(struct mparse *, const char *, int,
72 struct buf *, int *);
73 static void mparse_end(struct mparse *);
74 static void mparse_parse_buffer(struct mparse *, struct buf,
75 const char *);
76
77 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
78 MANDOCERR_OK,
79 MANDOCERR_WARNING,
80 MANDOCERR_WARNING,
81 MANDOCERR_ERROR,
82 MANDOCERR_FATAL,
83 MANDOCERR_MAX,
84 MANDOCERR_MAX
85 };
86
87 static const char * const mandocerrs[MANDOCERR_MAX] = {
88 "ok",
89
90 "generic warning",
91
92 /* related to the prologue */
93 "missing manual title, using UNTITLED",
94 "missing manual title, using \"\"",
95 "lower case character in document title",
96 "missing manual section, using \"\"",
97 "unknown manual section",
98 "unknown manual volume or arch",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "skipping paragraph macro",
123 "moving paragraph macro out of list",
124 "skipping no-space macro",
125 "blocks badly nested",
126 "nested displays are not portable",
127 "moving content out of list",
128 ".Vt block has child macro",
129 "fill mode already enabled, skipping",
130 "fill mode already disabled, skipping",
131 "line scope broken",
132
133 /* related to missing macro arguments */
134 "skipping empty request",
135 "conditional request controls empty scope",
136 "skipping empty macro",
137 "empty argument, using 0n",
138 "argument count wrong",
139 "missing display type, using -ragged",
140 "list type is not the first argument",
141 "missing -width in -tag list, using 8n",
142 "missing utility name, using \"\"",
143 "empty head in list item",
144 "empty list item",
145 "missing font type, using \\fR",
146 "unknown font type, using \\fR",
147 "missing -std argument, adding it",
148 "missing eqn box, using \"\"",
149
150 /* related to bad macro arguments */
151 "unterminated quoted argument",
152 "duplicate argument",
153 "skipping duplicate argument",
154 "skipping duplicate display type",
155 "skipping duplicate list type",
156 "skipping -width argument",
157 "unknown AT&T UNIX version",
158 "comma in function argument",
159 "parenthesis in function name",
160 "invalid content in Rs block",
161 "invalid Boolean argument",
162 "unknown font, skipping request",
163
164 /* related to plain text */
165 "blank line in fill mode, using .sp",
166 "tab in filled text",
167 "whitespace at end of input line",
168 "bad comment style",
169 "invalid escape sequence",
170 "undefined string, using \"\"",
171
172 "generic error",
173
174 /* related to equations */
175 "unexpected equation scope closure",
176 "equation scope open on exit",
177 "overlapping equation scopes",
178 "unexpected end of equation",
179
180 /* related to tables */
181 "bad table syntax",
182 "bad table option",
183 "bad table layout",
184 "no table layout cells specified",
185 "no table data cells specified",
186 "ignore data in cell",
187 "data block still open",
188 "ignoring extra data cells",
189
190 /* related to document structure and macros */
191 "input stack limit exceeded, infinite loop?",
192 "skipping bad character",
193 "skipping unknown macro",
194 "skipping item outside list",
195 "skipping column outside column list",
196 "skipping end of block that is not open",
197 "inserting missing end of block",
198 "appending missing end of block",
199
200 /* related to request and macro arguments */
201 "escaped character not allowed in a name",
202 "argument count wrong",
203 "missing list type, using -item",
204 "missing manual name, using \"\"",
205 "uname(3) system call failed, using UNKNOWN",
206 "unknown standard specifier",
207 "skipping request without numeric argument",
208 "skipping all arguments",
209 "skipping excess arguments",
210 "divide by zero",
211
212 "generic fatal error",
213
214 "input too large",
215 "NOT IMPLEMENTED: Bd -file",
216 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
217 ".so request failed",
218
219 /* system errors */
220 "cannot dup file descriptor",
221 "cannot exec",
222 "gunzip failed with code",
223 "cannot fork",
224 NULL,
225 "cannot open pipe",
226 "cannot read file",
227 "gunzip died from signal",
228 "cannot stat file",
229 "wait failed",
230 };
231
232 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
233 "SUCCESS",
234 "RESERVED",
235 "WARNING",
236 "ERROR",
237 "FATAL",
238 "BADARG",
239 "SYSERR"
240 };
241
242
243 static void
244 resize_buf(struct buf *buf, size_t initial)
245 {
246
247 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
248 buf->buf = mandoc_realloc(buf->buf, buf->sz);
249 }
250
251 static void
252 choose_parser(struct mparse *curp)
253 {
254 char *cp, *ep;
255 int format;
256
257 /*
258 * If neither command line arguments -mdoc or -man select
259 * a parser nor the roff parser found a .Dd or .TH macro
260 * yet, look ahead in the main input buffer.
261 */
262
263 if ((format = roff_getformat(curp->roff)) == 0) {
264 cp = curp->primary->buf;
265 ep = cp + curp->primary->sz;
266 while (cp < ep) {
267 if (*cp == '.' || *cp == '\'') {
268 cp++;
269 if (cp[0] == 'D' && cp[1] == 'd') {
270 format = MPARSE_MDOC;
271 break;
272 }
273 if (cp[0] == 'T' && cp[1] == 'H') {
274 format = MPARSE_MAN;
275 break;
276 }
277 }
278 cp = memchr(cp, '\n', ep - cp);
279 if (cp == NULL)
280 break;
281 cp++;
282 }
283 }
284
285 if (format == MPARSE_MDOC) {
286 if (NULL == curp->pmdoc)
287 curp->pmdoc = mdoc_alloc(
288 curp->roff, curp, curp->defos,
289 MPARSE_QUICK & curp->options ? 1 : 0);
290 assert(curp->pmdoc);
291 curp->mdoc = curp->pmdoc;
292 return;
293 }
294
295 /* Fall back to man(7) as a last resort. */
296
297 if (NULL == curp->pman)
298 curp->pman = man_alloc(curp->roff, curp,
299 MPARSE_QUICK & curp->options ? 1 : 0);
300 assert(curp->pman);
301 curp->man = curp->pman;
302 }
303
304 /*
305 * Main parse routine for an opened file. This is called for each
306 * opened file and simply loops around the full input file, possibly
307 * nesting (i.e., with `so').
308 */
309 static void
310 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
311 {
312 const struct tbl_span *span;
313 struct buf ln;
314 enum rofferr rr;
315 int i, of, rc;
316 int pos; /* byte number in the ln buffer */
317 int lnn; /* line number in the real file */
318 unsigned char c;
319
320 memset(&ln, 0, sizeof(struct buf));
321
322 lnn = curp->line;
323 pos = 0;
324
325 for (i = blk.offs; i < (int)blk.sz; ) {
326 if (0 == pos && '\0' == blk.buf[i])
327 break;
328
329 if (start) {
330 curp->line = lnn;
331 curp->reparse_count = 0;
332
333 if (lnn < 3 &&
334 curp->filenc & MPARSE_UTF8 &&
335 curp->filenc & MPARSE_LATIN1) {
336 blk.offs = i;
337 curp->filenc = preconv_cue(&blk);
338 }
339 }
340
341 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
342
343 /*
344 * When finding an unescaped newline character,
345 * leave the character loop to process the line.
346 * Skip a preceding carriage return, if any.
347 */
348
349 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
350 '\n' == blk.buf[i + 1])
351 ++i;
352 if ('\n' == blk.buf[i]) {
353 ++i;
354 ++lnn;
355 break;
356 }
357
358 /*
359 * Make sure we have space for the worst
360 * case of 11 bytes: "\\[u10ffff]\0"
361 */
362
363 if (pos + 11 > (int)ln.sz)
364 resize_buf(&ln, 256);
365
366 /*
367 * Encode 8-bit input.
368 */
369
370 c = blk.buf[i];
371 if (c & 0x80) {
372 blk.offs = i;
373 ln.offs = pos;
374 if (curp->filenc && preconv_encode(
375 &blk, &ln, &curp->filenc)) {
376 pos = ln.offs;
377 i = blk.offs;
378 } else {
379 mandoc_vmsg(MANDOCERR_BADCHAR,
380 curp, curp->line, pos,
381 "0x%x", c);
382 ln.buf[pos++] = '?';
383 i++;
384 }
385 continue;
386 }
387
388 /*
389 * Exclude control characters.
390 */
391
392 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
393 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
394 curp->line, pos, "0x%x", c);
395 i++;
396 ln.buf[pos++] = '?';
397 continue;
398 }
399
400 /* Trailing backslash = a plain char. */
401
402 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
403 ln.buf[pos++] = blk.buf[i++];
404 continue;
405 }
406
407 /*
408 * Found escape and at least one other character.
409 * When it's a newline character, skip it.
410 * When there is a carriage return in between,
411 * skip that one as well.
412 */
413
414 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
415 '\n' == blk.buf[i + 2])
416 ++i;
417 if ('\n' == blk.buf[i + 1]) {
418 i += 2;
419 ++lnn;
420 continue;
421 }
422
423 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
424 i += 2;
425 /* Comment, skip to end of line */
426 for (; i < (int)blk.sz; ++i) {
427 if ('\n' == blk.buf[i]) {
428 ++i;
429 ++lnn;
430 break;
431 }
432 }
433
434 /* Backout trailing whitespaces */
435 for (; pos > 0; --pos) {
436 if (ln.buf[pos - 1] != ' ')
437 break;
438 if (pos > 2 && ln.buf[pos - 2] == '\\')
439 break;
440 }
441 break;
442 }
443
444 /* Catch escaped bogus characters. */
445
446 c = (unsigned char) blk.buf[i+1];
447
448 if ( ! (isascii(c) &&
449 (isgraph(c) || isblank(c)))) {
450 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
451 curp->line, pos, "0x%x", c);
452 i += 2;
453 ln.buf[pos++] = '?';
454 continue;
455 }
456
457 /* Some other escape sequence, copy & cont. */
458
459 ln.buf[pos++] = blk.buf[i++];
460 ln.buf[pos++] = blk.buf[i++];
461 }
462
463 if (pos >= (int)ln.sz)
464 resize_buf(&ln, 256);
465
466 ln.buf[pos] = '\0';
467
468 /*
469 * A significant amount of complexity is contained by
470 * the roff preprocessor. It's line-oriented but can be
471 * expressed on one line, so we need at times to
472 * readjust our starting point and re-run it. The roff
473 * preprocessor can also readjust the buffers with new
474 * data, so we pass them in wholesale.
475 */
476
477 of = 0;
478
479 /*
480 * Maintain a lookaside buffer of all parsed lines. We
481 * only do this if mparse_keep() has been invoked (the
482 * buffer may be accessed with mparse_getkeep()).
483 */
484
485 if (curp->secondary) {
486 curp->secondary->buf = mandoc_realloc(
487 curp->secondary->buf,
488 curp->secondary->sz + pos + 2);
489 memcpy(curp->secondary->buf +
490 curp->secondary->sz,
491 ln.buf, pos);
492 curp->secondary->sz += pos;
493 curp->secondary->buf
494 [curp->secondary->sz] = '\n';
495 curp->secondary->sz++;
496 curp->secondary->buf
497 [curp->secondary->sz] = '\0';
498 }
499 rerun:
500 rr = roff_parseln(curp->roff, curp->line,
501 &ln.buf, &ln.sz, of, &of);
502
503 switch (rr) {
504 case ROFF_REPARSE:
505 if (REPARSE_LIMIT >= ++curp->reparse_count)
506 mparse_buf_r(curp, ln, 0);
507 else
508 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
509 curp->line, pos, NULL);
510 pos = 0;
511 continue;
512 case ROFF_APPEND:
513 pos = (int)strlen(ln.buf);
514 continue;
515 case ROFF_RERUN:
516 goto rerun;
517 case ROFF_IGN:
518 pos = 0;
519 continue;
520 case ROFF_ERR:
521 assert(MANDOCLEVEL_FATAL <= curp->file_status);
522 break;
523 case ROFF_SO:
524 if (0 == (MPARSE_SO & curp->options) &&
525 (i >= (int)blk.sz || '\0' == blk.buf[i])) {
526 curp->sodest = mandoc_strdup(ln.buf + of);
527 free(ln.buf);
528 return;
529 }
530 /*
531 * We remove `so' clauses from our lookaside
532 * buffer because we're going to descend into
533 * the file recursively.
534 */
535 if (curp->secondary)
536 curp->secondary->sz -= pos + 1;
537 mparse_readfd(curp, -1, ln.buf + of);
538 if (MANDOCLEVEL_FATAL <= curp->file_status) {
539 mandoc_vmsg(MANDOCERR_SO_FAIL,
540 curp, curp->line, pos,
541 ".so %s", ln.buf + of);
542 break;
543 }
544 pos = 0;
545 continue;
546 default:
547 break;
548 }
549
550 /*
551 * If we encounter errors in the recursive parse, make
552 * sure we don't continue parsing.
553 */
554
555 if (MANDOCLEVEL_FATAL <= curp->file_status)
556 break;
557
558 /*
559 * If input parsers have not been allocated, do so now.
560 * We keep these instanced between parsers, but set them
561 * locally per parse routine since we can use different
562 * parsers with each one.
563 */
564
565 if ( ! (curp->man || curp->mdoc))
566 choose_parser(curp);
567
568 /*
569 * Lastly, push down into the parsers themselves.
570 * If libroff returns ROFF_TBL, then add it to the
571 * currently open parse. Since we only get here if
572 * there does exist data (see tbl_data.c), we're
573 * guaranteed that something's been allocated.
574 * Do the same for ROFF_EQN.
575 */
576
577 rc = -1;
578
579 if (ROFF_TBL == rr)
580 while (NULL != (span = roff_span(curp->roff))) {
581 rc = curp->man ?
582 man_addspan(curp->man, span) :
583 mdoc_addspan(curp->mdoc, span);
584 if (0 == rc)
585 break;
586 }
587 else if (ROFF_EQN == rr)
588 rc = curp->mdoc ?
589 mdoc_addeqn(curp->mdoc,
590 roff_eqn(curp->roff)) :
591 man_addeqn(curp->man,
592 roff_eqn(curp->roff));
593 else if (curp->man || curp->mdoc)
594 rc = curp->man ?
595 man_parseln(curp->man,
596 curp->line, ln.buf, of) :
597 mdoc_parseln(curp->mdoc,
598 curp->line, ln.buf, of);
599
600 if (0 == rc) {
601 assert(MANDOCLEVEL_FATAL <= curp->file_status);
602 break;
603 } else if (2 == rc)
604 break;
605
606 /* Temporary buffers typically are not full. */
607
608 if (0 == start && '\0' == blk.buf[i])
609 break;
610
611 /* Start the next input line. */
612
613 pos = 0;
614 }
615
616 free(ln.buf);
617 }
618
619 static int
620 read_whole_file(struct mparse *curp, const char *file, int fd,
621 struct buf *fb, int *with_mmap)
622 {
623 size_t off;
624 ssize_t ssz;
625
626 #if HAVE_MMAP
627 struct stat st;
628 if (-1 == fstat(fd, &st)) {
629 curp->file_status = MANDOCLEVEL_SYSERR;
630 if (curp->mmsg)
631 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
632 file, 0, 0, strerror(errno));
633 return(0);
634 }
635
636 /*
637 * If we're a regular file, try just reading in the whole entry
638 * via mmap(). This is faster than reading it into blocks, and
639 * since each file is only a few bytes to begin with, I'm not
640 * concerned that this is going to tank any machines.
641 */
642
643 if (S_ISREG(st.st_mode)) {
644 if (st.st_size >= (1U << 31)) {
645 curp->file_status = MANDOCLEVEL_FATAL;
646 if (curp->mmsg)
647 (*curp->mmsg)(MANDOCERR_TOOLARGE,
648 curp->file_status, file, 0, 0, NULL);
649 return(0);
650 }
651 *with_mmap = 1;
652 fb->offs = 0;
653 fb->sz = (size_t)st.st_size;
654 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
655 if (fb->buf != MAP_FAILED)
656 return(1);
657 }
658 #endif
659
660 /*
661 * If this isn't a regular file (like, say, stdin), then we must
662 * go the old way and just read things in bit by bit.
663 */
664
665 *with_mmap = 0;
666 off = 0;
667 fb->sz = 0;
668 fb->buf = NULL;
669 for (;;) {
670 if (off == fb->sz) {
671 if (fb->sz == (1U << 31)) {
672 curp->file_status = MANDOCLEVEL_FATAL;
673 if (curp->mmsg)
674 (*curp->mmsg)(MANDOCERR_TOOLARGE,
675 curp->file_status,
676 file, 0, 0, NULL);
677 break;
678 }
679 resize_buf(fb, 65536);
680 }
681 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
682 if (ssz == 0) {
683 fb->sz = off;
684 fb->offs = 0;
685 return(1);
686 }
687 if (ssz == -1) {
688 curp->file_status = MANDOCLEVEL_SYSERR;
689 if (curp->mmsg)
690 (*curp->mmsg)(MANDOCERR_SYSREAD,
691 curp->file_status, file, 0, 0,
692 strerror(errno));
693 break;
694 }
695 off += (size_t)ssz;
696 }
697
698 free(fb->buf);
699 fb->buf = NULL;
700 return(0);
701 }
702
703 static void
704 mparse_end(struct mparse *curp)
705 {
706
707 if (MANDOCLEVEL_FATAL <= curp->file_status)
708 return;
709
710 if (curp->mdoc == NULL &&
711 curp->man == NULL &&
712 curp->sodest == NULL) {
713 if (curp->options & MPARSE_MDOC)
714 curp->mdoc = curp->pmdoc;
715 else {
716 if (curp->pman == NULL)
717 curp->pman = man_alloc(curp->roff, curp,
718 curp->options & MPARSE_QUICK ? 1 : 0);
719 curp->man = curp->pman;
720 }
721 }
722
723 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
724 assert(MANDOCLEVEL_FATAL <= curp->file_status);
725 return;
726 }
727
728 if (curp->man && ! man_endparse(curp->man)) {
729 assert(MANDOCLEVEL_FATAL <= curp->file_status);
730 return;
731 }
732
733 roff_endparse(curp->roff);
734 }
735
736 static void
737 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
738 {
739 struct buf *svprimary;
740 const char *svfile;
741 static int recursion_depth;
742
743 if (64 < recursion_depth) {
744 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
745 return;
746 }
747
748 /* Line number is per-file. */
749 svfile = curp->file;
750 curp->file = file;
751 svprimary = curp->primary;
752 curp->primary = &blk;
753 curp->line = 1;
754 recursion_depth++;
755
756 /* Skip an UTF-8 byte order mark. */
757 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
758 (unsigned char)blk.buf[0] == 0xef &&
759 (unsigned char)blk.buf[1] == 0xbb &&
760 (unsigned char)blk.buf[2] == 0xbf) {
761 blk.offs = 3;
762 curp->filenc &= ~MPARSE_LATIN1;
763 }
764
765 mparse_buf_r(curp, blk, 1);
766
767 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
768 mparse_end(curp);
769
770 curp->primary = svprimary;
771 curp->file = svfile;
772 }
773
774 enum mandoclevel
775 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
776 const char *file)
777 {
778 struct buf blk;
779
780 blk.buf = UNCONST(buf);
781 blk.sz = len;
782 blk.offs = 0;
783
784 mparse_parse_buffer(curp, blk, file);
785 return(curp->file_status);
786 }
787
788 enum mandoclevel
789 mparse_readfd(struct mparse *curp, int fd, const char *file)
790 {
791 struct buf blk;
792 int with_mmap;
793 int save_filenc;
794
795 if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
796 curp->file_status = MANDOCLEVEL_SYSERR;
797 if (curp->mmsg)
798 (*curp->mmsg)(MANDOCERR_SYSOPEN,
799 curp->file_status,
800 file, 0, 0, strerror(errno));
801 return(curp->file_status);
802 }
803
804 /*
805 * Run for each opened file; may be called more than once for
806 * each full parse sequence if the opened file is nested (i.e.,
807 * from `so'). Simply sucks in the whole file and moves into
808 * the parse phase for the file.
809 */
810
811 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
812 save_filenc = curp->filenc;
813 curp->filenc = curp->options &
814 (MPARSE_UTF8 | MPARSE_LATIN1);
815 mparse_parse_buffer(curp, blk, file);
816 curp->filenc = save_filenc;
817 #if HAVE_MMAP
818 if (with_mmap)
819 munmap(blk.buf, blk.sz);
820 else
821 #endif
822 free(blk.buf);
823 }
824
825 if (STDIN_FILENO != fd && -1 == close(fd))
826 perror(file);
827
828 return(curp->file_status);
829 }
830
831 enum mandoclevel
832 mparse_open(struct mparse *curp, int *fd, const char *file,
833 pid_t *child_pid)
834 {
835 int pfd[2];
836 char *cp;
837 enum mandocerr err;
838
839 pfd[1] = -1;
840 curp->file = file;
841 if ((cp = strrchr(file, '.')) == NULL ||
842 strcmp(cp + 1, "gz")) {
843 *child_pid = 0;
844 if ((*fd = open(file, O_RDONLY)) == -1) {
845 err = MANDOCERR_SYSOPEN;
846 goto out;
847 }
848 return(MANDOCLEVEL_OK);
849 }
850
851 if (pipe(pfd) == -1) {
852 err = MANDOCERR_SYSPIPE;
853 goto out;
854 }
855
856 switch (*child_pid = fork()) {
857 case -1:
858 err = MANDOCERR_SYSFORK;
859 close(pfd[0]);
860 close(pfd[1]);
861 pfd[1] = -1;
862 break;
863 case 0:
864 close(pfd[0]);
865 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
866 err = MANDOCERR_SYSDUP;
867 break;
868 }
869 execlp("gunzip", "gunzip", "-c", file, NULL);
870 err = MANDOCERR_SYSEXEC;
871 break;
872 default:
873 close(pfd[1]);
874 *fd = pfd[0];
875 return(MANDOCLEVEL_OK);
876 }
877
878 out:
879 *fd = -1;
880 *child_pid = 0;
881 curp->file_status = MANDOCLEVEL_SYSERR;
882 if (curp->mmsg)
883 (*curp->mmsg)(err, curp->file_status, file,
884 0, 0, strerror(errno));
885 if (pfd[1] != -1)
886 exit(1);
887 return(curp->file_status);
888 }
889
890 enum mandoclevel
891 mparse_wait(struct mparse *curp, pid_t child_pid)
892 {
893 int status;
894
895 if (waitpid(child_pid, &status, 0) == -1) {
896 mandoc_msg(MANDOCERR_SYSWAIT, curp, 0, 0,
897 strerror(errno));
898 curp->file_status = MANDOCLEVEL_SYSERR;
899 return(curp->file_status);
900 }
901 if (WIFSIGNALED(status)) {
902 mandoc_vmsg(MANDOCERR_SYSSIG, curp, 0, 0,
903 "%d", WTERMSIG(status));
904 curp->file_status = MANDOCLEVEL_SYSERR;
905 return(curp->file_status);
906 }
907 if (WEXITSTATUS(status)) {
908 mandoc_vmsg(MANDOCERR_SYSEXIT, curp, 0, 0,
909 "%d", WEXITSTATUS(status));
910 curp->file_status = MANDOCLEVEL_SYSERR;
911 return(curp->file_status);
912 }
913 return(MANDOCLEVEL_OK);
914 }
915
916 struct mparse *
917 mparse_alloc(int options, enum mandoclevel wlevel,
918 mandocmsg mmsg, const char *defos)
919 {
920 struct mparse *curp;
921
922 assert(wlevel <= MANDOCLEVEL_FATAL);
923
924 curp = mandoc_calloc(1, sizeof(struct mparse));
925
926 curp->options = options;
927 curp->wlevel = wlevel;
928 curp->mmsg = mmsg;
929 curp->defos = defos;
930
931 curp->roff = roff_alloc(curp, options);
932 if (curp->options & MPARSE_MDOC)
933 curp->pmdoc = mdoc_alloc(
934 curp->roff, curp, curp->defos,
935 curp->options & MPARSE_QUICK ? 1 : 0);
936 if (curp->options & MPARSE_MAN)
937 curp->pman = man_alloc(curp->roff, curp,
938 curp->options & MPARSE_QUICK ? 1 : 0);
939
940 return(curp);
941 }
942
943 void
944 mparse_reset(struct mparse *curp)
945 {
946
947 roff_reset(curp->roff);
948
949 if (curp->mdoc)
950 mdoc_reset(curp->mdoc);
951 if (curp->man)
952 man_reset(curp->man);
953 if (curp->secondary)
954 curp->secondary->sz = 0;
955
956 curp->file_status = MANDOCLEVEL_OK;
957 curp->mdoc = NULL;
958 curp->man = NULL;
959
960 free(curp->sodest);
961 curp->sodest = NULL;
962 }
963
964 void
965 mparse_free(struct mparse *curp)
966 {
967
968 if (curp->pmdoc)
969 mdoc_free(curp->pmdoc);
970 if (curp->pman)
971 man_free(curp->pman);
972 if (curp->roff)
973 roff_free(curp->roff);
974 if (curp->secondary)
975 free(curp->secondary->buf);
976
977 free(curp->secondary);
978 free(curp->sodest);
979 free(curp);
980 }
981
982 void
983 mparse_result(struct mparse *curp,
984 struct mdoc **mdoc, struct man **man, char **sodest)
985 {
986
987 if (sodest && NULL != (*sodest = curp->sodest)) {
988 *mdoc = NULL;
989 *man = NULL;
990 return;
991 }
992 if (mdoc)
993 *mdoc = curp->mdoc;
994 if (man)
995 *man = curp->man;
996 }
997
998 void
999 mandoc_vmsg(enum mandocerr t, struct mparse *m,
1000 int ln, int pos, const char *fmt, ...)
1001 {
1002 char buf[256];
1003 va_list ap;
1004
1005 va_start(ap, fmt);
1006 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1007 va_end(ap);
1008
1009 mandoc_msg(t, m, ln, pos, buf);
1010 }
1011
1012 void
1013 mandoc_msg(enum mandocerr er, struct mparse *m,
1014 int ln, int col, const char *msg)
1015 {
1016 enum mandoclevel level;
1017
1018 level = MANDOCLEVEL_FATAL;
1019 while (er < mandoclimits[level])
1020 level--;
1021
1022 if (level < m->wlevel)
1023 return;
1024
1025 if (m->mmsg)
1026 (*m->mmsg)(er, level, m->file, ln, col, msg);
1027
1028 if (m->file_status < level)
1029 m->file_status = level;
1030 }
1031
1032 const char *
1033 mparse_strerror(enum mandocerr er)
1034 {
1035
1036 return(mandocerrs[er]);
1037 }
1038
1039 const char *
1040 mparse_strlevel(enum mandoclevel lvl)
1041 {
1042 return(mandoclevels[lvl]);
1043 }
1044
1045 void
1046 mparse_keep(struct mparse *p)
1047 {
1048
1049 assert(NULL == p->secondary);
1050 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1051 }
1052
1053 const char *
1054 mparse_getkeep(const struct mparse *p)
1055 {
1056
1057 assert(p->secondary);
1058 return(p->secondary->sz ? p->secondary->buf : NULL);
1059 }