]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Rework tbl(7) layout parsing:
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.118 2015/01/26 18:42:30 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct man *pman; /* persistent man parser */
49 struct mdoc *pmdoc; /* persistent mdoc parser */
50 struct man *man; /* man parser */
51 struct mdoc *mdoc; /* mdoc parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 const struct mchars *mchars; /* character table */
54 char *sodest; /* filename pointed to by .so */
55 const char *file; /* filename of current input file */
56 struct buf *primary; /* buffer currently being parsed */
57 struct buf *secondary; /* preprocessed copy of input */
58 const char *defos; /* default operating system */
59 mandocmsg mmsg; /* warning/error message handler */
60 enum mandoclevel file_status; /* status of current parse */
61 enum mandoclevel wlevel; /* ignore messages below this */
62 int options; /* parser options */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 pid_t child; /* the gunzip(1) process */
67 };
68
69 static void choose_parser(struct mparse *);
70 static void resize_buf(struct buf *, size_t);
71 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
72 static int read_whole_file(struct mparse *, const char *, int,
73 struct buf *, int *);
74 static void mparse_end(struct mparse *);
75 static void mparse_parse_buffer(struct mparse *, struct buf,
76 const char *);
77
78 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 MANDOCERR_OK,
80 MANDOCERR_WARNING,
81 MANDOCERR_WARNING,
82 MANDOCERR_ERROR,
83 MANDOCERR_UNSUPP,
84 MANDOCERR_MAX,
85 MANDOCERR_MAX
86 };
87
88 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 "ok",
90
91 "generic warning",
92
93 /* related to the prologue */
94 "missing manual title, using UNTITLED",
95 "missing manual title, using \"\"",
96 "lower case character in document title",
97 "missing manual section, using \"\"",
98 "unknown manual section",
99 "missing date, using today's date",
100 "cannot parse date, using it verbatim",
101 "missing Os macro, using \"\"",
102 "duplicate prologue macro",
103 "late prologue macro",
104 "skipping late title macro",
105 "prologue macros out of order",
106
107 /* related to document structure */
108 ".so is fragile, better use ln(1)",
109 "no document body",
110 "content before first section header",
111 "first section is not \"NAME\"",
112 "bad NAME section contents",
113 "sections out of conventional order",
114 "duplicate section title",
115 "unexpected section",
116 "unusual Xr order",
117 "unusual Xr punctuation",
118 "AUTHORS section without An macro",
119
120 /* related to macros and nesting */
121 "obsolete macro",
122 "macro neither callable nor escaped",
123 "skipping paragraph macro",
124 "moving paragraph macro out of list",
125 "skipping no-space macro",
126 "blocks badly nested",
127 "nested displays are not portable",
128 "moving content out of list",
129 ".Vt block has child macro",
130 "fill mode already enabled, skipping",
131 "fill mode already disabled, skipping",
132 "line scope broken",
133
134 /* related to missing macro arguments */
135 "skipping empty request",
136 "conditional request controls empty scope",
137 "skipping empty macro",
138 "empty argument, using 0n",
139 "argument count wrong",
140 "missing display type, using -ragged",
141 "list type is not the first argument",
142 "missing -width in -tag list, using 8n",
143 "missing utility name, using \"\"",
144 "empty head in list item",
145 "empty list item",
146 "missing font type, using \\fR",
147 "unknown font type, using \\fR",
148 "nothing follows prefix",
149 "missing -std argument, adding it",
150 "missing eqn box, using \"\"",
151
152 /* related to bad macro arguments */
153 "unterminated quoted argument",
154 "duplicate argument",
155 "skipping duplicate argument",
156 "skipping duplicate display type",
157 "skipping duplicate list type",
158 "skipping -width argument",
159 "unknown AT&T UNIX version",
160 "comma in function argument",
161 "parenthesis in function name",
162 "invalid content in Rs block",
163 "invalid Boolean argument",
164 "unknown font, skipping request",
165
166 /* related to plain text */
167 "blank line in fill mode, using .sp",
168 "tab in filled text",
169 "whitespace at end of input line",
170 "bad comment style",
171 "invalid escape sequence",
172 "undefined string, using \"\"",
173
174 /* related to tables */
175 "tbl line starts with span",
176 "tbl column starts with span",
177 "skipping vertical bar in tbl layout",
178
179 "generic error",
180
181 /* related to equations */
182 "unexpected equation scope closure",
183 "equation scope open on exit",
184 "overlapping equation scopes",
185 "unexpected end of equation",
186
187 /* related to tables */
188 "non-alphabetic character in tbl options",
189 "skipping unknown tbl option",
190 "missing tbl option argument",
191 "wrong tbl option argument size",
192 "empty tbl layout",
193 "invalid character in tbl layout",
194 "unmatched parenthesis in tbl layout",
195 "no table data cells specified",
196 "ignore data in cell",
197 "data block still open",
198 "ignoring extra data cells",
199
200 /* related to document structure and macros */
201 NULL,
202 "input stack limit exceeded, infinite loop?",
203 "skipping bad character",
204 "skipping unknown macro",
205 "skipping insecure request",
206 "skipping item outside list",
207 "skipping column outside column list",
208 "skipping end of block that is not open",
209 "fewer RS blocks open, skipping",
210 "inserting missing end of block",
211 "appending missing end of block",
212
213 /* related to request and macro arguments */
214 "escaped character not allowed in a name",
215 "argument count wrong",
216 "NOT IMPLEMENTED: Bd -file",
217 "missing list type, using -item",
218 "missing manual name, using \"\"",
219 "uname(3) system call failed, using UNKNOWN",
220 "unknown standard specifier",
221 "skipping request without numeric argument",
222 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
223 ".so request failed",
224 "skipping all arguments",
225 "skipping excess arguments",
226 "divide by zero",
227
228 "unsupported feature",
229 "input too large",
230 "unsupported control character",
231 "unsupported roff request",
232 "unsupported tbl layout modifier",
233 "ignoring macro in table",
234 "eqn in tbl",
235 };
236
237 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
238 "SUCCESS",
239 "RESERVED",
240 "WARNING",
241 "ERROR",
242 "UNSUPP",
243 "BADARG",
244 "SYSERR"
245 };
246
247
248 static void
249 resize_buf(struct buf *buf, size_t initial)
250 {
251
252 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
253 buf->buf = mandoc_realloc(buf->buf, buf->sz);
254 }
255
256 static void
257 choose_parser(struct mparse *curp)
258 {
259 char *cp, *ep;
260 int format;
261
262 /*
263 * If neither command line arguments -mdoc or -man select
264 * a parser nor the roff parser found a .Dd or .TH macro
265 * yet, look ahead in the main input buffer.
266 */
267
268 if ((format = roff_getformat(curp->roff)) == 0) {
269 cp = curp->primary->buf;
270 ep = cp + curp->primary->sz;
271 while (cp < ep) {
272 if (*cp == '.' || *cp == '\'') {
273 cp++;
274 if (cp[0] == 'D' && cp[1] == 'd') {
275 format = MPARSE_MDOC;
276 break;
277 }
278 if (cp[0] == 'T' && cp[1] == 'H') {
279 format = MPARSE_MAN;
280 break;
281 }
282 }
283 cp = memchr(cp, '\n', ep - cp);
284 if (cp == NULL)
285 break;
286 cp++;
287 }
288 }
289
290 if (format == MPARSE_MDOC) {
291 if (NULL == curp->pmdoc)
292 curp->pmdoc = mdoc_alloc(
293 curp->roff, curp, curp->defos,
294 MPARSE_QUICK & curp->options ? 1 : 0);
295 assert(curp->pmdoc);
296 curp->mdoc = curp->pmdoc;
297 return;
298 }
299
300 /* Fall back to man(7) as a last resort. */
301
302 if (NULL == curp->pman)
303 curp->pman = man_alloc(
304 curp->roff, curp, curp->defos,
305 MPARSE_QUICK & curp->options ? 1 : 0);
306 assert(curp->pman);
307 curp->man = curp->pman;
308 }
309
310 /*
311 * Main parse routine for a buffer.
312 * It assumes encoding and line numbering are already set up.
313 * It can recurse directly (for invocations of user-defined
314 * macros, inline equations, and input line traps)
315 * and indirectly (for .so file inclusion).
316 */
317 static void
318 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
319 {
320 const struct tbl_span *span;
321 struct buf ln;
322 const char *save_file;
323 char *cp;
324 size_t pos; /* byte number in the ln buffer */
325 enum rofferr rr;
326 int of;
327 int lnn; /* line number in the real file */
328 int fd;
329 pid_t save_child;
330 unsigned char c;
331
332 memset(&ln, 0, sizeof(ln));
333
334 lnn = curp->line;
335 pos = 0;
336
337 while (i < blk.sz) {
338 if (0 == pos && '\0' == blk.buf[i])
339 break;
340
341 if (start) {
342 curp->line = lnn;
343 curp->reparse_count = 0;
344
345 if (lnn < 3 &&
346 curp->filenc & MPARSE_UTF8 &&
347 curp->filenc & MPARSE_LATIN1)
348 curp->filenc = preconv_cue(&blk, i);
349 }
350
351 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
352
353 /*
354 * When finding an unescaped newline character,
355 * leave the character loop to process the line.
356 * Skip a preceding carriage return, if any.
357 */
358
359 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
360 '\n' == blk.buf[i + 1])
361 ++i;
362 if ('\n' == blk.buf[i]) {
363 ++i;
364 ++lnn;
365 break;
366 }
367
368 /*
369 * Make sure we have space for the worst
370 * case of 11 bytes: "\\[u10ffff]\0"
371 */
372
373 if (pos + 11 > ln.sz)
374 resize_buf(&ln, 256);
375
376 /*
377 * Encode 8-bit input.
378 */
379
380 c = blk.buf[i];
381 if (c & 0x80) {
382 if ( ! (curp->filenc && preconv_encode(
383 &blk, &i, &ln, &pos, &curp->filenc))) {
384 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
385 curp->line, pos, "0x%x", c);
386 ln.buf[pos++] = '?';
387 i++;
388 }
389 continue;
390 }
391
392 /*
393 * Exclude control characters.
394 */
395
396 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
397 mandoc_vmsg(c == 0x00 || c == 0x04 ||
398 c > 0x0a ? MANDOCERR_CHAR_BAD :
399 MANDOCERR_CHAR_UNSUPP,
400 curp, curp->line, pos, "0x%x", c);
401 i++;
402 ln.buf[pos++] = '?';
403 continue;
404 }
405
406 /* Trailing backslash = a plain char. */
407
408 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
409 ln.buf[pos++] = blk.buf[i++];
410 continue;
411 }
412
413 /*
414 * Found escape and at least one other character.
415 * When it's a newline character, skip it.
416 * When there is a carriage return in between,
417 * skip that one as well.
418 */
419
420 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
421 '\n' == blk.buf[i + 2])
422 ++i;
423 if ('\n' == blk.buf[i + 1]) {
424 i += 2;
425 ++lnn;
426 continue;
427 }
428
429 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
430 i += 2;
431 /* Comment, skip to end of line */
432 for (; i < blk.sz; ++i) {
433 if ('\n' == blk.buf[i]) {
434 ++i;
435 ++lnn;
436 break;
437 }
438 }
439
440 /* Backout trailing whitespaces */
441 for (; pos > 0; --pos) {
442 if (ln.buf[pos - 1] != ' ')
443 break;
444 if (pos > 2 && ln.buf[pos - 2] == '\\')
445 break;
446 }
447 break;
448 }
449
450 /* Catch escaped bogus characters. */
451
452 c = (unsigned char) blk.buf[i+1];
453
454 if ( ! (isascii(c) &&
455 (isgraph(c) || isblank(c)))) {
456 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
457 curp->line, pos, "0x%x", c);
458 i += 2;
459 ln.buf[pos++] = '?';
460 continue;
461 }
462
463 /* Some other escape sequence, copy & cont. */
464
465 ln.buf[pos++] = blk.buf[i++];
466 ln.buf[pos++] = blk.buf[i++];
467 }
468
469 if (pos >= ln.sz)
470 resize_buf(&ln, 256);
471
472 ln.buf[pos] = '\0';
473
474 /*
475 * A significant amount of complexity is contained by
476 * the roff preprocessor. It's line-oriented but can be
477 * expressed on one line, so we need at times to
478 * readjust our starting point and re-run it. The roff
479 * preprocessor can also readjust the buffers with new
480 * data, so we pass them in wholesale.
481 */
482
483 of = 0;
484
485 /*
486 * Maintain a lookaside buffer of all parsed lines. We
487 * only do this if mparse_keep() has been invoked (the
488 * buffer may be accessed with mparse_getkeep()).
489 */
490
491 if (curp->secondary) {
492 curp->secondary->buf = mandoc_realloc(
493 curp->secondary->buf,
494 curp->secondary->sz + pos + 2);
495 memcpy(curp->secondary->buf +
496 curp->secondary->sz,
497 ln.buf, pos);
498 curp->secondary->sz += pos;
499 curp->secondary->buf
500 [curp->secondary->sz] = '\n';
501 curp->secondary->sz++;
502 curp->secondary->buf
503 [curp->secondary->sz] = '\0';
504 }
505 rerun:
506 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
507
508 switch (rr) {
509 case ROFF_REPARSE:
510 if (REPARSE_LIMIT >= ++curp->reparse_count)
511 mparse_buf_r(curp, ln, of, 0);
512 else
513 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
514 curp->line, pos, NULL);
515 pos = 0;
516 continue;
517 case ROFF_APPEND:
518 pos = strlen(ln.buf);
519 continue;
520 case ROFF_RERUN:
521 goto rerun;
522 case ROFF_IGN:
523 pos = 0;
524 continue;
525 case ROFF_SO:
526 if ( ! (curp->options & MPARSE_SO) &&
527 (i >= blk.sz || blk.buf[i] == '\0')) {
528 curp->sodest = mandoc_strdup(ln.buf + of);
529 free(ln.buf);
530 return;
531 }
532 /*
533 * We remove `so' clauses from our lookaside
534 * buffer because we're going to descend into
535 * the file recursively.
536 */
537 if (curp->secondary)
538 curp->secondary->sz -= pos + 1;
539 save_file = curp->file;
540 save_child = curp->child;
541 if (mparse_open(curp, &fd, ln.buf + of) ==
542 MANDOCLEVEL_OK) {
543 mparse_readfd(curp, fd, ln.buf + of);
544 curp->file = save_file;
545 } else {
546 curp->file = save_file;
547 mandoc_vmsg(MANDOCERR_SO_FAIL,
548 curp, curp->line, pos,
549 ".so %s", ln.buf + of);
550 ln.sz = mandoc_asprintf(&cp,
551 ".sp\nSee the file %s.\n.sp",
552 ln.buf + of);
553 free(ln.buf);
554 ln.buf = cp;
555 of = 0;
556 mparse_buf_r(curp, ln, of, 0);
557 }
558 curp->child = save_child;
559 pos = 0;
560 continue;
561 default:
562 break;
563 }
564
565 /*
566 * If input parsers have not been allocated, do so now.
567 * We keep these instanced between parsers, but set them
568 * locally per parse routine since we can use different
569 * parsers with each one.
570 */
571
572 if ( ! (curp->man || curp->mdoc))
573 choose_parser(curp);
574
575 /*
576 * Lastly, push down into the parsers themselves.
577 * If libroff returns ROFF_TBL, then add it to the
578 * currently open parse. Since we only get here if
579 * there does exist data (see tbl_data.c), we're
580 * guaranteed that something's been allocated.
581 * Do the same for ROFF_EQN.
582 */
583
584 if (rr == ROFF_TBL) {
585 while ((span = roff_span(curp->roff)) != NULL)
586 if (curp->man == NULL)
587 mdoc_addspan(curp->mdoc, span);
588 else
589 man_addspan(curp->man, span);
590 } else if (rr == ROFF_EQN) {
591 if (curp->man == NULL)
592 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
593 else
594 man_addeqn(curp->man, roff_eqn(curp->roff));
595 } else if ((curp->man == NULL ?
596 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
597 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
598 break;
599
600 /* Temporary buffers typically are not full. */
601
602 if (0 == start && '\0' == blk.buf[i])
603 break;
604
605 /* Start the next input line. */
606
607 pos = 0;
608 }
609
610 free(ln.buf);
611 }
612
613 static int
614 read_whole_file(struct mparse *curp, const char *file, int fd,
615 struct buf *fb, int *with_mmap)
616 {
617 size_t off;
618 ssize_t ssz;
619
620 #if HAVE_MMAP
621 struct stat st;
622 if (-1 == fstat(fd, &st)) {
623 perror(file);
624 exit((int)MANDOCLEVEL_SYSERR);
625 }
626
627 /*
628 * If we're a regular file, try just reading in the whole entry
629 * via mmap(). This is faster than reading it into blocks, and
630 * since each file is only a few bytes to begin with, I'm not
631 * concerned that this is going to tank any machines.
632 */
633
634 if (S_ISREG(st.st_mode)) {
635 if (st.st_size >= (1U << 31)) {
636 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
637 return(0);
638 }
639 *with_mmap = 1;
640 fb->sz = (size_t)st.st_size;
641 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
642 if (fb->buf != MAP_FAILED)
643 return(1);
644 }
645 #endif
646
647 /*
648 * If this isn't a regular file (like, say, stdin), then we must
649 * go the old way and just read things in bit by bit.
650 */
651
652 *with_mmap = 0;
653 off = 0;
654 fb->sz = 0;
655 fb->buf = NULL;
656 for (;;) {
657 if (off == fb->sz) {
658 if (fb->sz == (1U << 31)) {
659 mandoc_msg(MANDOCERR_TOOLARGE, curp,
660 0, 0, NULL);
661 break;
662 }
663 resize_buf(fb, 65536);
664 }
665 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
666 if (ssz == 0) {
667 fb->sz = off;
668 return(1);
669 }
670 if (ssz == -1) {
671 perror(file);
672 exit((int)MANDOCLEVEL_SYSERR);
673 }
674 off += (size_t)ssz;
675 }
676
677 free(fb->buf);
678 fb->buf = NULL;
679 return(0);
680 }
681
682 static void
683 mparse_end(struct mparse *curp)
684 {
685
686 if (curp->mdoc == NULL &&
687 curp->man == NULL &&
688 curp->sodest == NULL) {
689 if (curp->options & MPARSE_MDOC)
690 curp->mdoc = curp->pmdoc;
691 else {
692 if (curp->pman == NULL)
693 curp->pman = man_alloc(
694 curp->roff, curp, curp->defos,
695 curp->options & MPARSE_QUICK ? 1 : 0);
696 curp->man = curp->pman;
697 }
698 }
699 if (curp->mdoc)
700 mdoc_endparse(curp->mdoc);
701 if (curp->man)
702 man_endparse(curp->man);
703 roff_endparse(curp->roff);
704 }
705
706 static void
707 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
708 {
709 struct buf *svprimary;
710 const char *svfile;
711 size_t offset;
712 static int recursion_depth;
713
714 if (64 < recursion_depth) {
715 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
716 return;
717 }
718
719 /* Line number is per-file. */
720 svfile = curp->file;
721 curp->file = file;
722 svprimary = curp->primary;
723 curp->primary = &blk;
724 curp->line = 1;
725 recursion_depth++;
726
727 /* Skip an UTF-8 byte order mark. */
728 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
729 (unsigned char)blk.buf[0] == 0xef &&
730 (unsigned char)blk.buf[1] == 0xbb &&
731 (unsigned char)blk.buf[2] == 0xbf) {
732 offset = 3;
733 curp->filenc &= ~MPARSE_LATIN1;
734 } else
735 offset = 0;
736
737 mparse_buf_r(curp, blk, offset, 1);
738
739 if (--recursion_depth == 0)
740 mparse_end(curp);
741
742 curp->primary = svprimary;
743 curp->file = svfile;
744 }
745
746 enum mandoclevel
747 mparse_readmem(struct mparse *curp, void *buf, size_t len,
748 const char *file)
749 {
750 struct buf blk;
751
752 blk.buf = buf;
753 blk.sz = len;
754
755 mparse_parse_buffer(curp, blk, file);
756 return(curp->file_status);
757 }
758
759 /*
760 * Read the whole file into memory and call the parsers.
761 * Called recursively when an .so request is encountered.
762 */
763 enum mandoclevel
764 mparse_readfd(struct mparse *curp, int fd, const char *file)
765 {
766 struct buf blk;
767 int with_mmap;
768 int save_filenc;
769
770 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
771 save_filenc = curp->filenc;
772 curp->filenc = curp->options &
773 (MPARSE_UTF8 | MPARSE_LATIN1);
774 mparse_parse_buffer(curp, blk, file);
775 curp->filenc = save_filenc;
776 #if HAVE_MMAP
777 if (with_mmap)
778 munmap(blk.buf, blk.sz);
779 else
780 #endif
781 free(blk.buf);
782 }
783
784 if (fd != STDIN_FILENO && close(fd) == -1)
785 perror(file);
786
787 mparse_wait(curp);
788 return(curp->file_status);
789 }
790
791 enum mandoclevel
792 mparse_open(struct mparse *curp, int *fd, const char *file)
793 {
794 int pfd[2];
795 int save_errno;
796 char *cp;
797
798 curp->file = file;
799
800 /* Unless zipped, try to just open the file. */
801
802 if ((cp = strrchr(file, '.')) == NULL ||
803 strcmp(cp + 1, "gz")) {
804 curp->child = 0;
805 if ((*fd = open(file, O_RDONLY)) != -1)
806 return(MANDOCLEVEL_OK);
807
808 /* Open failed; try to append ".gz". */
809
810 mandoc_asprintf(&cp, "%s.gz", file);
811 file = cp;
812 } else
813 cp = NULL;
814
815 /* Before forking, make sure the file can be read. */
816
817 save_errno = errno;
818 if (access(file, R_OK) == -1) {
819 if (cp != NULL)
820 errno = save_errno;
821 free(cp);
822 *fd = -1;
823 curp->child = 0;
824 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
825 return(MANDOCLEVEL_ERROR);
826 }
827
828 /* Run gunzip(1). */
829
830 if (pipe(pfd) == -1) {
831 perror("pipe");
832 exit((int)MANDOCLEVEL_SYSERR);
833 }
834
835 switch (curp->child = fork()) {
836 case -1:
837 perror("fork");
838 exit((int)MANDOCLEVEL_SYSERR);
839 case 0:
840 close(pfd[0]);
841 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
842 perror("dup");
843 exit((int)MANDOCLEVEL_SYSERR);
844 }
845 execlp("gunzip", "gunzip", "-c", file, NULL);
846 perror("exec");
847 exit((int)MANDOCLEVEL_SYSERR);
848 default:
849 close(pfd[1]);
850 *fd = pfd[0];
851 return(MANDOCLEVEL_OK);
852 }
853 }
854
855 enum mandoclevel
856 mparse_wait(struct mparse *curp)
857 {
858 int status;
859
860 if (curp->child == 0)
861 return(MANDOCLEVEL_OK);
862
863 if (waitpid(curp->child, &status, 0) == -1) {
864 perror("wait");
865 exit((int)MANDOCLEVEL_SYSERR);
866 }
867 if (WIFSIGNALED(status)) {
868 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
869 "gunzip died from signal %d", WTERMSIG(status));
870 return(MANDOCLEVEL_ERROR);
871 }
872 if (WEXITSTATUS(status)) {
873 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
874 "gunzip failed with code %d", WEXITSTATUS(status));
875 return(MANDOCLEVEL_ERROR);
876 }
877 return(MANDOCLEVEL_OK);
878 }
879
880 struct mparse *
881 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
882 const struct mchars *mchars, const char *defos)
883 {
884 struct mparse *curp;
885
886 curp = mandoc_calloc(1, sizeof(struct mparse));
887
888 curp->options = options;
889 curp->wlevel = wlevel;
890 curp->mmsg = mmsg;
891 curp->defos = defos;
892
893 curp->mchars = mchars;
894 curp->roff = roff_alloc(curp, curp->mchars, options);
895 if (curp->options & MPARSE_MDOC)
896 curp->pmdoc = mdoc_alloc(
897 curp->roff, curp, curp->defos,
898 curp->options & MPARSE_QUICK ? 1 : 0);
899 if (curp->options & MPARSE_MAN)
900 curp->pman = man_alloc(
901 curp->roff, curp, curp->defos,
902 curp->options & MPARSE_QUICK ? 1 : 0);
903
904 return(curp);
905 }
906
907 void
908 mparse_reset(struct mparse *curp)
909 {
910
911 roff_reset(curp->roff);
912
913 if (curp->mdoc)
914 mdoc_reset(curp->mdoc);
915 if (curp->man)
916 man_reset(curp->man);
917 if (curp->secondary)
918 curp->secondary->sz = 0;
919
920 curp->file_status = MANDOCLEVEL_OK;
921 curp->mdoc = NULL;
922 curp->man = NULL;
923
924 free(curp->sodest);
925 curp->sodest = NULL;
926 }
927
928 void
929 mparse_free(struct mparse *curp)
930 {
931
932 if (curp->pmdoc)
933 mdoc_free(curp->pmdoc);
934 if (curp->pman)
935 man_free(curp->pman);
936 if (curp->roff)
937 roff_free(curp->roff);
938 if (curp->secondary)
939 free(curp->secondary->buf);
940
941 free(curp->secondary);
942 free(curp->sodest);
943 free(curp);
944 }
945
946 void
947 mparse_result(struct mparse *curp,
948 struct mdoc **mdoc, struct man **man, char **sodest)
949 {
950
951 if (sodest && NULL != (*sodest = curp->sodest)) {
952 *mdoc = NULL;
953 *man = NULL;
954 return;
955 }
956 if (mdoc)
957 *mdoc = curp->mdoc;
958 if (man)
959 *man = curp->man;
960 }
961
962 void
963 mandoc_vmsg(enum mandocerr t, struct mparse *m,
964 int ln, int pos, const char *fmt, ...)
965 {
966 char buf[256];
967 va_list ap;
968
969 va_start(ap, fmt);
970 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
971 va_end(ap);
972
973 mandoc_msg(t, m, ln, pos, buf);
974 }
975
976 void
977 mandoc_msg(enum mandocerr er, struct mparse *m,
978 int ln, int col, const char *msg)
979 {
980 enum mandoclevel level;
981
982 level = MANDOCLEVEL_UNSUPP;
983 while (er < mandoclimits[level])
984 level--;
985
986 if (level < m->wlevel && er != MANDOCERR_FILE)
987 return;
988
989 if (m->mmsg)
990 (*m->mmsg)(er, level, m->file, ln, col, msg);
991
992 if (m->file_status < level)
993 m->file_status = level;
994 }
995
996 const char *
997 mparse_strerror(enum mandocerr er)
998 {
999
1000 return(mandocerrs[er]);
1001 }
1002
1003 const char *
1004 mparse_strlevel(enum mandoclevel lvl)
1005 {
1006 return(mandoclevels[lvl]);
1007 }
1008
1009 void
1010 mparse_keep(struct mparse *p)
1011 {
1012
1013 assert(NULL == p->secondary);
1014 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1015 }
1016
1017 const char *
1018 mparse_getkeep(const struct mparse *p)
1019 {
1020
1021 assert(p->secondary);
1022 return(p->secondary->sz ? p->secondary->buf : NULL);
1023 }