]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Rounding rules for horizontal scaling widths are more complicated.
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.133 2015/04/02 21:36:50 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #if HAVE_MMAP
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 #endif
26 #include <sys/wait.h>
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <signal.h>
33 #include <stdarg.h>
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39
40 #include "mandoc_aux.h"
41 #include "mandoc.h"
42 #include "roff.h"
43 #include "mdoc.h"
44 #include "man.h"
45 #include "libmandoc.h"
46
47 #define REPARSE_LIMIT 1000
48
49 struct mparse {
50 struct man *pman; /* persistent man parser */
51 struct mdoc *pmdoc; /* persistent mdoc parser */
52 struct man *man; /* man parser */
53 struct mdoc *mdoc; /* mdoc parser */
54 struct roff *roff; /* roff parser (!NULL) */
55 const struct mchars *mchars; /* character table */
56 char *sodest; /* filename pointed to by .so */
57 const char *file; /* filename of current input file */
58 struct buf *primary; /* buffer currently being parsed */
59 struct buf *secondary; /* preprocessed copy of input */
60 const char *defos; /* default operating system */
61 mandocmsg mmsg; /* warning/error message handler */
62 enum mandoclevel file_status; /* status of current parse */
63 enum mandoclevel wlevel; /* ignore messages below this */
64 int options; /* parser options */
65 int filenc; /* encoding of the current file */
66 int reparse_count; /* finite interp. stack */
67 int line; /* line number in the file */
68 pid_t child; /* the gunzip(1) process */
69 };
70
71 static void choose_parser(struct mparse *);
72 static void resize_buf(struct buf *, size_t);
73 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
74 static int read_whole_file(struct mparse *, const char *, int,
75 struct buf *, int *);
76 static void mparse_end(struct mparse *);
77 static void mparse_parse_buffer(struct mparse *, struct buf,
78 const char *);
79
80 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
81 MANDOCERR_OK,
82 MANDOCERR_WARNING,
83 MANDOCERR_WARNING,
84 MANDOCERR_ERROR,
85 MANDOCERR_UNSUPP,
86 MANDOCERR_MAX,
87 MANDOCERR_MAX
88 };
89
90 static const char * const mandocerrs[MANDOCERR_MAX] = {
91 "ok",
92
93 "generic warning",
94
95 /* related to the prologue */
96 "missing manual title, using UNTITLED",
97 "missing manual title, using \"\"",
98 "lower case character in document title",
99 "missing manual section, using \"\"",
100 "unknown manual section",
101 "missing date, using today's date",
102 "cannot parse date, using it verbatim",
103 "missing Os macro, using \"\"",
104 "duplicate prologue macro",
105 "late prologue macro",
106 "skipping late title macro",
107 "prologue macros out of order",
108
109 /* related to document structure */
110 ".so is fragile, better use ln(1)",
111 "no document body",
112 "content before first section header",
113 "first section is not \"NAME\"",
114 "NAME section without name",
115 "NAME section without description",
116 "description not at the end of NAME",
117 "bad NAME section content",
118 "missing description line, using \"\"",
119 "sections out of conventional order",
120 "duplicate section title",
121 "unexpected section",
122 "unusual Xr order",
123 "unusual Xr punctuation",
124 "AUTHORS section without An macro",
125
126 /* related to macros and nesting */
127 "obsolete macro",
128 "macro neither callable nor escaped",
129 "skipping paragraph macro",
130 "moving paragraph macro out of list",
131 "skipping no-space macro",
132 "blocks badly nested",
133 "nested displays are not portable",
134 "moving content out of list",
135 ".Vt block has child macro",
136 "fill mode already enabled, skipping",
137 "fill mode already disabled, skipping",
138 "line scope broken",
139
140 /* related to missing macro arguments */
141 "skipping empty request",
142 "conditional request controls empty scope",
143 "skipping empty macro",
144 "empty block",
145 "empty argument, using 0n",
146 "missing display type, using -ragged",
147 "list type is not the first argument",
148 "missing -width in -tag list, using 8n",
149 "missing utility name, using \"\"",
150 "missing function name, using \"\"",
151 "empty head in list item",
152 "empty list item",
153 "missing font type, using \\fR",
154 "unknown font type, using \\fR",
155 "nothing follows prefix",
156 "empty reference block",
157 "missing -std argument, adding it",
158 "missing option string, using \"\"",
159 "missing resource identifier, using \"\"",
160 "missing eqn box, using \"\"",
161
162 /* related to bad macro arguments */
163 "unterminated quoted argument",
164 "duplicate argument",
165 "skipping duplicate argument",
166 "skipping duplicate display type",
167 "skipping duplicate list type",
168 "skipping -width argument",
169 "wrong number of cells",
170 "unknown AT&T UNIX version",
171 "comma in function argument",
172 "parenthesis in function name",
173 "invalid content in Rs block",
174 "invalid Boolean argument",
175 "unknown font, skipping request",
176 "odd number of characters in request",
177
178 /* related to plain text */
179 "blank line in fill mode, using .sp",
180 "tab in filled text",
181 "whitespace at end of input line",
182 "bad comment style",
183 "invalid escape sequence",
184 "undefined string, using \"\"",
185
186 /* related to tables */
187 "tbl line starts with span",
188 "tbl column starts with span",
189 "skipping vertical bar in tbl layout",
190
191 "generic error",
192
193 /* related to tables */
194 "non-alphabetic character in tbl options",
195 "skipping unknown tbl option",
196 "missing tbl option argument",
197 "wrong tbl option argument size",
198 "empty tbl layout",
199 "invalid character in tbl layout",
200 "unmatched parenthesis in tbl layout",
201 "tbl without any data cells",
202 "ignoring data in spanned tbl cell",
203 "ignoring extra tbl data cells",
204 "data block open at end of tbl",
205
206 /* related to document structure and macros */
207 NULL,
208 "input stack limit exceeded, infinite loop?",
209 "skipping bad character",
210 "skipping unknown macro",
211 "skipping insecure request",
212 "skipping item outside list",
213 "skipping column outside column list",
214 "skipping end of block that is not open",
215 "fewer RS blocks open, skipping",
216 "inserting missing end of block",
217 "appending missing end of block",
218
219 /* related to request and macro arguments */
220 "escaped character not allowed in a name",
221 "NOT IMPLEMENTED: Bd -file",
222 "missing list type, using -item",
223 "missing manual name, using \"\"",
224 "uname(3) system call failed, using UNKNOWN",
225 "unknown standard specifier",
226 "skipping request without numeric argument",
227 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
228 ".so request failed",
229 "skipping all arguments",
230 "skipping excess arguments",
231 "divide by zero",
232
233 "unsupported feature",
234 "input too large",
235 "unsupported control character",
236 "unsupported roff request",
237 "eqn delim option in tbl",
238 "unsupported tbl layout modifier",
239 "ignoring macro in table",
240 };
241
242 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
243 "SUCCESS",
244 "RESERVED",
245 "WARNING",
246 "ERROR",
247 "UNSUPP",
248 "BADARG",
249 "SYSERR"
250 };
251
252
253 static void
254 resize_buf(struct buf *buf, size_t initial)
255 {
256
257 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
258 buf->buf = mandoc_realloc(buf->buf, buf->sz);
259 }
260
261 static void
262 choose_parser(struct mparse *curp)
263 {
264 char *cp, *ep;
265 int format;
266
267 /*
268 * If neither command line arguments -mdoc or -man select
269 * a parser nor the roff parser found a .Dd or .TH macro
270 * yet, look ahead in the main input buffer.
271 */
272
273 if ((format = roff_getformat(curp->roff)) == 0) {
274 cp = curp->primary->buf;
275 ep = cp + curp->primary->sz;
276 while (cp < ep) {
277 if (*cp == '.' || *cp == '\'') {
278 cp++;
279 if (cp[0] == 'D' && cp[1] == 'd') {
280 format = MPARSE_MDOC;
281 break;
282 }
283 if (cp[0] == 'T' && cp[1] == 'H') {
284 format = MPARSE_MAN;
285 break;
286 }
287 }
288 cp = memchr(cp, '\n', ep - cp);
289 if (cp == NULL)
290 break;
291 cp++;
292 }
293 }
294
295 if (format == MPARSE_MDOC) {
296 if (NULL == curp->pmdoc)
297 curp->pmdoc = mdoc_alloc(
298 curp->roff, curp, curp->defos,
299 MPARSE_QUICK & curp->options ? 1 : 0);
300 assert(curp->pmdoc);
301 curp->mdoc = curp->pmdoc;
302 return;
303 }
304
305 /* Fall back to man(7) as a last resort. */
306
307 if (NULL == curp->pman)
308 curp->pman = man_alloc(
309 curp->roff, curp, curp->defos,
310 MPARSE_QUICK & curp->options ? 1 : 0);
311 assert(curp->pman);
312 curp->man = curp->pman;
313 }
314
315 /*
316 * Main parse routine for a buffer.
317 * It assumes encoding and line numbering are already set up.
318 * It can recurse directly (for invocations of user-defined
319 * macros, inline equations, and input line traps)
320 * and indirectly (for .so file inclusion).
321 */
322 static void
323 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
324 {
325 const struct tbl_span *span;
326 struct buf ln;
327 const char *save_file;
328 char *cp;
329 size_t pos; /* byte number in the ln buffer */
330 enum rofferr rr;
331 int of;
332 int lnn; /* line number in the real file */
333 int fd;
334 pid_t save_child;
335 unsigned char c;
336
337 memset(&ln, 0, sizeof(ln));
338
339 lnn = curp->line;
340 pos = 0;
341
342 while (i < blk.sz) {
343 if (0 == pos && '\0' == blk.buf[i])
344 break;
345
346 if (start) {
347 curp->line = lnn;
348 curp->reparse_count = 0;
349
350 if (lnn < 3 &&
351 curp->filenc & MPARSE_UTF8 &&
352 curp->filenc & MPARSE_LATIN1)
353 curp->filenc = preconv_cue(&blk, i);
354 }
355
356 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
357
358 /*
359 * When finding an unescaped newline character,
360 * leave the character loop to process the line.
361 * Skip a preceding carriage return, if any.
362 */
363
364 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
365 '\n' == blk.buf[i + 1])
366 ++i;
367 if ('\n' == blk.buf[i]) {
368 ++i;
369 ++lnn;
370 break;
371 }
372
373 /*
374 * Make sure we have space for the worst
375 * case of 11 bytes: "\\[u10ffff]\0"
376 */
377
378 if (pos + 11 > ln.sz)
379 resize_buf(&ln, 256);
380
381 /*
382 * Encode 8-bit input.
383 */
384
385 c = blk.buf[i];
386 if (c & 0x80) {
387 if ( ! (curp->filenc && preconv_encode(
388 &blk, &i, &ln, &pos, &curp->filenc))) {
389 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
390 curp->line, pos, "0x%x", c);
391 ln.buf[pos++] = '?';
392 i++;
393 }
394 continue;
395 }
396
397 /*
398 * Exclude control characters.
399 */
400
401 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
402 mandoc_vmsg(c == 0x00 || c == 0x04 ||
403 c > 0x0a ? MANDOCERR_CHAR_BAD :
404 MANDOCERR_CHAR_UNSUPP,
405 curp, curp->line, pos, "0x%x", c);
406 i++;
407 if (c != '\r')
408 ln.buf[pos++] = '?';
409 continue;
410 }
411
412 /* Trailing backslash = a plain char. */
413
414 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
415 ln.buf[pos++] = blk.buf[i++];
416 continue;
417 }
418
419 /*
420 * Found escape and at least one other character.
421 * When it's a newline character, skip it.
422 * When there is a carriage return in between,
423 * skip that one as well.
424 */
425
426 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
427 '\n' == blk.buf[i + 2])
428 ++i;
429 if ('\n' == blk.buf[i + 1]) {
430 i += 2;
431 ++lnn;
432 continue;
433 }
434
435 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
436 i += 2;
437 /* Comment, skip to end of line */
438 for (; i < blk.sz; ++i) {
439 if ('\n' == blk.buf[i]) {
440 ++i;
441 ++lnn;
442 break;
443 }
444 }
445
446 /* Backout trailing whitespaces */
447 for (; pos > 0; --pos) {
448 if (ln.buf[pos - 1] != ' ')
449 break;
450 if (pos > 2 && ln.buf[pos - 2] == '\\')
451 break;
452 }
453 break;
454 }
455
456 /* Catch escaped bogus characters. */
457
458 c = (unsigned char) blk.buf[i+1];
459
460 if ( ! (isascii(c) &&
461 (isgraph(c) || isblank(c)))) {
462 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
463 curp->line, pos, "0x%x", c);
464 i += 2;
465 ln.buf[pos++] = '?';
466 continue;
467 }
468
469 /* Some other escape sequence, copy & cont. */
470
471 ln.buf[pos++] = blk.buf[i++];
472 ln.buf[pos++] = blk.buf[i++];
473 }
474
475 if (pos >= ln.sz)
476 resize_buf(&ln, 256);
477
478 ln.buf[pos] = '\0';
479
480 /*
481 * A significant amount of complexity is contained by
482 * the roff preprocessor. It's line-oriented but can be
483 * expressed on one line, so we need at times to
484 * readjust our starting point and re-run it. The roff
485 * preprocessor can also readjust the buffers with new
486 * data, so we pass them in wholesale.
487 */
488
489 of = 0;
490
491 /*
492 * Maintain a lookaside buffer of all parsed lines. We
493 * only do this if mparse_keep() has been invoked (the
494 * buffer may be accessed with mparse_getkeep()).
495 */
496
497 if (curp->secondary) {
498 curp->secondary->buf = mandoc_realloc(
499 curp->secondary->buf,
500 curp->secondary->sz + pos + 2);
501 memcpy(curp->secondary->buf +
502 curp->secondary->sz,
503 ln.buf, pos);
504 curp->secondary->sz += pos;
505 curp->secondary->buf
506 [curp->secondary->sz] = '\n';
507 curp->secondary->sz++;
508 curp->secondary->buf
509 [curp->secondary->sz] = '\0';
510 }
511 rerun:
512 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
513
514 switch (rr) {
515 case ROFF_REPARSE:
516 if (REPARSE_LIMIT >= ++curp->reparse_count)
517 mparse_buf_r(curp, ln, of, 0);
518 else
519 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
520 curp->line, pos, NULL);
521 pos = 0;
522 continue;
523 case ROFF_APPEND:
524 pos = strlen(ln.buf);
525 continue;
526 case ROFF_RERUN:
527 goto rerun;
528 case ROFF_IGN:
529 pos = 0;
530 continue;
531 case ROFF_SO:
532 if ( ! (curp->options & MPARSE_SO) &&
533 (i >= blk.sz || blk.buf[i] == '\0')) {
534 curp->sodest = mandoc_strdup(ln.buf + of);
535 free(ln.buf);
536 return;
537 }
538 /*
539 * We remove `so' clauses from our lookaside
540 * buffer because we're going to descend into
541 * the file recursively.
542 */
543 if (curp->secondary)
544 curp->secondary->sz -= pos + 1;
545 save_file = curp->file;
546 save_child = curp->child;
547 if (mparse_open(curp, &fd, ln.buf + of) ==
548 MANDOCLEVEL_OK) {
549 mparse_readfd(curp, fd, ln.buf + of);
550 curp->file = save_file;
551 } else {
552 curp->file = save_file;
553 mandoc_vmsg(MANDOCERR_SO_FAIL,
554 curp, curp->line, pos,
555 ".so %s", ln.buf + of);
556 ln.sz = mandoc_asprintf(&cp,
557 ".sp\nSee the file %s.\n.sp",
558 ln.buf + of);
559 free(ln.buf);
560 ln.buf = cp;
561 of = 0;
562 mparse_buf_r(curp, ln, of, 0);
563 }
564 curp->child = save_child;
565 pos = 0;
566 continue;
567 default:
568 break;
569 }
570
571 /*
572 * If input parsers have not been allocated, do so now.
573 * We keep these instanced between parsers, but set them
574 * locally per parse routine since we can use different
575 * parsers with each one.
576 */
577
578 if ( ! (curp->man || curp->mdoc))
579 choose_parser(curp);
580
581 /*
582 * Lastly, push down into the parsers themselves.
583 * If libroff returns ROFF_TBL, then add it to the
584 * currently open parse. Since we only get here if
585 * there does exist data (see tbl_data.c), we're
586 * guaranteed that something's been allocated.
587 * Do the same for ROFF_EQN.
588 */
589
590 if (rr == ROFF_TBL) {
591 while ((span = roff_span(curp->roff)) != NULL)
592 if (curp->man == NULL)
593 mdoc_addspan(curp->mdoc, span);
594 else
595 man_addspan(curp->man, span);
596 } else if (rr == ROFF_EQN) {
597 if (curp->man == NULL)
598 mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
599 else
600 man_addeqn(curp->man, roff_eqn(curp->roff));
601 } else if ((curp->man == NULL ?
602 mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
603 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
604 break;
605
606 /* Temporary buffers typically are not full. */
607
608 if (0 == start && '\0' == blk.buf[i])
609 break;
610
611 /* Start the next input line. */
612
613 pos = 0;
614 }
615
616 free(ln.buf);
617 }
618
619 static int
620 read_whole_file(struct mparse *curp, const char *file, int fd,
621 struct buf *fb, int *with_mmap)
622 {
623 size_t off;
624 ssize_t ssz;
625
626 #if HAVE_MMAP
627 struct stat st;
628 if (-1 == fstat(fd, &st)) {
629 perror(file);
630 exit((int)MANDOCLEVEL_SYSERR);
631 }
632
633 /*
634 * If we're a regular file, try just reading in the whole entry
635 * via mmap(). This is faster than reading it into blocks, and
636 * since each file is only a few bytes to begin with, I'm not
637 * concerned that this is going to tank any machines.
638 */
639
640 if (S_ISREG(st.st_mode)) {
641 if (st.st_size > 0x7fffffff) {
642 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
643 return(0);
644 }
645 *with_mmap = 1;
646 fb->sz = (size_t)st.st_size;
647 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
648 if (fb->buf != MAP_FAILED)
649 return(1);
650 }
651 #endif
652
653 /*
654 * If this isn't a regular file (like, say, stdin), then we must
655 * go the old way and just read things in bit by bit.
656 */
657
658 *with_mmap = 0;
659 off = 0;
660 fb->sz = 0;
661 fb->buf = NULL;
662 for (;;) {
663 if (off == fb->sz) {
664 if (fb->sz == (1U << 31)) {
665 mandoc_msg(MANDOCERR_TOOLARGE, curp,
666 0, 0, NULL);
667 break;
668 }
669 resize_buf(fb, 65536);
670 }
671 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
672 if (ssz == 0) {
673 fb->sz = off;
674 return(1);
675 }
676 if (ssz == -1) {
677 perror(file);
678 exit((int)MANDOCLEVEL_SYSERR);
679 }
680 off += (size_t)ssz;
681 }
682
683 free(fb->buf);
684 fb->buf = NULL;
685 return(0);
686 }
687
688 static void
689 mparse_end(struct mparse *curp)
690 {
691
692 if (curp->mdoc == NULL &&
693 curp->man == NULL &&
694 curp->sodest == NULL) {
695 if (curp->options & MPARSE_MDOC)
696 curp->mdoc = curp->pmdoc;
697 else {
698 if (curp->pman == NULL)
699 curp->pman = man_alloc(
700 curp->roff, curp, curp->defos,
701 curp->options & MPARSE_QUICK ? 1 : 0);
702 curp->man = curp->pman;
703 }
704 }
705 if (curp->mdoc)
706 mdoc_endparse(curp->mdoc);
707 if (curp->man)
708 man_endparse(curp->man);
709 roff_endparse(curp->roff);
710 }
711
712 static void
713 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
714 {
715 struct buf *svprimary;
716 const char *svfile;
717 size_t offset;
718 static int recursion_depth;
719
720 if (64 < recursion_depth) {
721 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
722 return;
723 }
724
725 /* Line number is per-file. */
726 svfile = curp->file;
727 curp->file = file;
728 svprimary = curp->primary;
729 curp->primary = &blk;
730 curp->line = 1;
731 recursion_depth++;
732
733 /* Skip an UTF-8 byte order mark. */
734 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
735 (unsigned char)blk.buf[0] == 0xef &&
736 (unsigned char)blk.buf[1] == 0xbb &&
737 (unsigned char)blk.buf[2] == 0xbf) {
738 offset = 3;
739 curp->filenc &= ~MPARSE_LATIN1;
740 } else
741 offset = 0;
742
743 mparse_buf_r(curp, blk, offset, 1);
744
745 if (--recursion_depth == 0)
746 mparse_end(curp);
747
748 curp->primary = svprimary;
749 curp->file = svfile;
750 }
751
752 enum mandoclevel
753 mparse_readmem(struct mparse *curp, void *buf, size_t len,
754 const char *file)
755 {
756 struct buf blk;
757
758 blk.buf = buf;
759 blk.sz = len;
760
761 mparse_parse_buffer(curp, blk, file);
762 return(curp->file_status);
763 }
764
765 /*
766 * Read the whole file into memory and call the parsers.
767 * Called recursively when an .so request is encountered.
768 */
769 enum mandoclevel
770 mparse_readfd(struct mparse *curp, int fd, const char *file)
771 {
772 struct buf blk;
773 int with_mmap;
774 int save_filenc;
775
776 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
777 save_filenc = curp->filenc;
778 curp->filenc = curp->options &
779 (MPARSE_UTF8 | MPARSE_LATIN1);
780 mparse_parse_buffer(curp, blk, file);
781 curp->filenc = save_filenc;
782 #if HAVE_MMAP
783 if (with_mmap)
784 munmap(blk.buf, blk.sz);
785 else
786 #endif
787 free(blk.buf);
788 }
789
790 if (fd != STDIN_FILENO && close(fd) == -1)
791 perror(file);
792
793 mparse_wait(curp);
794 return(curp->file_status);
795 }
796
797 enum mandoclevel
798 mparse_open(struct mparse *curp, int *fd, const char *file)
799 {
800 int pfd[2];
801 int save_errno;
802 char *cp;
803
804 curp->file = file;
805
806 /* Unless zipped, try to just open the file. */
807
808 if ((cp = strrchr(file, '.')) == NULL ||
809 strcmp(cp + 1, "gz")) {
810 curp->child = 0;
811 if ((*fd = open(file, O_RDONLY)) != -1)
812 return(MANDOCLEVEL_OK);
813
814 /* Open failed; try to append ".gz". */
815
816 mandoc_asprintf(&cp, "%s.gz", file);
817 file = cp;
818 } else
819 cp = NULL;
820
821 /* Before forking, make sure the file can be read. */
822
823 save_errno = errno;
824 if (access(file, R_OK) == -1) {
825 if (cp != NULL)
826 errno = save_errno;
827 free(cp);
828 *fd = -1;
829 curp->child = 0;
830 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
831 return(MANDOCLEVEL_ERROR);
832 }
833
834 /* Run gunzip(1). */
835
836 if (pipe(pfd) == -1) {
837 perror("pipe");
838 exit((int)MANDOCLEVEL_SYSERR);
839 }
840
841 switch (curp->child = fork()) {
842 case -1:
843 perror("fork");
844 exit((int)MANDOCLEVEL_SYSERR);
845 case 0:
846 close(pfd[0]);
847 if (dup2(pfd[1], STDOUT_FILENO) == -1) {
848 perror("dup");
849 exit((int)MANDOCLEVEL_SYSERR);
850 }
851 signal(SIGPIPE, SIG_DFL);
852 execlp("gunzip", "gunzip", "-c", file, NULL);
853 perror("exec");
854 exit((int)MANDOCLEVEL_SYSERR);
855 default:
856 close(pfd[1]);
857 *fd = pfd[0];
858 return(MANDOCLEVEL_OK);
859 }
860 }
861
862 enum mandoclevel
863 mparse_wait(struct mparse *curp)
864 {
865 int status;
866
867 if (curp->child == 0)
868 return(MANDOCLEVEL_OK);
869
870 if (waitpid(curp->child, &status, 0) == -1) {
871 perror("wait");
872 exit((int)MANDOCLEVEL_SYSERR);
873 }
874 curp->child = 0;
875 if (WIFSIGNALED(status)) {
876 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
877 "gunzip died from signal %d", WTERMSIG(status));
878 return(MANDOCLEVEL_ERROR);
879 }
880 if (WEXITSTATUS(status)) {
881 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
882 "gunzip failed with code %d", WEXITSTATUS(status));
883 return(MANDOCLEVEL_ERROR);
884 }
885 return(MANDOCLEVEL_OK);
886 }
887
888 struct mparse *
889 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
890 const struct mchars *mchars, const char *defos)
891 {
892 struct mparse *curp;
893
894 curp = mandoc_calloc(1, sizeof(struct mparse));
895
896 curp->options = options;
897 curp->wlevel = wlevel;
898 curp->mmsg = mmsg;
899 curp->defos = defos;
900
901 curp->mchars = mchars;
902 curp->roff = roff_alloc(curp, curp->mchars, options);
903 if (curp->options & MPARSE_MDOC)
904 curp->pmdoc = mdoc_alloc(
905 curp->roff, curp, curp->defos,
906 curp->options & MPARSE_QUICK ? 1 : 0);
907 if (curp->options & MPARSE_MAN)
908 curp->pman = man_alloc(
909 curp->roff, curp, curp->defos,
910 curp->options & MPARSE_QUICK ? 1 : 0);
911
912 return(curp);
913 }
914
915 void
916 mparse_reset(struct mparse *curp)
917 {
918
919 roff_reset(curp->roff);
920
921 if (curp->mdoc)
922 mdoc_reset(curp->mdoc);
923 if (curp->man)
924 man_reset(curp->man);
925 if (curp->secondary)
926 curp->secondary->sz = 0;
927
928 curp->file_status = MANDOCLEVEL_OK;
929 curp->mdoc = NULL;
930 curp->man = NULL;
931
932 free(curp->sodest);
933 curp->sodest = NULL;
934 }
935
936 void
937 mparse_free(struct mparse *curp)
938 {
939
940 if (curp->pmdoc)
941 mdoc_free(curp->pmdoc);
942 if (curp->pman)
943 man_free(curp->pman);
944 if (curp->roff)
945 roff_free(curp->roff);
946 if (curp->secondary)
947 free(curp->secondary->buf);
948
949 free(curp->secondary);
950 free(curp->sodest);
951 free(curp);
952 }
953
954 void
955 mparse_result(struct mparse *curp,
956 struct mdoc **mdoc, struct man **man, char **sodest)
957 {
958
959 if (sodest && NULL != (*sodest = curp->sodest)) {
960 *mdoc = NULL;
961 *man = NULL;
962 return;
963 }
964 if (mdoc)
965 *mdoc = curp->mdoc;
966 if (man)
967 *man = curp->man;
968 }
969
970 void
971 mandoc_vmsg(enum mandocerr t, struct mparse *m,
972 int ln, int pos, const char *fmt, ...)
973 {
974 char buf[256];
975 va_list ap;
976
977 va_start(ap, fmt);
978 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
979 va_end(ap);
980
981 mandoc_msg(t, m, ln, pos, buf);
982 }
983
984 void
985 mandoc_msg(enum mandocerr er, struct mparse *m,
986 int ln, int col, const char *msg)
987 {
988 enum mandoclevel level;
989
990 level = MANDOCLEVEL_UNSUPP;
991 while (er < mandoclimits[level])
992 level--;
993
994 if (level < m->wlevel && er != MANDOCERR_FILE)
995 return;
996
997 if (m->mmsg)
998 (*m->mmsg)(er, level, m->file, ln, col, msg);
999
1000 if (m->file_status < level)
1001 m->file_status = level;
1002 }
1003
1004 const char *
1005 mparse_strerror(enum mandocerr er)
1006 {
1007
1008 return(mandocerrs[er]);
1009 }
1010
1011 const char *
1012 mparse_strlevel(enum mandoclevel lvl)
1013 {
1014 return(mandoclevels[lvl]);
1015 }
1016
1017 void
1018 mparse_keep(struct mparse *p)
1019 {
1020
1021 assert(NULL == p->secondary);
1022 p->secondary = mandoc_calloc(1, sizeof(struct buf));
1023 }
1024
1025 const char *
1026 mparse_getkeep(const struct mparse *p)
1027 {
1028
1029 assert(p->secondary);
1030 return(p->secondary->sz ? p->secondary->buf : NULL);
1031 }