]> git.cameronkatri.com Git - mandoc.git/blob - read.c
If an application parses multiple files with mparse_readfd(3) but
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.159 2017/02/03 17:56:59 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <zlib.h>
39
40 #include "mandoc_aux.h"
41 #include "mandoc.h"
42 #include "roff.h"
43 #include "mdoc.h"
44 #include "man.h"
45 #include "libmandoc.h"
46 #include "roff_int.h"
47
48 #define REPARSE_LIMIT 1000
49
50 struct mparse {
51 struct roff_man *man; /* man parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 char *sodest; /* filename pointed to by .so */
54 const char *file; /* filename of current input file */
55 struct buf *primary; /* buffer currently being parsed */
56 struct buf *secondary; /* preprocessed copy of input */
57 const char *defos; /* default operating system */
58 mandocmsg mmsg; /* warning/error message handler */
59 enum mandoclevel file_status; /* status of current parse */
60 enum mandoclevel wlevel; /* ignore messages below this */
61 int options; /* parser options */
62 int gzip; /* current input file is gzipped */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 };
67
68 static void choose_parser(struct mparse *);
69 static void resize_buf(struct buf *, size_t);
70 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
71 static int read_whole_file(struct mparse *, const char *, int,
72 struct buf *, int *);
73 static void mparse_end(struct mparse *);
74 static void mparse_parse_buffer(struct mparse *, struct buf,
75 const char *);
76
77 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
78 MANDOCERR_OK,
79 MANDOCERR_WARNING,
80 MANDOCERR_WARNING,
81 MANDOCERR_ERROR,
82 MANDOCERR_UNSUPP,
83 MANDOCERR_MAX,
84 MANDOCERR_MAX
85 };
86
87 static const char * const mandocerrs[MANDOCERR_MAX] = {
88 "ok",
89
90 "generic warning",
91
92 /* related to the prologue */
93 "missing manual title, using UNTITLED",
94 "missing manual title, using \"\"",
95 "lower case character in document title",
96 "missing manual section, using \"\"",
97 "unknown manual section",
98 "missing date, using today's date",
99 "cannot parse date, using it verbatim",
100 "missing Os macro, using \"\"",
101 "duplicate prologue macro",
102 "late prologue macro",
103 "skipping late title macro",
104 "prologue macros out of order",
105
106 /* related to document structure */
107 ".so is fragile, better use ln(1)",
108 "no document body",
109 "content before first section header",
110 "first section is not \"NAME\"",
111 "NAME section without Nm before Nd",
112 "NAME section without description",
113 "description not at the end of NAME",
114 "bad NAME section content",
115 "missing comma before name",
116 "missing description line, using \"\"",
117 "sections out of conventional order",
118 "duplicate section title",
119 "unexpected section",
120 "unusual Xr order",
121 "unusual Xr punctuation",
122 "AUTHORS section without An macro",
123
124 /* related to macros and nesting */
125 "obsolete macro",
126 "macro neither callable nor escaped",
127 "skipping paragraph macro",
128 "moving paragraph macro out of list",
129 "skipping no-space macro",
130 "blocks badly nested",
131 "nested displays are not portable",
132 "moving content out of list",
133 "fill mode already enabled, skipping",
134 "fill mode already disabled, skipping",
135 "line scope broken",
136
137 /* related to missing macro arguments */
138 "skipping empty request",
139 "conditional request controls empty scope",
140 "skipping empty macro",
141 "empty block",
142 "empty argument, using 0n",
143 "missing display type, using -ragged",
144 "list type is not the first argument",
145 "missing -width in -tag list, using 6n",
146 "missing utility name, using \"\"",
147 "missing function name, using \"\"",
148 "empty head in list item",
149 "empty list item",
150 "missing font type, using \\fR",
151 "unknown font type, using \\fR",
152 "nothing follows prefix",
153 "empty reference block",
154 "missing section argument",
155 "missing -std argument, adding it",
156 "missing option string, using \"\"",
157 "missing resource identifier, using \"\"",
158 "missing eqn box, using \"\"",
159
160 /* related to bad macro arguments */
161 "unterminated quoted argument",
162 "duplicate argument",
163 "skipping duplicate argument",
164 "skipping duplicate display type",
165 "skipping duplicate list type",
166 "skipping -width argument",
167 "wrong number of cells",
168 "unknown AT&T UNIX version",
169 "comma in function argument",
170 "parenthesis in function name",
171 "invalid content in Rs block",
172 "invalid Boolean argument",
173 "unknown font, skipping request",
174 "odd number of characters in request",
175
176 /* related to plain text */
177 "blank line in fill mode, using .sp",
178 "tab in filled text",
179 "whitespace at end of input line",
180 "new sentence, new line",
181 "bad comment style",
182 "invalid escape sequence",
183 "undefined string, using \"\"",
184
185 /* related to tables */
186 "tbl line starts with span",
187 "tbl column starts with span",
188 "skipping vertical bar in tbl layout",
189
190 "generic error",
191
192 /* related to tables */
193 "non-alphabetic character in tbl options",
194 "skipping unknown tbl option",
195 "missing tbl option argument",
196 "wrong tbl option argument size",
197 "empty tbl layout",
198 "invalid character in tbl layout",
199 "unmatched parenthesis in tbl layout",
200 "tbl without any data cells",
201 "ignoring data in spanned tbl cell",
202 "ignoring extra tbl data cells",
203 "data block open at end of tbl",
204
205 /* related to document structure and macros */
206 NULL,
207 "input stack limit exceeded, infinite loop?",
208 "skipping bad character",
209 "skipping unknown macro",
210 "skipping insecure request",
211 "skipping item outside list",
212 "skipping column outside column list",
213 "skipping end of block that is not open",
214 "fewer RS blocks open, skipping",
215 "inserting missing end of block",
216 "appending missing end of block",
217
218 /* related to request and macro arguments */
219 "escaped character not allowed in a name",
220 "NOT IMPLEMENTED: Bd -file",
221 "skipping display without arguments",
222 "missing list type, using -item",
223 "missing manual name, using \"\"",
224 "uname(3) system call failed, using UNKNOWN",
225 "unknown standard specifier",
226 "skipping request without numeric argument",
227 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
228 ".so request failed",
229 "skipping all arguments",
230 "skipping excess arguments",
231 "divide by zero",
232
233 "unsupported feature",
234 "input too large",
235 "unsupported control character",
236 "unsupported roff request",
237 "eqn delim option in tbl",
238 "unsupported tbl layout modifier",
239 "ignoring macro in table",
240 };
241
242 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
243 "SUCCESS",
244 "RESERVED",
245 "WARNING",
246 "ERROR",
247 "UNSUPP",
248 "BADARG",
249 "SYSERR"
250 };
251
252
253 static void
254 resize_buf(struct buf *buf, size_t initial)
255 {
256
257 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
258 buf->buf = mandoc_realloc(buf->buf, buf->sz);
259 }
260
261 static void
262 choose_parser(struct mparse *curp)
263 {
264 char *cp, *ep;
265 int format;
266
267 /*
268 * If neither command line arguments -mdoc or -man select
269 * a parser nor the roff parser found a .Dd or .TH macro
270 * yet, look ahead in the main input buffer.
271 */
272
273 if ((format = roff_getformat(curp->roff)) == 0) {
274 cp = curp->primary->buf;
275 ep = cp + curp->primary->sz;
276 while (cp < ep) {
277 if (*cp == '.' || *cp == '\'') {
278 cp++;
279 if (cp[0] == 'D' && cp[1] == 'd') {
280 format = MPARSE_MDOC;
281 break;
282 }
283 if (cp[0] == 'T' && cp[1] == 'H') {
284 format = MPARSE_MAN;
285 break;
286 }
287 }
288 cp = memchr(cp, '\n', ep - cp);
289 if (cp == NULL)
290 break;
291 cp++;
292 }
293 }
294
295 if (format == MPARSE_MDOC) {
296 mdoc_hash_init();
297 curp->man->macroset = MACROSET_MDOC;
298 curp->man->first->tok = TOKEN_NONE;
299 } else {
300 man_hash_init();
301 curp->man->macroset = MACROSET_MAN;
302 curp->man->first->tok = TOKEN_NONE;
303 }
304 }
305
306 /*
307 * Main parse routine for a buffer.
308 * It assumes encoding and line numbering are already set up.
309 * It can recurse directly (for invocations of user-defined
310 * macros, inline equations, and input line traps)
311 * and indirectly (for .so file inclusion).
312 */
313 static void
314 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
315 {
316 const struct tbl_span *span;
317 struct buf ln;
318 const char *save_file;
319 char *cp;
320 size_t pos; /* byte number in the ln buffer */
321 size_t j; /* auxiliary byte number in the blk buffer */
322 enum rofferr rr;
323 int of;
324 int lnn; /* line number in the real file */
325 int fd;
326 unsigned char c;
327
328 memset(&ln, 0, sizeof(ln));
329
330 lnn = curp->line;
331 pos = 0;
332
333 while (i < blk.sz) {
334 if (0 == pos && '\0' == blk.buf[i])
335 break;
336
337 if (start) {
338 curp->line = lnn;
339 curp->reparse_count = 0;
340
341 if (lnn < 3 &&
342 curp->filenc & MPARSE_UTF8 &&
343 curp->filenc & MPARSE_LATIN1)
344 curp->filenc = preconv_cue(&blk, i);
345 }
346
347 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
348
349 /*
350 * When finding an unescaped newline character,
351 * leave the character loop to process the line.
352 * Skip a preceding carriage return, if any.
353 */
354
355 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
356 '\n' == blk.buf[i + 1])
357 ++i;
358 if ('\n' == blk.buf[i]) {
359 ++i;
360 ++lnn;
361 break;
362 }
363
364 /*
365 * Make sure we have space for the worst
366 * case of 11 bytes: "\\[u10ffff]\0"
367 */
368
369 if (pos + 11 > ln.sz)
370 resize_buf(&ln, 256);
371
372 /*
373 * Encode 8-bit input.
374 */
375
376 c = blk.buf[i];
377 if (c & 0x80) {
378 if ( ! (curp->filenc && preconv_encode(
379 &blk, &i, &ln, &pos, &curp->filenc))) {
380 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
381 curp->line, pos, "0x%x", c);
382 ln.buf[pos++] = '?';
383 i++;
384 }
385 continue;
386 }
387
388 /*
389 * Exclude control characters.
390 */
391
392 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
393 mandoc_vmsg(c == 0x00 || c == 0x04 ||
394 c > 0x0a ? MANDOCERR_CHAR_BAD :
395 MANDOCERR_CHAR_UNSUPP,
396 curp, curp->line, pos, "0x%x", c);
397 i++;
398 if (c != '\r')
399 ln.buf[pos++] = '?';
400 continue;
401 }
402
403 /* Trailing backslash = a plain char. */
404
405 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
406 ln.buf[pos++] = blk.buf[i++];
407 continue;
408 }
409
410 /*
411 * Found escape and at least one other character.
412 * When it's a newline character, skip it.
413 * When there is a carriage return in between,
414 * skip that one as well.
415 */
416
417 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
418 '\n' == blk.buf[i + 2])
419 ++i;
420 if ('\n' == blk.buf[i + 1]) {
421 i += 2;
422 ++lnn;
423 continue;
424 }
425
426 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
427 j = i;
428 i += 2;
429 /* Comment, skip to end of line */
430 for (; i < blk.sz; ++i) {
431 if (blk.buf[i] != '\n')
432 continue;
433 if (blk.buf[i - 1] == ' ' ||
434 blk.buf[i - 1] == '\t')
435 mandoc_msg(
436 MANDOCERR_SPACE_EOL,
437 curp, curp->line,
438 pos + i-1 - j, NULL);
439 ++i;
440 ++lnn;
441 break;
442 }
443
444 /* Backout trailing whitespaces */
445 for (; pos > 0; --pos) {
446 if (ln.buf[pos - 1] != ' ')
447 break;
448 if (pos > 2 && ln.buf[pos - 2] == '\\')
449 break;
450 }
451 break;
452 }
453
454 /* Catch escaped bogus characters. */
455
456 c = (unsigned char) blk.buf[i+1];
457
458 if ( ! (isascii(c) &&
459 (isgraph(c) || isblank(c)))) {
460 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
461 curp->line, pos, "0x%x", c);
462 i += 2;
463 ln.buf[pos++] = '?';
464 continue;
465 }
466
467 /* Some other escape sequence, copy & cont. */
468
469 ln.buf[pos++] = blk.buf[i++];
470 ln.buf[pos++] = blk.buf[i++];
471 }
472
473 if (pos >= ln.sz)
474 resize_buf(&ln, 256);
475
476 ln.buf[pos] = '\0';
477
478 /*
479 * A significant amount of complexity is contained by
480 * the roff preprocessor. It's line-oriented but can be
481 * expressed on one line, so we need at times to
482 * readjust our starting point and re-run it. The roff
483 * preprocessor can also readjust the buffers with new
484 * data, so we pass them in wholesale.
485 */
486
487 of = 0;
488
489 /*
490 * Maintain a lookaside buffer of all parsed lines. We
491 * only do this if mparse_keep() has been invoked (the
492 * buffer may be accessed with mparse_getkeep()).
493 */
494
495 if (curp->secondary) {
496 curp->secondary->buf = mandoc_realloc(
497 curp->secondary->buf,
498 curp->secondary->sz + pos + 2);
499 memcpy(curp->secondary->buf +
500 curp->secondary->sz,
501 ln.buf, pos);
502 curp->secondary->sz += pos;
503 curp->secondary->buf
504 [curp->secondary->sz] = '\n';
505 curp->secondary->sz++;
506 curp->secondary->buf
507 [curp->secondary->sz] = '\0';
508 }
509 rerun:
510 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
511
512 switch (rr) {
513 case ROFF_REPARSE:
514 if (REPARSE_LIMIT >= ++curp->reparse_count)
515 mparse_buf_r(curp, ln, of, 0);
516 else
517 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
518 curp->line, pos, NULL);
519 pos = 0;
520 continue;
521 case ROFF_APPEND:
522 pos = strlen(ln.buf);
523 continue;
524 case ROFF_RERUN:
525 goto rerun;
526 case ROFF_IGN:
527 pos = 0;
528 continue;
529 case ROFF_SO:
530 if ( ! (curp->options & MPARSE_SO) &&
531 (i >= blk.sz || blk.buf[i] == '\0')) {
532 curp->sodest = mandoc_strdup(ln.buf + of);
533 free(ln.buf);
534 return;
535 }
536 /*
537 * We remove `so' clauses from our lookaside
538 * buffer because we're going to descend into
539 * the file recursively.
540 */
541 if (curp->secondary)
542 curp->secondary->sz -= pos + 1;
543 save_file = curp->file;
544 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
545 mparse_readfd(curp, fd, ln.buf + of);
546 close(fd);
547 curp->file = save_file;
548 } else {
549 curp->file = save_file;
550 mandoc_vmsg(MANDOCERR_SO_FAIL,
551 curp, curp->line, pos,
552 ".so %s", ln.buf + of);
553 ln.sz = mandoc_asprintf(&cp,
554 ".sp\nSee the file %s.\n.sp",
555 ln.buf + of);
556 free(ln.buf);
557 ln.buf = cp;
558 of = 0;
559 mparse_buf_r(curp, ln, of, 0);
560 }
561 pos = 0;
562 continue;
563 default:
564 break;
565 }
566
567 if (curp->man->macroset == MACROSET_NONE)
568 choose_parser(curp);
569
570 /*
571 * Lastly, push down into the parsers themselves.
572 * If libroff returns ROFF_TBL, then add it to the
573 * currently open parse. Since we only get here if
574 * there does exist data (see tbl_data.c), we're
575 * guaranteed that something's been allocated.
576 * Do the same for ROFF_EQN.
577 */
578
579 if (rr == ROFF_TBL)
580 while ((span = roff_span(curp->roff)) != NULL)
581 roff_addtbl(curp->man, span);
582 else if (rr == ROFF_EQN)
583 roff_addeqn(curp->man, roff_eqn(curp->roff));
584 else if ((curp->man->macroset == MACROSET_MDOC ?
585 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
586 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
587 break;
588
589 /* Temporary buffers typically are not full. */
590
591 if (0 == start && '\0' == blk.buf[i])
592 break;
593
594 /* Start the next input line. */
595
596 pos = 0;
597 }
598
599 free(ln.buf);
600 }
601
602 static int
603 read_whole_file(struct mparse *curp, const char *file, int fd,
604 struct buf *fb, int *with_mmap)
605 {
606 gzFile gz;
607 size_t off;
608 ssize_t ssz;
609
610 struct stat st;
611
612 if (fstat(fd, &st) == -1)
613 err((int)MANDOCLEVEL_SYSERR, "%s", file);
614
615 /*
616 * If we're a regular file, try just reading in the whole entry
617 * via mmap(). This is faster than reading it into blocks, and
618 * since each file is only a few bytes to begin with, I'm not
619 * concerned that this is going to tank any machines.
620 */
621
622 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
623 if (st.st_size > 0x7fffffff) {
624 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
625 return 0;
626 }
627 *with_mmap = 1;
628 fb->sz = (size_t)st.st_size;
629 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
630 if (fb->buf != MAP_FAILED)
631 return 1;
632 }
633
634 if (curp->gzip) {
635 if ((gz = gzdopen(fd, "rb")) == NULL)
636 err((int)MANDOCLEVEL_SYSERR, "%s", file);
637 } else
638 gz = NULL;
639
640 /*
641 * If this isn't a regular file (like, say, stdin), then we must
642 * go the old way and just read things in bit by bit.
643 */
644
645 *with_mmap = 0;
646 off = 0;
647 fb->sz = 0;
648 fb->buf = NULL;
649 for (;;) {
650 if (off == fb->sz) {
651 if (fb->sz == (1U << 31)) {
652 mandoc_msg(MANDOCERR_TOOLARGE, curp,
653 0, 0, NULL);
654 break;
655 }
656 resize_buf(fb, 65536);
657 }
658 ssz = curp->gzip ?
659 gzread(gz, fb->buf + (int)off, fb->sz - off) :
660 read(fd, fb->buf + (int)off, fb->sz - off);
661 if (ssz == 0) {
662 fb->sz = off;
663 return 1;
664 }
665 if (ssz == -1)
666 err((int)MANDOCLEVEL_SYSERR, "%s", file);
667 off += (size_t)ssz;
668 }
669
670 free(fb->buf);
671 fb->buf = NULL;
672 return 0;
673 }
674
675 static void
676 mparse_end(struct mparse *curp)
677 {
678 if (curp->man->macroset == MACROSET_NONE)
679 curp->man->macroset = MACROSET_MAN;
680 if (curp->man->macroset == MACROSET_MDOC)
681 mdoc_endparse(curp->man);
682 else
683 man_endparse(curp->man);
684 roff_endparse(curp->roff);
685 }
686
687 static void
688 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
689 {
690 struct buf *svprimary;
691 const char *svfile;
692 size_t offset;
693 static int recursion_depth;
694
695 if (64 < recursion_depth) {
696 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
697 return;
698 }
699
700 /* Line number is per-file. */
701 svfile = curp->file;
702 curp->file = file;
703 svprimary = curp->primary;
704 curp->primary = &blk;
705 curp->line = 1;
706 recursion_depth++;
707
708 /* Skip an UTF-8 byte order mark. */
709 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
710 (unsigned char)blk.buf[0] == 0xef &&
711 (unsigned char)blk.buf[1] == 0xbb &&
712 (unsigned char)blk.buf[2] == 0xbf) {
713 offset = 3;
714 curp->filenc &= ~MPARSE_LATIN1;
715 } else
716 offset = 0;
717
718 mparse_buf_r(curp, blk, offset, 1);
719
720 if (--recursion_depth == 0)
721 mparse_end(curp);
722
723 curp->primary = svprimary;
724 curp->file = svfile;
725 }
726
727 enum mandoclevel
728 mparse_readmem(struct mparse *curp, void *buf, size_t len,
729 const char *file)
730 {
731 struct buf blk;
732
733 blk.buf = buf;
734 blk.sz = len;
735
736 mparse_parse_buffer(curp, blk, file);
737 return curp->file_status;
738 }
739
740 /*
741 * Read the whole file into memory and call the parsers.
742 * Called recursively when an .so request is encountered.
743 */
744 enum mandoclevel
745 mparse_readfd(struct mparse *curp, int fd, const char *file)
746 {
747 struct buf blk;
748 int with_mmap;
749 int save_filenc;
750
751 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
752 save_filenc = curp->filenc;
753 curp->filenc = curp->options &
754 (MPARSE_UTF8 | MPARSE_LATIN1);
755 mparse_parse_buffer(curp, blk, file);
756 curp->filenc = save_filenc;
757 if (with_mmap)
758 munmap(blk.buf, blk.sz);
759 else
760 free(blk.buf);
761 }
762 return curp->file_status;
763 }
764
765 int
766 mparse_open(struct mparse *curp, const char *file)
767 {
768 char *cp;
769 int fd;
770
771 curp->file = file;
772 cp = strrchr(file, '.');
773 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
774
775 /* First try to use the filename as it is. */
776
777 if ((fd = open(file, O_RDONLY)) != -1)
778 return fd;
779
780 /*
781 * If that doesn't work and the filename doesn't
782 * already end in .gz, try appending .gz.
783 */
784
785 if ( ! curp->gzip) {
786 mandoc_asprintf(&cp, "%s.gz", file);
787 fd = open(cp, O_RDONLY);
788 free(cp);
789 if (fd != -1) {
790 curp->gzip = 1;
791 return fd;
792 }
793 }
794
795 /* Neither worked, give up. */
796
797 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
798 return -1;
799 }
800
801 struct mparse *
802 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
803 const char *defos)
804 {
805 struct mparse *curp;
806
807 curp = mandoc_calloc(1, sizeof(struct mparse));
808
809 curp->options = options;
810 curp->wlevel = wlevel;
811 curp->mmsg = mmsg;
812 curp->defos = defos;
813
814 curp->roff = roff_alloc(curp, options);
815 curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
816 curp->options & MPARSE_QUICK ? 1 : 0);
817 if (curp->options & MPARSE_MDOC) {
818 mdoc_hash_init();
819 curp->man->macroset = MACROSET_MDOC;
820 } else if (curp->options & MPARSE_MAN) {
821 man_hash_init();
822 curp->man->macroset = MACROSET_MAN;
823 }
824 curp->man->first->tok = TOKEN_NONE;
825 return curp;
826 }
827
828 void
829 mparse_reset(struct mparse *curp)
830 {
831 roff_reset(curp->roff);
832 roff_man_reset(curp->man);
833 if (curp->secondary)
834 curp->secondary->sz = 0;
835
836 curp->file_status = MANDOCLEVEL_OK;
837
838 free(curp->sodest);
839 curp->sodest = NULL;
840 curp->gzip = 0;
841 }
842
843 void
844 mparse_free(struct mparse *curp)
845 {
846
847 roff_man_free(curp->man);
848 if (curp->roff)
849 roff_free(curp->roff);
850 if (curp->secondary)
851 free(curp->secondary->buf);
852
853 free(curp->secondary);
854 free(curp->sodest);
855 free(curp);
856 }
857
858 void
859 mparse_result(struct mparse *curp, struct roff_man **man,
860 char **sodest)
861 {
862
863 if (sodest && NULL != (*sodest = curp->sodest)) {
864 *man = NULL;
865 return;
866 }
867 if (man)
868 *man = curp->man;
869 }
870
871 void
872 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
873 {
874 if (curp->file_status > *rc)
875 *rc = curp->file_status;
876 }
877
878 void
879 mandoc_vmsg(enum mandocerr t, struct mparse *m,
880 int ln, int pos, const char *fmt, ...)
881 {
882 char buf[256];
883 va_list ap;
884
885 va_start(ap, fmt);
886 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
887 va_end(ap);
888
889 mandoc_msg(t, m, ln, pos, buf);
890 }
891
892 void
893 mandoc_msg(enum mandocerr er, struct mparse *m,
894 int ln, int col, const char *msg)
895 {
896 enum mandoclevel level;
897
898 level = MANDOCLEVEL_UNSUPP;
899 while (er < mandoclimits[level])
900 level--;
901
902 if (level < m->wlevel && er != MANDOCERR_FILE)
903 return;
904
905 if (m->mmsg)
906 (*m->mmsg)(er, level, m->file, ln, col, msg);
907
908 if (m->file_status < level)
909 m->file_status = level;
910 }
911
912 const char *
913 mparse_strerror(enum mandocerr er)
914 {
915
916 return mandocerrs[er];
917 }
918
919 const char *
920 mparse_strlevel(enum mandoclevel lvl)
921 {
922 return mandoclevels[lvl];
923 }
924
925 void
926 mparse_keep(struct mparse *p)
927 {
928
929 assert(NULL == p->secondary);
930 p->secondary = mandoc_calloc(1, sizeof(struct buf));
931 }
932
933 const char *
934 mparse_getkeep(const struct mparse *p)
935 {
936
937 assert(p->secondary);
938 return p->secondary->sz ? p->secondary->buf : NULL;
939 }