]> git.cameronkatri.com Git - mandoc.git/blob - read.c
link to http://mdocml.bsd.lv/mdoc/ below SEE ALSO;
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.152 2016/10/09 18:16:56 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <zlib.h>
39
40 #include "mandoc_aux.h"
41 #include "mandoc.h"
42 #include "roff.h"
43 #include "mdoc.h"
44 #include "man.h"
45 #include "libmandoc.h"
46 #include "roff_int.h"
47
48 #define REPARSE_LIMIT 1000
49
50 struct mparse {
51 struct roff_man *man; /* man parser */
52 struct roff *roff; /* roff parser (!NULL) */
53 char *sodest; /* filename pointed to by .so */
54 const char *file; /* filename of current input file */
55 struct buf *primary; /* buffer currently being parsed */
56 struct buf *secondary; /* preprocessed copy of input */
57 const char *defos; /* default operating system */
58 mandocmsg mmsg; /* warning/error message handler */
59 enum mandoclevel file_status; /* status of current parse */
60 enum mandoclevel wlevel; /* ignore messages below this */
61 int options; /* parser options */
62 int gzip; /* current input file is gzipped */
63 int filenc; /* encoding of the current file */
64 int reparse_count; /* finite interp. stack */
65 int line; /* line number in the file */
66 };
67
68 static void choose_parser(struct mparse *);
69 static void resize_buf(struct buf *, size_t);
70 static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
71 static int read_whole_file(struct mparse *, const char *, int,
72 struct buf *, int *);
73 static void mparse_end(struct mparse *);
74 static void mparse_parse_buffer(struct mparse *, struct buf,
75 const char *);
76
77 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
78 MANDOCERR_OK,
79 MANDOCERR_WARNING,
80 MANDOCERR_WARNING,
81 MANDOCERR_ERROR,
82 MANDOCERR_UNSUPP,
83 MANDOCERR_MAX,
84 MANDOCERR_MAX
85 };
86
87 static const char * const mandocerrs[MANDOCERR_MAX] = {
88 "ok",
89
90 "generic warning",
91
92 /* related to the prologue */
93 "missing manual title, using UNTITLED",
94 "missing manual title, using \"\"",
95 "lower case character in document title",
96 "missing manual section, using \"\"",
97 "unknown manual section",
98 "missing date, using today's date",
99 "cannot parse date, using it verbatim",
100 "missing Os macro, using \"\"",
101 "duplicate prologue macro",
102 "late prologue macro",
103 "skipping late title macro",
104 "prologue macros out of order",
105
106 /* related to document structure */
107 ".so is fragile, better use ln(1)",
108 "no document body",
109 "content before first section header",
110 "first section is not \"NAME\"",
111 "NAME section without name",
112 "NAME section without description",
113 "description not at the end of NAME",
114 "bad NAME section content",
115 "missing description line, using \"\"",
116 "sections out of conventional order",
117 "duplicate section title",
118 "unexpected section",
119 "unusual Xr order",
120 "unusual Xr punctuation",
121 "AUTHORS section without An macro",
122
123 /* related to macros and nesting */
124 "obsolete macro",
125 "macro neither callable nor escaped",
126 "skipping paragraph macro",
127 "moving paragraph macro out of list",
128 "skipping no-space macro",
129 "blocks badly nested",
130 "nested displays are not portable",
131 "moving content out of list",
132 "fill mode already enabled, skipping",
133 "fill mode already disabled, skipping",
134 "line scope broken",
135
136 /* related to missing macro arguments */
137 "skipping empty request",
138 "conditional request controls empty scope",
139 "skipping empty macro",
140 "empty block",
141 "empty argument, using 0n",
142 "missing display type, using -ragged",
143 "list type is not the first argument",
144 "missing -width in -tag list, using 6n",
145 "missing utility name, using \"\"",
146 "missing function name, using \"\"",
147 "empty head in list item",
148 "empty list item",
149 "missing font type, using \\fR",
150 "unknown font type, using \\fR",
151 "nothing follows prefix",
152 "empty reference block",
153 "missing -std argument, adding it",
154 "missing option string, using \"\"",
155 "missing resource identifier, using \"\"",
156 "missing eqn box, using \"\"",
157
158 /* related to bad macro arguments */
159 "unterminated quoted argument",
160 "duplicate argument",
161 "skipping duplicate argument",
162 "skipping duplicate display type",
163 "skipping duplicate list type",
164 "skipping -width argument",
165 "wrong number of cells",
166 "unknown AT&T UNIX version",
167 "comma in function argument",
168 "parenthesis in function name",
169 "invalid content in Rs block",
170 "invalid Boolean argument",
171 "unknown font, skipping request",
172 "odd number of characters in request",
173
174 /* related to plain text */
175 "blank line in fill mode, using .sp",
176 "tab in filled text",
177 "whitespace at end of input line",
178 "bad comment style",
179 "invalid escape sequence",
180 "undefined string, using \"\"",
181
182 /* related to tables */
183 "tbl line starts with span",
184 "tbl column starts with span",
185 "skipping vertical bar in tbl layout",
186
187 "generic error",
188
189 /* related to tables */
190 "non-alphabetic character in tbl options",
191 "skipping unknown tbl option",
192 "missing tbl option argument",
193 "wrong tbl option argument size",
194 "empty tbl layout",
195 "invalid character in tbl layout",
196 "unmatched parenthesis in tbl layout",
197 "tbl without any data cells",
198 "ignoring data in spanned tbl cell",
199 "ignoring extra tbl data cells",
200 "data block open at end of tbl",
201
202 /* related to document structure and macros */
203 NULL,
204 "input stack limit exceeded, infinite loop?",
205 "skipping bad character",
206 "skipping unknown macro",
207 "skipping insecure request",
208 "skipping item outside list",
209 "skipping column outside column list",
210 "skipping end of block that is not open",
211 "fewer RS blocks open, skipping",
212 "inserting missing end of block",
213 "appending missing end of block",
214
215 /* related to request and macro arguments */
216 "escaped character not allowed in a name",
217 "NOT IMPLEMENTED: Bd -file",
218 "skipping display without arguments",
219 "missing list type, using -item",
220 "missing manual name, using \"\"",
221 "uname(3) system call failed, using UNKNOWN",
222 "unknown standard specifier",
223 "skipping request without numeric argument",
224 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
225 ".so request failed",
226 "skipping all arguments",
227 "skipping excess arguments",
228 "divide by zero",
229
230 "unsupported feature",
231 "input too large",
232 "unsupported control character",
233 "unsupported roff request",
234 "eqn delim option in tbl",
235 "unsupported tbl layout modifier",
236 "ignoring macro in table",
237 };
238
239 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
240 "SUCCESS",
241 "RESERVED",
242 "WARNING",
243 "ERROR",
244 "UNSUPP",
245 "BADARG",
246 "SYSERR"
247 };
248
249
250 static void
251 resize_buf(struct buf *buf, size_t initial)
252 {
253
254 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
255 buf->buf = mandoc_realloc(buf->buf, buf->sz);
256 }
257
258 static void
259 choose_parser(struct mparse *curp)
260 {
261 char *cp, *ep;
262 int format;
263
264 /*
265 * If neither command line arguments -mdoc or -man select
266 * a parser nor the roff parser found a .Dd or .TH macro
267 * yet, look ahead in the main input buffer.
268 */
269
270 if ((format = roff_getformat(curp->roff)) == 0) {
271 cp = curp->primary->buf;
272 ep = cp + curp->primary->sz;
273 while (cp < ep) {
274 if (*cp == '.' || *cp == '\'') {
275 cp++;
276 if (cp[0] == 'D' && cp[1] == 'd') {
277 format = MPARSE_MDOC;
278 break;
279 }
280 if (cp[0] == 'T' && cp[1] == 'H') {
281 format = MPARSE_MAN;
282 break;
283 }
284 }
285 cp = memchr(cp, '\n', ep - cp);
286 if (cp == NULL)
287 break;
288 cp++;
289 }
290 }
291
292 if (format == MPARSE_MDOC) {
293 mdoc_hash_init();
294 curp->man->macroset = MACROSET_MDOC;
295 curp->man->first->tok = TOKEN_NONE;
296 } else {
297 man_hash_init();
298 curp->man->macroset = MACROSET_MAN;
299 curp->man->first->tok = TOKEN_NONE;
300 }
301 }
302
303 /*
304 * Main parse routine for a buffer.
305 * It assumes encoding and line numbering are already set up.
306 * It can recurse directly (for invocations of user-defined
307 * macros, inline equations, and input line traps)
308 * and indirectly (for .so file inclusion).
309 */
310 static void
311 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
312 {
313 const struct tbl_span *span;
314 struct buf ln;
315 const char *save_file;
316 char *cp;
317 size_t pos; /* byte number in the ln buffer */
318 enum rofferr rr;
319 int of;
320 int lnn; /* line number in the real file */
321 int fd;
322 unsigned char c;
323
324 memset(&ln, 0, sizeof(ln));
325
326 lnn = curp->line;
327 pos = 0;
328
329 while (i < blk.sz) {
330 if (0 == pos && '\0' == blk.buf[i])
331 break;
332
333 if (start) {
334 curp->line = lnn;
335 curp->reparse_count = 0;
336
337 if (lnn < 3 &&
338 curp->filenc & MPARSE_UTF8 &&
339 curp->filenc & MPARSE_LATIN1)
340 curp->filenc = preconv_cue(&blk, i);
341 }
342
343 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
344
345 /*
346 * When finding an unescaped newline character,
347 * leave the character loop to process the line.
348 * Skip a preceding carriage return, if any.
349 */
350
351 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
352 '\n' == blk.buf[i + 1])
353 ++i;
354 if ('\n' == blk.buf[i]) {
355 ++i;
356 ++lnn;
357 break;
358 }
359
360 /*
361 * Make sure we have space for the worst
362 * case of 11 bytes: "\\[u10ffff]\0"
363 */
364
365 if (pos + 11 > ln.sz)
366 resize_buf(&ln, 256);
367
368 /*
369 * Encode 8-bit input.
370 */
371
372 c = blk.buf[i];
373 if (c & 0x80) {
374 if ( ! (curp->filenc && preconv_encode(
375 &blk, &i, &ln, &pos, &curp->filenc))) {
376 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
377 curp->line, pos, "0x%x", c);
378 ln.buf[pos++] = '?';
379 i++;
380 }
381 continue;
382 }
383
384 /*
385 * Exclude control characters.
386 */
387
388 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
389 mandoc_vmsg(c == 0x00 || c == 0x04 ||
390 c > 0x0a ? MANDOCERR_CHAR_BAD :
391 MANDOCERR_CHAR_UNSUPP,
392 curp, curp->line, pos, "0x%x", c);
393 i++;
394 if (c != '\r')
395 ln.buf[pos++] = '?';
396 continue;
397 }
398
399 /* Trailing backslash = a plain char. */
400
401 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
402 ln.buf[pos++] = blk.buf[i++];
403 continue;
404 }
405
406 /*
407 * Found escape and at least one other character.
408 * When it's a newline character, skip it.
409 * When there is a carriage return in between,
410 * skip that one as well.
411 */
412
413 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
414 '\n' == blk.buf[i + 2])
415 ++i;
416 if ('\n' == blk.buf[i + 1]) {
417 i += 2;
418 ++lnn;
419 continue;
420 }
421
422 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
423 i += 2;
424 /* Comment, skip to end of line */
425 for (; i < blk.sz; ++i) {
426 if ('\n' == blk.buf[i]) {
427 ++i;
428 ++lnn;
429 break;
430 }
431 }
432
433 /* Backout trailing whitespaces */
434 for (; pos > 0; --pos) {
435 if (ln.buf[pos - 1] != ' ')
436 break;
437 if (pos > 2 && ln.buf[pos - 2] == '\\')
438 break;
439 }
440 break;
441 }
442
443 /* Catch escaped bogus characters. */
444
445 c = (unsigned char) blk.buf[i+1];
446
447 if ( ! (isascii(c) &&
448 (isgraph(c) || isblank(c)))) {
449 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
450 curp->line, pos, "0x%x", c);
451 i += 2;
452 ln.buf[pos++] = '?';
453 continue;
454 }
455
456 /* Some other escape sequence, copy & cont. */
457
458 ln.buf[pos++] = blk.buf[i++];
459 ln.buf[pos++] = blk.buf[i++];
460 }
461
462 if (pos >= ln.sz)
463 resize_buf(&ln, 256);
464
465 ln.buf[pos] = '\0';
466
467 /*
468 * A significant amount of complexity is contained by
469 * the roff preprocessor. It's line-oriented but can be
470 * expressed on one line, so we need at times to
471 * readjust our starting point and re-run it. The roff
472 * preprocessor can also readjust the buffers with new
473 * data, so we pass them in wholesale.
474 */
475
476 of = 0;
477
478 /*
479 * Maintain a lookaside buffer of all parsed lines. We
480 * only do this if mparse_keep() has been invoked (the
481 * buffer may be accessed with mparse_getkeep()).
482 */
483
484 if (curp->secondary) {
485 curp->secondary->buf = mandoc_realloc(
486 curp->secondary->buf,
487 curp->secondary->sz + pos + 2);
488 memcpy(curp->secondary->buf +
489 curp->secondary->sz,
490 ln.buf, pos);
491 curp->secondary->sz += pos;
492 curp->secondary->buf
493 [curp->secondary->sz] = '\n';
494 curp->secondary->sz++;
495 curp->secondary->buf
496 [curp->secondary->sz] = '\0';
497 }
498 rerun:
499 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
500
501 switch (rr) {
502 case ROFF_REPARSE:
503 if (REPARSE_LIMIT >= ++curp->reparse_count)
504 mparse_buf_r(curp, ln, of, 0);
505 else
506 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
507 curp->line, pos, NULL);
508 pos = 0;
509 continue;
510 case ROFF_APPEND:
511 pos = strlen(ln.buf);
512 continue;
513 case ROFF_RERUN:
514 goto rerun;
515 case ROFF_IGN:
516 pos = 0;
517 continue;
518 case ROFF_SO:
519 if ( ! (curp->options & MPARSE_SO) &&
520 (i >= blk.sz || blk.buf[i] == '\0')) {
521 curp->sodest = mandoc_strdup(ln.buf + of);
522 free(ln.buf);
523 return;
524 }
525 /*
526 * We remove `so' clauses from our lookaside
527 * buffer because we're going to descend into
528 * the file recursively.
529 */
530 if (curp->secondary)
531 curp->secondary->sz -= pos + 1;
532 save_file = curp->file;
533 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
534 mparse_readfd(curp, fd, ln.buf + of);
535 close(fd);
536 curp->file = save_file;
537 } else {
538 curp->file = save_file;
539 mandoc_vmsg(MANDOCERR_SO_FAIL,
540 curp, curp->line, pos,
541 ".so %s", ln.buf + of);
542 ln.sz = mandoc_asprintf(&cp,
543 ".sp\nSee the file %s.\n.sp",
544 ln.buf + of);
545 free(ln.buf);
546 ln.buf = cp;
547 of = 0;
548 mparse_buf_r(curp, ln, of, 0);
549 }
550 pos = 0;
551 continue;
552 default:
553 break;
554 }
555
556 if (curp->man->macroset == MACROSET_NONE)
557 choose_parser(curp);
558
559 /*
560 * Lastly, push down into the parsers themselves.
561 * If libroff returns ROFF_TBL, then add it to the
562 * currently open parse. Since we only get here if
563 * there does exist data (see tbl_data.c), we're
564 * guaranteed that something's been allocated.
565 * Do the same for ROFF_EQN.
566 */
567
568 if (rr == ROFF_TBL)
569 while ((span = roff_span(curp->roff)) != NULL)
570 roff_addtbl(curp->man, span);
571 else if (rr == ROFF_EQN)
572 roff_addeqn(curp->man, roff_eqn(curp->roff));
573 else if ((curp->man->macroset == MACROSET_MDOC ?
574 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
575 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
576 break;
577
578 /* Temporary buffers typically are not full. */
579
580 if (0 == start && '\0' == blk.buf[i])
581 break;
582
583 /* Start the next input line. */
584
585 pos = 0;
586 }
587
588 free(ln.buf);
589 }
590
591 static int
592 read_whole_file(struct mparse *curp, const char *file, int fd,
593 struct buf *fb, int *with_mmap)
594 {
595 gzFile gz;
596 size_t off;
597 ssize_t ssz;
598
599 struct stat st;
600
601 if (fstat(fd, &st) == -1)
602 err((int)MANDOCLEVEL_SYSERR, "%s", file);
603
604 /*
605 * If we're a regular file, try just reading in the whole entry
606 * via mmap(). This is faster than reading it into blocks, and
607 * since each file is only a few bytes to begin with, I'm not
608 * concerned that this is going to tank any machines.
609 */
610
611 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
612 if (st.st_size > 0x7fffffff) {
613 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
614 return 0;
615 }
616 *with_mmap = 1;
617 fb->sz = (size_t)st.st_size;
618 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
619 if (fb->buf != MAP_FAILED)
620 return 1;
621 }
622
623 if (curp->gzip) {
624 if ((gz = gzdopen(fd, "rb")) == NULL)
625 err((int)MANDOCLEVEL_SYSERR, "%s", file);
626 } else
627 gz = NULL;
628
629 /*
630 * If this isn't a regular file (like, say, stdin), then we must
631 * go the old way and just read things in bit by bit.
632 */
633
634 *with_mmap = 0;
635 off = 0;
636 fb->sz = 0;
637 fb->buf = NULL;
638 for (;;) {
639 if (off == fb->sz) {
640 if (fb->sz == (1U << 31)) {
641 mandoc_msg(MANDOCERR_TOOLARGE, curp,
642 0, 0, NULL);
643 break;
644 }
645 resize_buf(fb, 65536);
646 }
647 ssz = curp->gzip ?
648 gzread(gz, fb->buf + (int)off, fb->sz - off) :
649 read(fd, fb->buf + (int)off, fb->sz - off);
650 if (ssz == 0) {
651 fb->sz = off;
652 return 1;
653 }
654 if (ssz == -1)
655 err((int)MANDOCLEVEL_SYSERR, "%s", file);
656 off += (size_t)ssz;
657 }
658
659 free(fb->buf);
660 fb->buf = NULL;
661 return 0;
662 }
663
664 static void
665 mparse_end(struct mparse *curp)
666 {
667 if (curp->man->macroset == MACROSET_NONE)
668 curp->man->macroset = MACROSET_MAN;
669 if (curp->man->macroset == MACROSET_MDOC)
670 mdoc_endparse(curp->man);
671 else
672 man_endparse(curp->man);
673 roff_endparse(curp->roff);
674 }
675
676 static void
677 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
678 {
679 struct buf *svprimary;
680 const char *svfile;
681 size_t offset;
682 static int recursion_depth;
683
684 if (64 < recursion_depth) {
685 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
686 return;
687 }
688
689 /* Line number is per-file. */
690 svfile = curp->file;
691 curp->file = file;
692 svprimary = curp->primary;
693 curp->primary = &blk;
694 curp->line = 1;
695 recursion_depth++;
696
697 /* Skip an UTF-8 byte order mark. */
698 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
699 (unsigned char)blk.buf[0] == 0xef &&
700 (unsigned char)blk.buf[1] == 0xbb &&
701 (unsigned char)blk.buf[2] == 0xbf) {
702 offset = 3;
703 curp->filenc &= ~MPARSE_LATIN1;
704 } else
705 offset = 0;
706
707 mparse_buf_r(curp, blk, offset, 1);
708
709 if (--recursion_depth == 0)
710 mparse_end(curp);
711
712 curp->primary = svprimary;
713 curp->file = svfile;
714 }
715
716 enum mandoclevel
717 mparse_readmem(struct mparse *curp, void *buf, size_t len,
718 const char *file)
719 {
720 struct buf blk;
721
722 blk.buf = buf;
723 blk.sz = len;
724
725 mparse_parse_buffer(curp, blk, file);
726 return curp->file_status;
727 }
728
729 /*
730 * Read the whole file into memory and call the parsers.
731 * Called recursively when an .so request is encountered.
732 */
733 enum mandoclevel
734 mparse_readfd(struct mparse *curp, int fd, const char *file)
735 {
736 struct buf blk;
737 int with_mmap;
738 int save_filenc;
739
740 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
741 save_filenc = curp->filenc;
742 curp->filenc = curp->options &
743 (MPARSE_UTF8 | MPARSE_LATIN1);
744 mparse_parse_buffer(curp, blk, file);
745 curp->filenc = save_filenc;
746 if (with_mmap)
747 munmap(blk.buf, blk.sz);
748 else
749 free(blk.buf);
750 }
751 return curp->file_status;
752 }
753
754 int
755 mparse_open(struct mparse *curp, const char *file)
756 {
757 char *cp;
758 int fd;
759
760 curp->file = file;
761 cp = strrchr(file, '.');
762 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
763
764 /* First try to use the filename as it is. */
765
766 if ((fd = open(file, O_RDONLY)) != -1)
767 return fd;
768
769 /*
770 * If that doesn't work and the filename doesn't
771 * already end in .gz, try appending .gz.
772 */
773
774 if ( ! curp->gzip) {
775 mandoc_asprintf(&cp, "%s.gz", file);
776 fd = open(cp, O_RDONLY);
777 free(cp);
778 if (fd != -1) {
779 curp->gzip = 1;
780 return fd;
781 }
782 }
783
784 /* Neither worked, give up. */
785
786 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
787 return -1;
788 }
789
790 struct mparse *
791 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
792 const char *defos)
793 {
794 struct mparse *curp;
795
796 curp = mandoc_calloc(1, sizeof(struct mparse));
797
798 curp->options = options;
799 curp->wlevel = wlevel;
800 curp->mmsg = mmsg;
801 curp->defos = defos;
802
803 curp->roff = roff_alloc(curp, options);
804 curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
805 curp->options & MPARSE_QUICK ? 1 : 0);
806 if (curp->options & MPARSE_MDOC) {
807 mdoc_hash_init();
808 curp->man->macroset = MACROSET_MDOC;
809 } else if (curp->options & MPARSE_MAN) {
810 man_hash_init();
811 curp->man->macroset = MACROSET_MAN;
812 }
813 curp->man->first->tok = TOKEN_NONE;
814 return curp;
815 }
816
817 void
818 mparse_reset(struct mparse *curp)
819 {
820 roff_reset(curp->roff);
821 roff_man_reset(curp->man);
822 if (curp->secondary)
823 curp->secondary->sz = 0;
824
825 curp->file_status = MANDOCLEVEL_OK;
826
827 free(curp->sodest);
828 curp->sodest = NULL;
829 }
830
831 void
832 mparse_free(struct mparse *curp)
833 {
834
835 roff_man_free(curp->man);
836 if (curp->roff)
837 roff_free(curp->roff);
838 if (curp->secondary)
839 free(curp->secondary->buf);
840
841 free(curp->secondary);
842 free(curp->sodest);
843 free(curp);
844 }
845
846 void
847 mparse_result(struct mparse *curp, struct roff_man **man,
848 char **sodest)
849 {
850
851 if (sodest && NULL != (*sodest = curp->sodest)) {
852 *man = NULL;
853 return;
854 }
855 if (man)
856 *man = curp->man;
857 }
858
859 void
860 mandoc_vmsg(enum mandocerr t, struct mparse *m,
861 int ln, int pos, const char *fmt, ...)
862 {
863 char buf[256];
864 va_list ap;
865
866 va_start(ap, fmt);
867 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
868 va_end(ap);
869
870 mandoc_msg(t, m, ln, pos, buf);
871 }
872
873 void
874 mandoc_msg(enum mandocerr er, struct mparse *m,
875 int ln, int col, const char *msg)
876 {
877 enum mandoclevel level;
878
879 level = MANDOCLEVEL_UNSUPP;
880 while (er < mandoclimits[level])
881 level--;
882
883 if (level < m->wlevel && er != MANDOCERR_FILE)
884 return;
885
886 if (m->mmsg)
887 (*m->mmsg)(er, level, m->file, ln, col, msg);
888
889 if (m->file_status < level)
890 m->file_status = level;
891 }
892
893 const char *
894 mparse_strerror(enum mandocerr er)
895 {
896
897 return mandocerrs[er];
898 }
899
900 const char *
901 mparse_strlevel(enum mandoclevel lvl)
902 {
903 return mandoclevels[lvl];
904 }
905
906 void
907 mparse_keep(struct mparse *p)
908 {
909
910 assert(NULL == p->secondary);
911 p->secondary = mandoc_calloc(1, sizeof(struct buf));
912 }
913
914 const char *
915 mparse_getkeep(const struct mparse *p)
916 {
917
918 assert(p->secondary);
919 return p->secondary->sz ? p->secondary->buf : NULL;
920 }