]> git.cameronkatri.com Git - mandoc.git/blob - read.c
STYLE message about full stop at the end of .Nd; inspired by mdoclint(1)
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.168 2017/06/01 15:25:39 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <zlib.h>
38
39 #include "mandoc_aux.h"
40 #include "mandoc.h"
41 #include "roff.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "libmandoc.h"
45 #include "roff_int.h"
46
47 #define REPARSE_LIMIT 1000
48
49 struct mparse {
50 struct roff *roff; /* roff parser (!NULL) */
51 struct roff_man *man; /* man parser */
52 char *sodest; /* filename pointed to by .so */
53 const char *file; /* filename of current input file */
54 struct buf *primary; /* buffer currently being parsed */
55 struct buf *secondary; /* preprocessed copy of input */
56 const char *defos; /* default operating system */
57 mandocmsg mmsg; /* warning/error message handler */
58 enum mandoclevel file_status; /* status of current parse */
59 enum mandoclevel wlevel; /* ignore messages below this */
60 int options; /* parser options */
61 int gzip; /* current input file is gzipped */
62 int filenc; /* encoding of the current file */
63 int reparse_count; /* finite interp. stack */
64 int line; /* line number in the file */
65 };
66
67 static void choose_parser(struct mparse *);
68 static void resize_buf(struct buf *, size_t);
69 static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
70 static int read_whole_file(struct mparse *, const char *, int,
71 struct buf *, int *);
72 static void mparse_end(struct mparse *);
73 static void mparse_parse_buffer(struct mparse *, struct buf,
74 const char *);
75
76 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
77 MANDOCERR_OK,
78 MANDOCERR_STYLE,
79 MANDOCERR_WARNING,
80 MANDOCERR_ERROR,
81 MANDOCERR_UNSUPP,
82 MANDOCERR_MAX,
83 MANDOCERR_MAX
84 };
85
86 static const char * const mandocerrs[MANDOCERR_MAX] = {
87 "ok",
88
89 "generic style suggestion",
90
91 "useless macro",
92 "consider using OS macro",
93 "description line ends with a full stop",
94
95 "generic warning",
96
97 /* related to the prologue */
98 "missing manual title, using UNTITLED",
99 "missing manual title, using \"\"",
100 "lower case character in document title",
101 "missing manual section, using \"\"",
102 "unknown manual section",
103 "missing date, using today's date",
104 "cannot parse date, using it verbatim",
105 "missing Os macro, using \"\"",
106 "duplicate prologue macro",
107 "late prologue macro",
108 "skipping late title macro",
109 "prologue macros out of order",
110
111 /* related to document structure */
112 ".so is fragile, better use ln(1)",
113 "no document body",
114 "content before first section header",
115 "first section is not \"NAME\"",
116 "NAME section without Nm before Nd",
117 "NAME section without description",
118 "description not at the end of NAME",
119 "bad NAME section content",
120 "missing comma before name",
121 "missing description line, using \"\"",
122 "description line outside NAME section",
123 "sections out of conventional order",
124 "duplicate section title",
125 "unexpected section",
126 "unusual Xr order",
127 "unusual Xr punctuation",
128 "AUTHORS section without An macro",
129
130 /* related to macros and nesting */
131 "obsolete macro",
132 "macro neither callable nor escaped",
133 "skipping paragraph macro",
134 "moving paragraph macro out of list",
135 "skipping no-space macro",
136 "blocks badly nested",
137 "nested displays are not portable",
138 "moving content out of list",
139 "fill mode already enabled, skipping",
140 "fill mode already disabled, skipping",
141 "line scope broken",
142
143 /* related to missing macro arguments */
144 "skipping empty request",
145 "conditional request controls empty scope",
146 "skipping empty macro",
147 "empty block",
148 "empty argument, using 0n",
149 "missing display type, using -ragged",
150 "list type is not the first argument",
151 "missing -width in -tag list, using 6n",
152 "missing utility name, using \"\"",
153 "missing function name, using \"\"",
154 "empty head in list item",
155 "empty list item",
156 "missing font type, using \\fR",
157 "unknown font type, using \\fR",
158 "nothing follows prefix",
159 "empty reference block",
160 "missing section argument",
161 "missing -std argument, adding it",
162 "missing option string, using \"\"",
163 "missing resource identifier, using \"\"",
164 "missing eqn box, using \"\"",
165
166 /* related to bad macro arguments */
167 "unterminated quoted argument",
168 "duplicate argument",
169 "skipping duplicate argument",
170 "skipping duplicate display type",
171 "skipping duplicate list type",
172 "skipping -width argument",
173 "wrong number of cells",
174 "unknown AT&T UNIX version",
175 "comma in function argument",
176 "parenthesis in function name",
177 "invalid content in Rs block",
178 "invalid Boolean argument",
179 "unknown font, skipping request",
180 "odd number of characters in request",
181
182 /* related to plain text */
183 "blank line in fill mode, using .sp",
184 "tab in filled text",
185 "whitespace at end of input line",
186 "new sentence, new line",
187 "bad comment style",
188 "invalid escape sequence",
189 "undefined string, using \"\"",
190
191 /* related to tables */
192 "tbl line starts with span",
193 "tbl column starts with span",
194 "skipping vertical bar in tbl layout",
195
196 "generic error",
197
198 /* related to tables */
199 "non-alphabetic character in tbl options",
200 "skipping unknown tbl option",
201 "missing tbl option argument",
202 "wrong tbl option argument size",
203 "empty tbl layout",
204 "invalid character in tbl layout",
205 "unmatched parenthesis in tbl layout",
206 "tbl without any data cells",
207 "ignoring data in spanned tbl cell",
208 "ignoring extra tbl data cells",
209 "data block open at end of tbl",
210
211 /* related to document structure and macros */
212 NULL,
213 "input stack limit exceeded, infinite loop?",
214 "skipping bad character",
215 "skipping unknown macro",
216 "skipping insecure request",
217 "skipping item outside list",
218 "skipping column outside column list",
219 "skipping end of block that is not open",
220 "fewer RS blocks open, skipping",
221 "inserting missing end of block",
222 "appending missing end of block",
223
224 /* related to request and macro arguments */
225 "escaped character not allowed in a name",
226 "NOT IMPLEMENTED: Bd -file",
227 "skipping display without arguments",
228 "missing list type, using -item",
229 "missing manual name, using \"\"",
230 "uname(3) system call failed, using UNKNOWN",
231 "unknown standard specifier",
232 "skipping request without numeric argument",
233 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
234 ".so request failed",
235 "skipping all arguments",
236 "skipping excess arguments",
237 "divide by zero",
238
239 "unsupported feature",
240 "input too large",
241 "unsupported control character",
242 "unsupported roff request",
243 "eqn delim option in tbl",
244 "unsupported tbl layout modifier",
245 "ignoring macro in table",
246 };
247
248 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
249 "SUCCESS",
250 "STYLE",
251 "WARNING",
252 "ERROR",
253 "UNSUPP",
254 "BADARG",
255 "SYSERR"
256 };
257
258
259 static void
260 resize_buf(struct buf *buf, size_t initial)
261 {
262
263 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
264 buf->buf = mandoc_realloc(buf->buf, buf->sz);
265 }
266
267 static void
268 choose_parser(struct mparse *curp)
269 {
270 char *cp, *ep;
271 int format;
272
273 /*
274 * If neither command line arguments -mdoc or -man select
275 * a parser nor the roff parser found a .Dd or .TH macro
276 * yet, look ahead in the main input buffer.
277 */
278
279 if ((format = roff_getformat(curp->roff)) == 0) {
280 cp = curp->primary->buf;
281 ep = cp + curp->primary->sz;
282 while (cp < ep) {
283 if (*cp == '.' || *cp == '\'') {
284 cp++;
285 if (cp[0] == 'D' && cp[1] == 'd') {
286 format = MPARSE_MDOC;
287 break;
288 }
289 if (cp[0] == 'T' && cp[1] == 'H') {
290 format = MPARSE_MAN;
291 break;
292 }
293 }
294 cp = memchr(cp, '\n', ep - cp);
295 if (cp == NULL)
296 break;
297 cp++;
298 }
299 }
300
301 if (format == MPARSE_MDOC) {
302 curp->man->macroset = MACROSET_MDOC;
303 if (curp->man->mdocmac == NULL)
304 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
305 } else {
306 curp->man->macroset = MACROSET_MAN;
307 if (curp->man->manmac == NULL)
308 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
309 }
310 curp->man->first->tok = TOKEN_NONE;
311 }
312
313 /*
314 * Main parse routine for a buffer.
315 * It assumes encoding and line numbering are already set up.
316 * It can recurse directly (for invocations of user-defined
317 * macros, inline equations, and input line traps)
318 * and indirectly (for .so file inclusion).
319 */
320 static int
321 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
322 {
323 const struct tbl_span *span;
324 struct buf ln;
325 const char *save_file;
326 char *cp;
327 size_t pos; /* byte number in the ln buffer */
328 size_t j; /* auxiliary byte number in the blk buffer */
329 enum rofferr rr;
330 int of;
331 int lnn; /* line number in the real file */
332 int fd;
333 unsigned char c;
334
335 memset(&ln, 0, sizeof(ln));
336
337 lnn = curp->line;
338 pos = 0;
339
340 while (i < blk.sz) {
341 if (0 == pos && '\0' == blk.buf[i])
342 break;
343
344 if (start) {
345 curp->line = lnn;
346 curp->reparse_count = 0;
347
348 if (lnn < 3 &&
349 curp->filenc & MPARSE_UTF8 &&
350 curp->filenc & MPARSE_LATIN1)
351 curp->filenc = preconv_cue(&blk, i);
352 }
353
354 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
355
356 /*
357 * When finding an unescaped newline character,
358 * leave the character loop to process the line.
359 * Skip a preceding carriage return, if any.
360 */
361
362 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
363 '\n' == blk.buf[i + 1])
364 ++i;
365 if ('\n' == blk.buf[i]) {
366 ++i;
367 ++lnn;
368 break;
369 }
370
371 /*
372 * Make sure we have space for the worst
373 * case of 11 bytes: "\\[u10ffff]\0"
374 */
375
376 if (pos + 11 > ln.sz)
377 resize_buf(&ln, 256);
378
379 /*
380 * Encode 8-bit input.
381 */
382
383 c = blk.buf[i];
384 if (c & 0x80) {
385 if ( ! (curp->filenc && preconv_encode(
386 &blk, &i, &ln, &pos, &curp->filenc))) {
387 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
388 curp->line, pos, "0x%x", c);
389 ln.buf[pos++] = '?';
390 i++;
391 }
392 continue;
393 }
394
395 /*
396 * Exclude control characters.
397 */
398
399 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
400 mandoc_vmsg(c == 0x00 || c == 0x04 ||
401 c > 0x0a ? MANDOCERR_CHAR_BAD :
402 MANDOCERR_CHAR_UNSUPP,
403 curp, curp->line, pos, "0x%x", c);
404 i++;
405 if (c != '\r')
406 ln.buf[pos++] = '?';
407 continue;
408 }
409
410 /* Trailing backslash = a plain char. */
411
412 if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
413 ln.buf[pos++] = blk.buf[i++];
414 continue;
415 }
416
417 /*
418 * Found escape and at least one other character.
419 * When it's a newline character, skip it.
420 * When there is a carriage return in between,
421 * skip that one as well.
422 */
423
424 if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
425 '\n' == blk.buf[i + 2])
426 ++i;
427 if ('\n' == blk.buf[i + 1]) {
428 i += 2;
429 ++lnn;
430 continue;
431 }
432
433 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
434 j = i;
435 i += 2;
436 /* Comment, skip to end of line */
437 for (; i < blk.sz; ++i) {
438 if (blk.buf[i] != '\n')
439 continue;
440 if (blk.buf[i - 1] == ' ' ||
441 blk.buf[i - 1] == '\t')
442 mandoc_msg(
443 MANDOCERR_SPACE_EOL,
444 curp, curp->line,
445 pos + i-1 - j, NULL);
446 ++i;
447 ++lnn;
448 break;
449 }
450
451 /* Backout trailing whitespaces */
452 for (; pos > 0; --pos) {
453 if (ln.buf[pos - 1] != ' ')
454 break;
455 if (pos > 2 && ln.buf[pos - 2] == '\\')
456 break;
457 }
458 break;
459 }
460
461 /* Catch escaped bogus characters. */
462
463 c = (unsigned char) blk.buf[i+1];
464
465 if ( ! (isascii(c) &&
466 (isgraph(c) || isblank(c)))) {
467 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
468 curp->line, pos, "0x%x", c);
469 i += 2;
470 ln.buf[pos++] = '?';
471 continue;
472 }
473
474 /* Some other escape sequence, copy & cont. */
475
476 ln.buf[pos++] = blk.buf[i++];
477 ln.buf[pos++] = blk.buf[i++];
478 }
479
480 if (pos >= ln.sz)
481 resize_buf(&ln, 256);
482
483 ln.buf[pos] = '\0';
484
485 /*
486 * A significant amount of complexity is contained by
487 * the roff preprocessor. It's line-oriented but can be
488 * expressed on one line, so we need at times to
489 * readjust our starting point and re-run it. The roff
490 * preprocessor can also readjust the buffers with new
491 * data, so we pass them in wholesale.
492 */
493
494 of = 0;
495
496 /*
497 * Maintain a lookaside buffer of all parsed lines. We
498 * only do this if mparse_keep() has been invoked (the
499 * buffer may be accessed with mparse_getkeep()).
500 */
501
502 if (curp->secondary) {
503 curp->secondary->buf = mandoc_realloc(
504 curp->secondary->buf,
505 curp->secondary->sz + pos + 2);
506 memcpy(curp->secondary->buf +
507 curp->secondary->sz,
508 ln.buf, pos);
509 curp->secondary->sz += pos;
510 curp->secondary->buf
511 [curp->secondary->sz] = '\n';
512 curp->secondary->sz++;
513 curp->secondary->buf
514 [curp->secondary->sz] = '\0';
515 }
516 rerun:
517 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
518
519 switch (rr) {
520 case ROFF_REPARSE:
521 if (++curp->reparse_count > REPARSE_LIMIT)
522 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
523 curp->line, pos, NULL);
524 else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
525 start == 1) {
526 pos = 0;
527 continue;
528 }
529 free(ln.buf);
530 return 0;
531 case ROFF_APPEND:
532 pos = strlen(ln.buf);
533 continue;
534 case ROFF_RERUN:
535 goto rerun;
536 case ROFF_IGN:
537 pos = 0;
538 continue;
539 case ROFF_SO:
540 if ( ! (curp->options & MPARSE_SO) &&
541 (i >= blk.sz || blk.buf[i] == '\0')) {
542 curp->sodest = mandoc_strdup(ln.buf + of);
543 free(ln.buf);
544 return 1;
545 }
546 /*
547 * We remove `so' clauses from our lookaside
548 * buffer because we're going to descend into
549 * the file recursively.
550 */
551 if (curp->secondary)
552 curp->secondary->sz -= pos + 1;
553 save_file = curp->file;
554 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
555 mparse_readfd(curp, fd, ln.buf + of);
556 close(fd);
557 curp->file = save_file;
558 } else {
559 curp->file = save_file;
560 mandoc_vmsg(MANDOCERR_SO_FAIL,
561 curp, curp->line, pos,
562 ".so %s", ln.buf + of);
563 ln.sz = mandoc_asprintf(&cp,
564 ".sp\nSee the file %s.\n.sp",
565 ln.buf + of);
566 free(ln.buf);
567 ln.buf = cp;
568 of = 0;
569 mparse_buf_r(curp, ln, of, 0);
570 }
571 pos = 0;
572 continue;
573 default:
574 break;
575 }
576
577 if (curp->man->macroset == MACROSET_NONE)
578 choose_parser(curp);
579
580 /*
581 * Lastly, push down into the parsers themselves.
582 * If libroff returns ROFF_TBL, then add it to the
583 * currently open parse. Since we only get here if
584 * there does exist data (see tbl_data.c), we're
585 * guaranteed that something's been allocated.
586 * Do the same for ROFF_EQN.
587 */
588
589 if (rr == ROFF_TBL)
590 while ((span = roff_span(curp->roff)) != NULL)
591 roff_addtbl(curp->man, span);
592 else if (rr == ROFF_EQN)
593 roff_addeqn(curp->man, roff_eqn(curp->roff));
594 else if ((curp->man->macroset == MACROSET_MDOC ?
595 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
596 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
597 break;
598
599 /* Temporary buffers typically are not full. */
600
601 if (0 == start && '\0' == blk.buf[i])
602 break;
603
604 /* Start the next input line. */
605
606 pos = 0;
607 }
608
609 free(ln.buf);
610 return 1;
611 }
612
613 static int
614 read_whole_file(struct mparse *curp, const char *file, int fd,
615 struct buf *fb, int *with_mmap)
616 {
617 struct stat st;
618 gzFile gz;
619 size_t off;
620 ssize_t ssz;
621
622 if (fstat(fd, &st) == -1)
623 err((int)MANDOCLEVEL_SYSERR, "%s", file);
624
625 /*
626 * If we're a regular file, try just reading in the whole entry
627 * via mmap(). This is faster than reading it into blocks, and
628 * since each file is only a few bytes to begin with, I'm not
629 * concerned that this is going to tank any machines.
630 */
631
632 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
633 if (st.st_size > 0x7fffffff) {
634 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
635 return 0;
636 }
637 *with_mmap = 1;
638 fb->sz = (size_t)st.st_size;
639 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
640 if (fb->buf != MAP_FAILED)
641 return 1;
642 }
643
644 if (curp->gzip) {
645 if ((gz = gzdopen(fd, "rb")) == NULL)
646 err((int)MANDOCLEVEL_SYSERR, "%s", file);
647 } else
648 gz = NULL;
649
650 /*
651 * If this isn't a regular file (like, say, stdin), then we must
652 * go the old way and just read things in bit by bit.
653 */
654
655 *with_mmap = 0;
656 off = 0;
657 fb->sz = 0;
658 fb->buf = NULL;
659 for (;;) {
660 if (off == fb->sz) {
661 if (fb->sz == (1U << 31)) {
662 mandoc_msg(MANDOCERR_TOOLARGE, curp,
663 0, 0, NULL);
664 break;
665 }
666 resize_buf(fb, 65536);
667 }
668 ssz = curp->gzip ?
669 gzread(gz, fb->buf + (int)off, fb->sz - off) :
670 read(fd, fb->buf + (int)off, fb->sz - off);
671 if (ssz == 0) {
672 fb->sz = off;
673 return 1;
674 }
675 if (ssz == -1)
676 err((int)MANDOCLEVEL_SYSERR, "%s", file);
677 off += (size_t)ssz;
678 }
679
680 free(fb->buf);
681 fb->buf = NULL;
682 return 0;
683 }
684
685 static void
686 mparse_end(struct mparse *curp)
687 {
688 if (curp->man->macroset == MACROSET_NONE)
689 curp->man->macroset = MACROSET_MAN;
690 if (curp->man->macroset == MACROSET_MDOC)
691 mdoc_endparse(curp->man);
692 else
693 man_endparse(curp->man);
694 roff_endparse(curp->roff);
695 }
696
697 static void
698 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
699 {
700 struct buf *svprimary;
701 const char *svfile;
702 size_t offset;
703 static int recursion_depth;
704
705 if (64 < recursion_depth) {
706 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
707 return;
708 }
709
710 /* Line number is per-file. */
711 svfile = curp->file;
712 curp->file = file;
713 svprimary = curp->primary;
714 curp->primary = &blk;
715 curp->line = 1;
716 recursion_depth++;
717
718 /* Skip an UTF-8 byte order mark. */
719 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
720 (unsigned char)blk.buf[0] == 0xef &&
721 (unsigned char)blk.buf[1] == 0xbb &&
722 (unsigned char)blk.buf[2] == 0xbf) {
723 offset = 3;
724 curp->filenc &= ~MPARSE_LATIN1;
725 } else
726 offset = 0;
727
728 mparse_buf_r(curp, blk, offset, 1);
729
730 if (--recursion_depth == 0)
731 mparse_end(curp);
732
733 curp->primary = svprimary;
734 curp->file = svfile;
735 }
736
737 enum mandoclevel
738 mparse_readmem(struct mparse *curp, void *buf, size_t len,
739 const char *file)
740 {
741 struct buf blk;
742
743 blk.buf = buf;
744 blk.sz = len;
745
746 mparse_parse_buffer(curp, blk, file);
747 return curp->file_status;
748 }
749
750 /*
751 * Read the whole file into memory and call the parsers.
752 * Called recursively when an .so request is encountered.
753 */
754 enum mandoclevel
755 mparse_readfd(struct mparse *curp, int fd, const char *file)
756 {
757 struct buf blk;
758 int with_mmap;
759 int save_filenc;
760
761 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
762 save_filenc = curp->filenc;
763 curp->filenc = curp->options &
764 (MPARSE_UTF8 | MPARSE_LATIN1);
765 mparse_parse_buffer(curp, blk, file);
766 curp->filenc = save_filenc;
767 if (with_mmap)
768 munmap(blk.buf, blk.sz);
769 else
770 free(blk.buf);
771 }
772 return curp->file_status;
773 }
774
775 int
776 mparse_open(struct mparse *curp, const char *file)
777 {
778 char *cp;
779 int fd;
780
781 curp->file = file;
782 cp = strrchr(file, '.');
783 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
784
785 /* First try to use the filename as it is. */
786
787 if ((fd = open(file, O_RDONLY)) != -1)
788 return fd;
789
790 /*
791 * If that doesn't work and the filename doesn't
792 * already end in .gz, try appending .gz.
793 */
794
795 if ( ! curp->gzip) {
796 mandoc_asprintf(&cp, "%s.gz", file);
797 fd = open(cp, O_RDONLY);
798 free(cp);
799 if (fd != -1) {
800 curp->gzip = 1;
801 return fd;
802 }
803 }
804
805 /* Neither worked, give up. */
806
807 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
808 return -1;
809 }
810
811 struct mparse *
812 mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
813 const char *defos)
814 {
815 struct mparse *curp;
816
817 curp = mandoc_calloc(1, sizeof(struct mparse));
818
819 curp->options = options;
820 curp->wlevel = wlevel;
821 curp->mmsg = mmsg;
822 curp->defos = defos;
823
824 curp->roff = roff_alloc(curp, options);
825 curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
826 curp->options & MPARSE_QUICK ? 1 : 0);
827 if (curp->options & MPARSE_MDOC) {
828 curp->man->macroset = MACROSET_MDOC;
829 if (curp->man->mdocmac == NULL)
830 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
831 } else if (curp->options & MPARSE_MAN) {
832 curp->man->macroset = MACROSET_MAN;
833 if (curp->man->manmac == NULL)
834 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
835 }
836 curp->man->first->tok = TOKEN_NONE;
837 return curp;
838 }
839
840 void
841 mparse_reset(struct mparse *curp)
842 {
843 roff_reset(curp->roff);
844 roff_man_reset(curp->man);
845
846 free(curp->sodest);
847 curp->sodest = NULL;
848
849 if (curp->secondary)
850 curp->secondary->sz = 0;
851
852 curp->file_status = MANDOCLEVEL_OK;
853 curp->gzip = 0;
854 }
855
856 void
857 mparse_free(struct mparse *curp)
858 {
859
860 roffhash_free(curp->man->mdocmac);
861 roffhash_free(curp->man->manmac);
862 roff_man_free(curp->man);
863 roff_free(curp->roff);
864 if (curp->secondary)
865 free(curp->secondary->buf);
866
867 free(curp->secondary);
868 free(curp->sodest);
869 free(curp);
870 }
871
872 void
873 mparse_result(struct mparse *curp, struct roff_man **man,
874 char **sodest)
875 {
876
877 if (sodest && NULL != (*sodest = curp->sodest)) {
878 *man = NULL;
879 return;
880 }
881 if (man)
882 *man = curp->man;
883 }
884
885 void
886 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
887 {
888 if (curp->file_status > *rc)
889 *rc = curp->file_status;
890 }
891
892 void
893 mandoc_vmsg(enum mandocerr t, struct mparse *m,
894 int ln, int pos, const char *fmt, ...)
895 {
896 char buf[256];
897 va_list ap;
898
899 va_start(ap, fmt);
900 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
901 va_end(ap);
902
903 mandoc_msg(t, m, ln, pos, buf);
904 }
905
906 void
907 mandoc_msg(enum mandocerr er, struct mparse *m,
908 int ln, int col, const char *msg)
909 {
910 enum mandoclevel level;
911
912 level = MANDOCLEVEL_UNSUPP;
913 while (er < mandoclimits[level])
914 level--;
915
916 if (level < m->wlevel && er != MANDOCERR_FILE)
917 return;
918
919 if (m->mmsg)
920 (*m->mmsg)(er, level, m->file, ln, col, msg);
921
922 if (m->file_status < level)
923 m->file_status = level;
924 }
925
926 const char *
927 mparse_strerror(enum mandocerr er)
928 {
929
930 return mandocerrs[er];
931 }
932
933 const char *
934 mparse_strlevel(enum mandoclevel lvl)
935 {
936 return mandoclevels[lvl];
937 }
938
939 void
940 mparse_keep(struct mparse *p)
941 {
942
943 assert(NULL == p->secondary);
944 p->secondary = mandoc_calloc(1, sizeof(struct buf));
945 }
946
947 const char *
948 mparse_getkeep(const struct mparse *p)
949 {
950
951 assert(p->secondary);
952 return p->secondary->sz ? p->secondary->buf : NULL;
953 }