]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Cleanup, no functional change:
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.202 2018/12/13 11:55:47 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stdarg.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <zlib.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "mandoc_parse.h"
42 #include "libmandoc.h"
43 #include "roff_int.h"
44
45 #define REPARSE_LIMIT 1000
46
47 struct mparse {
48 struct roff *roff; /* roff parser (!NULL) */
49 struct roff_man *man; /* man parser */
50 char *sodest; /* filename pointed to by .so */
51 const char *file; /* filename of current input file */
52 struct buf *primary; /* buffer currently being parsed */
53 struct buf *secondary; /* copy of top level input */
54 struct buf *loop; /* open .while request line */
55 const char *os_s; /* default operating system */
56 mandocmsg mmsg; /* warning/error message handler */
57 enum mandoclevel file_status; /* status of current parse */
58 enum mandocerr mmin; /* ignore messages below this */
59 int options; /* parser options */
60 int gzip; /* current input file is gzipped */
61 int filenc; /* encoding of the current file */
62 int reparse_count; /* finite interp. stack */
63 int line; /* line number in the file */
64 };
65
66 static void choose_parser(struct mparse *);
67 static void free_buf_list(struct buf *);
68 static void resize_buf(struct buf *, size_t);
69 static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
70 static int read_whole_file(struct mparse *, const char *, int,
71 struct buf *, int *);
72 static void mparse_end(struct mparse *);
73 static void mparse_parse_buffer(struct mparse *, struct buf,
74 const char *);
75
76 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
77 MANDOCERR_OK,
78 MANDOCERR_OK,
79 MANDOCERR_WARNING,
80 MANDOCERR_ERROR,
81 MANDOCERR_UNSUPP,
82 MANDOCERR_MAX,
83 MANDOCERR_MAX
84 };
85
86 static const char * const mandocerrs[MANDOCERR_MAX] = {
87 "ok",
88
89 "base system convention",
90
91 "Mdocdate found",
92 "Mdocdate missing",
93 "unknown architecture",
94 "operating system explicitly specified",
95 "RCS id missing",
96 "referenced manual not found",
97
98 "generic style suggestion",
99
100 "legacy man(7) date format",
101 "normalizing date format to",
102 "lower case character in document title",
103 "duplicate RCS id",
104 "possible typo in section name",
105 "unterminated quoted argument",
106 "useless macro",
107 "consider using OS macro",
108 "errnos out of order",
109 "duplicate errno",
110 "trailing delimiter",
111 "no blank before trailing delimiter",
112 "fill mode already enabled, skipping",
113 "fill mode already disabled, skipping",
114 "verbatim \"--\", maybe consider using \\(em",
115 "function name without markup",
116 "whitespace at end of input line",
117 "bad comment style",
118
119 "generic warning",
120
121 /* related to the prologue */
122 "missing manual title, using UNTITLED",
123 "missing manual title, using \"\"",
124 "missing manual section, using \"\"",
125 "unknown manual section",
126 "missing date, using today's date",
127 "cannot parse date, using it verbatim",
128 "date in the future, using it anyway",
129 "missing Os macro, using \"\"",
130 "late prologue macro",
131 "prologue macros out of order",
132
133 /* related to document structure */
134 ".so is fragile, better use ln(1)",
135 "no document body",
136 "content before first section header",
137 "first section is not \"NAME\"",
138 "NAME section without Nm before Nd",
139 "NAME section without description",
140 "description not at the end of NAME",
141 "bad NAME section content",
142 "missing comma before name",
143 "missing description line, using \"\"",
144 "description line outside NAME section",
145 "sections out of conventional order",
146 "duplicate section title",
147 "unexpected section",
148 "cross reference to self",
149 "unusual Xr order",
150 "unusual Xr punctuation",
151 "AUTHORS section without An macro",
152
153 /* related to macros and nesting */
154 "obsolete macro",
155 "macro neither callable nor escaped",
156 "skipping paragraph macro",
157 "moving paragraph macro out of list",
158 "skipping no-space macro",
159 "blocks badly nested",
160 "nested displays are not portable",
161 "moving content out of list",
162 "first macro on line",
163 "line scope broken",
164 "skipping blank line in line scope",
165
166 /* related to missing macro arguments */
167 "skipping empty request",
168 "conditional request controls empty scope",
169 "skipping empty macro",
170 "empty block",
171 "empty argument, using 0n",
172 "missing display type, using -ragged",
173 "list type is not the first argument",
174 "missing -width in -tag list, using 6n",
175 "missing utility name, using \"\"",
176 "missing function name, using \"\"",
177 "empty head in list item",
178 "empty list item",
179 "missing argument, using next line",
180 "missing font type, using \\fR",
181 "unknown font type, using \\fR",
182 "nothing follows prefix",
183 "empty reference block",
184 "missing section argument",
185 "missing -std argument, adding it",
186 "missing option string, using \"\"",
187 "missing resource identifier, using \"\"",
188 "missing eqn box, using \"\"",
189
190 /* related to bad macro arguments */
191 "duplicate argument",
192 "skipping duplicate argument",
193 "skipping duplicate display type",
194 "skipping duplicate list type",
195 "skipping -width argument",
196 "wrong number of cells",
197 "unknown AT&T UNIX version",
198 "comma in function argument",
199 "parenthesis in function name",
200 "unknown library name",
201 "invalid content in Rs block",
202 "invalid Boolean argument",
203 "argument contains two font escapes",
204 "unknown font, skipping request",
205 "odd number of characters in request",
206
207 /* related to plain text */
208 "blank line in fill mode, using .sp",
209 "tab in filled text",
210 "new sentence, new line",
211 "invalid escape sequence",
212 "undefined string, using \"\"",
213
214 /* related to tables */
215 "tbl line starts with span",
216 "tbl column starts with span",
217 "skipping vertical bar in tbl layout",
218
219 "generic error",
220
221 /* related to tables */
222 "non-alphabetic character in tbl options",
223 "skipping unknown tbl option",
224 "missing tbl option argument",
225 "wrong tbl option argument size",
226 "empty tbl layout",
227 "invalid character in tbl layout",
228 "unmatched parenthesis in tbl layout",
229 "tbl without any data cells",
230 "ignoring data in spanned tbl cell",
231 "ignoring extra tbl data cells",
232 "data block open at end of tbl",
233
234 /* related to document structure and macros */
235 NULL,
236 "duplicate prologue macro",
237 "skipping late title macro",
238 "input stack limit exceeded, infinite loop?",
239 "skipping bad character",
240 "skipping unknown macro",
241 "ignoring request outside macro",
242 "skipping insecure request",
243 "skipping item outside list",
244 "skipping column outside column list",
245 "skipping end of block that is not open",
246 "fewer RS blocks open, skipping",
247 "inserting missing end of block",
248 "appending missing end of block",
249
250 /* related to request and macro arguments */
251 "escaped character not allowed in a name",
252 "using macro argument outside macro",
253 "argument number is not numeric",
254 "NOT IMPLEMENTED: Bd -file",
255 "skipping display without arguments",
256 "missing list type, using -item",
257 "argument is not numeric, using 1",
258 "argument is not a character",
259 "missing manual name, using \"\"",
260 "uname(3) system call failed, using UNKNOWN",
261 "unknown standard specifier",
262 "skipping request without numeric argument",
263 "excessive shift",
264 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
265 ".so request failed",
266 "skipping all arguments",
267 "skipping excess arguments",
268 "divide by zero",
269
270 "unsupported feature",
271 "input too large",
272 "unsupported control character",
273 "unsupported roff request",
274 "nested .while loops",
275 "end of scope with open .while loop",
276 "end of .while loop in inner scope",
277 "cannot continue this .while loop",
278 "eqn delim option in tbl",
279 "unsupported tbl layout modifier",
280 "ignoring macro in table",
281 };
282
283 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
284 "SUCCESS",
285 "STYLE",
286 "WARNING",
287 "ERROR",
288 "UNSUPP",
289 "BADARG",
290 "SYSERR"
291 };
292
293
294 static void
295 resize_buf(struct buf *buf, size_t initial)
296 {
297
298 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
299 buf->buf = mandoc_realloc(buf->buf, buf->sz);
300 }
301
302 static void
303 free_buf_list(struct buf *buf)
304 {
305 struct buf *tmp;
306
307 while (buf != NULL) {
308 tmp = buf;
309 buf = tmp->next;
310 free(tmp->buf);
311 free(tmp);
312 }
313 }
314
315 static void
316 choose_parser(struct mparse *curp)
317 {
318 char *cp, *ep;
319 int format;
320
321 /*
322 * If neither command line arguments -mdoc or -man select
323 * a parser nor the roff parser found a .Dd or .TH macro
324 * yet, look ahead in the main input buffer.
325 */
326
327 if ((format = roff_getformat(curp->roff)) == 0) {
328 cp = curp->primary->buf;
329 ep = cp + curp->primary->sz;
330 while (cp < ep) {
331 if (*cp == '.' || *cp == '\'') {
332 cp++;
333 if (cp[0] == 'D' && cp[1] == 'd') {
334 format = MPARSE_MDOC;
335 break;
336 }
337 if (cp[0] == 'T' && cp[1] == 'H') {
338 format = MPARSE_MAN;
339 break;
340 }
341 }
342 cp = memchr(cp, '\n', ep - cp);
343 if (cp == NULL)
344 break;
345 cp++;
346 }
347 }
348
349 if (format == MPARSE_MDOC) {
350 curp->man->macroset = MACROSET_MDOC;
351 if (curp->man->mdocmac == NULL)
352 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
353 } else {
354 curp->man->macroset = MACROSET_MAN;
355 if (curp->man->manmac == NULL)
356 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
357 }
358 curp->man->first->tok = TOKEN_NONE;
359 }
360
361 /*
362 * Main parse routine for a buffer.
363 * It assumes encoding and line numbering are already set up.
364 * It can recurse directly (for invocations of user-defined
365 * macros, inline equations, and input line traps)
366 * and indirectly (for .so file inclusion).
367 */
368 static int
369 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
370 {
371 struct buf ln;
372 struct buf *firstln, *lastln, *thisln, *loop;
373 const char *save_file;
374 char *cp;
375 size_t pos; /* byte number in the ln buffer */
376 int line_result, result;
377 int of;
378 int lnn; /* line number in the real file */
379 int fd;
380 int inloop; /* Saw .while on this level. */
381 unsigned char c;
382
383 ln.sz = 256;
384 ln.buf = mandoc_malloc(ln.sz);
385 ln.next = NULL;
386 firstln = loop = NULL;
387 lnn = curp->line;
388 pos = 0;
389 inloop = 0;
390 result = ROFF_CONT;
391
392 while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
393 if (start) {
394 curp->line = lnn;
395 curp->reparse_count = 0;
396
397 if (lnn < 3 &&
398 curp->filenc & MPARSE_UTF8 &&
399 curp->filenc & MPARSE_LATIN1)
400 curp->filenc = preconv_cue(&blk, i);
401 }
402
403 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
404
405 /*
406 * When finding an unescaped newline character,
407 * leave the character loop to process the line.
408 * Skip a preceding carriage return, if any.
409 */
410
411 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
412 '\n' == blk.buf[i + 1])
413 ++i;
414 if ('\n' == blk.buf[i]) {
415 ++i;
416 ++lnn;
417 break;
418 }
419
420 /*
421 * Make sure we have space for the worst
422 * case of 12 bytes: "\\[u10ffff]\n\0"
423 */
424
425 if (pos + 12 > ln.sz)
426 resize_buf(&ln, 256);
427
428 /*
429 * Encode 8-bit input.
430 */
431
432 c = blk.buf[i];
433 if (c & 0x80) {
434 if ( ! (curp->filenc && preconv_encode(
435 &blk, &i, &ln, &pos, &curp->filenc))) {
436 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
437 curp->line, pos, "0x%x", c);
438 ln.buf[pos++] = '?';
439 i++;
440 }
441 continue;
442 }
443
444 /*
445 * Exclude control characters.
446 */
447
448 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
449 mandoc_vmsg(c == 0x00 || c == 0x04 ||
450 c > 0x0a ? MANDOCERR_CHAR_BAD :
451 MANDOCERR_CHAR_UNSUPP,
452 curp, curp->line, pos, "0x%x", c);
453 i++;
454 if (c != '\r')
455 ln.buf[pos++] = '?';
456 continue;
457 }
458
459 ln.buf[pos++] = blk.buf[i++];
460 }
461 ln.buf[pos] = '\0';
462
463 /*
464 * Maintain a lookaside buffer of all lines.
465 * parsed from this input source.
466 */
467
468 thisln = mandoc_malloc(sizeof(*thisln));
469 thisln->buf = mandoc_strdup(ln.buf);
470 thisln->sz = strlen(ln.buf) + 1;
471 thisln->next = NULL;
472 if (firstln == NULL) {
473 firstln = lastln = thisln;
474 if (curp->secondary == NULL)
475 curp->secondary = firstln;
476 } else {
477 lastln->next = thisln;
478 lastln = thisln;
479 }
480
481 /* XXX Ugly hack to mark the end of the input. */
482
483 if (i == blk.sz || blk.buf[i] == '\0') {
484 ln.buf[pos++] = '\n';
485 ln.buf[pos] = '\0';
486 }
487
488 /*
489 * A significant amount of complexity is contained by
490 * the roff preprocessor. It's line-oriented but can be
491 * expressed on one line, so we need at times to
492 * readjust our starting point and re-run it. The roff
493 * preprocessor can also readjust the buffers with new
494 * data, so we pass them in wholesale.
495 */
496
497 of = 0;
498 rerun:
499 line_result = roff_parseln(curp->roff, curp->line, &ln, &of);
500
501 /* Process options. */
502
503 if (line_result & ROFF_APPEND)
504 assert(line_result == (ROFF_IGN | ROFF_APPEND));
505
506 if (line_result & ROFF_USERCALL)
507 assert((line_result & ROFF_MASK) == ROFF_REPARSE);
508
509 if (line_result & ROFF_USERRET) {
510 assert(line_result == (ROFF_IGN | ROFF_USERRET));
511 if (start == 0) {
512 /* Return from the current macro. */
513 result = ROFF_USERRET;
514 goto out;
515 }
516 }
517
518 switch (line_result & ROFF_LOOPMASK) {
519 case ROFF_IGN:
520 break;
521 case ROFF_WHILE:
522 if (curp->loop != NULL) {
523 if (loop == curp->loop)
524 break;
525 mandoc_msg(MANDOCERR_WHILE_NEST,
526 curp, curp->line, pos, NULL);
527 }
528 curp->loop = thisln;
529 loop = NULL;
530 inloop = 1;
531 break;
532 case ROFF_LOOPCONT:
533 case ROFF_LOOPEXIT:
534 if (curp->loop == NULL) {
535 mandoc_msg(MANDOCERR_WHILE_FAIL,
536 curp, curp->line, pos, NULL);
537 break;
538 }
539 if (inloop == 0) {
540 mandoc_msg(MANDOCERR_WHILE_INTO,
541 curp, curp->line, pos, NULL);
542 curp->loop = loop = NULL;
543 break;
544 }
545 if (line_result & ROFF_LOOPCONT)
546 loop = curp->loop;
547 else {
548 curp->loop = loop = NULL;
549 inloop = 0;
550 }
551 break;
552 default:
553 abort();
554 }
555
556 /* Process the main instruction from the roff parser. */
557
558 switch (line_result & ROFF_MASK) {
559 case ROFF_IGN:
560 break;
561 case ROFF_CONT:
562 if (curp->man->macroset == MACROSET_NONE)
563 choose_parser(curp);
564 if ((curp->man->macroset == MACROSET_MDOC ?
565 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
566 man_parseln(curp->man, curp->line, ln.buf, of)
567 ) == 2)
568 goto out;
569 break;
570 case ROFF_RERUN:
571 goto rerun;
572 case ROFF_REPARSE:
573 if (++curp->reparse_count > REPARSE_LIMIT) {
574 /* Abort and return to the top level. */
575 result = ROFF_IGN;
576 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
577 curp->line, pos, NULL);
578 goto out;
579 }
580 result = mparse_buf_r(curp, ln, of, 0);
581 if (line_result & ROFF_USERCALL) {
582 roff_userret(curp->roff);
583 /* Continue normally. */
584 if (result & ROFF_USERRET)
585 result = ROFF_CONT;
586 }
587 if (start == 0 && result != ROFF_CONT)
588 goto out;
589 break;
590 case ROFF_SO:
591 if ( ! (curp->options & MPARSE_SO) &&
592 (i >= blk.sz || blk.buf[i] == '\0')) {
593 curp->sodest = mandoc_strdup(ln.buf + of);
594 goto out;
595 }
596 save_file = curp->file;
597 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
598 mparse_readfd(curp, fd, ln.buf + of);
599 close(fd);
600 curp->file = save_file;
601 } else {
602 curp->file = save_file;
603 mandoc_vmsg(MANDOCERR_SO_FAIL,
604 curp, curp->line, pos,
605 ".so %s", ln.buf + of);
606 ln.sz = mandoc_asprintf(&cp,
607 ".sp\nSee the file %s.\n.sp",
608 ln.buf + of);
609 free(ln.buf);
610 ln.buf = cp;
611 of = 0;
612 mparse_buf_r(curp, ln, of, 0);
613 }
614 break;
615 default:
616 abort();
617 }
618
619 /* Start the next input line. */
620
621 if (loop != NULL &&
622 (line_result & ROFF_LOOPMASK) == ROFF_IGN)
623 loop = loop->next;
624
625 if (loop != NULL) {
626 if ((line_result & ROFF_APPEND) == 0)
627 *ln.buf = '\0';
628 if (ln.sz < loop->sz)
629 resize_buf(&ln, loop->sz);
630 (void)strlcat(ln.buf, loop->buf, ln.sz);
631 of = 0;
632 goto rerun;
633 }
634
635 pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
636 }
637 out:
638 if (inloop) {
639 if (result != ROFF_USERRET)
640 mandoc_msg(MANDOCERR_WHILE_OUTOF, curp,
641 curp->line, pos, NULL);
642 curp->loop = NULL;
643 }
644 free(ln.buf);
645 if (firstln != curp->secondary)
646 free_buf_list(firstln);
647 return result;
648 }
649
650 static int
651 read_whole_file(struct mparse *curp, const char *file, int fd,
652 struct buf *fb, int *with_mmap)
653 {
654 struct stat st;
655 gzFile gz;
656 size_t off;
657 ssize_t ssz;
658 int gzerrnum, retval;
659
660 if (fstat(fd, &st) == -1) {
661 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
662 "fstat: %s", strerror(errno));
663 return 0;
664 }
665
666 /*
667 * If we're a regular file, try just reading in the whole entry
668 * via mmap(). This is faster than reading it into blocks, and
669 * since each file is only a few bytes to begin with, I'm not
670 * concerned that this is going to tank any machines.
671 */
672
673 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
674 if (st.st_size > 0x7fffffff) {
675 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
676 return 0;
677 }
678 *with_mmap = 1;
679 fb->sz = (size_t)st.st_size;
680 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
681 if (fb->buf != MAP_FAILED)
682 return 1;
683 }
684
685 if (curp->gzip) {
686 /*
687 * Duplicating the file descriptor is required
688 * because we will have to call gzclose(3)
689 * to free memory used internally by zlib,
690 * but that will also close the file descriptor,
691 * which this function must not do.
692 */
693 if ((fd = dup(fd)) == -1) {
694 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
695 "dup: %s", strerror(errno));
696 return 0;
697 }
698 if ((gz = gzdopen(fd, "rb")) == NULL) {
699 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
700 "gzdopen: %s", strerror(errno));
701 close(fd);
702 return 0;
703 }
704 } else
705 gz = NULL;
706
707 /*
708 * If this isn't a regular file (like, say, stdin), then we must
709 * go the old way and just read things in bit by bit.
710 */
711
712 *with_mmap = 0;
713 off = 0;
714 retval = 0;
715 fb->sz = 0;
716 fb->buf = NULL;
717 for (;;) {
718 if (off == fb->sz) {
719 if (fb->sz == (1U << 31)) {
720 mandoc_msg(MANDOCERR_TOOLARGE, curp,
721 0, 0, NULL);
722 break;
723 }
724 resize_buf(fb, 65536);
725 }
726 ssz = curp->gzip ?
727 gzread(gz, fb->buf + (int)off, fb->sz - off) :
728 read(fd, fb->buf + (int)off, fb->sz - off);
729 if (ssz == 0) {
730 fb->sz = off;
731 retval = 1;
732 break;
733 }
734 if (ssz == -1) {
735 if (curp->gzip)
736 (void)gzerror(gz, &gzerrnum);
737 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
738 curp->gzip && gzerrnum != Z_ERRNO ?
739 zError(gzerrnum) : strerror(errno));
740 break;
741 }
742 off += (size_t)ssz;
743 }
744
745 if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
746 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
747 gzerrnum == Z_ERRNO ? strerror(errno) :
748 zError(gzerrnum));
749 if (retval == 0) {
750 free(fb->buf);
751 fb->buf = NULL;
752 }
753 return retval;
754 }
755
756 static void
757 mparse_end(struct mparse *curp)
758 {
759 if (curp->man->macroset == MACROSET_NONE)
760 curp->man->macroset = MACROSET_MAN;
761 if (curp->man->macroset == MACROSET_MDOC)
762 mdoc_endparse(curp->man);
763 else
764 man_endparse(curp->man);
765 roff_endparse(curp->roff);
766 }
767
768 static void
769 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
770 {
771 struct buf *svprimary;
772 const char *svfile;
773 size_t offset;
774 static int recursion_depth;
775
776 if (64 < recursion_depth) {
777 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
778 return;
779 }
780
781 /* Line number is per-file. */
782 svfile = curp->file;
783 curp->file = file;
784 svprimary = curp->primary;
785 curp->primary = &blk;
786 curp->line = 1;
787 recursion_depth++;
788
789 /* Skip an UTF-8 byte order mark. */
790 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
791 (unsigned char)blk.buf[0] == 0xef &&
792 (unsigned char)blk.buf[1] == 0xbb &&
793 (unsigned char)blk.buf[2] == 0xbf) {
794 offset = 3;
795 curp->filenc &= ~MPARSE_LATIN1;
796 } else
797 offset = 0;
798
799 mparse_buf_r(curp, blk, offset, 1);
800
801 if (--recursion_depth == 0)
802 mparse_end(curp);
803
804 curp->primary = svprimary;
805 curp->file = svfile;
806 }
807
808 enum mandoclevel
809 mparse_readmem(struct mparse *curp, void *buf, size_t len,
810 const char *file)
811 {
812 struct buf blk;
813
814 blk.buf = buf;
815 blk.sz = len;
816
817 mparse_parse_buffer(curp, blk, file);
818 return curp->file_status;
819 }
820
821 /*
822 * Read the whole file into memory and call the parsers.
823 * Called recursively when an .so request is encountered.
824 */
825 enum mandoclevel
826 mparse_readfd(struct mparse *curp, int fd, const char *file)
827 {
828 struct buf blk;
829 int with_mmap;
830 int save_filenc;
831
832 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
833 save_filenc = curp->filenc;
834 curp->filenc = curp->options &
835 (MPARSE_UTF8 | MPARSE_LATIN1);
836 mparse_parse_buffer(curp, blk, file);
837 curp->filenc = save_filenc;
838 if (with_mmap)
839 munmap(blk.buf, blk.sz);
840 else
841 free(blk.buf);
842 }
843 return curp->file_status;
844 }
845
846 int
847 mparse_open(struct mparse *curp, const char *file)
848 {
849 char *cp;
850 int fd;
851
852 curp->file = file;
853 cp = strrchr(file, '.');
854 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
855
856 /* First try to use the filename as it is. */
857
858 if ((fd = open(file, O_RDONLY)) != -1)
859 return fd;
860
861 /*
862 * If that doesn't work and the filename doesn't
863 * already end in .gz, try appending .gz.
864 */
865
866 if ( ! curp->gzip) {
867 mandoc_asprintf(&cp, "%s.gz", file);
868 fd = open(cp, O_RDONLY);
869 free(cp);
870 if (fd != -1) {
871 curp->gzip = 1;
872 return fd;
873 }
874 }
875
876 /* Neither worked, give up. */
877
878 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
879 return -1;
880 }
881
882 struct mparse *
883 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
884 enum mandoc_os os_e, const char *os_s)
885 {
886 struct mparse *curp;
887
888 curp = mandoc_calloc(1, sizeof(struct mparse));
889
890 curp->options = options;
891 curp->mmin = mmin;
892 curp->mmsg = mmsg;
893 curp->os_s = os_s;
894
895 curp->roff = roff_alloc(curp, options);
896 curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
897 curp->options & MPARSE_QUICK ? 1 : 0);
898 if (curp->options & MPARSE_MDOC) {
899 curp->man->macroset = MACROSET_MDOC;
900 if (curp->man->mdocmac == NULL)
901 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
902 } else if (curp->options & MPARSE_MAN) {
903 curp->man->macroset = MACROSET_MAN;
904 if (curp->man->manmac == NULL)
905 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
906 }
907 curp->man->first->tok = TOKEN_NONE;
908 curp->man->meta.os_e = os_e;
909 return curp;
910 }
911
912 void
913 mparse_reset(struct mparse *curp)
914 {
915 roff_reset(curp->roff);
916 roff_man_reset(curp->man);
917 free_buf_list(curp->secondary);
918 curp->secondary = NULL;
919
920 free(curp->sodest);
921 curp->sodest = NULL;
922
923 curp->file_status = MANDOCLEVEL_OK;
924 curp->gzip = 0;
925 }
926
927 void
928 mparse_free(struct mparse *curp)
929 {
930 roffhash_free(curp->man->mdocmac);
931 roffhash_free(curp->man->manmac);
932 roff_man_free(curp->man);
933 roff_free(curp->roff);
934 free_buf_list(curp->secondary);
935 free(curp->sodest);
936 free(curp);
937 }
938
939 void
940 mparse_result(struct mparse *curp, struct roff_man **man,
941 char **sodest)
942 {
943
944 if (sodest && NULL != (*sodest = curp->sodest)) {
945 *man = NULL;
946 return;
947 }
948 if (man)
949 *man = curp->man;
950 }
951
952 void
953 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
954 {
955 if (curp->file_status > *rc)
956 *rc = curp->file_status;
957 }
958
959 void
960 mandoc_vmsg(enum mandocerr t, struct mparse *m,
961 int ln, int pos, const char *fmt, ...)
962 {
963 char buf[256];
964 va_list ap;
965
966 va_start(ap, fmt);
967 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
968 va_end(ap);
969
970 mandoc_msg(t, m, ln, pos, buf);
971 }
972
973 void
974 mandoc_msg(enum mandocerr er, struct mparse *m,
975 int ln, int col, const char *msg)
976 {
977 enum mandoclevel level;
978
979 if (er < m->mmin && er != MANDOCERR_FILE)
980 return;
981
982 level = MANDOCLEVEL_UNSUPP;
983 while (er < mandoclimits[level])
984 level--;
985
986 if (m->mmsg)
987 (*m->mmsg)(er, level, m->file, ln, col, msg);
988
989 if (m->file_status < level)
990 m->file_status = level;
991 }
992
993 const char *
994 mparse_strerror(enum mandocerr er)
995 {
996
997 return mandocerrs[er];
998 }
999
1000 const char *
1001 mparse_strlevel(enum mandoclevel lvl)
1002 {
1003 return mandoclevels[lvl];
1004 }
1005
1006 void
1007 mparse_copy(const struct mparse *p)
1008 {
1009 struct buf *buf;
1010
1011 for (buf = p->secondary; buf != NULL; buf = buf->next)
1012 puts(buf->buf);
1013 }