]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Cleanup, no functional change:
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.200 2018/08/25 16:53:39 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stdarg.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <zlib.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "libmandoc.h"
42
43 #define REPARSE_LIMIT 1000
44
45 struct mparse {
46 struct roff *roff; /* roff parser (!NULL) */
47 struct roff_man *man; /* man parser */
48 char *sodest; /* filename pointed to by .so */
49 const char *file; /* filename of current input file */
50 struct buf *primary; /* buffer currently being parsed */
51 struct buf *secondary; /* copy of top level input */
52 struct buf *loop; /* open .while request line */
53 const char *os_s; /* default operating system */
54 mandocmsg mmsg; /* warning/error message handler */
55 enum mandoclevel file_status; /* status of current parse */
56 enum mandocerr mmin; /* ignore messages below this */
57 int options; /* parser options */
58 int gzip; /* current input file is gzipped */
59 int filenc; /* encoding of the current file */
60 int reparse_count; /* finite interp. stack */
61 int line; /* line number in the file */
62 };
63
64 static void choose_parser(struct mparse *);
65 static void free_buf_list(struct buf *);
66 static void resize_buf(struct buf *, size_t);
67 static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
68 static int read_whole_file(struct mparse *, const char *, int,
69 struct buf *, int *);
70 static void mparse_end(struct mparse *);
71 static void mparse_parse_buffer(struct mparse *, struct buf,
72 const char *);
73
74 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
75 MANDOCERR_OK,
76 MANDOCERR_OK,
77 MANDOCERR_WARNING,
78 MANDOCERR_ERROR,
79 MANDOCERR_UNSUPP,
80 MANDOCERR_MAX,
81 MANDOCERR_MAX
82 };
83
84 static const char * const mandocerrs[MANDOCERR_MAX] = {
85 "ok",
86
87 "base system convention",
88
89 "Mdocdate found",
90 "Mdocdate missing",
91 "unknown architecture",
92 "operating system explicitly specified",
93 "RCS id missing",
94 "referenced manual not found",
95
96 "generic style suggestion",
97
98 "legacy man(7) date format",
99 "normalizing date format to",
100 "lower case character in document title",
101 "duplicate RCS id",
102 "possible typo in section name",
103 "unterminated quoted argument",
104 "useless macro",
105 "consider using OS macro",
106 "errnos out of order",
107 "duplicate errno",
108 "trailing delimiter",
109 "no blank before trailing delimiter",
110 "fill mode already enabled, skipping",
111 "fill mode already disabled, skipping",
112 "verbatim \"--\", maybe consider using \\(em",
113 "function name without markup",
114 "whitespace at end of input line",
115 "bad comment style",
116
117 "generic warning",
118
119 /* related to the prologue */
120 "missing manual title, using UNTITLED",
121 "missing manual title, using \"\"",
122 "missing manual section, using \"\"",
123 "unknown manual section",
124 "missing date, using today's date",
125 "cannot parse date, using it verbatim",
126 "date in the future, using it anyway",
127 "missing Os macro, using \"\"",
128 "late prologue macro",
129 "prologue macros out of order",
130
131 /* related to document structure */
132 ".so is fragile, better use ln(1)",
133 "no document body",
134 "content before first section header",
135 "first section is not \"NAME\"",
136 "NAME section without Nm before Nd",
137 "NAME section without description",
138 "description not at the end of NAME",
139 "bad NAME section content",
140 "missing comma before name",
141 "missing description line, using \"\"",
142 "description line outside NAME section",
143 "sections out of conventional order",
144 "duplicate section title",
145 "unexpected section",
146 "cross reference to self",
147 "unusual Xr order",
148 "unusual Xr punctuation",
149 "AUTHORS section without An macro",
150
151 /* related to macros and nesting */
152 "obsolete macro",
153 "macro neither callable nor escaped",
154 "skipping paragraph macro",
155 "moving paragraph macro out of list",
156 "skipping no-space macro",
157 "blocks badly nested",
158 "nested displays are not portable",
159 "moving content out of list",
160 "first macro on line",
161 "line scope broken",
162 "skipping blank line in line scope",
163
164 /* related to missing macro arguments */
165 "skipping empty request",
166 "conditional request controls empty scope",
167 "skipping empty macro",
168 "empty block",
169 "empty argument, using 0n",
170 "missing display type, using -ragged",
171 "list type is not the first argument",
172 "missing -width in -tag list, using 6n",
173 "missing utility name, using \"\"",
174 "missing function name, using \"\"",
175 "empty head in list item",
176 "empty list item",
177 "missing argument, using next line",
178 "missing font type, using \\fR",
179 "unknown font type, using \\fR",
180 "nothing follows prefix",
181 "empty reference block",
182 "missing section argument",
183 "missing -std argument, adding it",
184 "missing option string, using \"\"",
185 "missing resource identifier, using \"\"",
186 "missing eqn box, using \"\"",
187
188 /* related to bad macro arguments */
189 "duplicate argument",
190 "skipping duplicate argument",
191 "skipping duplicate display type",
192 "skipping duplicate list type",
193 "skipping -width argument",
194 "wrong number of cells",
195 "unknown AT&T UNIX version",
196 "comma in function argument",
197 "parenthesis in function name",
198 "unknown library name",
199 "invalid content in Rs block",
200 "invalid Boolean argument",
201 "argument contains two font escapes",
202 "unknown font, skipping request",
203 "odd number of characters in request",
204
205 /* related to plain text */
206 "blank line in fill mode, using .sp",
207 "tab in filled text",
208 "new sentence, new line",
209 "invalid escape sequence",
210 "undefined string, using \"\"",
211
212 /* related to tables */
213 "tbl line starts with span",
214 "tbl column starts with span",
215 "skipping vertical bar in tbl layout",
216
217 "generic error",
218
219 /* related to tables */
220 "non-alphabetic character in tbl options",
221 "skipping unknown tbl option",
222 "missing tbl option argument",
223 "wrong tbl option argument size",
224 "empty tbl layout",
225 "invalid character in tbl layout",
226 "unmatched parenthesis in tbl layout",
227 "tbl without any data cells",
228 "ignoring data in spanned tbl cell",
229 "ignoring extra tbl data cells",
230 "data block open at end of tbl",
231
232 /* related to document structure and macros */
233 NULL,
234 "duplicate prologue macro",
235 "skipping late title macro",
236 "input stack limit exceeded, infinite loop?",
237 "skipping bad character",
238 "skipping unknown macro",
239 "ignoring request outside macro",
240 "skipping insecure request",
241 "skipping item outside list",
242 "skipping column outside column list",
243 "skipping end of block that is not open",
244 "fewer RS blocks open, skipping",
245 "inserting missing end of block",
246 "appending missing end of block",
247
248 /* related to request and macro arguments */
249 "escaped character not allowed in a name",
250 "using macro argument outside macro",
251 "argument number is not numeric",
252 "NOT IMPLEMENTED: Bd -file",
253 "skipping display without arguments",
254 "missing list type, using -item",
255 "argument is not numeric, using 1",
256 "argument is not a character",
257 "missing manual name, using \"\"",
258 "uname(3) system call failed, using UNKNOWN",
259 "unknown standard specifier",
260 "skipping request without numeric argument",
261 "excessive shift",
262 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
263 ".so request failed",
264 "skipping all arguments",
265 "skipping excess arguments",
266 "divide by zero",
267
268 "unsupported feature",
269 "input too large",
270 "unsupported control character",
271 "unsupported roff request",
272 "nested .while loops",
273 "end of scope with open .while loop",
274 "end of .while loop in inner scope",
275 "cannot continue this .while loop",
276 "eqn delim option in tbl",
277 "unsupported tbl layout modifier",
278 "ignoring macro in table",
279 };
280
281 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
282 "SUCCESS",
283 "STYLE",
284 "WARNING",
285 "ERROR",
286 "UNSUPP",
287 "BADARG",
288 "SYSERR"
289 };
290
291
292 static void
293 resize_buf(struct buf *buf, size_t initial)
294 {
295
296 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
297 buf->buf = mandoc_realloc(buf->buf, buf->sz);
298 }
299
300 static void
301 free_buf_list(struct buf *buf)
302 {
303 struct buf *tmp;
304
305 while (buf != NULL) {
306 tmp = buf;
307 buf = tmp->next;
308 free(tmp->buf);
309 free(tmp);
310 }
311 }
312
313 static void
314 choose_parser(struct mparse *curp)
315 {
316 char *cp, *ep;
317 int format;
318
319 /*
320 * If neither command line arguments -mdoc or -man select
321 * a parser nor the roff parser found a .Dd or .TH macro
322 * yet, look ahead in the main input buffer.
323 */
324
325 if ((format = roff_getformat(curp->roff)) == 0) {
326 cp = curp->primary->buf;
327 ep = cp + curp->primary->sz;
328 while (cp < ep) {
329 if (*cp == '.' || *cp == '\'') {
330 cp++;
331 if (cp[0] == 'D' && cp[1] == 'd') {
332 format = MPARSE_MDOC;
333 break;
334 }
335 if (cp[0] == 'T' && cp[1] == 'H') {
336 format = MPARSE_MAN;
337 break;
338 }
339 }
340 cp = memchr(cp, '\n', ep - cp);
341 if (cp == NULL)
342 break;
343 cp++;
344 }
345 }
346
347 if (format == MPARSE_MDOC) {
348 curp->man->macroset = MACROSET_MDOC;
349 if (curp->man->mdocmac == NULL)
350 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
351 } else {
352 curp->man->macroset = MACROSET_MAN;
353 if (curp->man->manmac == NULL)
354 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
355 }
356 curp->man->first->tok = TOKEN_NONE;
357 }
358
359 /*
360 * Main parse routine for a buffer.
361 * It assumes encoding and line numbering are already set up.
362 * It can recurse directly (for invocations of user-defined
363 * macros, inline equations, and input line traps)
364 * and indirectly (for .so file inclusion).
365 */
366 static int
367 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
368 {
369 struct buf ln;
370 struct buf *firstln, *lastln, *thisln, *loop;
371 const char *save_file;
372 char *cp;
373 size_t pos; /* byte number in the ln buffer */
374 int line_result, result;
375 int of;
376 int lnn; /* line number in the real file */
377 int fd;
378 int inloop; /* Saw .while on this level. */
379 unsigned char c;
380
381 ln.sz = 256;
382 ln.buf = mandoc_malloc(ln.sz);
383 ln.next = NULL;
384 firstln = loop = NULL;
385 lnn = curp->line;
386 pos = 0;
387 inloop = 0;
388 result = ROFF_CONT;
389
390 while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
391 if (start) {
392 curp->line = lnn;
393 curp->reparse_count = 0;
394
395 if (lnn < 3 &&
396 curp->filenc & MPARSE_UTF8 &&
397 curp->filenc & MPARSE_LATIN1)
398 curp->filenc = preconv_cue(&blk, i);
399 }
400
401 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
402
403 /*
404 * When finding an unescaped newline character,
405 * leave the character loop to process the line.
406 * Skip a preceding carriage return, if any.
407 */
408
409 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
410 '\n' == blk.buf[i + 1])
411 ++i;
412 if ('\n' == blk.buf[i]) {
413 ++i;
414 ++lnn;
415 break;
416 }
417
418 /*
419 * Make sure we have space for the worst
420 * case of 12 bytes: "\\[u10ffff]\n\0"
421 */
422
423 if (pos + 12 > ln.sz)
424 resize_buf(&ln, 256);
425
426 /*
427 * Encode 8-bit input.
428 */
429
430 c = blk.buf[i];
431 if (c & 0x80) {
432 if ( ! (curp->filenc && preconv_encode(
433 &blk, &i, &ln, &pos, &curp->filenc))) {
434 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
435 curp->line, pos, "0x%x", c);
436 ln.buf[pos++] = '?';
437 i++;
438 }
439 continue;
440 }
441
442 /*
443 * Exclude control characters.
444 */
445
446 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
447 mandoc_vmsg(c == 0x00 || c == 0x04 ||
448 c > 0x0a ? MANDOCERR_CHAR_BAD :
449 MANDOCERR_CHAR_UNSUPP,
450 curp, curp->line, pos, "0x%x", c);
451 i++;
452 if (c != '\r')
453 ln.buf[pos++] = '?';
454 continue;
455 }
456
457 ln.buf[pos++] = blk.buf[i++];
458 }
459 ln.buf[pos] = '\0';
460
461 /*
462 * Maintain a lookaside buffer of all lines.
463 * parsed from this input source.
464 */
465
466 thisln = mandoc_malloc(sizeof(*thisln));
467 thisln->buf = mandoc_strdup(ln.buf);
468 thisln->sz = strlen(ln.buf) + 1;
469 thisln->next = NULL;
470 if (firstln == NULL) {
471 firstln = lastln = thisln;
472 if (curp->secondary == NULL)
473 curp->secondary = firstln;
474 } else {
475 lastln->next = thisln;
476 lastln = thisln;
477 }
478
479 /* XXX Ugly hack to mark the end of the input. */
480
481 if (i == blk.sz || blk.buf[i] == '\0') {
482 ln.buf[pos++] = '\n';
483 ln.buf[pos] = '\0';
484 }
485
486 /*
487 * A significant amount of complexity is contained by
488 * the roff preprocessor. It's line-oriented but can be
489 * expressed on one line, so we need at times to
490 * readjust our starting point and re-run it. The roff
491 * preprocessor can also readjust the buffers with new
492 * data, so we pass them in wholesale.
493 */
494
495 of = 0;
496 rerun:
497 line_result = roff_parseln(curp->roff, curp->line, &ln, &of);
498
499 /* Process options. */
500
501 if (line_result & ROFF_APPEND)
502 assert(line_result == (ROFF_IGN | ROFF_APPEND));
503
504 if (line_result & ROFF_USERCALL)
505 assert((line_result & ROFF_MASK) == ROFF_REPARSE);
506
507 if (line_result & ROFF_USERRET) {
508 assert(line_result == (ROFF_IGN | ROFF_USERRET));
509 if (start == 0) {
510 /* Return from the current macro. */
511 result = ROFF_USERRET;
512 goto out;
513 }
514 }
515
516 switch (line_result & ROFF_LOOPMASK) {
517 case ROFF_IGN:
518 break;
519 case ROFF_WHILE:
520 if (curp->loop != NULL) {
521 if (loop == curp->loop)
522 break;
523 mandoc_msg(MANDOCERR_WHILE_NEST,
524 curp, curp->line, pos, NULL);
525 }
526 curp->loop = thisln;
527 loop = NULL;
528 inloop = 1;
529 break;
530 case ROFF_LOOPCONT:
531 case ROFF_LOOPEXIT:
532 if (curp->loop == NULL) {
533 mandoc_msg(MANDOCERR_WHILE_FAIL,
534 curp, curp->line, pos, NULL);
535 break;
536 }
537 if (inloop == 0) {
538 mandoc_msg(MANDOCERR_WHILE_INTO,
539 curp, curp->line, pos, NULL);
540 curp->loop = loop = NULL;
541 break;
542 }
543 if (line_result & ROFF_LOOPCONT)
544 loop = curp->loop;
545 else {
546 curp->loop = loop = NULL;
547 inloop = 0;
548 }
549 break;
550 default:
551 abort();
552 }
553
554 /* Process the main instruction from the roff parser. */
555
556 switch (line_result & ROFF_MASK) {
557 case ROFF_IGN:
558 break;
559 case ROFF_CONT:
560 if (curp->man->macroset == MACROSET_NONE)
561 choose_parser(curp);
562 if ((curp->man->macroset == MACROSET_MDOC ?
563 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
564 man_parseln(curp->man, curp->line, ln.buf, of)
565 ) == 2)
566 goto out;
567 break;
568 case ROFF_RERUN:
569 goto rerun;
570 case ROFF_REPARSE:
571 if (++curp->reparse_count > REPARSE_LIMIT) {
572 /* Abort and return to the top level. */
573 result = ROFF_IGN;
574 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
575 curp->line, pos, NULL);
576 goto out;
577 }
578 result = mparse_buf_r(curp, ln, of, 0);
579 if (line_result & ROFF_USERCALL) {
580 roff_userret(curp->roff);
581 /* Continue normally. */
582 if (result & ROFF_USERRET)
583 result = ROFF_CONT;
584 }
585 if (start == 0 && result != ROFF_CONT)
586 goto out;
587 break;
588 case ROFF_SO:
589 if ( ! (curp->options & MPARSE_SO) &&
590 (i >= blk.sz || blk.buf[i] == '\0')) {
591 curp->sodest = mandoc_strdup(ln.buf + of);
592 goto out;
593 }
594 save_file = curp->file;
595 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
596 mparse_readfd(curp, fd, ln.buf + of);
597 close(fd);
598 curp->file = save_file;
599 } else {
600 curp->file = save_file;
601 mandoc_vmsg(MANDOCERR_SO_FAIL,
602 curp, curp->line, pos,
603 ".so %s", ln.buf + of);
604 ln.sz = mandoc_asprintf(&cp,
605 ".sp\nSee the file %s.\n.sp",
606 ln.buf + of);
607 free(ln.buf);
608 ln.buf = cp;
609 of = 0;
610 mparse_buf_r(curp, ln, of, 0);
611 }
612 break;
613 default:
614 abort();
615 }
616
617 /* Start the next input line. */
618
619 if (loop != NULL &&
620 (line_result & ROFF_LOOPMASK) == ROFF_IGN)
621 loop = loop->next;
622
623 if (loop != NULL) {
624 if ((line_result & ROFF_APPEND) == 0)
625 *ln.buf = '\0';
626 if (ln.sz < loop->sz)
627 resize_buf(&ln, loop->sz);
628 (void)strlcat(ln.buf, loop->buf, ln.sz);
629 of = 0;
630 goto rerun;
631 }
632
633 pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
634 }
635 out:
636 if (inloop) {
637 if (result != ROFF_USERRET)
638 mandoc_msg(MANDOCERR_WHILE_OUTOF, curp,
639 curp->line, pos, NULL);
640 curp->loop = NULL;
641 }
642 free(ln.buf);
643 if (firstln != curp->secondary)
644 free_buf_list(firstln);
645 return result;
646 }
647
648 static int
649 read_whole_file(struct mparse *curp, const char *file, int fd,
650 struct buf *fb, int *with_mmap)
651 {
652 struct stat st;
653 gzFile gz;
654 size_t off;
655 ssize_t ssz;
656 int gzerrnum, retval;
657
658 if (fstat(fd, &st) == -1) {
659 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
660 "fstat: %s", strerror(errno));
661 return 0;
662 }
663
664 /*
665 * If we're a regular file, try just reading in the whole entry
666 * via mmap(). This is faster than reading it into blocks, and
667 * since each file is only a few bytes to begin with, I'm not
668 * concerned that this is going to tank any machines.
669 */
670
671 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
672 if (st.st_size > 0x7fffffff) {
673 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
674 return 0;
675 }
676 *with_mmap = 1;
677 fb->sz = (size_t)st.st_size;
678 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
679 if (fb->buf != MAP_FAILED)
680 return 1;
681 }
682
683 if (curp->gzip) {
684 /*
685 * Duplicating the file descriptor is required
686 * because we will have to call gzclose(3)
687 * to free memory used internally by zlib,
688 * but that will also close the file descriptor,
689 * which this function must not do.
690 */
691 if ((fd = dup(fd)) == -1) {
692 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
693 "dup: %s", strerror(errno));
694 return 0;
695 }
696 if ((gz = gzdopen(fd, "rb")) == NULL) {
697 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
698 "gzdopen: %s", strerror(errno));
699 close(fd);
700 return 0;
701 }
702 } else
703 gz = NULL;
704
705 /*
706 * If this isn't a regular file (like, say, stdin), then we must
707 * go the old way and just read things in bit by bit.
708 */
709
710 *with_mmap = 0;
711 off = 0;
712 retval = 0;
713 fb->sz = 0;
714 fb->buf = NULL;
715 for (;;) {
716 if (off == fb->sz) {
717 if (fb->sz == (1U << 31)) {
718 mandoc_msg(MANDOCERR_TOOLARGE, curp,
719 0, 0, NULL);
720 break;
721 }
722 resize_buf(fb, 65536);
723 }
724 ssz = curp->gzip ?
725 gzread(gz, fb->buf + (int)off, fb->sz - off) :
726 read(fd, fb->buf + (int)off, fb->sz - off);
727 if (ssz == 0) {
728 fb->sz = off;
729 retval = 1;
730 break;
731 }
732 if (ssz == -1) {
733 if (curp->gzip)
734 (void)gzerror(gz, &gzerrnum);
735 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
736 curp->gzip && gzerrnum != Z_ERRNO ?
737 zError(gzerrnum) : strerror(errno));
738 break;
739 }
740 off += (size_t)ssz;
741 }
742
743 if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
744 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
745 gzerrnum == Z_ERRNO ? strerror(errno) :
746 zError(gzerrnum));
747 if (retval == 0) {
748 free(fb->buf);
749 fb->buf = NULL;
750 }
751 return retval;
752 }
753
754 static void
755 mparse_end(struct mparse *curp)
756 {
757 if (curp->man->macroset == MACROSET_NONE)
758 curp->man->macroset = MACROSET_MAN;
759 if (curp->man->macroset == MACROSET_MDOC)
760 mdoc_endparse(curp->man);
761 else
762 man_endparse(curp->man);
763 roff_endparse(curp->roff);
764 }
765
766 static void
767 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
768 {
769 struct buf *svprimary;
770 const char *svfile;
771 size_t offset;
772 static int recursion_depth;
773
774 if (64 < recursion_depth) {
775 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
776 return;
777 }
778
779 /* Line number is per-file. */
780 svfile = curp->file;
781 curp->file = file;
782 svprimary = curp->primary;
783 curp->primary = &blk;
784 curp->line = 1;
785 recursion_depth++;
786
787 /* Skip an UTF-8 byte order mark. */
788 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
789 (unsigned char)blk.buf[0] == 0xef &&
790 (unsigned char)blk.buf[1] == 0xbb &&
791 (unsigned char)blk.buf[2] == 0xbf) {
792 offset = 3;
793 curp->filenc &= ~MPARSE_LATIN1;
794 } else
795 offset = 0;
796
797 mparse_buf_r(curp, blk, offset, 1);
798
799 if (--recursion_depth == 0)
800 mparse_end(curp);
801
802 curp->primary = svprimary;
803 curp->file = svfile;
804 }
805
806 enum mandoclevel
807 mparse_readmem(struct mparse *curp, void *buf, size_t len,
808 const char *file)
809 {
810 struct buf blk;
811
812 blk.buf = buf;
813 blk.sz = len;
814
815 mparse_parse_buffer(curp, blk, file);
816 return curp->file_status;
817 }
818
819 /*
820 * Read the whole file into memory and call the parsers.
821 * Called recursively when an .so request is encountered.
822 */
823 enum mandoclevel
824 mparse_readfd(struct mparse *curp, int fd, const char *file)
825 {
826 struct buf blk;
827 int with_mmap;
828 int save_filenc;
829
830 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
831 save_filenc = curp->filenc;
832 curp->filenc = curp->options &
833 (MPARSE_UTF8 | MPARSE_LATIN1);
834 mparse_parse_buffer(curp, blk, file);
835 curp->filenc = save_filenc;
836 if (with_mmap)
837 munmap(blk.buf, blk.sz);
838 else
839 free(blk.buf);
840 }
841 return curp->file_status;
842 }
843
844 int
845 mparse_open(struct mparse *curp, const char *file)
846 {
847 char *cp;
848 int fd;
849
850 curp->file = file;
851 cp = strrchr(file, '.');
852 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
853
854 /* First try to use the filename as it is. */
855
856 if ((fd = open(file, O_RDONLY)) != -1)
857 return fd;
858
859 /*
860 * If that doesn't work and the filename doesn't
861 * already end in .gz, try appending .gz.
862 */
863
864 if ( ! curp->gzip) {
865 mandoc_asprintf(&cp, "%s.gz", file);
866 fd = open(cp, O_RDONLY);
867 free(cp);
868 if (fd != -1) {
869 curp->gzip = 1;
870 return fd;
871 }
872 }
873
874 /* Neither worked, give up. */
875
876 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
877 return -1;
878 }
879
880 struct mparse *
881 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
882 enum mandoc_os os_e, const char *os_s)
883 {
884 struct mparse *curp;
885
886 curp = mandoc_calloc(1, sizeof(struct mparse));
887
888 curp->options = options;
889 curp->mmin = mmin;
890 curp->mmsg = mmsg;
891 curp->os_s = os_s;
892
893 curp->roff = roff_alloc(curp, options);
894 curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
895 curp->options & MPARSE_QUICK ? 1 : 0);
896 if (curp->options & MPARSE_MDOC) {
897 curp->man->macroset = MACROSET_MDOC;
898 if (curp->man->mdocmac == NULL)
899 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
900 } else if (curp->options & MPARSE_MAN) {
901 curp->man->macroset = MACROSET_MAN;
902 if (curp->man->manmac == NULL)
903 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
904 }
905 curp->man->first->tok = TOKEN_NONE;
906 curp->man->meta.os_e = os_e;
907 return curp;
908 }
909
910 void
911 mparse_reset(struct mparse *curp)
912 {
913 roff_reset(curp->roff);
914 roff_man_reset(curp->man);
915 free_buf_list(curp->secondary);
916 curp->secondary = NULL;
917
918 free(curp->sodest);
919 curp->sodest = NULL;
920
921 curp->file_status = MANDOCLEVEL_OK;
922 curp->gzip = 0;
923 }
924
925 void
926 mparse_free(struct mparse *curp)
927 {
928 roffhash_free(curp->man->mdocmac);
929 roffhash_free(curp->man->manmac);
930 roff_man_free(curp->man);
931 roff_free(curp->roff);
932 free_buf_list(curp->secondary);
933 free(curp->sodest);
934 free(curp);
935 }
936
937 void
938 mparse_result(struct mparse *curp, struct roff_man **man,
939 char **sodest)
940 {
941
942 if (sodest && NULL != (*sodest = curp->sodest)) {
943 *man = NULL;
944 return;
945 }
946 if (man)
947 *man = curp->man;
948 }
949
950 void
951 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
952 {
953 if (curp->file_status > *rc)
954 *rc = curp->file_status;
955 }
956
957 void
958 mandoc_vmsg(enum mandocerr t, struct mparse *m,
959 int ln, int pos, const char *fmt, ...)
960 {
961 char buf[256];
962 va_list ap;
963
964 va_start(ap, fmt);
965 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
966 va_end(ap);
967
968 mandoc_msg(t, m, ln, pos, buf);
969 }
970
971 void
972 mandoc_msg(enum mandocerr er, struct mparse *m,
973 int ln, int col, const char *msg)
974 {
975 enum mandoclevel level;
976
977 if (er < m->mmin && er != MANDOCERR_FILE)
978 return;
979
980 level = MANDOCLEVEL_UNSUPP;
981 while (er < mandoclimits[level])
982 level--;
983
984 if (m->mmsg)
985 (*m->mmsg)(er, level, m->file, ln, col, msg);
986
987 if (m->file_status < level)
988 m->file_status = level;
989 }
990
991 const char *
992 mparse_strerror(enum mandocerr er)
993 {
994
995 return mandocerrs[er];
996 }
997
998 const char *
999 mparse_strlevel(enum mandoclevel lvl)
1000 {
1001 return mandoclevels[lvl];
1002 }
1003
1004 void
1005 mparse_copy(const struct mparse *p)
1006 {
1007 struct buf *buf;
1008
1009 for (buf = p->secondary; buf != NULL; buf = buf->next)
1010 puts(buf->buf);
1011 }