]> git.cameronkatri.com Git - mandoc.git/blob - read.c
Cleanup, no functional change:
[mandoc.git] / read.c
1 /* $Id: read.c,v 1.201 2018/12/13 06:18:20 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stdarg.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <zlib.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "libmandoc.h"
42 #include "roff_int.h"
43
44 #define REPARSE_LIMIT 1000
45
46 struct mparse {
47 struct roff *roff; /* roff parser (!NULL) */
48 struct roff_man *man; /* man parser */
49 char *sodest; /* filename pointed to by .so */
50 const char *file; /* filename of current input file */
51 struct buf *primary; /* buffer currently being parsed */
52 struct buf *secondary; /* copy of top level input */
53 struct buf *loop; /* open .while request line */
54 const char *os_s; /* default operating system */
55 mandocmsg mmsg; /* warning/error message handler */
56 enum mandoclevel file_status; /* status of current parse */
57 enum mandocerr mmin; /* ignore messages below this */
58 int options; /* parser options */
59 int gzip; /* current input file is gzipped */
60 int filenc; /* encoding of the current file */
61 int reparse_count; /* finite interp. stack */
62 int line; /* line number in the file */
63 };
64
65 static void choose_parser(struct mparse *);
66 static void free_buf_list(struct buf *);
67 static void resize_buf(struct buf *, size_t);
68 static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
69 static int read_whole_file(struct mparse *, const char *, int,
70 struct buf *, int *);
71 static void mparse_end(struct mparse *);
72 static void mparse_parse_buffer(struct mparse *, struct buf,
73 const char *);
74
75 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
76 MANDOCERR_OK,
77 MANDOCERR_OK,
78 MANDOCERR_WARNING,
79 MANDOCERR_ERROR,
80 MANDOCERR_UNSUPP,
81 MANDOCERR_MAX,
82 MANDOCERR_MAX
83 };
84
85 static const char * const mandocerrs[MANDOCERR_MAX] = {
86 "ok",
87
88 "base system convention",
89
90 "Mdocdate found",
91 "Mdocdate missing",
92 "unknown architecture",
93 "operating system explicitly specified",
94 "RCS id missing",
95 "referenced manual not found",
96
97 "generic style suggestion",
98
99 "legacy man(7) date format",
100 "normalizing date format to",
101 "lower case character in document title",
102 "duplicate RCS id",
103 "possible typo in section name",
104 "unterminated quoted argument",
105 "useless macro",
106 "consider using OS macro",
107 "errnos out of order",
108 "duplicate errno",
109 "trailing delimiter",
110 "no blank before trailing delimiter",
111 "fill mode already enabled, skipping",
112 "fill mode already disabled, skipping",
113 "verbatim \"--\", maybe consider using \\(em",
114 "function name without markup",
115 "whitespace at end of input line",
116 "bad comment style",
117
118 "generic warning",
119
120 /* related to the prologue */
121 "missing manual title, using UNTITLED",
122 "missing manual title, using \"\"",
123 "missing manual section, using \"\"",
124 "unknown manual section",
125 "missing date, using today's date",
126 "cannot parse date, using it verbatim",
127 "date in the future, using it anyway",
128 "missing Os macro, using \"\"",
129 "late prologue macro",
130 "prologue macros out of order",
131
132 /* related to document structure */
133 ".so is fragile, better use ln(1)",
134 "no document body",
135 "content before first section header",
136 "first section is not \"NAME\"",
137 "NAME section without Nm before Nd",
138 "NAME section without description",
139 "description not at the end of NAME",
140 "bad NAME section content",
141 "missing comma before name",
142 "missing description line, using \"\"",
143 "description line outside NAME section",
144 "sections out of conventional order",
145 "duplicate section title",
146 "unexpected section",
147 "cross reference to self",
148 "unusual Xr order",
149 "unusual Xr punctuation",
150 "AUTHORS section without An macro",
151
152 /* related to macros and nesting */
153 "obsolete macro",
154 "macro neither callable nor escaped",
155 "skipping paragraph macro",
156 "moving paragraph macro out of list",
157 "skipping no-space macro",
158 "blocks badly nested",
159 "nested displays are not portable",
160 "moving content out of list",
161 "first macro on line",
162 "line scope broken",
163 "skipping blank line in line scope",
164
165 /* related to missing macro arguments */
166 "skipping empty request",
167 "conditional request controls empty scope",
168 "skipping empty macro",
169 "empty block",
170 "empty argument, using 0n",
171 "missing display type, using -ragged",
172 "list type is not the first argument",
173 "missing -width in -tag list, using 6n",
174 "missing utility name, using \"\"",
175 "missing function name, using \"\"",
176 "empty head in list item",
177 "empty list item",
178 "missing argument, using next line",
179 "missing font type, using \\fR",
180 "unknown font type, using \\fR",
181 "nothing follows prefix",
182 "empty reference block",
183 "missing section argument",
184 "missing -std argument, adding it",
185 "missing option string, using \"\"",
186 "missing resource identifier, using \"\"",
187 "missing eqn box, using \"\"",
188
189 /* related to bad macro arguments */
190 "duplicate argument",
191 "skipping duplicate argument",
192 "skipping duplicate display type",
193 "skipping duplicate list type",
194 "skipping -width argument",
195 "wrong number of cells",
196 "unknown AT&T UNIX version",
197 "comma in function argument",
198 "parenthesis in function name",
199 "unknown library name",
200 "invalid content in Rs block",
201 "invalid Boolean argument",
202 "argument contains two font escapes",
203 "unknown font, skipping request",
204 "odd number of characters in request",
205
206 /* related to plain text */
207 "blank line in fill mode, using .sp",
208 "tab in filled text",
209 "new sentence, new line",
210 "invalid escape sequence",
211 "undefined string, using \"\"",
212
213 /* related to tables */
214 "tbl line starts with span",
215 "tbl column starts with span",
216 "skipping vertical bar in tbl layout",
217
218 "generic error",
219
220 /* related to tables */
221 "non-alphabetic character in tbl options",
222 "skipping unknown tbl option",
223 "missing tbl option argument",
224 "wrong tbl option argument size",
225 "empty tbl layout",
226 "invalid character in tbl layout",
227 "unmatched parenthesis in tbl layout",
228 "tbl without any data cells",
229 "ignoring data in spanned tbl cell",
230 "ignoring extra tbl data cells",
231 "data block open at end of tbl",
232
233 /* related to document structure and macros */
234 NULL,
235 "duplicate prologue macro",
236 "skipping late title macro",
237 "input stack limit exceeded, infinite loop?",
238 "skipping bad character",
239 "skipping unknown macro",
240 "ignoring request outside macro",
241 "skipping insecure request",
242 "skipping item outside list",
243 "skipping column outside column list",
244 "skipping end of block that is not open",
245 "fewer RS blocks open, skipping",
246 "inserting missing end of block",
247 "appending missing end of block",
248
249 /* related to request and macro arguments */
250 "escaped character not allowed in a name",
251 "using macro argument outside macro",
252 "argument number is not numeric",
253 "NOT IMPLEMENTED: Bd -file",
254 "skipping display without arguments",
255 "missing list type, using -item",
256 "argument is not numeric, using 1",
257 "argument is not a character",
258 "missing manual name, using \"\"",
259 "uname(3) system call failed, using UNKNOWN",
260 "unknown standard specifier",
261 "skipping request without numeric argument",
262 "excessive shift",
263 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
264 ".so request failed",
265 "skipping all arguments",
266 "skipping excess arguments",
267 "divide by zero",
268
269 "unsupported feature",
270 "input too large",
271 "unsupported control character",
272 "unsupported roff request",
273 "nested .while loops",
274 "end of scope with open .while loop",
275 "end of .while loop in inner scope",
276 "cannot continue this .while loop",
277 "eqn delim option in tbl",
278 "unsupported tbl layout modifier",
279 "ignoring macro in table",
280 };
281
282 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
283 "SUCCESS",
284 "STYLE",
285 "WARNING",
286 "ERROR",
287 "UNSUPP",
288 "BADARG",
289 "SYSERR"
290 };
291
292
293 static void
294 resize_buf(struct buf *buf, size_t initial)
295 {
296
297 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
298 buf->buf = mandoc_realloc(buf->buf, buf->sz);
299 }
300
301 static void
302 free_buf_list(struct buf *buf)
303 {
304 struct buf *tmp;
305
306 while (buf != NULL) {
307 tmp = buf;
308 buf = tmp->next;
309 free(tmp->buf);
310 free(tmp);
311 }
312 }
313
314 static void
315 choose_parser(struct mparse *curp)
316 {
317 char *cp, *ep;
318 int format;
319
320 /*
321 * If neither command line arguments -mdoc or -man select
322 * a parser nor the roff parser found a .Dd or .TH macro
323 * yet, look ahead in the main input buffer.
324 */
325
326 if ((format = roff_getformat(curp->roff)) == 0) {
327 cp = curp->primary->buf;
328 ep = cp + curp->primary->sz;
329 while (cp < ep) {
330 if (*cp == '.' || *cp == '\'') {
331 cp++;
332 if (cp[0] == 'D' && cp[1] == 'd') {
333 format = MPARSE_MDOC;
334 break;
335 }
336 if (cp[0] == 'T' && cp[1] == 'H') {
337 format = MPARSE_MAN;
338 break;
339 }
340 }
341 cp = memchr(cp, '\n', ep - cp);
342 if (cp == NULL)
343 break;
344 cp++;
345 }
346 }
347
348 if (format == MPARSE_MDOC) {
349 curp->man->macroset = MACROSET_MDOC;
350 if (curp->man->mdocmac == NULL)
351 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
352 } else {
353 curp->man->macroset = MACROSET_MAN;
354 if (curp->man->manmac == NULL)
355 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
356 }
357 curp->man->first->tok = TOKEN_NONE;
358 }
359
360 /*
361 * Main parse routine for a buffer.
362 * It assumes encoding and line numbering are already set up.
363 * It can recurse directly (for invocations of user-defined
364 * macros, inline equations, and input line traps)
365 * and indirectly (for .so file inclusion).
366 */
367 static int
368 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
369 {
370 struct buf ln;
371 struct buf *firstln, *lastln, *thisln, *loop;
372 const char *save_file;
373 char *cp;
374 size_t pos; /* byte number in the ln buffer */
375 int line_result, result;
376 int of;
377 int lnn; /* line number in the real file */
378 int fd;
379 int inloop; /* Saw .while on this level. */
380 unsigned char c;
381
382 ln.sz = 256;
383 ln.buf = mandoc_malloc(ln.sz);
384 ln.next = NULL;
385 firstln = loop = NULL;
386 lnn = curp->line;
387 pos = 0;
388 inloop = 0;
389 result = ROFF_CONT;
390
391 while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
392 if (start) {
393 curp->line = lnn;
394 curp->reparse_count = 0;
395
396 if (lnn < 3 &&
397 curp->filenc & MPARSE_UTF8 &&
398 curp->filenc & MPARSE_LATIN1)
399 curp->filenc = preconv_cue(&blk, i);
400 }
401
402 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
403
404 /*
405 * When finding an unescaped newline character,
406 * leave the character loop to process the line.
407 * Skip a preceding carriage return, if any.
408 */
409
410 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
411 '\n' == blk.buf[i + 1])
412 ++i;
413 if ('\n' == blk.buf[i]) {
414 ++i;
415 ++lnn;
416 break;
417 }
418
419 /*
420 * Make sure we have space for the worst
421 * case of 12 bytes: "\\[u10ffff]\n\0"
422 */
423
424 if (pos + 12 > ln.sz)
425 resize_buf(&ln, 256);
426
427 /*
428 * Encode 8-bit input.
429 */
430
431 c = blk.buf[i];
432 if (c & 0x80) {
433 if ( ! (curp->filenc && preconv_encode(
434 &blk, &i, &ln, &pos, &curp->filenc))) {
435 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
436 curp->line, pos, "0x%x", c);
437 ln.buf[pos++] = '?';
438 i++;
439 }
440 continue;
441 }
442
443 /*
444 * Exclude control characters.
445 */
446
447 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
448 mandoc_vmsg(c == 0x00 || c == 0x04 ||
449 c > 0x0a ? MANDOCERR_CHAR_BAD :
450 MANDOCERR_CHAR_UNSUPP,
451 curp, curp->line, pos, "0x%x", c);
452 i++;
453 if (c != '\r')
454 ln.buf[pos++] = '?';
455 continue;
456 }
457
458 ln.buf[pos++] = blk.buf[i++];
459 }
460 ln.buf[pos] = '\0';
461
462 /*
463 * Maintain a lookaside buffer of all lines.
464 * parsed from this input source.
465 */
466
467 thisln = mandoc_malloc(sizeof(*thisln));
468 thisln->buf = mandoc_strdup(ln.buf);
469 thisln->sz = strlen(ln.buf) + 1;
470 thisln->next = NULL;
471 if (firstln == NULL) {
472 firstln = lastln = thisln;
473 if (curp->secondary == NULL)
474 curp->secondary = firstln;
475 } else {
476 lastln->next = thisln;
477 lastln = thisln;
478 }
479
480 /* XXX Ugly hack to mark the end of the input. */
481
482 if (i == blk.sz || blk.buf[i] == '\0') {
483 ln.buf[pos++] = '\n';
484 ln.buf[pos] = '\0';
485 }
486
487 /*
488 * A significant amount of complexity is contained by
489 * the roff preprocessor. It's line-oriented but can be
490 * expressed on one line, so we need at times to
491 * readjust our starting point and re-run it. The roff
492 * preprocessor can also readjust the buffers with new
493 * data, so we pass them in wholesale.
494 */
495
496 of = 0;
497 rerun:
498 line_result = roff_parseln(curp->roff, curp->line, &ln, &of);
499
500 /* Process options. */
501
502 if (line_result & ROFF_APPEND)
503 assert(line_result == (ROFF_IGN | ROFF_APPEND));
504
505 if (line_result & ROFF_USERCALL)
506 assert((line_result & ROFF_MASK) == ROFF_REPARSE);
507
508 if (line_result & ROFF_USERRET) {
509 assert(line_result == (ROFF_IGN | ROFF_USERRET));
510 if (start == 0) {
511 /* Return from the current macro. */
512 result = ROFF_USERRET;
513 goto out;
514 }
515 }
516
517 switch (line_result & ROFF_LOOPMASK) {
518 case ROFF_IGN:
519 break;
520 case ROFF_WHILE:
521 if (curp->loop != NULL) {
522 if (loop == curp->loop)
523 break;
524 mandoc_msg(MANDOCERR_WHILE_NEST,
525 curp, curp->line, pos, NULL);
526 }
527 curp->loop = thisln;
528 loop = NULL;
529 inloop = 1;
530 break;
531 case ROFF_LOOPCONT:
532 case ROFF_LOOPEXIT:
533 if (curp->loop == NULL) {
534 mandoc_msg(MANDOCERR_WHILE_FAIL,
535 curp, curp->line, pos, NULL);
536 break;
537 }
538 if (inloop == 0) {
539 mandoc_msg(MANDOCERR_WHILE_INTO,
540 curp, curp->line, pos, NULL);
541 curp->loop = loop = NULL;
542 break;
543 }
544 if (line_result & ROFF_LOOPCONT)
545 loop = curp->loop;
546 else {
547 curp->loop = loop = NULL;
548 inloop = 0;
549 }
550 break;
551 default:
552 abort();
553 }
554
555 /* Process the main instruction from the roff parser. */
556
557 switch (line_result & ROFF_MASK) {
558 case ROFF_IGN:
559 break;
560 case ROFF_CONT:
561 if (curp->man->macroset == MACROSET_NONE)
562 choose_parser(curp);
563 if ((curp->man->macroset == MACROSET_MDOC ?
564 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
565 man_parseln(curp->man, curp->line, ln.buf, of)
566 ) == 2)
567 goto out;
568 break;
569 case ROFF_RERUN:
570 goto rerun;
571 case ROFF_REPARSE:
572 if (++curp->reparse_count > REPARSE_LIMIT) {
573 /* Abort and return to the top level. */
574 result = ROFF_IGN;
575 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
576 curp->line, pos, NULL);
577 goto out;
578 }
579 result = mparse_buf_r(curp, ln, of, 0);
580 if (line_result & ROFF_USERCALL) {
581 roff_userret(curp->roff);
582 /* Continue normally. */
583 if (result & ROFF_USERRET)
584 result = ROFF_CONT;
585 }
586 if (start == 0 && result != ROFF_CONT)
587 goto out;
588 break;
589 case ROFF_SO:
590 if ( ! (curp->options & MPARSE_SO) &&
591 (i >= blk.sz || blk.buf[i] == '\0')) {
592 curp->sodest = mandoc_strdup(ln.buf + of);
593 goto out;
594 }
595 save_file = curp->file;
596 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
597 mparse_readfd(curp, fd, ln.buf + of);
598 close(fd);
599 curp->file = save_file;
600 } else {
601 curp->file = save_file;
602 mandoc_vmsg(MANDOCERR_SO_FAIL,
603 curp, curp->line, pos,
604 ".so %s", ln.buf + of);
605 ln.sz = mandoc_asprintf(&cp,
606 ".sp\nSee the file %s.\n.sp",
607 ln.buf + of);
608 free(ln.buf);
609 ln.buf = cp;
610 of = 0;
611 mparse_buf_r(curp, ln, of, 0);
612 }
613 break;
614 default:
615 abort();
616 }
617
618 /* Start the next input line. */
619
620 if (loop != NULL &&
621 (line_result & ROFF_LOOPMASK) == ROFF_IGN)
622 loop = loop->next;
623
624 if (loop != NULL) {
625 if ((line_result & ROFF_APPEND) == 0)
626 *ln.buf = '\0';
627 if (ln.sz < loop->sz)
628 resize_buf(&ln, loop->sz);
629 (void)strlcat(ln.buf, loop->buf, ln.sz);
630 of = 0;
631 goto rerun;
632 }
633
634 pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
635 }
636 out:
637 if (inloop) {
638 if (result != ROFF_USERRET)
639 mandoc_msg(MANDOCERR_WHILE_OUTOF, curp,
640 curp->line, pos, NULL);
641 curp->loop = NULL;
642 }
643 free(ln.buf);
644 if (firstln != curp->secondary)
645 free_buf_list(firstln);
646 return result;
647 }
648
649 static int
650 read_whole_file(struct mparse *curp, const char *file, int fd,
651 struct buf *fb, int *with_mmap)
652 {
653 struct stat st;
654 gzFile gz;
655 size_t off;
656 ssize_t ssz;
657 int gzerrnum, retval;
658
659 if (fstat(fd, &st) == -1) {
660 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
661 "fstat: %s", strerror(errno));
662 return 0;
663 }
664
665 /*
666 * If we're a regular file, try just reading in the whole entry
667 * via mmap(). This is faster than reading it into blocks, and
668 * since each file is only a few bytes to begin with, I'm not
669 * concerned that this is going to tank any machines.
670 */
671
672 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
673 if (st.st_size > 0x7fffffff) {
674 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
675 return 0;
676 }
677 *with_mmap = 1;
678 fb->sz = (size_t)st.st_size;
679 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
680 if (fb->buf != MAP_FAILED)
681 return 1;
682 }
683
684 if (curp->gzip) {
685 /*
686 * Duplicating the file descriptor is required
687 * because we will have to call gzclose(3)
688 * to free memory used internally by zlib,
689 * but that will also close the file descriptor,
690 * which this function must not do.
691 */
692 if ((fd = dup(fd)) == -1) {
693 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
694 "dup: %s", strerror(errno));
695 return 0;
696 }
697 if ((gz = gzdopen(fd, "rb")) == NULL) {
698 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
699 "gzdopen: %s", strerror(errno));
700 close(fd);
701 return 0;
702 }
703 } else
704 gz = NULL;
705
706 /*
707 * If this isn't a regular file (like, say, stdin), then we must
708 * go the old way and just read things in bit by bit.
709 */
710
711 *with_mmap = 0;
712 off = 0;
713 retval = 0;
714 fb->sz = 0;
715 fb->buf = NULL;
716 for (;;) {
717 if (off == fb->sz) {
718 if (fb->sz == (1U << 31)) {
719 mandoc_msg(MANDOCERR_TOOLARGE, curp,
720 0, 0, NULL);
721 break;
722 }
723 resize_buf(fb, 65536);
724 }
725 ssz = curp->gzip ?
726 gzread(gz, fb->buf + (int)off, fb->sz - off) :
727 read(fd, fb->buf + (int)off, fb->sz - off);
728 if (ssz == 0) {
729 fb->sz = off;
730 retval = 1;
731 break;
732 }
733 if (ssz == -1) {
734 if (curp->gzip)
735 (void)gzerror(gz, &gzerrnum);
736 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
737 curp->gzip && gzerrnum != Z_ERRNO ?
738 zError(gzerrnum) : strerror(errno));
739 break;
740 }
741 off += (size_t)ssz;
742 }
743
744 if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
745 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
746 gzerrnum == Z_ERRNO ? strerror(errno) :
747 zError(gzerrnum));
748 if (retval == 0) {
749 free(fb->buf);
750 fb->buf = NULL;
751 }
752 return retval;
753 }
754
755 static void
756 mparse_end(struct mparse *curp)
757 {
758 if (curp->man->macroset == MACROSET_NONE)
759 curp->man->macroset = MACROSET_MAN;
760 if (curp->man->macroset == MACROSET_MDOC)
761 mdoc_endparse(curp->man);
762 else
763 man_endparse(curp->man);
764 roff_endparse(curp->roff);
765 }
766
767 static void
768 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
769 {
770 struct buf *svprimary;
771 const char *svfile;
772 size_t offset;
773 static int recursion_depth;
774
775 if (64 < recursion_depth) {
776 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
777 return;
778 }
779
780 /* Line number is per-file. */
781 svfile = curp->file;
782 curp->file = file;
783 svprimary = curp->primary;
784 curp->primary = &blk;
785 curp->line = 1;
786 recursion_depth++;
787
788 /* Skip an UTF-8 byte order mark. */
789 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
790 (unsigned char)blk.buf[0] == 0xef &&
791 (unsigned char)blk.buf[1] == 0xbb &&
792 (unsigned char)blk.buf[2] == 0xbf) {
793 offset = 3;
794 curp->filenc &= ~MPARSE_LATIN1;
795 } else
796 offset = 0;
797
798 mparse_buf_r(curp, blk, offset, 1);
799
800 if (--recursion_depth == 0)
801 mparse_end(curp);
802
803 curp->primary = svprimary;
804 curp->file = svfile;
805 }
806
807 enum mandoclevel
808 mparse_readmem(struct mparse *curp, void *buf, size_t len,
809 const char *file)
810 {
811 struct buf blk;
812
813 blk.buf = buf;
814 blk.sz = len;
815
816 mparse_parse_buffer(curp, blk, file);
817 return curp->file_status;
818 }
819
820 /*
821 * Read the whole file into memory and call the parsers.
822 * Called recursively when an .so request is encountered.
823 */
824 enum mandoclevel
825 mparse_readfd(struct mparse *curp, int fd, const char *file)
826 {
827 struct buf blk;
828 int with_mmap;
829 int save_filenc;
830
831 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
832 save_filenc = curp->filenc;
833 curp->filenc = curp->options &
834 (MPARSE_UTF8 | MPARSE_LATIN1);
835 mparse_parse_buffer(curp, blk, file);
836 curp->filenc = save_filenc;
837 if (with_mmap)
838 munmap(blk.buf, blk.sz);
839 else
840 free(blk.buf);
841 }
842 return curp->file_status;
843 }
844
845 int
846 mparse_open(struct mparse *curp, const char *file)
847 {
848 char *cp;
849 int fd;
850
851 curp->file = file;
852 cp = strrchr(file, '.');
853 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
854
855 /* First try to use the filename as it is. */
856
857 if ((fd = open(file, O_RDONLY)) != -1)
858 return fd;
859
860 /*
861 * If that doesn't work and the filename doesn't
862 * already end in .gz, try appending .gz.
863 */
864
865 if ( ! curp->gzip) {
866 mandoc_asprintf(&cp, "%s.gz", file);
867 fd = open(cp, O_RDONLY);
868 free(cp);
869 if (fd != -1) {
870 curp->gzip = 1;
871 return fd;
872 }
873 }
874
875 /* Neither worked, give up. */
876
877 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
878 return -1;
879 }
880
881 struct mparse *
882 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
883 enum mandoc_os os_e, const char *os_s)
884 {
885 struct mparse *curp;
886
887 curp = mandoc_calloc(1, sizeof(struct mparse));
888
889 curp->options = options;
890 curp->mmin = mmin;
891 curp->mmsg = mmsg;
892 curp->os_s = os_s;
893
894 curp->roff = roff_alloc(curp, options);
895 curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
896 curp->options & MPARSE_QUICK ? 1 : 0);
897 if (curp->options & MPARSE_MDOC) {
898 curp->man->macroset = MACROSET_MDOC;
899 if (curp->man->mdocmac == NULL)
900 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
901 } else if (curp->options & MPARSE_MAN) {
902 curp->man->macroset = MACROSET_MAN;
903 if (curp->man->manmac == NULL)
904 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
905 }
906 curp->man->first->tok = TOKEN_NONE;
907 curp->man->meta.os_e = os_e;
908 return curp;
909 }
910
911 void
912 mparse_reset(struct mparse *curp)
913 {
914 roff_reset(curp->roff);
915 roff_man_reset(curp->man);
916 free_buf_list(curp->secondary);
917 curp->secondary = NULL;
918
919 free(curp->sodest);
920 curp->sodest = NULL;
921
922 curp->file_status = MANDOCLEVEL_OK;
923 curp->gzip = 0;
924 }
925
926 void
927 mparse_free(struct mparse *curp)
928 {
929 roffhash_free(curp->man->mdocmac);
930 roffhash_free(curp->man->manmac);
931 roff_man_free(curp->man);
932 roff_free(curp->roff);
933 free_buf_list(curp->secondary);
934 free(curp->sodest);
935 free(curp);
936 }
937
938 void
939 mparse_result(struct mparse *curp, struct roff_man **man,
940 char **sodest)
941 {
942
943 if (sodest && NULL != (*sodest = curp->sodest)) {
944 *man = NULL;
945 return;
946 }
947 if (man)
948 *man = curp->man;
949 }
950
951 void
952 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
953 {
954 if (curp->file_status > *rc)
955 *rc = curp->file_status;
956 }
957
958 void
959 mandoc_vmsg(enum mandocerr t, struct mparse *m,
960 int ln, int pos, const char *fmt, ...)
961 {
962 char buf[256];
963 va_list ap;
964
965 va_start(ap, fmt);
966 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
967 va_end(ap);
968
969 mandoc_msg(t, m, ln, pos, buf);
970 }
971
972 void
973 mandoc_msg(enum mandocerr er, struct mparse *m,
974 int ln, int col, const char *msg)
975 {
976 enum mandoclevel level;
977
978 if (er < m->mmin && er != MANDOCERR_FILE)
979 return;
980
981 level = MANDOCLEVEL_UNSUPP;
982 while (er < mandoclimits[level])
983 level--;
984
985 if (m->mmsg)
986 (*m->mmsg)(er, level, m->file, ln, col, msg);
987
988 if (m->file_status < level)
989 m->file_status = level;
990 }
991
992 const char *
993 mparse_strerror(enum mandocerr er)
994 {
995
996 return mandocerrs[er];
997 }
998
999 const char *
1000 mparse_strlevel(enum mandoclevel lvl)
1001 {
1002 return mandoclevels[lvl];
1003 }
1004
1005 void
1006 mparse_copy(const struct mparse *p)
1007 {
1008 struct buf *buf;
1009
1010 for (buf = p->secondary; buf != NULL; buf = buf->next)
1011 puts(buf->buf);
1012 }