X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/2c217d985cd43cfa1b9b2155e4189511d8f122ae..cc281721217973092f10a3c7a71ce0004c65dbb8:/main.c

diff --git a/main.c b/main.c
index 90960915..4ad45ab2 100644
--- a/main.c
+++ b/main.c
@@ -1,6 +1,7 @@
-/*	$Id: main.c,v 1.74 2010/05/15 22:28:22 kristaps Exp $ */
+/*	$Id: main.c,v 1.101 2010/07/29 22:00:39 joerg Exp $ */
 /*
- * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
+ * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -22,6 +23,7 @@
 #include <sys/stat.h>
 
 #include <assert.h>
+#include <ctype.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -30,10 +32,14 @@
 #include <unistd.h>
 
 #include "mandoc.h"
+#include "main.h"
 #include "mdoc.h"
 #include "man.h"
 #include "roff.h"
-#include "main.h"
+
+#ifndef MAP_FILE
+#define	MAP_FILE	0
+#endif
 
 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
 
@@ -65,7 +71,9 @@ enum	outt {
 	OUTT_TREE,
 	OUTT_HTML,
 	OUTT_XHTML,
-	OUTT_LINT
+	OUTT_LINT,
+	OUTT_PS,
+	OUTT_PDF
 };
 
 struct	curparse {
@@ -81,17 +89,97 @@ struct	curparse {
 #define	FL_NIGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
 #define	FL_IGN_ERRORS	 (1 << 4)	/* Ignore failed parse. */
 #define	FL_STRICT	  FL_NIGN_ESCAPE | \
-			  FL_NIGN_MACRO
-	enum intt	  inttype;	/* Input parsers... */
-	struct man	 *man;
-	struct mdoc	 *mdoc;
-	struct roff	 *roff;
-	enum outt	  outtype;	/* Output devices... */
-	out_mdoc	  outmdoc;
-	out_man	  	  outman;
-	out_free	  outfree;
-	void		 *outdata;
-	char		  outopts[BUFSIZ];
+			  FL_NIGN_MACRO /* ignore nothing */
+	enum intt	  inttype;	/* which parser to use */
+	struct man	 *man;		/* man parser */
+	struct mdoc	 *mdoc;		/* mdoc parser */
+	struct roff	 *roff;		/* roff parser (!NULL) */
+	struct regset	  regs;		/* roff registers */
+	enum outt	  outtype; 	/* which output to use */
+	out_mdoc	  outmdoc;	/* mdoc output ptr */
+	out_man	  	  outman;	/* man output ptr */
+	out_free	  outfree;	/* free output ptr */
+	void		 *outdata;	/* data for output */
+	char		  outopts[BUFSIZ]; /* buf of output opts */
+};
+
+static	const char * const	mandocerrs[MANDOCERR_MAX] = {
+	"ok",
+
+	"generic warning",
+
+	"text should be uppercase",
+	"sections out of conventional order",
+	"section name repeats",
+	"out of order prologue",
+	"repeated prologue entry",
+	"list type must come first",
+	"bad standard",
+	"bad library",
+	"tab in non-literal context",
+	"bad escape sequence",
+	"unterminated quoted string",
+	"argument requires the width argument",
+	"superfluous width argument",
+	"ignoring argument",
+	"bad date argument",
+	"bad width argument",
+	"unknown manual section",
+	"section not in conventional manual section",
+	"end of line whitespace",
+	"blocks badly nested",
+	"scope open on exit",
+
+	"generic error",
+
+	"NAME section must come first",
+	"bad Boolean value",
+	"child violates parent syntax",
+	"bad AT&T symbol",
+	"list type repeated",
+	"display type repeated",
+	"argument repeated",
+	"manual name not yet set",
+	"obsolete macro ignored",
+	"empty macro ignored",
+	"macro not allowed in body",
+	"macro not allowed in prologue",
+	"bad character",
+	"bad NAME section contents",
+	"no blank lines",
+	"no text in this context",
+	"bad comment style",
+	"unknown macro will be lost",
+	"line scope broken",
+	"argument count wrong",
+	"request scope close w/none open",
+	"scope already open",
+	"macro requires line argument(s)",
+	"macro requires body argument(s)",
+	"macro requires argument(s)",
+	"no title in document",
+	"missing list type",
+	"missing display type",
+	"missing font type",
+	"line argument(s) will be lost",
+	"body argument(s) will be lost",
+
+	"generic fatal error",
+
+	"column syntax is inconsistent",
+	"displays may not be nested",
+	"unsupported display type",
+	"blocks badly nested",
+	"no such block is open",
+	"scope broken, syntax violated",
+	"line scope broken, syntax violated",
+	"argument count wrong, violates syntax",
+	"child violates parent syntax",
+	"argument count wrong, violates syntax",
+	"no document body",
+	"no document prologue",
+	"utsname system call failed",
+	"memory exhausted",
 };
 
 static	void		  fdesc(struct curparse *);
@@ -100,9 +188,7 @@ static	int		  foptions(int *, char *);
 static	struct man	 *man_init(struct curparse *);
 static	struct mdoc	 *mdoc_init(struct curparse *);
 static	struct roff	 *roff_init(struct curparse *);
-static	int		  merr(void *, int, int, const char *); /* DEPRECATED */
 static	int		  moptions(enum intt *, char *);
-static	int		  mwarn(void *, int, int, const char *); /* DEPRECATED */
 static	int		  mmsg(enum mandocerr, void *, 
 				int, int, const char *);
 static	int		  pset(const char *, int, struct curparse *,
@@ -113,8 +199,8 @@ static	void		  version(void) __attribute__((noreturn));
 static	int		  woptions(int *, char *);
 
 static	const char	 *progname;
-static 	int		  with_error;
-static	int		  with_warning;
+static	int		  with_fatal;
+static	int		  with_error;
 
 int
 main(int argc, char *argv[])
@@ -177,7 +263,7 @@ main(int argc, char *argv[])
 	while (*argv) {
 		ffile(*argv, &curp);
 
-		if (with_error && !(curp.fflags & FL_IGN_ERRORS))
+		if (with_fatal && !(curp.fflags & FL_IGN_ERRORS))
 			break;
 		++argv;
 	}
@@ -191,7 +277,7 @@ main(int argc, char *argv[])
 	if (curp.roff)
 		roff_free(curp.roff);
 
-	return((with_warning || with_error) ? 
+	return((with_fatal || with_error) ? 
 			EXIT_FAILURE :  EXIT_SUCCESS);
 }
 
@@ -220,10 +306,6 @@ static struct man *
 man_init(struct curparse *curp)
 {
 	int		 pflags;
-	struct man_cb	 mancb;
-
-	mancb.man_err = merr;
-	mancb.man_warn = mwarn;
 
 	/* Defaults from mandoc.1. */
 
@@ -234,7 +316,7 @@ man_init(struct curparse *curp)
 	if (curp->fflags & FL_NIGN_ESCAPE)
 		pflags &= ~MAN_IGN_ESCAPE;
 
-	return(man_alloc(curp, pflags, &mancb));
+	return(man_alloc(&curp->regs, curp, pflags, mmsg));
 }
 
 
@@ -242,7 +324,7 @@ static struct roff *
 roff_init(struct curparse *curp)
 {
 
-	return(roff_alloc(mmsg, curp));
+	return(roff_alloc(&curp->regs, mmsg, curp));
 }
 
 
@@ -250,10 +332,6 @@ static struct mdoc *
 mdoc_init(struct curparse *curp)
 {
 	int		 pflags;
-	struct mdoc_cb	 mdoccb;
-
-	mdoccb.mdoc_err = merr;
-	mdoccb.mdoc_warn = mwarn;
 
 	/* Defaults from mandoc.1. */
 
@@ -266,7 +344,7 @@ mdoc_init(struct curparse *curp)
 	if (curp->fflags & FL_NIGN_MACRO)
 		pflags &= ~MDOC_IGN_MACRO;
 
-	return(mdoc_alloc(curp, pflags, &mdoccb));
+	return(mdoc_alloc(&curp->regs, curp, pflags, mmsg));
 }
 
 
@@ -277,7 +355,7 @@ ffile(const char *file, struct curparse *curp)
 	curp->file = file;
 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
 		perror(curp->file);
-		with_error = 1;
+		with_fatal = 1;
 		return;
 	}
 
@@ -318,7 +396,7 @@ read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
 
 	if (-1 == fstat(curp->fd, &st)) {
 		perror(curp->file);
-		with_error = 1;
+		with_fatal = 1;
 		return(0);
 	}
 
@@ -333,13 +411,13 @@ read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
 		if (st.st_size >= (1U << 31)) {
 			fprintf(stderr, "%s: input too large\n", 
 					curp->file);
-			with_error = 1;
+			with_fatal = 1;
 			return(0);
 		}
 		*with_mmap = 1;
 		fb->sz = (size_t)st.st_size;
 		fb->buf = mmap(NULL, fb->sz, PROT_READ, 
-				MAP_FILE, curp->fd, 0);
+				MAP_FILE|MAP_SHARED, curp->fd, 0);
 		if (fb->buf != MAP_FAILED)
 			return(1);
 	}
@@ -377,7 +455,7 @@ read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
 
 	free(fb->buf);
 	fb->buf = NULL;
-	with_error = 1;
+	with_fatal = 1;
 	return(0);
 }
 
@@ -386,7 +464,7 @@ static void
 fdesc(struct curparse *curp)
 {
 	struct buf	 ln, blk;
-	int		 i, pos, lnn, lnn_start, with_mmap;
+	int		 i, pos, lnn, lnn_start, with_mmap, of;
 	enum rofferr	 re;
 	struct man	*man;
 	struct mdoc	*mdoc;
@@ -395,6 +473,7 @@ fdesc(struct curparse *curp)
 	man = NULL;
 	mdoc = NULL;
 	roff = NULL;
+
 	memset(&ln, 0, sizeof(struct buf));
 
 	/*
@@ -419,6 +498,26 @@ fdesc(struct curparse *curp)
 				++lnn;
 				break;
 			}
+
+			/* 
+			 * Warn about bogus characters.  If you're using
+			 * non-ASCII encoding, you're screwing your
+			 * readers.  Since I'd rather this not happen,
+			 * I'll be helpful and drop these characters so
+			 * we don't display gibberish.  Note to manual
+			 * writers: use special characters.
+			 */
+
+			if ( ! isgraph((u_char)blk.buf[i]) &&
+					! isblank((u_char)blk.buf[i])) {
+				if ( ! mmsg(MANDOCERR_BADCHAR, curp, 
+						lnn_start, pos, 
+						"ignoring byte"))
+					goto bailout;
+				i++;
+				continue;
+			}
+
 			/* Trailing backslash is like a plain character. */
 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
 				if (pos >= (int)ln.sz)
@@ -467,22 +566,42 @@ fdesc(struct curparse *curp)
 				goto bailout;
 		ln.buf[pos] = '\0';
 
-		re = roff_parseln(roff, lnn_start, &ln.buf, &ln.sz);
+		/*
+		 * A significant amount of complexity is contained by
+		 * the roff preprocessor.  It's line-oriented but can be
+		 * expressed on one line, so we need at times to
+		 * readjust our starting point and re-run it.  The roff
+		 * preprocessor can also readjust the buffers with new
+		 * data, so we pass them in wholesale.
+		 */
+
+		of = 0;
+		do {
+			re = roff_parseln(roff, lnn_start, 
+					&ln.buf, &ln.sz, of, &of);
+		} while (ROFF_RERUN == re);
+
 		if (ROFF_IGN == re)
 			continue;
 		else if (ROFF_ERR == re)
 			goto bailout;
 
-		/* If unset, assign parser in pset(). */
+		/*
+		 * If input parsers have not been allocated, do so now.
+		 * We keep these instanced betwen parsers, but set them
+		 * locally per parse routine since we can use different
+		 * parsers with each one.
+		 */
 
-		if ( ! (man || mdoc) && ! pset(ln.buf, pos, curp, &man, &mdoc))
-			goto bailout;
+		if ( ! (man || mdoc))
+			if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc))
+				goto bailout;
 
-		/* Pass down into parsers. */
+		/* Lastly, push down into the parsers themselves. */
 
-		if (man && ! man_parseln(man, lnn_start, ln.buf))
+		if (man && ! man_parseln(man, lnn_start, ln.buf, of))
 			goto bailout;
-		if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf))
+		if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of))
 			goto bailout;
 	}
 
@@ -493,6 +612,8 @@ fdesc(struct curparse *curp)
 		goto bailout;
 	}
 
+	/* Clean up the parse routine ASTs. */
+
 	if (mdoc && ! mdoc_endparse(mdoc))
 		goto bailout;
 	if (man && ! man_endparse(man))
@@ -506,12 +627,30 @@ fdesc(struct curparse *curp)
 		switch (curp->outtype) {
 		case (OUTT_XHTML):
 			curp->outdata = xhtml_alloc(curp->outopts);
-			curp->outman = html_man;
-			curp->outmdoc = html_mdoc;
-			curp->outfree = html_free;
 			break;
 		case (OUTT_HTML):
 			curp->outdata = html_alloc(curp->outopts);
+			break;
+		case (OUTT_ASCII):
+			curp->outdata = ascii_alloc(curp->outopts);
+			curp->outfree = ascii_free;
+			break;
+		case (OUTT_PDF):
+			curp->outdata = pdf_alloc(curp->outopts);
+			curp->outfree = pspdf_free;
+			break;
+		case (OUTT_PS):
+			curp->outdata = ps_alloc(curp->outopts);
+			curp->outfree = pspdf_free;
+			break;
+		default:
+			break;
+		}
+
+		switch (curp->outtype) {
+		case (OUTT_HTML):
+			/* FALLTHROUGH */
+		case (OUTT_XHTML):
 			curp->outman = html_man;
 			curp->outmdoc = html_mdoc;
 			curp->outfree = html_free;
@@ -520,13 +659,15 @@ fdesc(struct curparse *curp)
 			curp->outman = tree_man;
 			curp->outmdoc = tree_mdoc;
 			break;
-		case (OUTT_LINT):
-			break;
-		default:
-			curp->outdata = ascii_alloc(80);
+		case (OUTT_PDF):
+			/* FALLTHROUGH */
+		case (OUTT_ASCII):
+			/* FALLTHROUGH */
+		case (OUTT_PS):
 			curp->outman = terminal_man;
 			curp->outmdoc = terminal_mdoc;
-			curp->outfree = terminal_free;
+			break;
+		default:
 			break;
 		}
 	}
@@ -539,6 +680,7 @@ fdesc(struct curparse *curp)
 		(*curp->outmdoc)(curp->outdata, mdoc);
 
  cleanup:
+	memset(&curp->regs, 0, sizeof(struct regset));
 	if (mdoc)
 		mdoc_reset(mdoc);
 	if (man)
@@ -555,7 +697,7 @@ fdesc(struct curparse *curp)
 	return;
 
  bailout:
-	with_error = 1;
+	with_fatal = 1;
 	goto cleanup;
 }
 
@@ -574,7 +716,7 @@ pset(const char *buf, int pos, struct curparse *curp,
 	 * default to -man, which is more lenient.
 	 */
 
-	if (buf[0] == '.') {
+	if ('.' == buf[0] || '\'' == buf[0]) {
 		for (i = 1; buf[i]; i++)
 			if (' ' != buf[i] && '\t' != buf[i])
 				break;
@@ -651,6 +793,10 @@ toptions(struct curparse *curp, char *arg)
 		curp->outtype = OUTT_HTML;
 	else if (0 == strcmp(arg, "xhtml"))
 		curp->outtype = OUTT_XHTML;
+	else if (0 == strcmp(arg, "ps"))
+		curp->outtype = OUTT_PS;
+	else if (0 == strcmp(arg, "pdf"))
+		curp->outtype = OUTT_PDF;
 	else {
 		fprintf(stderr, "%s: Bad argument\n", arg);
 		return(0);
@@ -734,73 +880,41 @@ woptions(int *wflags, char *arg)
 }
 
 
-/* ARGSUSED */
-static int
-merr(void *arg, int line, int col, const char *msg)
-{
-	struct curparse *curp;
-
-	curp = (struct curparse *)arg;
-
-	(void)fprintf(stderr, "%s:%d:%d: error: %s\n", 
-			curp->file, line, col + 1, msg);
-
-	with_error = 1;
-
-	return(0);
-}
-
-
-static int
-mwarn(void *arg, int line, int col, const char *msg)
-{
-	struct curparse *curp;
-
-	curp = (struct curparse *)arg;
-
-	if ( ! (curp->wflags & WARN_WALL))
-		return(1);
-
-	(void)fprintf(stderr, "%s:%d:%d: warning: %s\n", 
-			curp->file, line, col + 1, msg);
-
-	with_warning = 1;
-	if (curp->wflags & WARN_WERR) {
-		with_error = 1;
-		return(0);
-	}
-
-	return(1);
-}
-
-static	const char * const	mandocerrs[MANDOCERR_MAX] = {
-	"ok",
-	"multi-line scope open on exit",
-	"request for scope closure when no matching scope is open",
-	"line arguments will be lost",
-	"memory exhausted"
-};
-
-/*
- * XXX: this is experimental code that will eventually become the
- * generic means of covering all warnings and errors!
- */
-/* ARGSUSED */
 static int
 mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg)
 {
-#if 0
 	struct curparse *cp;
+	const char *level;
+	int rc;
 
 	cp = (struct curparse *)arg;
+	level = NULL;
+	rc = 1;
+
+	if (t >= MANDOCERR_FATAL) {
+		with_fatal = 1;
+		level = "FATAL";
+		rc = 0;
+	} else {
+		if ( ! (WARN_WALL & cp->wflags))
+			return(1);
+		if (t >= MANDOCERR_ERROR) {
+			with_error = 1;
+			level = "ERROR";
+		}
+		if (WARN_WERR & cp->wflags) {
+			with_fatal = 1;
+			rc = 0;
+		}
+	}
 
-	fprintf(stderr, "%s:%d:%d: %s", cp->file, 
-			ln, col + 1, mandocerrs[t]);
-
+	fprintf(stderr, "%s:%d:%d:", cp->file, ln, col + 1);
+	if (level)
+		fprintf(stderr, " %s:", level);
+	fprintf(stderr, " %s", mandocerrs[t]);
 	if (msg)
 		fprintf(stderr, ": %s", msg);
-
 	fputc('\n', stderr);
-#endif
-	return(1);
+
+	return(rc);
 }