-/* $Id: macro.c,v 1.49 2009/01/22 14:56:21 kristaps Exp $ */
+/* $Id: macro.c,v 1.57 2009/03/06 14:13:47 kristaps Exp $ */
/*
* Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
*
/*
* This has scanning/parsing routines, each of which extract a macro and
* its arguments and parameters, then know how to progress to the next
- * macro. Macros are parsed according as follows:
- *
- * ELEMENT: TEXT | epsilon
- * BLOCK: HEAD PUNCT BODY PUNCT BLOCK_TAIL PUNCT
- * BLOCK_TAIL: TAIL | epsilon
- * HEAD: ELEMENT | TEXT | BLOCK | epsilon
- * BODY: ELEMENT | TEXT | BLOCK | epsilon
- * TAIL: TEXT | epsilon
- * PUNCT: TEXT (delimiters) | epsilon
- *
- * These are arranged into a parse tree, an example of which follows:
- *
- * ROOT
- * BLOCK (.Sh)
- * HEAD
- * TEXT (`NAME')
- * BODY
- * ELEMENT (.Nm)
- * TEXT (`mdocml')
- * ELEMENT (.Nd)
- * TEXT (`mdoc macro compiler')
- * BLOCK (.Op)
- * HEAD
- * ELEMENT (.Fl)
- * TEXT (`v')
- * BLOCK (.Op)
- * HEAD
- * ELEMENT (.Fl)
- * TEXT (`v')
- * ELEMENT (.Fl)
- * TEXT (`W')
- * ELEMENT (.Ns)
- * ELEMENT (.Ar)
- * TEXT (`err...')
- *
- * These types are always per-line except for block bodies, which may
- * span multiple lines. Macros are assigned a parsing routine, which
- * corresponds to the type, in the mdoc_macros table.
- *
- * Note that types are general: there can be several parsing routines
- * corresponding to a single type. The macro_text function, for
- * example, parses an ELEMENT type (see the function definition for
- * details) that may be interrupted by further macros; the
- * macro_constant function, on the other hand, parses an ELEMENT type
- * spanning a single line.
+ * macro.
+ */
+
+/*
+ * FIXME: don't use static mdoc_argv values, as they require us to do a
+ * complicated copy-over when actually assigning them to dynamic memory.
*/
#include "private.h"
+static int macro_obsolete(MACRO_PROT_ARGS);
+static int macro_constant(MACRO_PROT_ARGS);
+static int macro_constant_scoped(MACRO_PROT_ARGS);
+static int macro_constant_delimited(MACRO_PROT_ARGS);
+static int macro_text(MACRO_PROT_ARGS);
+static int macro_scoped(MACRO_PROT_ARGS);
+static int macro_scoped_close(MACRO_PROT_ARGS);
+static int macro_scoped_line(MACRO_PROT_ARGS);
+
#define REWIND_REWIND (1 << 0)
#define REWIND_NOHALT (1 << 1)
#define REWIND_HALT (1 << 2)
#define ENOPARMS (2)
#define EARGVLIM (3)
+/* Central table of library: who gets parsed how. */
+
+const struct mdoc_macro __mdoc_macros[MDOC_MAX] = {
+ { NULL, 0 }, /* \" */
+ { macro_constant, MDOC_PROLOGUE }, /* Dd */
+ { macro_constant, MDOC_PROLOGUE }, /* Dt */
+ { macro_constant, MDOC_PROLOGUE }, /* Os */
+ { macro_scoped, 0 }, /* Sh */
+ { macro_scoped, 0 }, /* Ss */
+ { macro_text, 0 }, /* Pp */
+ { macro_scoped_line, MDOC_PARSED }, /* D1 */
+ { macro_scoped_line, MDOC_PARSED }, /* Dl */
+ { macro_scoped, MDOC_EXPLICIT }, /* Bd */
+ { macro_scoped_close, MDOC_EXPLICIT }, /* Ed */
+ { macro_scoped, MDOC_EXPLICIT }, /* Bl */
+ { macro_scoped_close, MDOC_EXPLICIT }, /* El */
+ { macro_scoped, MDOC_PARSED }, /* It */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Ad */
+ { macro_text, MDOC_PARSED }, /* An */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Ar */
+ { macro_constant, 0 }, /* Cd */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Cm */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Dv */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Er */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Ev */
+ { macro_constant, 0 }, /* Ex */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Fa */
+ { macro_constant, 0 }, /* Fd */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Fl */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Fn */
+ { macro_text, MDOC_PARSED }, /* Ft */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Ic */
+ { macro_constant, 0 }, /* In */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Li */
+ { macro_constant, 0 }, /* Nd */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Nm */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Op */
+ { macro_obsolete, 0 }, /* Ot */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Pa */
+ { macro_constant, 0 }, /* Rv */
+ /* XXX - .St supposed to be (but isn't) callable. */
+ { macro_constant_delimited, MDOC_PARSED }, /* St */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Va */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Vt */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Xr */
+ { macro_constant, 0 }, /* %A */
+ { macro_constant, 0 }, /* %B */
+ { macro_constant, 0 }, /* %D */
+ { macro_constant, 0 }, /* %I */
+ { macro_constant, 0 }, /* %J */
+ { macro_constant, 0 }, /* %N */
+ { macro_constant, 0 }, /* %O */
+ { macro_constant, 0 }, /* %P */
+ { macro_constant, 0 }, /* %R */
+ { macro_constant, 0 }, /* %T */
+ { macro_constant, 0 }, /* %V */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ac */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Ao */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Aq */
+ { macro_constant_delimited, 0 }, /* At */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Bc */
+ { macro_scoped, MDOC_EXPLICIT }, /* Bf */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Bo */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Bq */
+ { macro_constant_delimited, MDOC_PARSED }, /* Bsx */
+ { macro_constant_delimited, MDOC_PARSED }, /* Bx */
+ { macro_constant, 0 }, /* Db */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Dc */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Do */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Dq */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ec */
+ { macro_scoped_close, MDOC_EXPLICIT }, /* Ef */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Em */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Eo */
+ { macro_constant_delimited, MDOC_PARSED }, /* Fx */
+ { macro_text, MDOC_PARSED }, /* Ms */
+ { macro_constant_delimited, MDOC_CALLABLE | MDOC_PARSED }, /* No */
+ { macro_constant_delimited, MDOC_CALLABLE | MDOC_PARSED }, /* Ns */
+ { macro_constant_delimited, MDOC_PARSED }, /* Nx */
+ { macro_constant_delimited, MDOC_PARSED }, /* Ox */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Pc */
+ { macro_constant_delimited, MDOC_PARSED }, /* Pf */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Po */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Pq */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Qc */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ql */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Qo */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Qq */
+ { macro_scoped_close, MDOC_EXPLICIT }, /* Re */
+ { macro_scoped, MDOC_EXPLICIT }, /* Rs */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Sc */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* So */
+ { macro_scoped_line, MDOC_CALLABLE | MDOC_PARSED }, /* Sq */
+ { macro_constant, 0 }, /* Sm */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Sx */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Sy */
+ { macro_text, MDOC_CALLABLE | MDOC_PARSED }, /* Tn */
+ { macro_constant_delimited, MDOC_PARSED }, /* Ux */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Xc */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Xo */
+ /* XXX - .Fo supposed to be (but isn't) callable. */
+ { macro_scoped, MDOC_EXPLICIT }, /* Fo */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Fc */
+ { macro_constant_scoped, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Oo */
+ { macro_scoped_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Oc */
+ { macro_scoped, MDOC_EXPLICIT }, /* Bk */
+ { macro_scoped_close, MDOC_EXPLICIT }, /* Ek */
+ { macro_constant, 0 }, /* Bt */
+ { macro_constant, 0 }, /* Hf */
+ { macro_obsolete, 0 }, /* Fr */
+ { macro_constant, 0 }, /* Ud */
+ { macro_constant, 0 }, /* Lb */
+};
+
+const struct mdoc_macro * const mdoc_macros = __mdoc_macros;
+
static int
perr(struct mdoc *mdoc, int line, int pos, int type)
break;
else if (rewind_dobreak(tok, n))
continue;
- return(mdoc_perr(mdoc, line, ppos, "scope breaks prior %s", mdoc_node2a(n)));
+ return(mdoc_perr(mdoc, line, ppos,
+ "scope breaks %s", MDOC_ROOT == n->type ?
+ "<root>" : mdoc_macronames[n->tok]));
}
assert(n);
break;
else if (rewind_dobreak(tok, n))
continue;
- return(mdoc_perr(mdoc, line, ppos, "scope breaks prior %s", mdoc_node2a(n)));
+ return(mdoc_perr(mdoc, line, ppos,
+ "scope breaks %s", MDOC_ROOT == n->type ?
+ "<root>" : mdoc_macronames[n->tok]));
}
assert(n);
break;
else if (rewind_dobreak(tok, n))
continue;
- return(mdoc_perr(mdoc, line, ppos, "scope breaks prior %s", mdoc_node2a(n)));
+ return(mdoc_perr(mdoc, line, ppos,
+ "scope breaks %s", MDOC_ROOT == n->type ?
+ "<root>" : mdoc_macronames[n->tok]));
}
assert(n);
* Close out an explicit scope. This optionally parses a TAIL type with
* a set number of TEXT children.
*/
-int
+static int
macro_scoped_close(MACRO_PROT_ARGS)
{
int tt, j, c, lastarg, maxargs, flushed;
if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) {
if (0 == buf[*pos]) {
- if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos))
+ if ( ! rewind_subblock(MDOC_BODY, mdoc,
+ tok, line, ppos))
return(0);
return(rewind_expblock(mdoc, tok, line, ppos));
}
return(0);
else if (MDOC_MAX != c) {
if ( ! flushed) {
- if ( ! rewind_expblock(mdoc, tok, line, ppos))
+ if ( ! rewind_expblock(mdoc, tok,
+ line, ppos))
return(0);
flushed = 1;
}
* TEXT (`;')
* TEXT (`;')
*/
-int
+static int
macro_text(MACRO_PROT_ARGS)
{
int la, lastpunct, c, w, argc;
return(perr(mdoc, line, ppos, EARGVLIM));
}
- c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv);
+ c = mdoc_elem_alloc(mdoc, line, ppos,
+ tok, (size_t)argc, argv);
if (0 == c) {
mdoc_argv_free(argc, argv);
return(0);
}
+ /* FIXME: .Fl and .Ar handling of `|'. */
+
if (ARGS_QWORD != w && mdoc_isdelim(p)) {
if (0 == lastpunct && ! rewind_elem(mdoc, tok)) {
mdoc_argv_free(argc, argv);
}
lastpunct = 1;
} else if (lastpunct) {
- c = mdoc_elem_alloc(mdoc, line,
- ppos, tok, argc, argv);
+ c = mdoc_elem_alloc(mdoc, line, ppos,
+ tok, (size_t)argc, argv);
if (0 == c) {
mdoc_argv_free(argc, argv);
return(0);
* Note that the `.It' macro, possibly the most difficult (as it has
* embedded scope, etc.) is handled by this routine.
*/
-int
+static int
macro_scoped(MACRO_PROT_ARGS)
{
int c, lastarg, argc;
if (0 == buf[*pos]) {
if ( ! mdoc_head_alloc(mdoc, line, ppos, tok))
return(0);
- if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ if ( ! rewind_subblock(MDOC_HEAD, mdoc,
+ tok, line, ppos))
return(0);
if ( ! mdoc_body_alloc(mdoc, line, ppos, tok))
return(0);
if (ARGS_ERROR == c)
return(0);
- if (ARGS_PUNCT == c)
- break;
if (ARGS_EOLN == c)
break;
-
if (ARGS_PHRASE == c) {
/*
if ( ! mdoc_phrase(mdoc, line, lastarg, buf))
break;
}
- if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
- return(0);
if (1 == ppos && ! append_delims(mdoc, line, pos, buf))
return(0);
+ if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ return(0);
if ( ! mdoc_body_alloc(mdoc, line, ppos, tok))
return(0);
* TEXT (`;')
* TEXT (`;')
*/
-int
+static int
macro_scoped_line(MACRO_PROT_ARGS)
{
int lastarg, c;
* TEXT (`b')
* TEXT (';')
*/
-int
+static int
macro_constant_scoped(MACRO_PROT_ARGS)
{
int lastarg, flushed, j, c, maxargs;
return(0);
else if (MDOC_MAX != c) {
if ( ! flushed) {
- if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ if ( ! rewind_subblock(MDOC_HEAD, mdoc,
+ tok, line, ppos))
return(0);
flushed = 1;
- if ( ! mdoc_body_alloc(mdoc, line, ppos, tok))
+ if ( ! mdoc_body_alloc(mdoc, line,
+ ppos, tok))
return(0);
mdoc->next = MDOC_NEXT_CHILD;
}
- if ( ! mdoc_macro(mdoc, c, line, lastarg, pos, buf))
+ if ( ! mdoc_macro(mdoc, c, line, lastarg,
+ pos, buf))
return(0);
break;
}
if ( ! flushed && mdoc_isdelim(p)) {
- if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ if ( ! rewind_subblock(MDOC_HEAD, mdoc,
+ tok, line, ppos))
return(0);
flushed = 1;
if ( ! mdoc_body_alloc(mdoc, line, ppos, tok))
* ELEMENT (.No)
* TEXT (`b')
*/
-int
+static int
macro_constant_delimited(MACRO_PROT_ARGS)
{
- int lastarg, flushed, j, c, maxargs, argc;
+ int lastarg, flushed, j, c, maxargs, argc,
+ igndelim, ignargs;
struct mdoc_arg argv[MDOC_LINEARG_MAX];
char *p;
lastarg = ppos;
flushed = 0;
+
+ /*
+ * Maximum arguments per macro. Some of these have none and
+ * exit as soon as they're parsed.
+ */
+
switch (tok) {
case (MDOC_No):
/* FALLTHROUGH */
case (MDOC_Ns):
/* FALLTHROUGH */
- case (MDOC_Pf):
- /* FALLTHROUGH */
case (MDOC_Ux):
- /* FALLTHROUGH */
- case (MDOC_St):
maxargs = 0;
break;
default:
break;
}
- for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) {
- lastarg = *pos;
- c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf);
- if (ARGV_EOLN == c)
- break;
- if (ARGV_WORD == c) {
- *pos = lastarg;
- break;
- } else if (ARGV_ARG == c)
- continue;
- mdoc_argv_free(argc, argv);
- return(0);
+ /*
+ * Whether to ignore delimiter characters. `Pf' accepts its
+ * first token as a parameter no matter what it looks like (if
+ * it's text).
+ */
+
+ switch (tok) {
+ case (MDOC_Pf):
+ igndelim = 1;
+ break;
+ default:
+ igndelim = 0;
+ break;
}
+ /*
+ * Whether to ignore arguments: `St', for example, handles its
+ * argument-like parameters as regular parameters.
+ */
+
+ switch (tok) {
+ case (MDOC_St):
+ ignargs = 1;
+ break;
+ default:
+ ignargs = 0;
+ break;
+ }
+
+ argc = 0;
+
+ if ( ! ignargs)
+ for ( ; argc < MDOC_LINEARG_MAX; argc++) {
+ lastarg = *pos;
+ c = mdoc_argv(mdoc, line, tok,
+ &argv[argc], pos, buf);
+ if (ARGV_EOLN == c)
+ break;
+ if (ARGV_WORD == c) {
+ *pos = lastarg;
+ break;
+ } else if (ARGV_ARG == c)
+ continue;
+ mdoc_argv_free(argc, argv);
+ return(0);
+ }
+
if (MDOC_LINEARG_MAX == argc) {
mdoc_argv_free(argc - 1, argv);
return(perr(mdoc, line, ppos, EARGVLIM));
}
- c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv);
+ c = mdoc_elem_alloc(mdoc, line, ppos,
+ tok, (size_t)argc, argv);
+
mdoc_argv_free(argc, argv);
if (0 == c)
break;
}
- if ( ! flushed && mdoc_isdelim(p)) {
+ if ( ! flushed && mdoc_isdelim(p) && ! igndelim) {
if ( ! rewind_elem(mdoc, tok))
return(0);
flushed = 1;
* A constant macro is the simplest classification. It spans an entire
* line.
*/
-int
+static int
macro_constant(MACRO_PROT_ARGS)
{
int c, w, la, argc;
return(perr(mdoc, line, ppos, EARGVLIM));
}
- c = mdoc_elem_alloc(mdoc, line, ppos, tok, argc, argv);
+ c = mdoc_elem_alloc(mdoc, line, ppos,
+ tok, (size_t)argc, argv);
+
mdoc_argv_free(argc, argv);
if (0 == c)
/* ARGSUSED */
-int
+static int
macro_obsolete(MACRO_PROT_ARGS)
{
continue;
if ( ! (MDOC_EXPLICIT & mdoc_macros[n->tok].flags))
continue;
- return(mdoc_nerr(mdoc, n, "macro scope still open on exit"));
+ return(mdoc_nerr(mdoc, n,
+ "macro scope still open on exit"));
}
return(rewind_last(mdoc, mdoc->first));