-/* $Id: macro.c,v 1.43 2009/01/19 17:51:32 kristaps Exp $ */
+/* $Id: macro.c,v 1.48 2009/01/20 20:56:21 kristaps Exp $ */
/*
* Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
*
#include <time.h>
#endif
-#include "private.h"
+/*
+ * This has scanning/parsing routines, each of which extract a macro and
+ * its arguments and parameters, then know how to progress to the next
+ * macro. Macros are parsed according as follows:
+ *
+ * ELEMENT: TEXT | epsilon
+ * BLOCK: HEAD PUNCT BODY PUNCT BLOCK_TAIL PUNCT
+ * BLOCK_TAIL: TAIL | epsilon
+ * HEAD: ELEMENT | TEXT | BLOCK | epsilon
+ * BODY: ELEMENT | TEXT | BLOCK | epsilon
+ * TAIL: TEXT | epsilon
+ * PUNCT: TEXT (delimiters) | epsilon
+ *
+ * These are arranged into a parse tree, an example of which follows:
+ *
+ * ROOT
+ * BLOCK (.Sh)
+ * HEAD
+ * TEXT (`NAME')
+ * BODY
+ * ELEMENT (.Nm)
+ * TEXT (`mdocml')
+ * ELEMENT (.Nd)
+ * TEXT (`mdoc macro compiler')
+ * BLOCK (.Op)
+ * HEAD
+ * ELEMENT (.Fl)
+ * TEXT (`v')
+ * BLOCK (.Op)
+ * HEAD
+ * ELEMENT (.Fl)
+ * TEXT (`v')
+ * ELEMENT (.Fl)
+ * TEXT (`W')
+ * ELEMENT (.Ns)
+ * ELEMENT (.Ar)
+ * TEXT (`err...')
+ *
+ * These types are always per-line except for block bodies, which may
+ * span multiple lines. Macros are assigned a parsing routine, which
+ * corresponds to the type, in the mdoc_macros table.
+ *
+ * Note that types are general: there can be several parsing routines
+ * corresponding to a single type. The macro_text function, for
+ * example, parses an ELEMENT type (see the function definition for
+ * details) that may be interrupted by further macros; the
+ * macro_constant function, on the other hand, parses an ELEMENT type
+ * spanning a single line.
+ */
-/* FIXME: maxlineargs should be per LINE, no per TOKEN. */
+#include "private.h"
-static int rewind_alt(int);
-static int rewind_dohalt(int, enum mdoc_type,
- const struct mdoc_node *);
#define REWIND_REWIND (1 << 0)
#define REWIND_NOHALT (1 << 1)
#define REWIND_HALT (1 << 2)
+static int rewind_dohalt(int, enum mdoc_type,
+ const struct mdoc_node *);
+static int rewind_alt(int);
static int rewind_dobreak(int, const struct mdoc_node *);
-
-
static int rewind_elem(struct mdoc *, int);
static int rewind_impblock(struct mdoc *, int, int, int);
static int rewind_expblock(struct mdoc *, int, int, int);
case (MDOC_Qq):
/* FALLTHROUGH */
case (MDOC_Sq):
- assert(MDOC_BODY != type);
+ assert(MDOC_HEAD != type);
assert(MDOC_TAIL != type);
if (type == p->type && tok == p->tok)
return(REWIND_REWIND);
}
+/*
+ * Close out an explicit scope. This optionally parses a TAIL type with
+ * a set number of TEXT children.
+ */
int
macro_scoped_close(MACRO_PROT_ARGS)
{
flushed = 1;
}
- c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p);
+ c = mdoc_args(mdoc, line, pos, buf, tok, &p);
if (ARGS_ERROR == c)
return(0);
if (ARGS_PUNCT == c)
}
+/*
+ * A general text macro. This is a complex case because of punctuation.
+ * If a text macro is followed by words, then punctuation, the macro is
+ * "stopped" and "reopened" following the punctuation. Thus, the
+ * following arises:
+ *
+ * .Fl a ; b
+ *
+ * ELEMENT (.Fl)
+ * TEXT (`a')
+ * TEXT (`;')
+ * ELEMENT (.Fl)
+ * TEXT (`b')
+ *
+ * This must handle the following situations:
+ *
+ * .Fl Ar b ; ;
+ *
+ * ELEMENT (.Fl)
+ * ELEMENT (.Ar)
+ * TEXT (`b')
+ * TEXT (`;')
+ * TEXT (`;')
+ */
int
macro_text(MACRO_PROT_ARGS)
{
- int la, lastpunct, c, fl, argc;
+ int la, lastpunct, c, w, argc;
struct mdoc_arg argv[MDOC_LINEARG_MAX];
char *p;
mdoc->next = MDOC_NEXT_CHILD;
- fl = ARGS_DELIM;
- if (MDOC_QUOTABLE & mdoc_macros[tok].flags)
- fl |= ARGS_QUOTED;
-
lastpunct = 0;
for (;;) {
la = *pos;
- c = mdoc_args(mdoc, line, pos, buf, fl, &p);
- if (ARGS_ERROR == c) {
+ w = mdoc_args(mdoc, line, pos, buf, tok, &p);
+ if (ARGS_ERROR == w) {
mdoc_argv_free(argc, argv);
return(0);
}
- if (ARGS_EOLN == c)
+ if (ARGS_EOLN == w)
break;
- if (ARGS_PUNCT == c)
+ if (ARGS_PUNCT == w)
break;
- if (-1 == (c = lookup(mdoc, line, la, tok, p)))
- return(0);
- else if (MDOC_MAX != c) {
+ c = ARGS_QWORD == w ? MDOC_MAX :
+ lookup(mdoc, line, la, tok, p);
+
+ if (MDOC_MAX != c && -1 != c) {
if (0 == lastpunct && ! rewind_elem(mdoc, tok)) {
mdoc_argv_free(argc, argv);
return(0);
}
mdoc_argv_free(argc, argv);
-
c = mdoc_macro(mdoc, c, line, la, pos, buf);
if (0 == c)
return(0);
if (ppos > 1)
return(1);
return(append_delims(mdoc, line, pos, buf));
+ } else if (-1 == c) {
+ mdoc_argv_free(argc, argv);
+ return(0);
}
- if (mdoc_isdelim(p)) {
+ if (ARGS_QWORD != w && mdoc_isdelim(p)) {
if (0 == lastpunct && ! rewind_elem(mdoc, tok)) {
mdoc_argv_free(argc, argv);
return(0);
}
+/*
+ * Handle explicit-scope (having a different closure token) and implicit
+ * scope (closing out prior scopes when re-invoked) macros. These
+ * constitute the BLOCK type and usually span multiple lines. These
+ * always have HEAD and sometimes have BODY types. In the multi-line
+ * case:
+ *
+ * .Bd -ragged
+ * Text.
+ * .Fl macro
+ * Another.
+ * .Ed
+ *
+ * BLOCK (.Bd)
+ * HEAD
+ * BODY
+ * TEXT (`Text.')
+ * ELEMENT (.Fl)
+ * TEXT (`macro')
+ * TEXT (`Another.')
+ *
+ * Note that the `.It' macro, possibly the most difficult (as it has
+ * embedded scope, etc.) is handled by this routine.
+ */
int
macro_scoped(MACRO_PROT_ARGS)
{
- int c, lastarg, argc, fl;
+ int c, lastarg, argc;
struct mdoc_arg argv[MDOC_LINEARG_MAX];
char *p;
assert ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags));
+ /* First rewind extant implicit scope. */
+
if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) {
if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos))
return(0);
return(0);
}
+ /* Parse arguments. */
+
for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) {
lastarg = *pos;
c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf);
return(0);
mdoc->next = MDOC_NEXT_CHILD;
- fl = ARGS_DELIM;
- if (MDOC_TABSEP & mdoc_macros[tok].flags)
- fl |= ARGS_TABSEP;
-
for (;;) {
lastarg = *pos;
- c = mdoc_args(mdoc, line, pos, buf, fl, &p);
+ c = mdoc_args(mdoc, line, pos, buf, tok, &p);
if (ARGS_ERROR == c)
return(0);
break;
if (ARGS_EOLN == c)
break;
+
+ /* FIXME: if .It -column, the lookup must be for a
+ * sub-line component. BLAH. */
if (-1 == (c = lookup(mdoc, line, lastarg, tok, p)))
return(0);
- else if (MDOC_MAX == c) {
+
+ if (MDOC_MAX == c) {
if ( ! mdoc_word_alloc(mdoc, line, lastarg, p))
return(0);
mdoc->next = MDOC_NEXT_SIBLING;
return(0);
break;
}
-
+
if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
return(0);
if (1 == ppos && ! append_delims(mdoc, line, pos, buf))
}
+/*
+ * This handles a case of implicitly-scoped macro (BLOCK) limited to a
+ * single line. Instead of being closed out by a subsequent call to
+ * another macro, the scope is closed at the end of line. These don't
+ * have BODY or TAIL types. Notice that the punctuation falls outside
+ * of the HEAD type.
+ *
+ * .Qq a Fl b Ar d ; ;
+ *
+ * BLOCK (Qq)
+ * HEAD
+ * TEXT (`a')
+ * ELEMENT (.Fl)
+ * TEXT (`b')
+ * ELEMENT (.Ar)
+ * TEXT (`d')
+ * TEXT (`;')
+ * TEXT (`;')
+ */
int
macro_scoped_line(MACRO_PROT_ARGS)
{
if ( ! mdoc_head_alloc(mdoc, line, ppos, tok))
return(0);
+ mdoc->next = MDOC_NEXT_SIBLING;
+ if ( ! mdoc_body_alloc(mdoc, line, ppos, tok))
+ return(0);
mdoc->next = MDOC_NEXT_CHILD;
/* XXX - no known argument macros. */
lastarg = ppos;
for (;;) {
lastarg = *pos;
- c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p);
+ c = mdoc_args(mdoc, line, pos, buf, tok, &p);
if (ARGS_ERROR == c)
return(0);
}
if (1 == ppos) {
- if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos))
return(0);
if ( ! append_delims(mdoc, line, pos, buf))
return(0);
- } else if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos))
+ } else if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos))
return(0);
return(rewind_impblock(mdoc, tok, line, ppos));
}
+/*
+ * A constant-scoped macro is like a simple-scoped macro (mdoc_scoped)
+ * except that it doesn't handle implicit scopes and explicit ones have
+ * a fixed number of TEXT children to the BODY.
+ *
+ * .Fl a So b Sc ;
+ *
+ * ELEMENT (.Fl)
+ * TEXT (`a')
+ * BLOCK (.So)
+ * HEAD
+ * BODY
+ * TEXT (`b')
+ * TEXT (';')
+ */
int
macro_constant_scoped(MACRO_PROT_ARGS)
{
mdoc->next = MDOC_NEXT_CHILD;
}
- c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p);
+ c = mdoc_args(mdoc, line, pos, buf, tok, &p);
if (ARGS_ERROR == c)
return(0);
if (ARGS_PUNCT == c)
}
+/*
+ * A delimited constant is very similar to the macros parsed by
+ * macro_text except that, in the event of punctuation, the macro isn't
+ * "re-opened" as it is in macro_text. Also, these macros have a fixed
+ * number of parameters.
+ *
+ * .Fl a No b
+ *
+ * ELEMENT (.Fl)
+ * TEXT (`a')
+ * ELEMENT (.No)
+ * TEXT (`b')
+ */
int
macro_constant_delimited(MACRO_PROT_ARGS)
{
flushed = 1;
}
- c = mdoc_args(mdoc, line, pos, buf, ARGS_DELIM, &p);
+ c = mdoc_args(mdoc, line, pos, buf, tok, &p);
if (ARGS_ERROR == c)
return(0);
if (ARGS_PUNCT == c)
}
+/*
+ * A constant macro is the simplest classification. It spans an entire
+ * line.
+ */
int
macro_constant(MACRO_PROT_ARGS)
{
- int c, lastarg, argc, fl;
+ int c, w, la, argc;
struct mdoc_arg argv[MDOC_LINEARG_MAX];
char *p;
- struct mdoc_node *n;
- fl = 0;
- if (MDOC_QUOTABLE & mdoc_macros[tok].flags)
- fl = ARGS_QUOTED;
+ assert( ! (MDOC_CALLABLE & mdoc_macros[tok].flags));
for (argc = 0; argc < MDOC_LINEARG_MAX; argc++) {
- lastarg = *pos;
+ la = *pos;
c = mdoc_argv(mdoc, line, tok, &argv[argc], pos, buf);
if (ARGV_EOLN == c)
break;
if (ARGV_WORD == c) {
- *pos = lastarg;
+ *pos = la;
break;
} else if (ARGV_ARG == c)
continue;
mdoc->next = MDOC_NEXT_CHILD;
for (;;) {
- lastarg = *pos;
- c = mdoc_args(mdoc, line, pos, buf, fl, &p);
- if (ARGS_ERROR == c)
+ la = *pos;
+ w = mdoc_args(mdoc, line, pos, buf, tok, &p);
+ if (ARGS_ERROR == w)
return(0);
- if (ARGS_EOLN == c)
+ if (ARGS_EOLN == w)
break;
- if (-1 == (c = lookup(mdoc, line, lastarg, tok, p)))
- return(0);
- else if (MDOC_MAX != c) {
+ c = ARGS_QWORD == w ? MDOC_MAX :
+ lookup(mdoc, line, la, tok, p);
+
+ if (MDOC_MAX != c && -1 != c) {
if ( ! rewind_elem(mdoc, tok))
return(0);
- return(mdoc_macro(mdoc, c, line,
- lastarg, pos, buf));
- }
-
- if ( ! mdoc_word_alloc(mdoc, line, lastarg, p))
+ return(mdoc_macro(mdoc, c, line, la, pos, buf));
+ } else if (-1 == c)
return(0);
- mdoc->next = MDOC_NEXT_SIBLING;
- }
-
- if ( ! rewind_elem(mdoc, tok))
- return(0);
- if ( ! (MDOC_NOKEEP & mdoc_macros[tok].flags))
- return(1);
- assert(mdoc->last->tok == tok);
- if (mdoc->last->parent->child == mdoc->last)
- mdoc->last->parent->child = mdoc->last->prev;
- if (mdoc->last->prev)
- mdoc->last->prev->next = NULL;
-
- n = mdoc->last;
- assert(NULL == mdoc->last->next);
-
- if (mdoc->last->prev) {
- mdoc->last = mdoc->last->prev;
+ if ( ! mdoc_word_alloc(mdoc, line, la, p))
+ return(0);
mdoc->next = MDOC_NEXT_SIBLING;
- } else {
- mdoc->last = mdoc->last->parent;
- mdoc->next = MDOC_NEXT_CHILD;
}
- mdoc_node_freelist(n);
-
- return(1);
+ return(rewind_elem(mdoc, tok));
}
}
+/*
+ * This is called at the end of parsing. It must traverse up the tree,
+ * closing out open [implicit] scopes. Obviously, open explicit scopes
+ * are errors.
+ */
int
macro_end(struct mdoc *mdoc)
{
continue;
if ( ! (MDOC_EXPLICIT & mdoc_macros[n->tok].flags))
continue;
- mdoc_nerr(mdoc, n, "macro scope still open on exit");
- return(0);
+ return(mdoc_nerr(mdoc, n, "macro scope still open on exit"));
}
return(rewind_last(mdoc, mdoc->first));