From 80d8b4991abbf63a591116b09dfb3b844b5b0405 Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Tue, 20 Jan 2009 12:51:28 +0000 Subject: Moved prologue-pruning into action.c. Added line-arg softmax. --- action.c | 53 ++++++++++++---- argv.c | 5 +- macro.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++----------- mdoc.c | 47 ++++++++++---- mdocml.c | 16 +---- private.h | 13 ++-- validate.c | 32 ++-------- 7 files changed, 265 insertions(+), 109 deletions(-) diff --git a/action.c b/action.c index f9bc34d4..f04e650d 100644 --- a/action.c +++ b/action.c @@ -1,4 +1,4 @@ -/* $Id: action.c,v 1.12 2009/01/19 17:02:58 kristaps Exp $ */ +/* $Id: action.c,v 1.13 2009/01/20 12:51:28 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -29,11 +29,13 @@ struct actions { /* Per-macro action routines. */ -static int post_sh(struct mdoc *); -static int post_os(struct mdoc *); -static int post_dt(struct mdoc *); -static int post_dd(struct mdoc *); -static int post_nm(struct mdoc *); +static int post_sh(struct mdoc *); +static int post_os(struct mdoc *); +static int post_dt(struct mdoc *); +static int post_dd(struct mdoc *); +static int post_nm(struct mdoc *); + +static int post_prologue(struct mdoc *); /* Array of macro action routines. */ @@ -226,7 +228,8 @@ post_dt(struct mdoc *mdoc) if (NULL == mdoc->meta.title) mdoc->meta.title = xstrdup("untitled"); - return(1); + + return(post_prologue(mdoc)); } @@ -244,7 +247,9 @@ post_os(struct mdoc *mdoc) mdoc->meta.os = xstrdup(buf[0] ? buf : "local"); mdoc->sec_lastn = mdoc->sec_last = SEC_BODY; - return(1); + mdoc->flags |= MDOC_BODYPARSE; + + return(post_prologue(mdoc)); } @@ -269,7 +274,7 @@ post_dd(struct mdoc *mdoc) assert(MDOC_TEXT == n->type); p = n->data.text.string; - if (xstrcmp(p, "$Mdocdate: January 19 2009 $")) { + if (xstrcmp(p, "$Mdocdate: January 20 2009 $")) { mdoc->meta.date = time(NULL); continue; } else if (xstrcmp(p, "$")) { @@ -285,15 +290,41 @@ post_dd(struct mdoc *mdoc) } if (mdoc->meta.date && NULL == n) - return(1); + return(post_prologue(mdoc)); else if (n) return(mdoc_err(mdoc, "invalid parameter syntax")); if ((mdoc->meta.date = mdoc_atotime(date))) - return(1); + return(post_prologue(mdoc)); return(mdoc_err(mdoc, "invalid parameter syntax")); } +static int +post_prologue(struct mdoc *mdoc) +{ + struct mdoc_node *n; + + if (mdoc->last->parent->child == mdoc->last) + mdoc->last->parent->child = mdoc->last->prev; + if (mdoc->last->prev) + mdoc->last->prev->next = NULL; + + n = mdoc->last; + assert(NULL == mdoc->last->next); + + if (mdoc->last->prev) { + mdoc->last = mdoc->last->prev; + mdoc->next = MDOC_NEXT_SIBLING; + } else { + mdoc->last = mdoc->last->parent; + mdoc->next = MDOC_NEXT_CHILD; + } + + mdoc_node_freelist(n); + return(1); +} + + int mdoc_action_post(struct mdoc *mdoc) { diff --git a/argv.c b/argv.c index a2d62608..8ebf89be 100644 --- a/argv.c +++ b/argv.c @@ -1,4 +1,4 @@ -/* $Id: argv.c,v 1.17 2009/01/19 17:02:58 kristaps Exp $ */ +/* $Id: argv.c,v 1.18 2009/01/20 12:51:28 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -25,8 +25,9 @@ #include "private.h" - /* FIXME: .It called with -column and quoted arguments. */ +/* FIXME: if arguments are quoted, they should not be later parsed for + * macros. */ static int lookup(int, const char *); static int parse(struct mdoc *, int, diff --git a/macro.c b/macro.c index 70cd340b..5c568901 100644 --- a/macro.c +++ b/macro.c @@ -1,4 +1,4 @@ -/* $Id: macro.c,v 1.43 2009/01/19 17:51:32 kristaps Exp $ */ +/* $Id: macro.c,v 1.44 2009/01/20 12:51:28 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -25,19 +25,65 @@ #include #endif -#include "private.h" +/* + * This has scanning/parsing routines, each of which extract a macro and + * its arguments and parameters, then know how to progress to the next + * macro. Macros are parsed according as follows: + * + * ELEMENT: TEXT | epsilon + * BLOCK: HEAD PUNCT BODY PUNCT BLOCK_TAIL PUNCT + * BLOCK_TAIL: TAIL | epsilon + * HEAD: ELEMENT | TEXT | BLOCK | epsilon + * BODY: ELEMENT | TEXT | BLOCK | epsilon + * TAIL: TEXT | epsilon + * PUNCT: TEXT (delimiters) | epsilon + * + * These are arranged into a parse tree, an example of which follows: + * + * ROOT + * BLOCK (.Sh) + * HEAD + * TEXT (`NAME') + * BODY + * ELEMENT (.Nm) + * TEXT (`mdocml') + * ELEMENT (.Nd) + * TEXT (`mdoc macro compiler') + * BLOCK (.Op) + * HEAD + * ELEMENT (.Fl) + * TEXT (`v') + * BLOCK (.Op) + * HEAD + * ELEMENT (.Fl) + * TEXT (`v') + * ELEMENT (.Fl) + * TEXT (`W') + * ELEMENT (.Ns) + * ELEMENT (.Ar) + * TEXT (`err...') + * + * These types are always per-line except for block bodies, which may + * span multiple lines. Macros are assigned a parsing routine, which + * corresponds to the type, in the mdoc_macros table. + * + * Note that types are general: there can be several parsing routines + * corresponding to a single type. The macro_text function, for + * example, parses an ELEMENT type (see the function definition for + * details) that may be interrupted by further macros; the + * macro_constant function, on the other hand, parses an ELEMENT type + * spanning a single line. + */ -/* FIXME: maxlineargs should be per LINE, no per TOKEN. */ +#include "private.h" -static int rewind_alt(int); -static int rewind_dohalt(int, enum mdoc_type, - const struct mdoc_node *); #define REWIND_REWIND (1 << 0) #define REWIND_NOHALT (1 << 1) #define REWIND_HALT (1 << 2) +static int rewind_dohalt(int, enum mdoc_type, + const struct mdoc_node *); +static int rewind_alt(int); static int rewind_dobreak(int, const struct mdoc_node *); - - static int rewind_elem(struct mdoc *, int); static int rewind_impblock(struct mdoc *, int, int, int); static int rewind_expblock(struct mdoc *, int, int, int); @@ -158,7 +204,7 @@ rewind_dohalt(int tok, enum mdoc_type type, const struct mdoc_node *p) case (MDOC_Qq): /* FALLTHROUGH */ case (MDOC_Sq): - assert(MDOC_BODY != type); + assert(MDOC_HEAD != type); assert(MDOC_TAIL != type); if (type == p->type && tok == p->tok) return(REWIND_REWIND); @@ -412,6 +458,10 @@ append_delims(struct mdoc *mdoc, int line, int *pos, char *buf) } +/* + * Close out an explicit scope. This optionally parses a TAIL type with + * a set number of TEXT children. + */ int macro_scoped_close(MACRO_PROT_ARGS) { @@ -497,6 +547,30 @@ macro_scoped_close(MACRO_PROT_ARGS) } +/* + * A general text macro. This is a complex case because of punctuation. + * If a text macro is followed by words, then punctuation, the macro is + * "stopped" and "reopened" following the punctuation. Thus, the + * following arises: + * + * .Fl a ; b + * + * ELEMENT (.Fl) + * TEXT (`a') + * TEXT (`;') + * ELEMENT (.Fl) + * TEXT (`b') + * + * This must handle the following situations: + * + * .Fl Ar b ; ; + * + * ELEMENT (.Fl) + * ELEMENT (.Ar) + * TEXT (`b') + * TEXT (`;') + * TEXT (`;') + */ int macro_text(MACRO_PROT_ARGS) { @@ -603,6 +677,30 @@ macro_text(MACRO_PROT_ARGS) } +/* + * Handle explicit-scope (having a different closure token) and implicit + * scope (closing out prior scopes when re-invoked) macros. These + * constitute the BLOCK type and usually span multiple lines. These + * always have HEAD and sometimes have BODY types. In the multi-line + * case: + * + * .Bd -ragged + * Text. + * .Fl macro + * Another. + * .Ed + * + * BLOCK (.Bd) + * HEAD + * BODY + * TEXT (`Text.') + * ELEMENT (.Fl) + * TEXT (`macro') + * TEXT (`Another.') + * + * Note that the `.It' macro, possibly the most difficult (as it has + * embedded scope, etc.) is handled by this routine. + */ int macro_scoped(MACRO_PROT_ARGS) { @@ -704,6 +802,25 @@ macro_scoped(MACRO_PROT_ARGS) } +/* + * This handles a case of implicitly-scoped macro (BLOCK) limited to a + * single line. Instead of being closed out by a subsequent call to + * another macro, the scope is closed at the end of line. These don't + * have BODY or TAIL types. Notice that the punctuation falls outside + * of the HEAD type. + * + * .Qq a Fl b Ar d ; ; + * + * BLOCK (Qq) + * HEAD + * TEXT (`a') + * ELEMENT (.Fl) + * TEXT (`b') + * ELEMENT (.Ar) + * TEXT (`d') + * TEXT (`;') + * TEXT (`;') + */ int macro_scoped_line(MACRO_PROT_ARGS) { @@ -716,6 +833,9 @@ macro_scoped_line(MACRO_PROT_ARGS) if ( ! mdoc_head_alloc(mdoc, line, ppos, tok)) return(0); + mdoc->next = MDOC_NEXT_SIBLING; + if ( ! mdoc_body_alloc(mdoc, line, ppos, tok)) + return(0); mdoc->next = MDOC_NEXT_CHILD; /* XXX - no known argument macros. */ @@ -747,16 +867,31 @@ macro_scoped_line(MACRO_PROT_ARGS) } if (1 == ppos) { - if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) return(0); if ( ! append_delims(mdoc, line, pos, buf)) return(0); - } else if ( ! rewind_subblock(MDOC_HEAD, mdoc, tok, line, ppos)) + } else if ( ! rewind_subblock(MDOC_BODY, mdoc, tok, line, ppos)) return(0); return(rewind_impblock(mdoc, tok, line, ppos)); } +/* + * A constant-scoped macro is like a simple-scoped macro (mdoc_scoped) + * except that it doesn't handle implicit scopes and explicit ones have + * a fixed number of TEXT children to the BODY. + * + * .Fl a So b Sc ; + * + * ELEMENT (.Fl) + * TEXT (`a') + * BLOCK (.So) + * HEAD + * BODY + * TEXT (`b') + * TEXT (';') + */ int macro_constant_scoped(MACRO_PROT_ARGS) { @@ -856,6 +991,19 @@ macro_constant_scoped(MACRO_PROT_ARGS) } +/* + * A delimited constant is very similar to the macros parsed by + * macro_text except that, in the event of punctuation, the macro isn't + * "re-opened" as it is in macro_text. Also, these macros have a fixed + * number of parameters. + * + * .Fl a No b + * + * ELEMENT (.Fl) + * TEXT (`a') + * ELEMENT (.No) + * TEXT (`b') + */ int macro_constant_delimited(MACRO_PROT_ARGS) { @@ -958,13 +1106,18 @@ macro_constant_delimited(MACRO_PROT_ARGS) } +/* + * A constant macro is the simplest classification. It spans an entire + * line. + */ int macro_constant(MACRO_PROT_ARGS) { int c, lastarg, argc, fl; struct mdoc_arg argv[MDOC_LINEARG_MAX]; char *p; - struct mdoc_node *n; + + assert( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)); fl = 0; if (MDOC_QUOTABLE & mdoc_macros[tok].flags) @@ -1020,31 +1173,7 @@ macro_constant(MACRO_PROT_ARGS) mdoc->next = MDOC_NEXT_SIBLING; } - if ( ! rewind_elem(mdoc, tok)) - return(0); - if ( ! (MDOC_NOKEEP & mdoc_macros[tok].flags)) - return(1); - - assert(mdoc->last->tok == tok); - if (mdoc->last->parent->child == mdoc->last) - mdoc->last->parent->child = mdoc->last->prev; - if (mdoc->last->prev) - mdoc->last->prev->next = NULL; - - n = mdoc->last; - assert(NULL == mdoc->last->next); - - if (mdoc->last->prev) { - mdoc->last = mdoc->last->prev; - mdoc->next = MDOC_NEXT_SIBLING; - } else { - mdoc->last = mdoc->last->parent; - mdoc->next = MDOC_NEXT_CHILD; - } - - mdoc_node_freelist(n); - - return(1); + return(rewind_elem(mdoc, tok)); } @@ -1057,6 +1186,11 @@ macro_obsolete(MACRO_PROT_ARGS) } +/* + * This is called at the end of parsing. It must traverse up the tree, + * closing out open [implicit] scopes. Obviously, open explicit scopes + * are errors. + */ int macro_end(struct mdoc *mdoc) { diff --git a/mdoc.c b/mdoc.c index 3606a1fb..247f7593 100644 --- a/mdoc.c +++ b/mdoc.c @@ -1,4 +1,4 @@ -/* $Id: mdoc.c,v 1.38 2009/01/19 17:53:54 kristaps Exp $ */ +/* $Id: mdoc.c,v 1.39 2009/01/20 12:51:28 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -85,9 +85,9 @@ const char *const __mdoc_argnames[MDOC_ARG_MAX] = { const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { { NULL, 0 }, /* \" */ - { macro_constant, MDOC_PROLOGUE | MDOC_NOKEEP }, /* Dd */ - { macro_constant, MDOC_PROLOGUE | MDOC_NOKEEP }, /* Dt */ - { macro_constant, MDOC_PROLOGUE | MDOC_NOKEEP }, /* Os */ + { macro_constant, MDOC_PROLOGUE }, /* Dd */ + { macro_constant, MDOC_PROLOGUE }, /* Dt */ + { macro_constant, MDOC_PROLOGUE }, /* Os */ { macro_scoped, 0 }, /* Sh */ { macro_scoped, 0 }, /* Ss */ { macro_text, 0 }, /* Pp */ @@ -295,14 +295,15 @@ mdoc_parseln(struct mdoc *mdoc, int line, char *buf) if (MDOC_HALT & mdoc->flags) return(0); + mdoc->linetok = 0; + if ('.' != *buf) { - if (SEC_PROLOGUE != mdoc->sec_lastn) { - if ( ! mdoc_word_alloc(mdoc, line, 0, buf)) - return(0); - mdoc->next = MDOC_NEXT_SIBLING; - return(1); - } - return(mdoc_perr(mdoc, line, 0, "text disallowed")); + if ( ! (MDOC_BODYPARSE & mdoc->flags)) + return(mdoc_perr(mdoc, line, 0, "text disallowed")); + if ( ! mdoc_word_alloc(mdoc, line, 0, buf)) + return(0); + mdoc->next = MDOC_NEXT_SIBLING; + return(1); } if (buf[1] && '\\' == buf[1]) @@ -400,8 +401,8 @@ mdoc_macro(struct mdoc *mdoc, int tok, assert(mdoc_macros[tok].fp); if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && - SEC_PROLOGUE == mdoc->sec_lastn) - return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document prologue")); + ! (MDOC_BODYPARSE & mdoc->flags)) + return(mdoc_perr(mdoc, ln, ppos, "macro disallowed: not in document body")); if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) return(mdoc_perr(mdoc, ln, ppos, "macro not callable")); return((*mdoc_macros[tok].fp)(mdoc, tok, ln, ppos, pos, buf)); @@ -417,6 +418,26 @@ mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p) assert(mdoc->first); assert(MDOC_ROOT != p->type); + /* See if we exceed the suggest line-max. */ + + switch (p->type) { + case (MDOC_TEXT): + /* FALLTHROUGH */ + case (MDOC_ELEM): + /* FALLTHROUGH */ + case (MDOC_BLOCK): + mdoc->linetok++; + break; + default: + break; + } + + if (mdoc->linetok > MDOC_LINEARG_SOFTMAX) + if ( ! mdoc_nwarn(mdoc, p, WARN_COMPAT, + "suggested %d tokens per line exceeded (has %d)", + MDOC_LINEARG_SOFTMAX, mdoc->linetok)) + return(0); + if (MDOC_TEXT == mdoc->last->type) on = ""; else if (MDOC_ROOT == mdoc->last->type) diff --git a/mdocml.c b/mdocml.c index d186abec..8d0433ab 100644 --- a/mdocml.c +++ b/mdocml.c @@ -1,4 +1,4 @@ - /* $Id: mdocml.c,v 1.49 2009/01/19 17:51:33 kristaps Exp $ */ + /* $Id: mdocml.c,v 1.50 2009/01/20 12:51:28 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -34,19 +34,7 @@ #define MD_LINE_SZ (256) /* Max input line size. */ -/* - * Put this into a mdoctrans.h, which has: - * - * struct mdoc_trans; (opaque) - * - * struct mdoc_trans *mdoc_trans_alloc(const char *filter); - * - * mdoc_trans_free(struct mdoc_trans *); - * - * int mdoc_trans_getopt(struct mdoc_trans *, char *); - * - * int mdoc_trans_print(struct mdoc_trans *, const struct mdoc_node *); - */ +/* TODO: have a struct for each transformer. */ typedef int (*mdocprint)(const struct mdoc_node *); diff --git a/private.h b/private.h index bd859d59..fbcdc90d 100644 --- a/private.h +++ b/private.h @@ -1,4 +1,4 @@ -/* $Id: private.h,v 1.69 2009/01/19 17:51:33 kristaps Exp $ */ +/* $Id: private.h,v 1.70 2009/01/20 12:51:28 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -30,8 +30,10 @@ struct mdoc { void *data; struct mdoc_cb cb; void *htab; + int linetok; int flags; #define MDOC_HALT (1 << 0) +#define MDOC_BODYPARSE (1 << 1) enum mdoc_next next; struct mdoc_node *last; struct mdoc_node *first; @@ -41,9 +43,13 @@ struct mdoc { }; -/* FIXME: it's 9 (this isn't used properly). */ +/* Hard-limit of macro arguments. */ -#define MDOC_LINEARG_MAX 12 +#define MDOC_LINEARG_MAX 9 + +/* Suggested limit of macro arguments. */ + +#define MDOC_LINEARG_SOFTMAX 9 #define MACRO_PROT_ARGS struct mdoc *mdoc, int tok, int line, \ int ppos, int *pos, char *buf @@ -57,7 +63,6 @@ struct mdoc_macro { #define MDOC_QUOTABLE (1 << 3) #define MDOC_PROLOGUE (1 << 4) #define MDOC_TABSEP (1 << 5) -#define MDOC_NOKEEP (1 << 6) }; #define mdoc_nwarn(mdoc, node, type, fmt, ...) \ diff --git a/validate.c b/validate.c index 0c1415a1..b68e6ad9 100644 --- a/validate.c +++ b/validate.c @@ -1,4 +1,4 @@ -/* $Id: validate.c,v 1.42 2009/01/19 23:11:43 kristaps Exp $ */ +/* $Id: validate.c,v 1.43 2009/01/20 12:51:28 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -27,7 +27,6 @@ typedef int (*v_post)(struct mdoc *); /* FIXME: some sections should only occur in specific msecs. */ /* FIXME: ignoring Pp. */ /* FIXME: math symbols. */ -/* FIXME: make sure prologue is complete. */ /* FIXME: valid character-escape checks. */ /* FIXME: make sure required sections are included (NAME, ...). */ @@ -72,7 +71,6 @@ static int pre_prologue(struct mdoc *, struct mdoc_node *); static int herr_ge1(struct mdoc *); static int herr_le1(struct mdoc *); -static int hwarn_ge1(struct mdoc *); static int herr_eq0(struct mdoc *); static int eerr_eq0(struct mdoc *); static int eerr_le1(struct mdoc *); @@ -82,7 +80,6 @@ static int eerr_ge1(struct mdoc *); static int ewarn_eq0(struct mdoc *); static int ewarn_eq1(struct mdoc *); static int bwarn_ge1(struct mdoc *); -static int berr_eq0(struct mdoc *); static int ewarn_ge1(struct mdoc *); static int ebool(struct mdoc *); static int post_sh(struct mdoc *); @@ -119,14 +116,13 @@ static v_post posts_bd[] = { herr_eq0, bwarn_ge1, NULL }; static v_post posts_text[] = { eerr_ge1, NULL }; static v_post posts_wtext[] = { ewarn_ge1, NULL }; static v_post posts_notext[] = { eerr_eq0, NULL }; -static v_post posts_wline[] = { hwarn_ge1, berr_eq0, NULL }; +static v_post posts_wline[] = { bwarn_ge1, herr_eq0, NULL }; static v_post posts_sh[] = { herr_ge1, bwarn_ge1, post_sh, NULL }; static v_post posts_bl[] = { herr_eq0, bwarn_ge1, post_bl, NULL }; static v_post posts_it[] = { post_it, NULL }; static v_post posts_in[] = { ewarn_eq1, NULL }; static v_post posts_ss[] = { herr_ge1, NULL }; static v_post posts_pp[] = { ewarn_eq0, NULL }; -static v_post posts_d1[] = { herr_ge1, NULL }; static v_post posts_ex[] = { eerr_le1, post_ex, NULL }; static v_post posts_an[] = { post_an, NULL }; static v_post posts_at[] = { post_at, NULL }; @@ -145,8 +141,8 @@ const struct valids mdoc_valids[MDOC_MAX] = { { pres_sh, posts_sh }, /* Sh */ { pres_ss, posts_ss }, /* Ss */ { NULL, posts_pp }, /* Pp */ - { pres_d1, posts_d1 }, /* D1 */ - { pres_d1, posts_d1 }, /* Dl */ + { pres_d1, posts_wline }, /* D1 */ + { pres_d1, posts_wline }, /* Dl */ { pres_bd, posts_bd }, /* Bd */ { NULL, NULL }, /* Ed */ { pres_bl, posts_bl }, /* Bl */ @@ -364,16 +360,6 @@ pre_check_parent(struct mdoc *mdoc, struct mdoc_node *node, } -static int -berr_eq0(struct mdoc *mdoc) -{ - - if (MDOC_BODY != mdoc->last->type) - return(1); - return(post_check_children_eq(mdoc, "body children", 0)); -} - - static int bwarn_ge1(struct mdoc *mdoc) { @@ -466,16 +452,6 @@ herr_eq0(struct mdoc *mdoc) } -static int -hwarn_ge1(struct mdoc *mdoc) -{ - - if (MDOC_HEAD != mdoc->last->type) - return(1); - return(post_check_children_wgt(mdoc, "parameters", 0)); -} - - static int herr_le1(struct mdoc *mdoc) { -- cgit v1.2.3-56-ge451