-/* $Id: mdoc_validate.c,v 1.376 2020/01/19 16:44:50 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.388 2020/10/30 13:24:33 schwarze Exp $ */
/*
- * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Validation module for mdoc(7) syntax trees used by mandoc(1).
*/
#include "config.h"
#include "libmandoc.h"
#include "roff_int.h"
#include "libmdoc.h"
+#include "tag.h"
/* FIXME: .Bl -diag can't have non-text children in HEAD. */
static void post_delim(POST_ARGS);
static void post_delim_nb(POST_ARGS);
static void post_dt(POST_ARGS);
+static void post_em(POST_ARGS);
static void post_en(POST_ARGS);
+static void post_er(POST_ARGS);
static void post_es(POST_ARGS);
static void post_eoln(POST_ARGS);
static void post_ex(POST_ARGS);
static void post_fa(POST_ARGS);
+static void post_fl(POST_ARGS);
static void post_fn(POST_ARGS);
static void post_fname(POST_ARGS);
static void post_fo(POST_ARGS);
static void post_hyph(POST_ARGS);
-static void post_ignpar(POST_ARGS);
static void post_it(POST_ARGS);
static void post_lb(POST_ARGS);
static void post_nd(POST_ARGS);
static void post_root(POST_ARGS);
static void post_rs(POST_ARGS);
static void post_rv(POST_ARGS);
+static void post_section(POST_ARGS);
static void post_sh(POST_ARGS);
static void post_sh_head(POST_ARGS);
static void post_sh_name(POST_ARGS);
static void post_st(POST_ARGS);
static void post_std(POST_ARGS);
static void post_sx(POST_ARGS);
+static void post_tag(POST_ARGS);
+static void post_tg(POST_ARGS);
static void post_useless(POST_ARGS);
static void post_xr(POST_ARGS);
static void post_xx(POST_ARGS);
post_dt, /* Dt */
post_os, /* Os */
post_sh, /* Sh */
- post_ignpar, /* Ss */
+ post_section, /* Ss */
post_par, /* Pp */
post_display, /* D1 */
post_display, /* Dl */
NULL, /* Ap */
post_defaults, /* Ar */
NULL, /* Cd */
- post_delim_nb, /* Cm */
- post_delim_nb, /* Dv */
- post_delim_nb, /* Er */
- post_delim_nb, /* Ev */
+ post_tag, /* Cm */
+ post_tag, /* Dv */
+ post_er, /* Er */
+ post_tag, /* Ev */
post_ex, /* Ex */
post_fa, /* Fa */
NULL, /* Fd */
- post_delim_nb, /* Fl */
+ post_fl, /* Fl */
post_fn, /* Fn */
post_delim_nb, /* Ft */
- post_delim_nb, /* Ic */
+ post_tag, /* Ic */
post_delim_nb, /* In */
- post_defaults, /* Li */
+ post_tag, /* Li */
post_nd, /* Nd */
post_nm, /* Nm */
post_delim_nb, /* Op */
NULL, /* Dq */
NULL, /* Ec */
NULL, /* Ef */
- post_delim_nb, /* Em */
+ post_em, /* Em */
NULL, /* Eo */
post_xx, /* Fx */
- post_delim_nb, /* Ms */
- NULL, /* No */
+ post_tag, /* Ms */
+ post_tag, /* No */
post_ns, /* Ns */
post_xx, /* Nx */
post_xx, /* Ox */
post_delim_nb, /* Sq */
post_sm, /* Sm */
post_sx, /* Sx */
- post_delim_nb, /* Sy */
+ post_em, /* Sy */
post_useless, /* Tn */
post_xx, /* Ux */
NULL, /* Xc */
NULL, /* %Q */
NULL, /* %U */
NULL, /* Ta */
+ post_tg, /* Tg */
};
#define RSORD_MAX 14 /* Number of `Rs' blocks. */
NULL
};
+static int fn_prio = TAG_STRONG;
+
/* Validate the subtree rooted at mdoc->last. */
void
mdoc->last= n;
}
+static void
+post_tg(POST_ARGS)
+{
+ struct roff_node *n; /* The .Tg node. */
+ struct roff_node *nch; /* The first child of the .Tg node. */
+ struct roff_node *nn; /* The next node after the .Tg node. */
+ struct roff_node *np; /* The parent of the next node. */
+ struct roff_node *nt; /* The TEXT node containing the tag. */
+ size_t len; /* The number of bytes in the tag. */
+
+ /* Find the next node. */
+ n = mdoc->last;
+ for (nn = n; nn != NULL; nn = nn->parent) {
+ if (nn->next != NULL) {
+ nn = nn->next;
+ break;
+ }
+ }
+
+ /* Find the tag. */
+ nt = nch = n->child;
+ if (nch == NULL && nn != NULL && nn->child != NULL &&
+ nn->child->type == ROFFT_TEXT)
+ nt = nn->child;
+
+ /* Validate the tag. */
+ if (nt == NULL || *nt->string == '\0')
+ mandoc_msg(MANDOCERR_MACRO_EMPTY, n->line, n->pos, "Tg");
+ if (nt == NULL) {
+ roff_node_delete(mdoc, n);
+ return;
+ }
+ len = strcspn(nt->string, " \t\\");
+ if (nt->string[len] != '\0')
+ mandoc_msg(MANDOCERR_TG_SPC, nt->line,
+ nt->pos + len, "Tg %s", nt->string);
+
+ /* Keep only the first argument. */
+ if (nch != NULL && nch->next != NULL) {
+ mandoc_msg(MANDOCERR_ARG_EXCESS, nch->next->line,
+ nch->next->pos, "Tg ... %s", nch->next->string);
+ while (nch->next != NULL)
+ roff_node_delete(mdoc, nch->next);
+ }
+
+ /* Drop the macro if the first argument is invalid. */
+ if (len == 0 || nt->string[len] != '\0') {
+ roff_node_delete(mdoc, n);
+ return;
+ }
+
+ /* By default, tag the .Tg node itself. */
+ if (nn == NULL || nn->flags & NODE_ID)
+ nn = n;
+
+ /* Explicit tagging of specific macros. */
+ switch (nn->tok) {
+ case MDOC_Sh:
+ case MDOC_Ss:
+ case MDOC_Fo:
+ nn = nn->head->child == NULL ? n : nn->head;
+ break;
+ case MDOC_It:
+ np = nn->parent;
+ while (np->tok != MDOC_Bl)
+ np = np->parent;
+ switch (np->norm->Bl.type) {
+ case LIST_column:
+ break;
+ case LIST_diag:
+ case LIST_hang:
+ case LIST_inset:
+ case LIST_ohang:
+ case LIST_tag:
+ nn = nn->head;
+ break;
+ case LIST_bullet:
+ case LIST_dash:
+ case LIST_enum:
+ case LIST_hyphen:
+ case LIST_item:
+ nn = nn->body->child == NULL ? n : nn->body;
+ break;
+ default:
+ abort();
+ }
+ break;
+ case MDOC_Bd:
+ case MDOC_Bl:
+ case MDOC_D1:
+ case MDOC_Dl:
+ nn = nn->body->child == NULL ? n : nn->body;
+ break;
+ case MDOC_Pp:
+ break;
+ case MDOC_Cm:
+ case MDOC_Dv:
+ case MDOC_Em:
+ case MDOC_Er:
+ case MDOC_Ev:
+ case MDOC_Fl:
+ case MDOC_Fn:
+ case MDOC_Ic:
+ case MDOC_Li:
+ case MDOC_Ms:
+ case MDOC_No:
+ case MDOC_Sy:
+ if (nn->child == NULL)
+ nn = n;
+ break;
+ default:
+ nn = n;
+ break;
+ }
+ tag_put(nt->string, TAG_MANUAL, nn);
+ if (nn != n)
+ n->flags |= NODE_NOPRT;
+}
+
static void
post_obsolete(POST_ARGS)
{
static void
post_fname(POST_ARGS)
{
- const struct roff_node *n;
+ struct roff_node *n, *nch;
const char *cp;
size_t pos;
- n = mdoc->last->child;
- cp = n->string;
+ n = mdoc->last;
+ nch = n->child;
+ cp = nch->string;
if (*cp == '(') {
if (cp[strlen(cp + 1)] == ')')
return;
pos = 0;
} else {
pos = strcspn(cp, "()");
- if (cp[pos] == '\0')
+ if (cp[pos] == '\0') {
+ if (n->sec == SEC_DESCRIPTION ||
+ n->sec == SEC_CUSTOM)
+ tag_put(NULL, fn_prio++, n);
return;
+ }
}
- mandoc_msg(MANDOCERR_FN_PAREN, n->line, n->pos + pos, "%s", cp);
+ mandoc_msg(MANDOCERR_FN_PAREN, nch->line, nch->pos + pos, "%s", cp);
}
static void
post_fn(POST_ARGS)
{
-
post_fname(mdoc);
post_fa(mdoc);
}
static void
post_defaults(POST_ARGS)
{
- struct roff_node *nn;
+ struct roff_node *n;
- if (mdoc->last->child != NULL) {
+ n = mdoc->last;
+ if (n->child != NULL) {
post_delim_nb(mdoc);
return;
}
-
- /*
- * The `Ar' defaults to "file ..." if no value is provided as an
- * argument; the `Mt' and `Pa' macros use "~"; the `Li' just
- * gets an empty string.
- */
-
- nn = mdoc->last;
- switch (nn->tok) {
+ mdoc->next = ROFF_NEXT_CHILD;
+ switch (n->tok) {
case MDOC_Ar:
- mdoc->next = ROFF_NEXT_CHILD;
- roff_word_alloc(mdoc, nn->line, nn->pos, "file");
- mdoc->last->flags |= NODE_NOSRC;
- roff_word_alloc(mdoc, nn->line, nn->pos, "...");
+ roff_word_alloc(mdoc, n->line, n->pos, "file");
mdoc->last->flags |= NODE_NOSRC;
+ roff_word_alloc(mdoc, n->line, n->pos, "...");
break;
case MDOC_Pa:
case MDOC_Mt:
- mdoc->next = ROFF_NEXT_CHILD;
- roff_word_alloc(mdoc, nn->line, nn->pos, "~");
- mdoc->last->flags |= NODE_NOSRC;
+ roff_word_alloc(mdoc, n->line, n->pos, "~");
break;
default:
abort();
}
- mdoc->last = nn;
+ mdoc->last->flags |= NODE_NOSRC;
+ mdoc->last = n;
}
static void
}
static void
-post_en(POST_ARGS)
+post_em(POST_ARGS)
{
+ post_tag(mdoc);
+ tag_put(NULL, TAG_FALLBACK, mdoc->last);
+}
+static void
+post_en(POST_ARGS)
+{
post_obsolete(mdoc);
if (mdoc->last->type == ROFFT_BLOCK)
mdoc->last->norm->Es = mdoc->last_es;
}
static void
-post_es(POST_ARGS)
+post_er(POST_ARGS)
{
+ struct roff_node *n;
+
+ n = mdoc->last;
+ if (n->sec == SEC_ERRORS &&
+ (n->parent->tok == MDOC_It ||
+ (n->parent->tok == MDOC_Bq &&
+ n->parent->parent->parent->tok == MDOC_It)))
+ tag_put(NULL, TAG_STRONG, n);
+ post_delim_nb(mdoc);
+}
+static void
+post_tag(POST_ARGS)
+{
+ struct roff_node *n;
+
+ n = mdoc->last;
+ if ((n->prev == NULL ||
+ (n->prev->type == ROFFT_TEXT &&
+ strcmp(n->prev->string, "|") == 0)) &&
+ (n->parent->tok == MDOC_It ||
+ (n->parent->tok == MDOC_Xo &&
+ n->parent->parent->prev == NULL &&
+ n->parent->parent->parent->tok == MDOC_It)))
+ tag_put(NULL, TAG_STRONG, n);
+ post_delim_nb(mdoc);
+}
+
+static void
+post_es(POST_ARGS)
+{
post_obsolete(mdoc);
mdoc->last_es = mdoc->last;
}
+static void
+post_fl(POST_ARGS)
+{
+ struct roff_node *n;
+ char *cp;
+
+ /*
+ * Transform ".Fl Fl long" to ".Fl \-long",
+ * resulting for example in better HTML output.
+ */
+
+ n = mdoc->last;
+ if (n->prev != NULL && n->prev->tok == MDOC_Fl &&
+ n->prev->child == NULL && n->child != NULL &&
+ (n->flags & NODE_LINE) == 0) {
+ mandoc_asprintf(&cp, "\\-%s", n->child->string);
+ free(n->child->string);
+ n->child->string = cp;
+ roff_node_delete(mdoc, n->prev);
+ }
+ post_tag(mdoc);
+}
+
static void
post_xx(POST_ARGS)
{
if ((nch = nit->head->child) != NULL)
mandoc_msg(MANDOCERR_ARG_SKIP,
nit->line, nit->pos, "It %s",
- nch->string == NULL ? roff_name[nch->tok] :
- nch->string);
+ nch->type == ROFFT_TEXT ? nch->string :
+ roff_name[nch->tok]);
break;
case LIST_column:
cols = (int)nbl->norm->Bl.ncols;
static void
post_bl(POST_ARGS)
{
- struct roff_node *nparent, *nprev; /* of the Bl block */
- struct roff_node *nblock, *nbody; /* of the Bl */
+ struct roff_node *nbody; /* of the Bl */
struct roff_node *nchild, *nnext; /* of the Bl body */
const char *prev_Er;
int order;
if (nbody->end != ENDBODY_NOT)
return;
- nchild = nbody->child;
- if (nchild == NULL) {
- mandoc_msg(MANDOCERR_BLK_EMPTY,
- nbody->line, nbody->pos, "Bl");
- return;
+ /*
+ * Up to the first item, move nodes before the list,
+ * but leave transparent nodes where they are
+ * if they precede an item.
+ * The next non-transparent node is kept in nchild.
+ * It only needs to be updated after a non-transparent
+ * node was moved out, and at the very beginning
+ * when no node at all was moved yet.
+ */
+
+ nchild = mdoc->last;
+ for (;;) {
+ if (nchild == mdoc->last)
+ nchild = roff_node_child(nbody);
+ if (nchild == NULL) {
+ mdoc->last = nbody;
+ mandoc_msg(MANDOCERR_BLK_EMPTY,
+ nbody->line, nbody->pos, "Bl");
+ return;
+ }
+ if (nchild->tok == MDOC_It) {
+ mdoc->last = nbody;
+ break;
+ }
+ mandoc_msg(MANDOCERR_BL_MOVE, nbody->child->line,
+ nbody->child->pos, "%s", roff_name[nbody->child->tok]);
+ if (nbody->parent->prev == NULL) {
+ mdoc->last = nbody->parent->parent;
+ mdoc->next = ROFF_NEXT_CHILD;
+ } else {
+ mdoc->last = nbody->parent->prev;
+ mdoc->next = ROFF_NEXT_SIBLING;
+ }
+ roff_node_relink(mdoc, nbody->child);
}
+
+ /*
+ * We have reached the first item,
+ * so moving nodes out is no longer possible.
+ * But in .Bl -column, the first rows may be implicit,
+ * that is, they may not start with .It macros.
+ * Such rows may be followed by nodes generated on the
+ * roff level, for example .TS.
+ * Wrap such roff nodes into an implicit row.
+ */
+
while (nchild != NULL) {
- nnext = nchild->next;
- if (nchild->tok == MDOC_It ||
- (nchild->tok == MDOC_Sm &&
- nnext != NULL && nnext->tok == MDOC_It)) {
- nchild = nnext;
+ if (nchild->tok == MDOC_It) {
+ nchild = roff_node_next(nchild);
continue;
}
-
- /*
- * In .Bl -column, the first rows may be implicit,
- * that is, they may not start with .It macros.
- * Such rows may be followed by nodes generated on the
- * roff level, for example .TS, which cannot be moved
- * out of the list. In that case, wrap such roff nodes
- * into an implicit row.
- */
-
- if (nchild->prev != NULL) {
- mdoc->last = nchild;
- mdoc->next = ROFF_NEXT_SIBLING;
- roff_block_alloc(mdoc, nchild->line,
- nchild->pos, MDOC_It);
- roff_head_alloc(mdoc, nchild->line,
- nchild->pos, MDOC_It);
+ nnext = nchild->next;
+ mdoc->last = nchild->prev;
+ mdoc->next = ROFF_NEXT_SIBLING;
+ roff_block_alloc(mdoc, nchild->line, nchild->pos, MDOC_It);
+ roff_head_alloc(mdoc, nchild->line, nchild->pos, MDOC_It);
+ mdoc->next = ROFF_NEXT_SIBLING;
+ roff_body_alloc(mdoc, nchild->line, nchild->pos, MDOC_It);
+ while (nchild->tok != MDOC_It) {
+ roff_node_relink(mdoc, nchild);
+ if (nnext == NULL)
+ break;
+ nchild = nnext;
+ nnext = nchild->next;
mdoc->next = ROFF_NEXT_SIBLING;
- roff_body_alloc(mdoc, nchild->line,
- nchild->pos, MDOC_It);
- while (nchild->tok != MDOC_It) {
- roff_node_relink(mdoc, nchild);
- if ((nchild = nnext) == NULL)
- break;
- nnext = nchild->next;
- mdoc->next = ROFF_NEXT_SIBLING;
- }
- mdoc->last = nbody;
- continue;
}
-
- mandoc_msg(MANDOCERR_BL_MOVE, nchild->line, nchild->pos,
- "%s", roff_name[nchild->tok]);
-
- /*
- * Move the node out of the Bl block.
- * First, collect all required node pointers.
- */
-
- nblock = nbody->parent;
- nprev = nblock->prev;
- nparent = nblock->parent;
-
- /*
- * Unlink this child.
- */
-
- nbody->child = nnext;
- if (nnext == NULL)
- nbody->last = NULL;
- else
- nnext->prev = NULL;
-
- /*
- * Relink this child.
- */
-
- nchild->parent = nparent;
- nchild->prev = nprev;
- nchild->next = nblock;
-
- nblock->prev = nchild;
- if (nprev == NULL)
- nparent->child = nchild;
- else
- nprev->next = nchild;
-
- nchild = nnext;
+ mdoc->last = nbody;
}
if (mdoc->meta.os_e != MANDOC_OS_NETBSD)
static void
post_hyph(POST_ARGS)
{
- struct roff_node *nch;
+ struct roff_node *n, *nch;
char *cp;
- for (nch = mdoc->last->child; nch != NULL; nch = nch->next) {
+ n = mdoc->last;
+ for (nch = n->child; nch != NULL; nch = nch->next) {
if (nch->type != ROFFT_TEXT)
continue;
cp = nch->string;
while (*(++cp) != '\0')
if (*cp == '-' &&
isalpha((unsigned char)cp[-1]) &&
- isalpha((unsigned char)cp[1]))
+ isalpha((unsigned char)cp[1])) {
+ if (n->tag == NULL && n->flags & NODE_ID)
+ n->tag = mandoc_strdup(nch->string);
*cp = ASCII_HYPH;
+ }
}
}
static void
post_sh(POST_ARGS)
{
-
- post_ignpar(mdoc);
+ post_section(mdoc);
switch (mdoc->last->type) {
case ROFFT_HEAD:
roff_setreg(mdoc->roff, "nS", 0, '=');
mdoc->flags &= ~MDOC_SYNOPSIS;
}
+ if (sec == SEC_DESCRIPTION)
+ fn_prio = TAG_STRONG;
/* Mark our last section. */
}
static void
-post_ignpar(POST_ARGS)
+post_section(POST_ARGS)
{
- struct roff_node *np;
+ struct roff_node *n, *nch;
+ char *cp, *tag;
- switch (mdoc->last->type) {
+ n = mdoc->last;
+ switch (n->type) {
case ROFFT_BLOCK:
post_prevpar(mdoc);
return;
case ROFFT_HEAD:
+ tag = NULL;
+ deroff(&tag, n);
+ if (tag != NULL) {
+ for (cp = tag; *cp != '\0'; cp++)
+ if (*cp == ' ')
+ *cp = '_';
+ if ((nch = n->child) != NULL &&
+ nch->type == ROFFT_TEXT &&
+ strcmp(nch->string, tag) == 0)
+ tag_put(NULL, TAG_STRONG, n);
+ else
+ tag_put(tag, TAG_FALLBACK, n);
+ free(tag);
+ }
post_delim(mdoc);
post_hyph(mdoc);
return;
default:
return;
}
-
- if ((np = mdoc->last->child) != NULL)
- if (np->tok == MDOC_Pp ||
- np->tok == ROFF_br || np->tok == ROFF_sp) {
- mandoc_msg(MANDOCERR_PAR_SKIP, np->line, np->pos,
- "%s after %s", roff_name[np->tok],
- roff_name[mdoc->last->tok]);
- roff_node_delete(mdoc, np);
- }
-
- if ((np = mdoc->last->last) != NULL)
- if (np->tok == MDOC_Pp || np->tok == ROFF_br) {
- mandoc_msg(MANDOCERR_PAR_SKIP, np->line, np->pos,
- "%s at the end of %s", roff_name[np->tok],
- roff_name[mdoc->last->tok]);
- roff_node_delete(mdoc, np);
- }
+ if ((nch = n->child) != NULL &&
+ (nch->tok == MDOC_Pp || nch->tok == ROFF_br ||
+ nch->tok == ROFF_sp)) {
+ mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos,
+ "%s after %s", roff_name[nch->tok],
+ roff_name[n->tok]);
+ roff_node_delete(mdoc, nch);
+ }
+ if ((nch = n->last) != NULL &&
+ (nch->tok == MDOC_Pp || nch->tok == ROFF_br)) {
+ mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos,
+ "%s at the end of %s", roff_name[nch->tok],
+ roff_name[n->tok]);
+ roff_node_delete(mdoc, nch);
+ }
}
static void
post_prevpar(POST_ARGS)
{
- struct roff_node *n;
+ struct roff_node *n, *np;
n = mdoc->last;
- if (NULL == n->prev)
- return;
if (n->type != ROFFT_ELEM && n->type != ROFFT_BLOCK)
return;
+ if ((np = roff_node_prev(n)) == NULL)
+ return;
/*
* Don't allow `Pp' prior to a paragraph-type
* block: `Pp' or non-compact `Bd' or `Bl'.
*/
- if (n->prev->tok != MDOC_Pp && n->prev->tok != ROFF_br)
+ if (np->tok != MDOC_Pp && np->tok != ROFF_br)
return;
if (n->tok == MDOC_Bl && n->norm->Bl.comp)
return;
if (n->tok == MDOC_It && n->parent->norm->Bl.comp)
return;
- mandoc_msg(MANDOCERR_PAR_SKIP, n->prev->line, n->prev->pos,
- "%s before %s", roff_name[n->prev->tok], roff_name[n->tok]);
- roff_node_delete(mdoc, n->prev);
+ mandoc_msg(MANDOCERR_PAR_SKIP, np->line, np->pos,
+ "%s before %s", roff_name[np->tok], roff_name[n->tok]);
+ roff_node_delete(mdoc, np);
}
static void
{
struct roff_node *np;
+ fn_prio = TAG_STRONG;
post_prevpar(mdoc);
np = mdoc->last;
mandoc_msg(MANDOCERR_MSEC_BAD,
nn->line, nn->pos, "Dt ... %s", nn->string);
mdoc->meta.vol = mandoc_strdup(nn->string);
- } else
+ } else {
mdoc->meta.vol = mandoc_strdup(cp);
+ if (mdoc->filesec != '\0' &&
+ mdoc->filesec != *nn->string &&
+ *nn->string >= '1' && *nn->string <= '9')
+ mandoc_msg(MANDOCERR_MSEC_FILE, nn->line, nn->pos,
+ "*.%c vs Dt ... %c", mdoc->filesec, *nn->string);
+ }
/* Optional third argument: architecture. */