-/* $Id: mdoc_validate.c,v 1.352 2017/08/02 13:29:04 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.364 2018/12/04 02:53:51 schwarze Exp $ */
/*
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
typedef void (*v_post)(POST_ARGS);
static int build_list(struct roff_man *, int);
-static void check_text(struct roff_man *, int, int, char *);
static void check_argv(struct roff_man *,
struct roff_node *, struct mdoc_argv *);
static void check_args(struct roff_man *, struct roff_node *);
+static void check_text(struct roff_man *, int, int, char *);
+static void check_text_em(struct roff_man *, int, int, char *);
static void check_toptext(struct roff_man *, int, int, const char *);
static int child_an(const struct roff_node *);
static size_t macro2len(enum roff_tok);
static void rewrite_macro2len(struct roff_man *, char **);
static int similar(const char *, const char *);
+static void post_abort(POST_ARGS);
static void post_an(POST_ARGS);
static void post_an_norm(POST_ARGS);
static void post_at(POST_ARGS);
static void post_xr(POST_ARGS);
static void post_xx(POST_ARGS);
-static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
+static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
post_dd, /* Dd */
post_dt, /* Dt */
post_os, /* Os */
post_nd, /* Nd */
post_nm, /* Nm */
post_delim_nb, /* Op */
- post_obsolete, /* Ot */
+ post_abort, /* Ot */
post_defaults, /* Pa */
post_rv, /* Rv */
post_st, /* St */
post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */
NULL, /* %V */
NULL, /* Ac */
- post_delim_nb, /* Ao */
+ NULL, /* Ao */
post_delim_nb, /* Aq */
post_at, /* At */
NULL, /* Bc */
post_bf, /* Bf */
- post_delim_nb, /* Bo */
+ NULL, /* Bo */
NULL, /* Bq */
post_xx, /* Bsx */
post_bx, /* Bx */
post_xx, /* Ox */
NULL, /* Pc */
NULL, /* Pf */
- post_delim_nb, /* Po */
+ NULL, /* Po */
post_delim_nb, /* Pq */
NULL, /* Qc */
post_delim_nb, /* Ql */
- post_delim_nb, /* Qo */
+ NULL, /* Qo */
post_delim_nb, /* Qq */
NULL, /* Re */
post_rs, /* Rs */
NULL, /* Sc */
- post_delim_nb, /* So */
+ NULL, /* So */
post_delim_nb, /* Sq */
post_sm, /* Sm */
post_sx, /* Sx */
NULL, /* Xo */
post_fo, /* Fo */
NULL, /* Fc */
- post_delim_nb, /* Oo */
+ NULL, /* Oo */
NULL, /* Oc */
post_bk, /* Bk */
NULL, /* Ek */
post_obsolete, /* Fr */
post_eoln, /* Ud */
post_lb, /* Lb */
- post_par, /* Lp */
+ post_abort, /* Lp */
post_delim_nb, /* Lk */
post_defaults, /* Mt */
post_delim_nb, /* Brq */
- post_delim_nb, /* Bro */
+ NULL, /* Bro */
NULL, /* Brc */
NULL, /* %C */
post_es, /* Es */
NULL, /* %U */
NULL, /* Ta */
};
-static const v_post *const mdoc_valids = __mdoc_valids - MDOC_Dd;
#define RSORD_MAX 14 /* Number of `Rs' blocks. */
};
+/* Validate the subtree rooted at mdoc->last. */
void
mdoc_node_validate(struct roff_man *mdoc)
{
- struct roff_node *n;
+ struct roff_node *n, *np;
const v_post *p;
+ /*
+ * Translate obsolete macros to modern macros first
+ * such that later code does not need to look
+ * for the obsolete versions.
+ */
+
n = mdoc->last;
+ switch (n->tok) {
+ case MDOC_Lp:
+ n->tok = MDOC_Pp;
+ break;
+ case MDOC_Ot:
+ post_obsolete(mdoc);
+ n->tok = MDOC_Ft;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Iterate over all children, recursing into each one
+ * in turn, depth-first.
+ */
+
mdoc->last = mdoc->last->child;
while (mdoc->last != NULL) {
mdoc_node_validate(mdoc);
mdoc->last = mdoc->last->next;
}
+ /* Finally validate the macro itself. */
+
mdoc->last = n;
mdoc->next = ROFF_NEXT_SIBLING;
switch (n->type) {
case ROFFT_TEXT:
+ np = n->parent;
if (n->sec != SEC_SYNOPSIS ||
- (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd))
+ (np->tok != MDOC_Cd && np->tok != MDOC_Fd))
check_text(mdoc, n->line, n->pos, n->string);
- if (n->parent->tok == MDOC_It ||
- (n->parent->type == ROFFT_BODY &&
- (n->parent->tok == MDOC_Sh ||
- n->parent->tok == MDOC_Ss)))
+ if (np->tok != MDOC_Ql && np->tok != MDOC_Dl &&
+ (np->tok != MDOC_Bd ||
+ (mdoc->flags & MDOC_LITERAL) == 0) &&
+ (np->tok != MDOC_It || np->type != ROFFT_HEAD ||
+ np->parent->parent->norm->Bl.type != LIST_diag))
+ check_text_em(mdoc, n->line, n->pos, n->string);
+ if (np->tok == MDOC_It || (np->type == ROFFT_BODY &&
+ (np->tok == MDOC_Sh || np->tok == MDOC_Ss)))
check_toptext(mdoc, n->line, n->pos, n->string);
break;
+ case ROFFT_COMMENT:
case ROFFT_EQN:
case ROFFT_TBL:
break;
/* Call the macro's postprocessor. */
if (n->tok < ROFF_MAX) {
- switch(n->tok) {
- case ROFF_br:
- case ROFF_sp:
- post_par(mdoc);
- break;
- default:
- roff_validate(mdoc);
- break;
- }
+ roff_validate(mdoc);
break;
}
assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX);
- p = mdoc_valids + n->tok;
+ p = mdoc_valids + (n->tok - MDOC_Dd);
if (*p)
(*p)(mdoc);
if (mdoc->last == n)
ln, pos + (int)(p - cp), NULL);
}
+static void
+check_text_em(struct roff_man *mdoc, int ln, int pos, char *p)
+{
+ const struct roff_node *np, *nn;
+ char *cp;
+
+ np = mdoc->last->prev;
+ nn = mdoc->last->next;
+
+ /* Look for em-dashes wrongly encoded as "--". */
+
+ for (cp = p; *cp != '\0'; cp++) {
+ if (cp[0] != '-' || cp[1] != '-')
+ continue;
+ cp++;
+
+ /* Skip input sequences of more than two '-'. */
+
+ if (cp[1] == '-') {
+ while (cp[1] == '-')
+ cp++;
+ continue;
+ }
+
+ /* Skip "--" directly attached to something else. */
+
+ if ((cp - p > 1 && cp[-2] != ' ') ||
+ (cp[1] != '\0' && cp[1] != ' '))
+ continue;
+
+ /* Require a letter right before or right afterwards. */
+
+ if ((cp - p > 2 ?
+ isalpha((unsigned char)cp[-3]) :
+ np != NULL &&
+ np->type == ROFFT_TEXT &&
+ *np->string != '\0' &&
+ isalpha((unsigned char)np->string[
+ strlen(np->string) - 1])) ||
+ (cp[1] != '\0' && cp[2] != '\0' ?
+ isalpha((unsigned char)cp[2]) :
+ nn != NULL &&
+ nn->type == ROFFT_TEXT &&
+ isalpha((unsigned char)*nn->string))) {
+ mandoc_msg(MANDOCERR_DASHDASH, mdoc->parse,
+ ln, pos + (int)(cp - p) - 1, NULL);
+ break;
+ }
+ }
+}
+
static void
check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p)
{
}
}
+static void
+post_abort(POST_ARGS)
+{
+ abort();
+}
+
static void
post_delim(POST_ARGS)
{
/* At least three alphabetic words with a sentence ending. */
if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em ||
- tok == MDOC_Li || tok == MDOC_Po || tok == MDOC_Pq ||
- tok == MDOC_Sy)) {
+ tok == MDOC_Li || tok == MDOC_Pq || tok == MDOC_Sy)) {
nw = 0;
for (cp = lc - 1; cp >= nch->string; cp--) {
if (*cp == ' ') {
for (ic = 1;; ic++) {
roff_elem_alloc(mdoc, n->line, n->pos, tok);
mdoc->last->flags |= NODE_NOSRC;
- mdoc_node_relink(mdoc, n);
+ roff_node_relink(mdoc, n);
n = mdoc->last = mdoc->last->parent;
mdoc->next = ROFF_NEXT_SIBLING;
if (n->next == NULL)
roff_word_alloc(mdoc, n->line, n->pos, "library");
mdoc->last->flags = NODE_NOSRC;
- roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq");
+ roff_word_alloc(mdoc, n->line, n->pos, "\\(lq");
mdoc->last->flags = NODE_DELIMO | NODE_NOSRC;
mdoc->last = mdoc->last->next;
- roff_word_alloc(mdoc, n->line, n->pos, "\\(Rq");
+ roff_word_alloc(mdoc, n->line, n->pos, "\\(rq");
mdoc->last->flags = NODE_DELIMC | NODE_NOSRC;
mdoc->last = n;
}
n->child->type == ROFFT_TEXT && mdoc->meta.msec != NULL)
mandoc_xr_add(mdoc->meta.msec, n->child->string, -1, -1);
- if (n->last != NULL &&
- (n->last->tok == MDOC_Pp ||
- n->last->tok == MDOC_Lp))
- mdoc_node_relink(mdoc, n->last);
+ if (n->last != NULL && n->last->tok == MDOC_Pp)
+ roff_node_relink(mdoc, n->last);
if (mdoc->meta.name == NULL)
deroff(&mdoc->meta.name, n);
mdoc->parse, n->line, n->pos, "Bd");
mdoc->next = ROFF_NEXT_SIBLING;
while (n->body->child != NULL)
- mdoc_node_relink(mdoc,
+ roff_node_relink(mdoc,
n->body->child);
roff_node_delete(mdoc, n);
break;
while (nc != NULL) {
switch (nc->tok) {
case MDOC_Pp:
- case MDOC_Lp:
case ROFF_br:
break;
default:
mandoc_msg(MANDOCERR_PAR_MOVE,
mdoc->parse, nc->line, nc->pos,
roff_name[nc->tok]);
- mdoc_node_relink(mdoc, nc);
+ roff_node_relink(mdoc, nc);
} else if (n->norm->Bl.comp == 0 &&
n->norm->Bl.type != LIST_column) {
mandoc_vmsg(MANDOCERR_PAR_SKIP,
roff_body_alloc(mdoc, nchild->line,
nchild->pos, MDOC_It);
while (nchild->tok != MDOC_It) {
- mdoc_node_relink(mdoc, nchild);
+ roff_node_relink(mdoc, nchild);
if ((nchild = nnext) == NULL)
break;
nnext = nchild->next;
mandoc_vmsg(MANDOCERR_SM_BAD,
mdoc->parse, nch->line, nch->pos,
"%s %s", roff_name[mdoc->last->tok], nch->string);
- mdoc_node_relink(mdoc, nch);
+ roff_node_relink(mdoc, nch);
return;
}
arch++;
if (*arch == NULL) {
n = mdoc->first->child;
- while (n->tok != MDOC_Dt)
+ while (n->tok != MDOC_Dt ||
+ n->child == NULL ||
+ n->child->next == NULL ||
+ n->child->next->next == NULL)
n = n->next;
n = n->child->next->next;
mandoc_vmsg(MANDOCERR_ARCH_BAD,
/* Check that we begin with a proper `Sh'. */
n = mdoc->first->child;
- while (n != NULL && n->tok >= MDOC_Dd &&
- mdoc_macros[n->tok].flags & MDOC_PROLOGUE)
+ while (n != NULL &&
+ (n->type == ROFFT_COMMENT ||
+ (n->tok >= MDOC_Dd &&
+ mdoc_macro(n->tok)->flags & MDOC_PROLOGUE)))
n = n->next;
if (n == NULL)
}
if ((np = mdoc->last->child) != NULL)
- if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) {
+ if (np->tok == MDOC_Pp ||
+ np->tok == ROFF_br || np->tok == ROFF_sp) {
mandoc_vmsg(MANDOCERR_PAR_SKIP,
mdoc->parse, np->line, np->pos,
"%s after %s", roff_name[np->tok],
}
if ((np = mdoc->last->last) != NULL)
- if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) {
+ if (np->tok == MDOC_Pp || np->tok == ROFF_br) {
mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse,
np->line, np->pos, "%s at the end of %s",
roff_name[np->tok],
return;
/*
- * Don't allow prior `Lp' or `Pp' prior to a paragraph-type
- * block: `Lp', `Pp', or non-compact `Bd' or `Bl'.
+ * Don't allow `Pp' prior to a paragraph-type
+ * block: `Pp' or non-compact `Bd' or `Bl'.
*/
- if (n->prev->tok != MDOC_Pp &&
- n->prev->tok != MDOC_Lp &&
- n->prev->tok != ROFF_br)
+ if (n->prev->tok != MDOC_Pp && n->prev->tok != ROFF_br)
return;
if (n->tok == MDOC_Bl && n->norm->Bl.comp)
return;
{
struct roff_node *np;
- np = mdoc->last;
- if (np->tok != ROFF_br && np->tok != ROFF_sp)
- post_prevpar(mdoc);
+ post_prevpar(mdoc);
- if (np->tok == ROFF_sp) {
- if (np->child != NULL && np->child->next != NULL)
- mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse,
- np->child->next->line, np->child->next->pos,
- "sp ... %s", np->child->next->string);
- } else if (np->child != NULL)
+ np = mdoc->last;
+ if (np->child != NULL)
mandoc_vmsg(MANDOCERR_ARG_SKIP,
mdoc->parse, np->line, np->pos, "%s %s",
roff_name[np->tok], np->child->string);
-
- if ((np = mdoc->last->prev) == NULL) {
- np = mdoc->last->parent;
- if (np->tok != MDOC_Sh && np->tok != MDOC_Ss)
- return;
- } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp &&
- (mdoc->last->tok != ROFF_br ||
- (np->tok != ROFF_sp && np->tok != ROFF_br)))
- return;
-
- mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse,
- mdoc->last->line, mdoc->last->pos, "%s after %s",
- roff_name[mdoc->last->tok], roff_name[np->tok]);
- roff_node_delete(mdoc, mdoc->last);
}
static void