X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/4aac2c71991f01c5694f74c4b7506926bc92ea17..ef2d1ed7915b0e62315dcecce1bc447b55254b7c:/mdoc_validate.c diff --git a/mdoc_validate.c b/mdoc_validate.c index 3a9b86f3..7a7457b9 100644 --- a/mdoc_validate.c +++ b/mdoc_validate.c @@ -1,7 +1,7 @@ -/* $Id: mdoc_validate.c,v 1.352 2017/08/02 13:29:04 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.364 2018/12/04 02:53:51 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons - * Copyright (c) 2010-2017 Ingo Schwarze + * Copyright (c) 2010-2018 Ingo Schwarze * Copyright (c) 2010 Joerg Sonnenberger * * Permission to use, copy, modify, and distribute this software for any @@ -53,16 +53,18 @@ enum check_ineq { typedef void (*v_post)(POST_ARGS); static int build_list(struct roff_man *, int); -static void check_text(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_text(struct roff_man *, int, int, char *); +static void check_text_em(struct roff_man *, int, int, char *); static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); static void rewrite_macro2len(struct roff_man *, char **); static int similar(const char *, const char *); +static void post_abort(POST_ARGS); static void post_an(POST_ARGS); static void post_an_norm(POST_ARGS); static void post_at(POST_ARGS); @@ -115,7 +117,7 @@ static void post_useless(POST_ARGS); static void post_xr(POST_ARGS); static void post_xx(POST_ARGS); -static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { +static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_dd, /* Dd */ post_dt, /* Dt */ post_os, /* Os */ @@ -150,7 +152,7 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_nd, /* Nd */ post_nm, /* Nm */ post_delim_nb, /* Op */ - post_obsolete, /* Ot */ + post_abort, /* Ot */ post_defaults, /* Pa */ post_rv, /* Rv */ post_st, /* St */ @@ -169,12 +171,12 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ NULL, /* %V */ NULL, /* Ac */ - post_delim_nb, /* Ao */ + NULL, /* Ao */ post_delim_nb, /* Aq */ post_at, /* At */ NULL, /* Bc */ post_bf, /* Bf */ - post_delim_nb, /* Bo */ + NULL, /* Bo */ NULL, /* Bq */ post_xx, /* Bsx */ post_bx, /* Bx */ @@ -194,16 +196,16 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_xx, /* Ox */ NULL, /* Pc */ NULL, /* Pf */ - post_delim_nb, /* Po */ + NULL, /* Po */ post_delim_nb, /* Pq */ NULL, /* Qc */ post_delim_nb, /* Ql */ - post_delim_nb, /* Qo */ + NULL, /* Qo */ post_delim_nb, /* Qq */ NULL, /* Re */ post_rs, /* Rs */ NULL, /* Sc */ - post_delim_nb, /* So */ + NULL, /* So */ post_delim_nb, /* Sq */ post_sm, /* Sm */ post_sx, /* Sx */ @@ -214,7 +216,7 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { NULL, /* Xo */ post_fo, /* Fo */ NULL, /* Fc */ - post_delim_nb, /* Oo */ + NULL, /* Oo */ NULL, /* Oc */ post_bk, /* Bk */ NULL, /* Ek */ @@ -223,11 +225,11 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_obsolete, /* Fr */ post_eoln, /* Ud */ post_lb, /* Lb */ - post_par, /* Lp */ + post_abort, /* Lp */ post_delim_nb, /* Lk */ post_defaults, /* Mt */ post_delim_nb, /* Brq */ - post_delim_nb, /* Bro */ + NULL, /* Bro */ NULL, /* Brc */ NULL, /* %C */ post_es, /* Es */ @@ -237,7 +239,6 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { NULL, /* %U */ NULL, /* Ta */ }; -static const v_post *const mdoc_valids = __mdoc_valids - MDOC_Dd; #define RSORD_MAX 14 /* Number of `Rs' blocks. */ @@ -285,13 +286,37 @@ static const char * const secnames[SEC__MAX] = { }; +/* Validate the subtree rooted at mdoc->last. */ void mdoc_node_validate(struct roff_man *mdoc) { - struct roff_node *n; + struct roff_node *n, *np; const v_post *p; + /* + * Translate obsolete macros to modern macros first + * such that later code does not need to look + * for the obsolete versions. + */ + n = mdoc->last; + switch (n->tok) { + case MDOC_Lp: + n->tok = MDOC_Pp; + break; + case MDOC_Ot: + post_obsolete(mdoc); + n->tok = MDOC_Ft; + break; + default: + break; + } + + /* + * Iterate over all children, recursing into each one + * in turn, depth-first. + */ + mdoc->last = mdoc->last->child; while (mdoc->last != NULL) { mdoc_node_validate(mdoc); @@ -301,19 +326,27 @@ mdoc_node_validate(struct roff_man *mdoc) mdoc->last = mdoc->last->next; } + /* Finally validate the macro itself. */ + mdoc->last = n; mdoc->next = ROFF_NEXT_SIBLING; switch (n->type) { case ROFFT_TEXT: + np = n->parent; if (n->sec != SEC_SYNOPSIS || - (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) + (np->tok != MDOC_Cd && np->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); - if (n->parent->tok == MDOC_It || - (n->parent->type == ROFFT_BODY && - (n->parent->tok == MDOC_Sh || - n->parent->tok == MDOC_Ss))) + if (np->tok != MDOC_Ql && np->tok != MDOC_Dl && + (np->tok != MDOC_Bd || + (mdoc->flags & MDOC_LITERAL) == 0) && + (np->tok != MDOC_It || np->type != ROFFT_HEAD || + np->parent->parent->norm->Bl.type != LIST_diag)) + check_text_em(mdoc, n->line, n->pos, n->string); + if (np->tok == MDOC_It || (np->type == ROFFT_BODY && + (np->tok == MDOC_Sh || np->tok == MDOC_Ss))) check_toptext(mdoc, n->line, n->pos, n->string); break; + case ROFFT_COMMENT: case ROFFT_EQN: case ROFFT_TBL: break; @@ -337,20 +370,12 @@ mdoc_node_validate(struct roff_man *mdoc) /* Call the macro's postprocessor. */ if (n->tok < ROFF_MAX) { - switch(n->tok) { - case ROFF_br: - case ROFF_sp: - post_par(mdoc); - break; - default: - roff_validate(mdoc); - break; - } + roff_validate(mdoc); break; } assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); - p = mdoc_valids + n->tok; + p = mdoc_valids + (n->tok - MDOC_Dd); if (*p) (*p)(mdoc); if (mdoc->last == n) @@ -394,6 +419,57 @@ check_text(struct roff_man *mdoc, int ln, int pos, char *p) ln, pos + (int)(p - cp), NULL); } +static void +check_text_em(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const struct roff_node *np, *nn; + char *cp; + + np = mdoc->last->prev; + nn = mdoc->last->next; + + /* Look for em-dashes wrongly encoded as "--". */ + + for (cp = p; *cp != '\0'; cp++) { + if (cp[0] != '-' || cp[1] != '-') + continue; + cp++; + + /* Skip input sequences of more than two '-'. */ + + if (cp[1] == '-') { + while (cp[1] == '-') + cp++; + continue; + } + + /* Skip "--" directly attached to something else. */ + + if ((cp - p > 1 && cp[-2] != ' ') || + (cp[1] != '\0' && cp[1] != ' ')) + continue; + + /* Require a letter right before or right afterwards. */ + + if ((cp - p > 2 ? + isalpha((unsigned char)cp[-3]) : + np != NULL && + np->type == ROFFT_TEXT && + *np->string != '\0' && + isalpha((unsigned char)np->string[ + strlen(np->string) - 1])) || + (cp[1] != '\0' && cp[2] != '\0' ? + isalpha((unsigned char)cp[2]) : + nn != NULL && + nn->type == ROFFT_TEXT && + isalpha((unsigned char)*nn->string))) { + mandoc_msg(MANDOCERR_DASHDASH, mdoc->parse, + ln, pos + (int)(cp - p) - 1, NULL); + break; + } + } +} + static void check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) { @@ -429,6 +505,12 @@ check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) } } +static void +post_abort(POST_ARGS) +{ + abort(); +} + static void post_delim(POST_ARGS) { @@ -530,8 +612,7 @@ post_delim_nb(POST_ARGS) /* At least three alphabetic words with a sentence ending. */ if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || - tok == MDOC_Li || tok == MDOC_Po || tok == MDOC_Pq || - tok == MDOC_Sy)) { + tok == MDOC_Li || tok == MDOC_Pq || tok == MDOC_Sy)) { nw = 0; for (cp = lc - 1; cp >= nch->string; cp--) { if (*cp == ' ') { @@ -867,7 +948,7 @@ build_list(struct roff_man *mdoc, int tok) for (ic = 1;; ic++) { roff_elem_alloc(mdoc, n->line, n->pos, tok); mdoc->last->flags |= NODE_NOSRC; - mdoc_node_relink(mdoc, n); + roff_node_relink(mdoc, n); n = mdoc->last = mdoc->last->parent; mdoc->next = ROFF_NEXT_SIBLING; if (n->next == NULL) @@ -947,10 +1028,10 @@ post_lb(POST_ARGS) roff_word_alloc(mdoc, n->line, n->pos, "library"); mdoc->last->flags = NODE_NOSRC; - roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq"); + roff_word_alloc(mdoc, n->line, n->pos, "\\(lq"); mdoc->last->flags = NODE_DELIMO | NODE_NOSRC; mdoc->last = mdoc->last->next; - roff_word_alloc(mdoc, n->line, n->pos, "\\(Rq"); + roff_word_alloc(mdoc, n->line, n->pos, "\\(rq"); mdoc->last->flags = NODE_DELIMC | NODE_NOSRC; mdoc->last = n; } @@ -1207,10 +1288,8 @@ post_nm(POST_ARGS) n->child->type == ROFFT_TEXT && mdoc->meta.msec != NULL) mandoc_xr_add(mdoc->meta.msec, n->child->string, -1, -1); - if (n->last != NULL && - (n->last->tok == MDOC_Pp || - n->last->tok == MDOC_Lp)) - mdoc_node_relink(mdoc, n->last); + if (n->last != NULL && n->last->tok == MDOC_Pp) + roff_node_relink(mdoc, n->last); if (mdoc->meta.name == NULL) deroff(&mdoc->meta.name, n); @@ -1289,7 +1368,7 @@ post_display(POST_ARGS) mdoc->parse, n->line, n->pos, "Bd"); mdoc->next = ROFF_NEXT_SIBLING; while (n->body->child != NULL) - mdoc_node_relink(mdoc, + roff_node_relink(mdoc, n->body->child); roff_node_delete(mdoc, n); break; @@ -1562,7 +1641,6 @@ post_bl_block(POST_ARGS) while (nc != NULL) { switch (nc->tok) { case MDOC_Pp: - case MDOC_Lp: case ROFF_br: break; default: @@ -1573,7 +1651,7 @@ post_bl_block(POST_ARGS) mandoc_msg(MANDOCERR_PAR_MOVE, mdoc->parse, nc->line, nc->pos, roff_name[nc->tok]); - mdoc_node_relink(mdoc, nc); + roff_node_relink(mdoc, nc); } else if (n->norm->Bl.comp == 0 && n->norm->Bl.type != LIST_column) { mandoc_vmsg(MANDOCERR_PAR_SKIP, @@ -1733,7 +1811,7 @@ post_bl(POST_ARGS) roff_body_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); while (nchild->tok != MDOC_It) { - mdoc_node_relink(mdoc, nchild); + roff_node_relink(mdoc, nchild); if ((nchild = nnext) == NULL) break; nnext = nchild->next; @@ -1852,7 +1930,7 @@ post_sm(POST_ARGS) mandoc_vmsg(MANDOCERR_SM_BAD, mdoc->parse, nch->line, nch->pos, "%s %s", roff_name[mdoc->last->tok], nch->string); - mdoc_node_relink(mdoc, nch); + roff_node_relink(mdoc, nch); return; } @@ -1914,7 +1992,10 @@ post_root(POST_ARGS) arch++; if (*arch == NULL) { n = mdoc->first->child; - while (n->tok != MDOC_Dt) + while (n->tok != MDOC_Dt || + n->child == NULL || + n->child->next == NULL || + n->child->next->next == NULL) n = n->next; n = n->child->next->next; mandoc_vmsg(MANDOCERR_ARCH_BAD, @@ -1928,8 +2009,10 @@ post_root(POST_ARGS) /* Check that we begin with a proper `Sh'. */ n = mdoc->first->child; - while (n != NULL && n->tok >= MDOC_Dd && - mdoc_macros[n->tok].flags & MDOC_PROLOGUE) + while (n != NULL && + (n->type == ROFFT_COMMENT || + (n->tok >= MDOC_Dd && + mdoc_macro(n->tok)->flags & MDOC_PROLOGUE))) n = n->next; if (n == NULL) @@ -2431,7 +2514,8 @@ post_ignpar(POST_ARGS) } if ((np = mdoc->last->child) != NULL) - if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { + if (np->tok == MDOC_Pp || + np->tok == ROFF_br || np->tok == ROFF_sp) { mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, np->line, np->pos, "%s after %s", roff_name[np->tok], @@ -2440,7 +2524,7 @@ post_ignpar(POST_ARGS) } if ((np = mdoc->last->last) != NULL) - if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { + if (np->tok == MDOC_Pp || np->tok == ROFF_br) { mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, np->line, np->pos, "%s at the end of %s", roff_name[np->tok], @@ -2461,13 +2545,11 @@ post_prevpar(POST_ARGS) return; /* - * Don't allow prior `Lp' or `Pp' prior to a paragraph-type - * block: `Lp', `Pp', or non-compact `Bd' or `Bl'. + * Don't allow `Pp' prior to a paragraph-type + * block: `Pp' or non-compact `Bd' or `Bl'. */ - if (n->prev->tok != MDOC_Pp && - n->prev->tok != MDOC_Lp && - n->prev->tok != ROFF_br) + if (n->prev->tok != MDOC_Pp && n->prev->tok != ROFF_br) return; if (n->tok == MDOC_Bl && n->norm->Bl.comp) return; @@ -2487,33 +2569,13 @@ post_par(POST_ARGS) { struct roff_node *np; - np = mdoc->last; - if (np->tok != ROFF_br && np->tok != ROFF_sp) - post_prevpar(mdoc); + post_prevpar(mdoc); - if (np->tok == ROFF_sp) { - if (np->child != NULL && np->child->next != NULL) - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, - np->child->next->line, np->child->next->pos, - "sp ... %s", np->child->next->string); - } else if (np->child != NULL) + np = mdoc->last; + if (np->child != NULL) mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, np->line, np->pos, "%s %s", roff_name[np->tok], np->child->string); - - if ((np = mdoc->last->prev) == NULL) { - np = mdoc->last->parent; - if (np->tok != MDOC_Sh && np->tok != MDOC_Ss) - return; - } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && - (mdoc->last->tok != ROFF_br || - (np->tok != ROFF_sp && np->tok != ROFF_br))) - return; - - mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, "%s after %s", - roff_name[mdoc->last->tok], roff_name[np->tok]); - roff_node_delete(mdoc, mdoc->last); } static void