X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/a756448888657802f810973d8bddadebeed29030..ef2d1ed7915b0e62315dcecce1bc447b55254b7c:/mdoc_validate.c diff --git a/mdoc_validate.c b/mdoc_validate.c index c90c0f0d..7a7457b9 100644 --- a/mdoc_validate.c +++ b/mdoc_validate.c @@ -1,7 +1,7 @@ -/* $Id: mdoc_validate.c,v 1.334 2017/06/10 16:54:16 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.364 2018/12/04 02:53:51 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons - * Copyright (c) 2010-2017 Ingo Schwarze + * Copyright (c) 2010-2018 Ingo Schwarze * Copyright (c) 2010 Joerg Sonnenberger * * Permission to use, copy, modify, and distribute this software for any @@ -33,6 +33,7 @@ #include "mandoc_aux.h" #include "mandoc.h" +#include "mandoc_xr.h" #include "roff.h" #include "mdoc.h" #include "libmandoc.h" @@ -52,15 +53,18 @@ enum check_ineq { typedef void (*v_post)(POST_ARGS); static int build_list(struct roff_man *, int); -static void check_text(struct roff_man *, int, int, char *); -static void check_bsd(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_text(struct roff_man *, int, int, char *); +static void check_text_em(struct roff_man *, int, int, char *); +static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); static void rewrite_macro2len(struct roff_man *, char **); +static int similar(const char *, const char *); +static void post_abort(POST_ARGS); static void post_an(POST_ARGS); static void post_an_norm(POST_ARGS); static void post_at(POST_ARGS); @@ -76,6 +80,7 @@ static void post_defaults(POST_ARGS); static void post_display(POST_ARGS); static void post_dd(POST_ARGS); static void post_delim(POST_ARGS); +static void post_delim_nb(POST_ARGS); static void post_dt(POST_ARGS); static void post_en(POST_ARGS); static void post_es(POST_ARGS); @@ -107,11 +112,12 @@ static void post_sh_authors(POST_ARGS); static void post_sm(POST_ARGS); static void post_st(POST_ARGS); static void post_std(POST_ARGS); +static void post_sx(POST_ARGS); static void post_useless(POST_ARGS); static void post_xr(POST_ARGS); static void post_xx(POST_ARGS); -static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { +static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_dd, /* Dd */ post_dt, /* Dt */ post_os, /* Os */ @@ -125,33 +131,33 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_bl, /* Bl */ NULL, /* El */ post_it, /* It */ - post_delim, /* Ad */ + post_delim_nb, /* Ad */ post_an, /* An */ NULL, /* Ap */ post_defaults, /* Ar */ NULL, /* Cd */ - post_delim, /* Cm */ - post_delim, /* Dv */ - post_delim, /* Er */ - post_delim, /* Ev */ + post_delim_nb, /* Cm */ + post_delim_nb, /* Dv */ + post_delim_nb, /* Er */ + post_delim_nb, /* Ev */ post_ex, /* Ex */ post_fa, /* Fa */ NULL, /* Fd */ - post_delim, /* Fl */ + post_delim_nb, /* Fl */ post_fn, /* Fn */ - post_delim, /* Ft */ - post_delim, /* Ic */ - post_delim, /* In */ + post_delim_nb, /* Ft */ + post_delim_nb, /* Ic */ + post_delim_nb, /* In */ post_defaults, /* Li */ post_nd, /* Nd */ post_nm, /* Nm */ - post_delim, /* Op */ - post_obsolete, /* Ot */ + post_delim_nb, /* Op */ + post_abort, /* Ot */ post_defaults, /* Pa */ post_rv, /* Rv */ post_st, /* St */ - post_delim, /* Va */ - post_delim, /* Vt */ + post_delim_nb, /* Va */ + post_delim_nb, /* Vt */ post_xr, /* Xr */ NULL, /* %A */ post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ @@ -165,12 +171,12 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ NULL, /* %V */ NULL, /* Ac */ - post_delim, /* Ao */ - post_delim, /* Aq */ + NULL, /* Ao */ + post_delim_nb, /* Aq */ post_at, /* At */ NULL, /* Bc */ post_bf, /* Bf */ - post_delim, /* Bo */ + NULL, /* Bo */ NULL, /* Bq */ post_xx, /* Bsx */ post_bx, /* Bx */ @@ -180,37 +186,37 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { NULL, /* Dq */ NULL, /* Ec */ NULL, /* Ef */ - post_delim, /* Em */ + post_delim_nb, /* Em */ NULL, /* Eo */ post_xx, /* Fx */ - post_delim, /* Ms */ - post_delim, /* No */ + post_delim_nb, /* Ms */ + NULL, /* No */ post_ns, /* Ns */ post_xx, /* Nx */ post_xx, /* Ox */ NULL, /* Pc */ NULL, /* Pf */ - post_delim, /* Po */ - post_delim, /* Pq */ + NULL, /* Po */ + post_delim_nb, /* Pq */ NULL, /* Qc */ - post_delim, /* Ql */ - post_delim, /* Qo */ - post_delim, /* Qq */ + post_delim_nb, /* Ql */ + NULL, /* Qo */ + post_delim_nb, /* Qq */ NULL, /* Re */ post_rs, /* Rs */ NULL, /* Sc */ - post_delim, /* So */ - post_delim, /* Sq */ + NULL, /* So */ + post_delim_nb, /* Sq */ post_sm, /* Sm */ - post_hyph, /* Sx */ - post_delim, /* Sy */ + post_sx, /* Sx */ + post_delim_nb, /* Sy */ post_useless, /* Tn */ post_xx, /* Ux */ NULL, /* Xc */ NULL, /* Xo */ post_fo, /* Fo */ NULL, /* Fc */ - post_delim, /* Oo */ + NULL, /* Oo */ NULL, /* Oc */ post_bk, /* Bk */ NULL, /* Ek */ @@ -219,11 +225,11 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_obsolete, /* Fr */ post_eoln, /* Ud */ post_lb, /* Lb */ - post_par, /* Lp */ - post_delim, /* Lk */ + post_abort, /* Lp */ + post_delim_nb, /* Lk */ post_defaults, /* Mt */ - post_delim, /* Brq */ - post_delim, /* Bro */ + post_delim_nb, /* Brq */ + NULL, /* Bro */ NULL, /* Brc */ NULL, /* %C */ post_es, /* Es */ @@ -233,7 +239,6 @@ static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { NULL, /* %U */ NULL, /* Ta */ }; -static const v_post *const mdoc_valids = __mdoc_valids - MDOC_Dd; #define RSORD_MAX 14 /* Number of `Rs' blocks. */ @@ -281,13 +286,37 @@ static const char * const secnames[SEC__MAX] = { }; +/* Validate the subtree rooted at mdoc->last. */ void mdoc_node_validate(struct roff_man *mdoc) { - struct roff_node *n; + struct roff_node *n, *np; const v_post *p; + /* + * Translate obsolete macros to modern macros first + * such that later code does not need to look + * for the obsolete versions. + */ + n = mdoc->last; + switch (n->tok) { + case MDOC_Lp: + n->tok = MDOC_Pp; + break; + case MDOC_Ot: + post_obsolete(mdoc); + n->tok = MDOC_Ft; + break; + default: + break; + } + + /* + * Iterate over all children, recursing into each one + * in turn, depth-first. + */ + mdoc->last = mdoc->last->child; while (mdoc->last != NULL) { mdoc_node_validate(mdoc); @@ -297,18 +326,27 @@ mdoc_node_validate(struct roff_man *mdoc) mdoc->last = mdoc->last->next; } + /* Finally validate the macro itself. */ + mdoc->last = n; mdoc->next = ROFF_NEXT_SIBLING; switch (n->type) { case ROFFT_TEXT: + np = n->parent; if (n->sec != SEC_SYNOPSIS || - (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) + (np->tok != MDOC_Cd && np->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); - if (n->parent->tok == MDOC_Sh || - n->parent->tok == MDOC_Ss || - n->parent->tok == MDOC_It) - check_bsd(mdoc, n->line, n->pos, n->string); + if (np->tok != MDOC_Ql && np->tok != MDOC_Dl && + (np->tok != MDOC_Bd || + (mdoc->flags & MDOC_LITERAL) == 0) && + (np->tok != MDOC_It || np->type != ROFFT_HEAD || + np->parent->parent->norm->Bl.type != LIST_diag)) + check_text_em(mdoc, n->line, n->pos, n->string); + if (np->tok == MDOC_It || (np->type == ROFFT_BODY && + (np->tok == MDOC_Sh || np->tok == MDOC_Ss))) + check_toptext(mdoc, n->line, n->pos, n->string); break; + case ROFFT_COMMENT: case ROFFT_EQN: case ROFFT_TBL: break; @@ -332,20 +370,12 @@ mdoc_node_validate(struct roff_man *mdoc) /* Call the macro's postprocessor. */ if (n->tok < ROFF_MAX) { - switch(n->tok) { - case ROFF_br: - case ROFF_sp: - post_par(mdoc); - break; - default: - roff_validate(mdoc); - break; - } + roff_validate(mdoc); break; } assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); - p = mdoc_valids + n->tok; + p = mdoc_valids + (n->tok - MDOC_Dd); if (*p) (*p)(mdoc); if (mdoc->last == n) @@ -390,9 +420,63 @@ check_text(struct roff_man *mdoc, int ln, int pos, char *p) } static void -check_bsd(struct roff_man *mdoc, int ln, int pos, char *p) +check_text_em(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const struct roff_node *np, *nn; + char *cp; + + np = mdoc->last->prev; + nn = mdoc->last->next; + + /* Look for em-dashes wrongly encoded as "--". */ + + for (cp = p; *cp != '\0'; cp++) { + if (cp[0] != '-' || cp[1] != '-') + continue; + cp++; + + /* Skip input sequences of more than two '-'. */ + + if (cp[1] == '-') { + while (cp[1] == '-') + cp++; + continue; + } + + /* Skip "--" directly attached to something else. */ + + if ((cp - p > 1 && cp[-2] != ' ') || + (cp[1] != '\0' && cp[1] != ' ')) + continue; + + /* Require a letter right before or right afterwards. */ + + if ((cp - p > 2 ? + isalpha((unsigned char)cp[-3]) : + np != NULL && + np->type == ROFFT_TEXT && + *np->string != '\0' && + isalpha((unsigned char)np->string[ + strlen(np->string) - 1])) || + (cp[1] != '\0' && cp[2] != '\0' ? + isalpha((unsigned char)cp[2]) : + nn != NULL && + nn->type == ROFFT_TEXT && + isalpha((unsigned char)*nn->string))) { + mandoc_msg(MANDOCERR_DASHDASH, mdoc->parse, + ln, pos + (int)(cp - p) - 1, NULL); + break; + } + } +} + +static void +check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) { - const char *cp; + const char *cp, *cpr; + + if (*p == '\0') + return; if ((cp = strstr(p, "OpenBSD")) != NULL) mandoc_msg(MANDOCERR_BX, mdoc->parse, @@ -406,10 +490,57 @@ check_bsd(struct roff_man *mdoc, int ln, int pos, char *p) if ((cp = strstr(p, "DragonFly")) != NULL) mandoc_msg(MANDOCERR_BX, mdoc->parse, ln, pos + (cp - p), "Dx"); + + cp = p; + while ((cp = strstr(cp + 1, "()")) != NULL) { + for (cpr = cp - 1; cpr >= p; cpr--) + if (*cpr != '_' && !isalnum((unsigned char)*cpr)) + break; + if ((cpr < p || *cpr == ' ') && cpr + 1 < cp) { + cpr++; + mandoc_vmsg(MANDOCERR_FUNC, mdoc->parse, + ln, pos + (cpr - p), + "%.*s()", (int)(cp - cpr), cpr); + } + } +} + +static void +post_abort(POST_ARGS) +{ + abort(); } static void post_delim(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc; + enum mdelim delim; + enum roff_tok tok; + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc < nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + if (*lc == ')' && (tok == MDOC_Nd || tok == MDOC_Sh || + tok == MDOC_Ss || tok == MDOC_Fo)) + return; + + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, + nch->line, nch->pos + (lc - nch->string), + "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void +post_delim_nb(POST_ARGS) { const struct roff_node *nch; const char *lc, *cp; @@ -481,8 +612,7 @@ post_delim(POST_ARGS) /* At least three alphabetic words with a sentence ending. */ if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || - tok == MDOC_Li || tok == MDOC_No || tok == MDOC_Po || - tok == MDOC_Pq || tok == MDOC_Sy)) { + tok == MDOC_Li || tok == MDOC_Pq || tok == MDOC_Sy)) { nw = 0; for (cp = lc - 1; cp >= nch->string; cp--) { if (*cp == ' ') { @@ -497,7 +627,7 @@ post_delim(POST_ARGS) } } - mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, + mandoc_vmsg(MANDOCERR_DELIM_NB, mdoc->parse, nch->line, nch->pos + (lc - nch->string), "%s%s %s", roff_name[tok], nch == mdoc->last->child ? "" : " ...", nch->string); @@ -651,7 +781,7 @@ post_bl_norm(POST_ARGS) switch (n->norm->Bl.type) { case LIST_tag: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) mandoc_msg(MANDOCERR_BL_NOWIDTH, mdoc->parse, n->line, n->pos, "Bl -tag"); break; @@ -660,19 +790,20 @@ post_bl_norm(POST_ARGS) case LIST_ohang: case LIST_inset: case LIST_item: - if (n->norm->Bl.width) + if (n->norm->Bl.width != NULL) mandoc_vmsg(MANDOCERR_BL_SKIPW, mdoc->parse, wa->line, wa->pos, "Bl -%s", mdoc_argnames[mdoclt]); + n->norm->Bl.width = NULL; break; case LIST_bullet: case LIST_dash: case LIST_hyphen: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) n->norm->Bl.width = "2n"; break; case LIST_enum: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) n->norm->Bl.width = "3n"; break; default: @@ -817,7 +948,7 @@ build_list(struct roff_man *mdoc, int tok) for (ic = 1;; ic++) { roff_elem_alloc(mdoc, n->line, n->pos, tok); mdoc->last->flags |= NODE_NOSRC; - mdoc_node_relink(mdoc, n); + roff_node_relink(mdoc, n); n = mdoc->last = mdoc->last->parent; mdoc->next = ROFF_NEXT_SIBLING; if (n->next == NULL) @@ -878,7 +1009,7 @@ post_lb(POST_ARGS) struct roff_node *n; const char *p; - post_delim(mdoc); + post_delim_nb(mdoc); n = mdoc->last; assert(n->child->type == ROFFT_TEXT); @@ -897,10 +1028,10 @@ post_lb(POST_ARGS) roff_word_alloc(mdoc, n->line, n->pos, "library"); mdoc->last->flags = NODE_NOSRC; - roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq"); + roff_word_alloc(mdoc, n->line, n->pos, "\\(lq"); mdoc->last->flags = NODE_DELIMO | NODE_NOSRC; mdoc->last = mdoc->last->next; - roff_word_alloc(mdoc, n->line, n->pos, "\\(Rq"); + roff_word_alloc(mdoc, n->line, n->pos, "\\(rq"); mdoc->last->flags = NODE_DELIMC | NODE_NOSRC; mdoc->last = n; } @@ -949,6 +1080,8 @@ post_std(POST_ARGS) { struct roff_node *n; + post_delim(mdoc); + n = mdoc->last; if (n->args && n->args->argc == 1) if (n->args->argv[0].arg == MDOC_Std) @@ -1116,7 +1249,8 @@ post_fo(POST_ARGS) "Fo ... %s", n->child->next->string); while (n->child != n->last) roff_node_delete(mdoc, n->last); - } + } else + post_delim(mdoc); post_fname(mdoc); } @@ -1140,7 +1274,7 @@ post_fa(POST_ARGS) break; } } - post_delim(mdoc); + post_delim_nb(mdoc); } static void @@ -1150,10 +1284,12 @@ post_nm(POST_ARGS) n = mdoc->last; - if (n->last != NULL && - (n->last->tok == MDOC_Pp || - n->last->tok == MDOC_Lp)) - mdoc_node_relink(mdoc, n->last); + if (n->sec == SEC_NAME && n->child != NULL && + n->child->type == ROFFT_TEXT && mdoc->meta.msec != NULL) + mandoc_xr_add(mdoc->meta.msec, n->child->string, -1, -1); + + if (n->last != NULL && n->last->tok == MDOC_Pp) + roff_node_relink(mdoc, n->last); if (mdoc->meta.name == NULL) deroff(&mdoc->meta.name, n); @@ -1163,11 +1299,18 @@ post_nm(POST_ARGS) mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse, n->line, n->pos, "Nm"); - if (n->type == ROFFT_ELEM) + switch (n->type) { + case ROFFT_ELEM: + post_delim_nb(mdoc); + break; + case ROFFT_HEAD: post_delim(mdoc); + break; + default: + return; + } - if ((n->type != ROFFT_ELEM && n->type != ROFFT_HEAD) || - (n->child != NULL && n->child->type == ROFFT_TEXT) || + if ((n->child != NULL && n->child->type == ROFFT_TEXT) || mdoc->meta.name == NULL) return; @@ -1181,7 +1324,6 @@ static void post_nd(POST_ARGS) { struct roff_node *n; - size_t sz; n = mdoc->last; @@ -1195,11 +1337,8 @@ post_nd(POST_ARGS) if (n->child == NULL) mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse, n->line, n->pos, "Nd"); - else if (n->last->type == ROFFT_TEXT && - (sz = strlen(n->last->string)) != 0 && - n->last->string[sz - 1] == '.') - mandoc_msg(MANDOCERR_ND_DOT, mdoc->parse, - n->last->line, n->last->pos + sz - 1, NULL); + else + post_delim(mdoc); post_hyph(mdoc); } @@ -1229,7 +1368,7 @@ post_display(POST_ARGS) mdoc->parse, n->line, n->pos, "Bd"); mdoc->next = ROFF_NEXT_SIBLING; while (n->body->child != NULL) - mdoc_node_relink(mdoc, + roff_node_relink(mdoc, n->body->child); roff_node_delete(mdoc, n); break; @@ -1257,7 +1396,7 @@ post_defaults(POST_ARGS) struct roff_node *nn; if (mdoc->last->child != NULL) { - post_delim(mdoc); + post_delim_nb(mdoc); return; } @@ -1332,7 +1471,7 @@ post_an(POST_ARGS) mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, np->line, np->pos, "An"); else - post_delim(mdoc); + post_delim_nb(mdoc); } else if (nch != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, nch->line, nch->pos, "An ... %s", nch->string); @@ -1360,8 +1499,9 @@ post_xx(POST_ARGS) { struct roff_node *n; const char *os; + char *v; - post_delim(mdoc); + post_delim_nb(mdoc); n = mdoc->last; switch (n->tok) { @@ -1376,6 +1516,20 @@ post_xx(POST_ARGS) break; case MDOC_Nx: os = "NetBSD"; + if (n->child == NULL) + break; + v = n->child->string; + if ((v[0] != '0' && v[0] != '1') || v[1] != '.' || + v[2] < '0' || v[2] > '9' || + v[3] < 'a' || v[3] > 'z' || v[4] != '\0') + break; + n->child->flags |= NODE_NOPRT; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->child->line, n->child->pos, v); + v = mdoc->last->string; + v[3] = toupper((unsigned char)v[3]); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; break; case MDOC_Ox: os = "OpenBSD"; @@ -1442,15 +1596,30 @@ post_it(POST_ARGS) assert(nit->head->child == NULL); - i = 0; - for (nch = nit->child; nch != NULL; nch = nch->next) - if (nch->type == ROFFT_BODY) - i++; + if (nit->head->next->child == NULL && + nit->head->next->next == NULL) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + nit->line, nit->pos, "It"); + roff_node_delete(mdoc, nit); + break; + } + i = 0; + for (nch = nit->child; nch != NULL; nch = nch->next) { + if (nch->type != ROFFT_BODY) + continue; + if (i++ && nch->flags & NODE_LINE) + mandoc_msg(MANDOCERR_TA_LINE, mdoc->parse, + nch->line, nch->pos, "Ta"); + } if (i < cols || i > cols + 1) mandoc_vmsg(MANDOCERR_BL_COL, mdoc->parse, nit->line, nit->pos, "%d columns, %d cells", cols, i); + else if (nit->head->next->child != NULL && + nit->head->next->child->line > nit->line) + mandoc_msg(MANDOCERR_IT_NOARG, mdoc->parse, + nit->line, nit->pos, "Bl -column It"); break; default: abort(); @@ -1472,7 +1641,6 @@ post_bl_block(POST_ARGS) while (nc != NULL) { switch (nc->tok) { case MDOC_Pp: - case MDOC_Lp: case ROFF_br: break; default: @@ -1483,7 +1651,7 @@ post_bl_block(POST_ARGS) mandoc_msg(MANDOCERR_PAR_MOVE, mdoc->parse, nc->line, nc->pos, roff_name[nc->tok]); - mdoc_node_relink(mdoc, nc); + roff_node_relink(mdoc, nc); } else if (n->norm->Bl.comp == 0 && n->norm->Bl.type != LIST_column) { mandoc_vmsg(MANDOCERR_PAR_SKIP, @@ -1643,7 +1811,7 @@ post_bl(POST_ARGS) roff_body_alloc(mdoc, nchild->line, nchild->pos, MDOC_It); while (nchild->tok != MDOC_It) { - mdoc_node_relink(mdoc, nchild); + roff_node_relink(mdoc, nchild); if ((nchild = nnext) == NULL) break; nnext = nchild->next; @@ -1692,7 +1860,7 @@ post_bl(POST_ARGS) nchild = nnext; } - if (mdoc->meta.os_e != MDOC_OS_NETBSD) + if (mdoc->meta.os_e != MANDOC_OS_NETBSD) return; prev_Er = NULL; @@ -1711,11 +1879,12 @@ post_bl(POST_ARGS) if (order > 0) mandoc_vmsg(MANDOCERR_ER_ORDER, mdoc->parse, nnext->line, nnext->pos, - "Er %s %s", prev_Er, nnext->string); + "Er %s %s (NetBSD)", + prev_Er, nnext->string); else if (order == 0) mandoc_vmsg(MANDOCERR_ER_REP, mdoc->parse, nnext->line, nnext->pos, - "Er %s", prev_Er); + "Er %s (NetBSD)", prev_Er); } prev_Er = nnext->string; } @@ -1761,21 +1930,42 @@ post_sm(POST_ARGS) mandoc_vmsg(MANDOCERR_SM_BAD, mdoc->parse, nch->line, nch->pos, "%s %s", roff_name[mdoc->last->tok], nch->string); - mdoc_node_relink(mdoc, nch); + roff_node_relink(mdoc, nch); return; } static void post_root(POST_ARGS) { + const char *openbsd_arch[] = { + "alpha", "amd64", "arm64", "armv7", "hppa", "i386", + "landisk", "loongson", "luna88k", "macppc", "mips64", + "octeon", "sgi", "socppc", "sparc64", NULL + }; + const char *netbsd_arch[] = { + "acorn26", "acorn32", "algor", "alpha", "amiga", + "arc", "atari", + "bebox", "cats", "cesfic", "cobalt", "dreamcast", + "emips", "evbarm", "evbmips", "evbppc", "evbsh3", "evbsh5", + "hp300", "hpcarm", "hpcmips", "hpcsh", "hppa", + "i386", "ibmnws", "luna68k", + "mac68k", "macppc", "mipsco", "mmeye", "mvme68k", "mvmeppc", + "netwinder", "news68k", "newsmips", "next68k", + "pc532", "playstation2", "pmax", "pmppc", "prep", + "sandpoint", "sbmips", "sgimips", "shark", + "sparc", "sparc64", "sun2", "sun3", + "vax", "walnut", "x68k", "x86", "x86_64", "xen", NULL + }; + const char **arches[] = { NULL, netbsd_arch, openbsd_arch }; + struct roff_node *n; + const char **arch; /* Add missing prologue data. */ if (mdoc->meta.date == NULL) - mdoc->meta.date = mdoc->quick ? - mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, 0, 0); + mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : + mandoc_normdate(mdoc, NULL, 0, 0); if (mdoc->meta.title == NULL) { mandoc_msg(MANDOCERR_DT_NOTITLE, @@ -1790,13 +1980,39 @@ post_root(POST_ARGS) mandoc_msg(MANDOCERR_OS_MISSING, mdoc->parse, 0, 0, NULL); mdoc->meta.os = mandoc_strdup(""); + } else if (mdoc->meta.os_e && + (mdoc->meta.rcsids & (1 << mdoc->meta.os_e)) == 0) + mandoc_msg(MANDOCERR_RCS_MISSING, mdoc->parse, 0, 0, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + + if (mdoc->meta.arch != NULL && + (arch = arches[mdoc->meta.os_e]) != NULL) { + while (*arch != NULL && strcmp(*arch, mdoc->meta.arch)) + arch++; + if (*arch == NULL) { + n = mdoc->first->child; + while (n->tok != MDOC_Dt || + n->child == NULL || + n->child->next == NULL || + n->child->next->next == NULL) + n = n->next; + n = n->child->next->next; + mandoc_vmsg(MANDOCERR_ARCH_BAD, + mdoc->parse, n->line, n->pos, + "Dt ... %s %s", mdoc->meta.arch, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + } } /* Check that we begin with a proper `Sh'. */ n = mdoc->first->child; - while (n != NULL && n->tok != TOKEN_NONE && - mdoc_macros[n->tok].flags & MDOC_PROLOGUE) + while (n != NULL && + (n->type == ROFFT_COMMENT || + (n->tok >= MDOC_Dd && + mdoc_macro(n->tok)->flags & MDOC_PROLOGUE))) n = n->next; if (n == NULL) @@ -1922,10 +2138,20 @@ post_hyph(POST_ARGS) static void post_ns(POST_ARGS) { + struct roff_node *n; - if (mdoc->last->flags & NODE_LINE) + n = mdoc->last; + if (n->flags & NODE_LINE || + (n->next != NULL && n->next->flags & NODE_DELIMC)) mandoc_msg(MANDOCERR_NS_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, NULL); + n->line, n->pos, NULL); +} + +static void +post_sx(POST_ARGS) +{ + post_delim(mdoc); + post_hyph(mdoc); } static void @@ -2088,11 +2314,54 @@ post_sh_authors(POST_ARGS) mdoc->last->line, mdoc->last->pos, NULL); } +/* + * Return an upper bound for the string distance (allowing + * transpositions). Not a full Levenshtein implementation + * because Levenshtein is quadratic in the string length + * and this function is called for every standard name, + * so the check for each custom name would be cubic. + * The following crude heuristics is linear, resulting + * in quadratic behaviour for checking one custom name, + * which does not cause measurable slowdown. + */ +static int +similar(const char *s1, const char *s2) +{ + const int maxdist = 3; + int dist = 0; + + while (s1[0] != '\0' && s2[0] != '\0') { + if (s1[0] == s2[0]) { + s1++; + s2++; + continue; + } + if (++dist > maxdist) + return INT_MAX; + if (s1[1] == s2[1]) { /* replacement */ + s1++; + s2++; + } else if (s1[0] == s2[1] && s1[1] == s2[0]) { + s1 += 2; /* transposition */ + s2 += 2; + } else if (s1[0] == s2[1]) /* insertion */ + s2++; + else if (s1[1] == s2[0]) /* deletion */ + s1++; + else + return INT_MAX; + } + dist += strlen(s1) + strlen(s2); + return dist > maxdist ? INT_MAX : dist; +} + static void post_sh_head(POST_ARGS) { struct roff_node *nch; const char *goodsec; + const char *const *testsec; + int dist, mindist; enum roff_sec sec; /* @@ -2130,8 +2399,25 @@ post_sh_head(POST_ARGS) /* We don't care about custom sections after this. */ - if (sec == SEC_CUSTOM) + if (sec == SEC_CUSTOM) { + if ((nch = mdoc->last->child) == NULL || + nch->type != ROFFT_TEXT || nch->next != NULL) + return; + goodsec = NULL; + mindist = INT_MAX; + for (testsec = secnames + 1; *testsec != NULL; testsec++) { + dist = similar(nch->string, *testsec); + if (dist < mindist) { + goodsec = *testsec; + mindist = dist; + } + } + if (goodsec != NULL) + mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse, + nch->line, nch->pos, "Sh %s instead of %s", + nch->string, goodsec); return; + } /* * Check whether our non-custom section is being repeated or is @@ -2197,9 +2483,15 @@ post_xr(POST_ARGS) if (nch->next == NULL) { mandoc_vmsg(MANDOCERR_XR_NOSEC, mdoc->parse, n->line, n->pos, "Xr %s", nch->string); - } else + } else { assert(nch->next == n->last); - post_delim(mdoc); + if(mandoc_xr_add(nch->next->string, nch->string, + nch->line, nch->pos)) + mandoc_vmsg(MANDOCERR_XR_SELF, mdoc->parse, + nch->line, nch->pos, "Xr %s %s", + nch->string, nch->next->string); + } + post_delim_nb(mdoc); } static void @@ -2212,6 +2504,7 @@ post_ignpar(POST_ARGS) post_prevpar(mdoc); return; case ROFFT_HEAD: + post_delim(mdoc); post_hyph(mdoc); return; case ROFFT_BODY: @@ -2221,7 +2514,8 @@ post_ignpar(POST_ARGS) } if ((np = mdoc->last->child) != NULL) - if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { + if (np->tok == MDOC_Pp || + np->tok == ROFF_br || np->tok == ROFF_sp) { mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, np->line, np->pos, "%s after %s", roff_name[np->tok], @@ -2230,7 +2524,7 @@ post_ignpar(POST_ARGS) } if ((np = mdoc->last->last) != NULL) - if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { + if (np->tok == MDOC_Pp || np->tok == ROFF_br) { mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, np->line, np->pos, "%s at the end of %s", roff_name[np->tok], @@ -2251,13 +2545,11 @@ post_prevpar(POST_ARGS) return; /* - * Don't allow prior `Lp' or `Pp' prior to a paragraph-type - * block: `Lp', `Pp', or non-compact `Bd' or `Bl'. + * Don't allow `Pp' prior to a paragraph-type + * block: `Pp' or non-compact `Bd' or `Bl'. */ - if (n->prev->tok != MDOC_Pp && - n->prev->tok != MDOC_Lp && - n->prev->tok != ROFF_br) + if (n->prev->tok != MDOC_Pp && n->prev->tok != ROFF_br) return; if (n->tok == MDOC_Bl && n->norm->Bl.comp) return; @@ -2277,33 +2569,13 @@ post_par(POST_ARGS) { struct roff_node *np; - np = mdoc->last; - if (np->tok != ROFF_br && np->tok != ROFF_sp) - post_prevpar(mdoc); + post_prevpar(mdoc); - if (np->tok == ROFF_sp) { - if (np->child != NULL && np->child->next != NULL) - mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, - np->child->next->line, np->child->next->pos, - "sp ... %s", np->child->next->string); - } else if (np->child != NULL) + np = mdoc->last; + if (np->child != NULL) mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, np->line, np->pos, "%s %s", roff_name[np->tok], np->child->string); - - if ((np = mdoc->last->prev) == NULL) { - np = mdoc->last->parent; - if (np->tok != MDOC_Sh && np->tok != MDOC_Ss) - return; - } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && - (mdoc->last->tok != ROFF_br || - (np->tok != ROFF_sp && np->tok != ROFF_br))) - return; - - mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, "%s after %s", - roff_name[mdoc->last->tok], roff_name[np->tok]); - roff_node_delete(mdoc, mdoc->last); } static void @@ -2331,7 +2603,7 @@ post_dd(POST_ARGS) if (n->child == NULL || n->child->string[0] == '\0') { mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, n->line, n->pos); + mandoc_normdate(mdoc, NULL, n->line, n->pos); return; } @@ -2340,7 +2612,7 @@ post_dd(POST_ARGS) if (mdoc->quick) mdoc->meta.date = datestr; else { - mdoc->meta.date = mandoc_normdate(mdoc->parse, + mdoc->meta.date = mandoc_normdate(mdoc, datestr, n->line, n->pos); free(datestr); } @@ -2448,7 +2720,7 @@ post_bx(POST_ARGS) struct roff_node *n, *nch; const char *macro; - post_delim(mdoc); + post_delim_nb(mdoc); n = mdoc->last; nch = n->child; @@ -2514,6 +2786,8 @@ post_os(POST_ARGS) mandoc_msg(MANDOCERR_PROLOG_LATE, mdoc->parse, n->line, n->pos, "Os"); + post_delim(mdoc); + /* * Set the operating system by way of the `Os' macro. * The order of precedence is: @@ -2529,8 +2803,8 @@ post_os(POST_ARGS) if (mdoc->meta.os) goto out; - if (mdoc->defos) { - mdoc->meta.os = mandoc_strdup(mdoc->defos); + if (mdoc->os_s != NULL) { + mdoc->meta.os = mandoc_strdup(mdoc->os_s); goto out; } @@ -2549,9 +2823,43 @@ post_os(POST_ARGS) mdoc->meta.os = mandoc_strdup(defbuf); #endif /*!OSNAME*/ -out: mdoc->meta.os_e = strstr(mdoc->meta.os, "OpenBSD") != NULL ? - MDOC_OS_OPENBSD : strstr(mdoc->meta.os, "NetBSD") != NULL ? - MDOC_OS_NETBSD : MDOC_OS_OTHER; +out: + if (mdoc->meta.os_e == MANDOC_OS_OTHER) { + if (strstr(mdoc->meta.os, "OpenBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_OPENBSD; + else if (strstr(mdoc->meta.os, "NetBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_NETBSD; + } + + /* + * This is the earliest point where we can check + * Mdocdate conventions because we don't know + * the operating system earlier. + */ + + if (n->child != NULL) + mandoc_vmsg(MANDOCERR_OS_ARG, mdoc->parse, + n->child->line, n->child->pos, + "Os %s (%s)", n->child->string, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "OpenBSD" : "NetBSD"); + + while (n->tok != MDOC_Dd) + if ((n = n->prev) == NULL) + return; + if ((n = n->child) == NULL) + return; + if (strncmp(n->string, "$" "Mdocdate", 9)) { + if (mdoc->meta.os_e == MANDOC_OS_OPENBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE_MISSING, + mdoc->parse, n->line, n->pos, + "Dd %s (OpenBSD)", n->string); + } else { + if (mdoc->meta.os_e == MANDOC_OS_NETBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE, + mdoc->parse, n->line, n->pos, + "Dd %s (NetBSD)", n->string); + } } enum roff_sec