]> git.cameronkatri.com Git - mandoc.git/blobdiff - mdoc_validate.c
Mostly complete implementation of the 'c' (character available)
[mandoc.git] / mdoc_validate.c
index 38283674da3e7c79210206516c91c5ec93ee3729..57182df3f9faf3951ed005a4c7ebc9d7587a25e7 100644 (file)
@@ -1,7 +1,7 @@
-/*     $Id: mdoc_validate.c,v 1.322 2017/04/29 12:45:42 schwarze Exp $ */
+/*     $Id: mdoc_validate.c,v 1.362 2018/08/17 20:33:38 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
  * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -33,6 +33,7 @@
 
 #include "mandoc_aux.h"
 #include "mandoc.h"
+#include "mandoc_xr.h"
 #include "roff.h"
 #include "mdoc.h"
 #include "libmandoc.h"
@@ -52,13 +53,16 @@ enum        check_ineq {
 typedef        void    (*v_post)(POST_ARGS);
 
 static int      build_list(struct roff_man *, int);
-static void     check_text(struct roff_man *, int, int, char *);
 static void     check_argv(struct roff_man *,
                        struct roff_node *, struct mdoc_argv *);
 static void     check_args(struct roff_man *, struct roff_node *);
+static void     check_text(struct roff_man *, int, int, char *);
+static void     check_text_em(struct roff_man *, int, int, char *);
+static void     check_toptext(struct roff_man *, int, int, const char *);
 static int      child_an(const struct roff_node *);
 static size_t          macro2len(enum roff_tok);
 static void     rewrite_macro2len(struct roff_man *, char **);
+static int      similar(const char *, const char *);
 
 static void     post_an(POST_ARGS);
 static void     post_an_norm(POST_ARGS);
@@ -74,6 +78,8 @@ static        void     post_bx(POST_ARGS);
 static void     post_defaults(POST_ARGS);
 static void     post_display(POST_ARGS);
 static void     post_dd(POST_ARGS);
+static void     post_delim(POST_ARGS);
+static void     post_delim_nb(POST_ARGS);
 static void     post_dt(POST_ARGS);
 static void     post_en(POST_ARGS);
 static void     post_es(POST_ARGS);
@@ -105,10 +111,12 @@ static    void     post_sh_authors(POST_ARGS);
 static void     post_sm(POST_ARGS);
 static void     post_st(POST_ARGS);
 static void     post_std(POST_ARGS);
+static void     post_sx(POST_ARGS);
+static void     post_useless(POST_ARGS);
 static void     post_xr(POST_ARGS);
 static void     post_xx(POST_ARGS);
 
-static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
+static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        post_dd,        /* Dd */
        post_dt,        /* Dt */
        post_os,        /* Os */
@@ -122,33 +130,33 @@ static    const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        post_bl,        /* Bl */
        NULL,           /* El */
        post_it,        /* It */
-       NULL,           /* Ad */
+       post_delim_nb,  /* Ad */
        post_an,        /* An */
        NULL,           /* Ap */
        post_defaults,  /* Ar */
        NULL,           /* Cd */
-       NULL,           /* Cm */
-       NULL,           /* Dv */
-       NULL,           /* Er */
-       NULL,           /* Ev */
+       post_delim_nb,  /* Cm */
+       post_delim_nb,  /* Dv */
+       post_delim_nb,  /* Er */
+       post_delim_nb,  /* Ev */
        post_ex,        /* Ex */
        post_fa,        /* Fa */
        NULL,           /* Fd */
-       NULL,           /* Fl */
+       post_delim_nb,  /* Fl */
        post_fn,        /* Fn */
-       NULL,           /* Ft */
-       NULL,           /* Ic */
-       NULL,           /* In */
+       post_delim_nb,  /* Ft */
+       post_delim_nb,  /* Ic */
+       post_delim_nb,  /* In */
        post_defaults,  /* Li */
        post_nd,        /* Nd */
        post_nm,        /* Nm */
-       NULL,           /* Op */
+       post_delim_nb,  /* Op */
        post_obsolete,  /* Ot */
        post_defaults,  /* Pa */
        post_rv,        /* Rv */
        post_st,        /* St */
-       NULL,           /* Va */
-       NULL,           /* Vt */
+       post_delim_nb,  /* Va */
+       post_delim_nb,  /* Vt */
        post_xr,        /* Xr */
        NULL,           /* %A */
        post_hyph,      /* %B */ /* FIXME: can be used outside Rs/Re. */
@@ -163,7 +171,7 @@ static      const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        NULL,           /* %V */
        NULL,           /* Ac */
        NULL,           /* Ao */
-       NULL,           /* Aq */
+       post_delim_nb,  /* Aq */
        post_at,        /* At */
        NULL,           /* Bc */
        post_bf,        /* Bf */
@@ -177,10 +185,10 @@ static    const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        NULL,           /* Dq */
        NULL,           /* Ec */
        NULL,           /* Ef */
-       NULL,           /* Em */
+       post_delim_nb,  /* Em */
        NULL,           /* Eo */
        post_xx,        /* Fx */
-       NULL,           /* Ms */
+       post_delim_nb,  /* Ms */
        NULL,           /* No */
        post_ns,        /* Ns */
        post_xx,        /* Nx */
@@ -188,20 +196,20 @@ static    const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        NULL,           /* Pc */
        NULL,           /* Pf */
        NULL,           /* Po */
-       NULL,           /* Pq */
+       post_delim_nb,  /* Pq */
        NULL,           /* Qc */
-       NULL,           /* Ql */
+       post_delim_nb,  /* Ql */
        NULL,           /* Qo */
-       NULL,           /* Qq */
+       post_delim_nb,  /* Qq */
        NULL,           /* Re */
        post_rs,        /* Rs */
        NULL,           /* Sc */
        NULL,           /* So */
-       NULL,           /* Sq */
+       post_delim_nb,  /* Sq */
        post_sm,        /* Sm */
-       post_hyph,      /* Sx */
-       NULL,           /* Sy */
-       NULL,           /* Tn */
+       post_sx,        /* Sx */
+       post_delim_nb,  /* Sy */
+       post_useless,   /* Tn */
        post_xx,        /* Ux */
        NULL,           /* Xc */
        NULL,           /* Xo */
@@ -212,14 +220,14 @@ static    const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        post_bk,        /* Bk */
        NULL,           /* Ek */
        post_eoln,      /* Bt */
-       NULL,           /* Hf */
+       post_obsolete,  /* Hf */
        post_obsolete,  /* Fr */
        post_eoln,      /* Ud */
        post_lb,        /* Lb */
        post_par,       /* Lp */
-       NULL,           /* Lk */
+       post_delim_nb,  /* Lk */
        post_defaults,  /* Mt */
-       NULL,           /* Brq */
+       post_delim_nb,  /* Brq */
        NULL,           /* Bro */
        NULL,           /* Brc */
        NULL,           /* %C */
@@ -227,13 +235,9 @@ static     const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        post_en,        /* En */
        post_xx,        /* Dx */
        NULL,           /* %Q */
-       post_par,       /* br */
-       post_par,       /* sp */
        NULL,           /* %U */
        NULL,           /* Ta */
-       NULL,           /* ll */
 };
-static const v_post *const mdoc_valids = __mdoc_valids - MDOC_Dd;
 
 #define        RSORD_MAX 14 /* Number of `Rs' blocks. */
 
@@ -284,7 +288,7 @@ static      const char * const secnames[SEC__MAX] = {
 void
 mdoc_node_validate(struct roff_man *mdoc)
 {
-       struct roff_node *n;
+       struct roff_node *n, *np;
        const v_post *p;
 
        n = mdoc->last;
@@ -301,10 +305,21 @@ mdoc_node_validate(struct roff_man *mdoc)
        mdoc->next = ROFF_NEXT_SIBLING;
        switch (n->type) {
        case ROFFT_TEXT:
+               np = n->parent;
                if (n->sec != SEC_SYNOPSIS ||
-                   (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd))
+                   (np->tok != MDOC_Cd && np->tok != MDOC_Fd))
                        check_text(mdoc, n->line, n->pos, n->string);
+               if (np->tok != MDOC_Ql && np->tok != MDOC_Dl &&
+                   (np->tok != MDOC_Bd ||
+                    (mdoc->flags & MDOC_LITERAL) == 0) &&
+                   (np->tok != MDOC_It || np->type != ROFFT_HEAD ||
+                    np->parent->parent->norm->Bl.type != LIST_diag))
+                       check_text_em(mdoc, n->line, n->pos, n->string);
+               if (np->tok == MDOC_It || (np->type == ROFFT_BODY &&
+                   (np->tok == MDOC_Sh || np->tok == MDOC_Ss)))
+                       check_toptext(mdoc, n->line, n->pos, n->string);
                break;
+       case ROFFT_COMMENT:
        case ROFFT_EQN:
        case ROFFT_TBL:
                break;
@@ -327,7 +342,21 @@ mdoc_node_validate(struct roff_man *mdoc)
 
                /* Call the macro's postprocessor. */
 
-               p = mdoc_valids + n->tok;
+               if (n->tok < ROFF_MAX) {
+                       switch(n->tok) {
+                       case ROFF_br:
+                       case ROFF_sp:
+                               post_par(mdoc);
+                               break;
+                       default:
+                               roff_validate(mdoc);
+                               break;
+                       }
+                       break;
+               }
+
+               assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX);
+               p = mdoc_valids + (n->tok - MDOC_Dd);
                if (*p)
                        (*p)(mdoc);
                if (mdoc->last == n)
@@ -371,6 +400,214 @@ check_text(struct roff_man *mdoc, int ln, int pos, char *p)
                    ln, pos + (int)(p - cp), NULL);
 }
 
+static void
+check_text_em(struct roff_man *mdoc, int ln, int pos, char *p)
+{
+       const struct roff_node  *np, *nn;
+       char                    *cp;
+
+       np = mdoc->last->prev;
+       nn = mdoc->last->next;
+
+       /* Look for em-dashes wrongly encoded as "--". */
+
+       for (cp = p; *cp != '\0'; cp++) {
+               if (cp[0] != '-' || cp[1] != '-')
+                       continue;
+               cp++;
+
+               /* Skip input sequences of more than two '-'. */
+
+               if (cp[1] == '-') {
+                       while (cp[1] == '-')
+                               cp++;
+                       continue;
+               }
+
+               /* Skip "--" directly attached to something else. */
+
+               if ((cp - p > 1 && cp[-2] != ' ') ||
+                   (cp[1] != '\0' && cp[1] != ' '))
+                       continue;
+
+               /* Require a letter right before or right afterwards. */
+
+               if ((cp - p > 2 ?
+                    isalpha((unsigned char)cp[-3]) :
+                    np != NULL &&
+                    np->type == ROFFT_TEXT &&
+                    *np->string != '\0' &&
+                    isalpha((unsigned char)np->string[
+                      strlen(np->string) - 1])) ||
+                   (cp[1] != '\0' && cp[2] != '\0' ?
+                    isalpha((unsigned char)cp[2]) :
+                    nn != NULL &&
+                    nn->type == ROFFT_TEXT &&
+                    isalpha((unsigned char)*nn->string))) {
+                       mandoc_msg(MANDOCERR_DASHDASH, mdoc->parse,
+                           ln, pos + (int)(cp - p) - 1, NULL);
+                       break;
+               }
+       }
+}
+
+static void
+check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p)
+{
+       const char      *cp, *cpr;
+
+       if (*p == '\0')
+               return;
+
+       if ((cp = strstr(p, "OpenBSD")) != NULL)
+               mandoc_msg(MANDOCERR_BX, mdoc->parse,
+                   ln, pos + (cp - p), "Ox");
+       if ((cp = strstr(p, "NetBSD")) != NULL)
+               mandoc_msg(MANDOCERR_BX, mdoc->parse,
+                   ln, pos + (cp - p), "Nx");
+       if ((cp = strstr(p, "FreeBSD")) != NULL)
+               mandoc_msg(MANDOCERR_BX, mdoc->parse,
+                   ln, pos + (cp - p), "Fx");
+       if ((cp = strstr(p, "DragonFly")) != NULL)
+               mandoc_msg(MANDOCERR_BX, mdoc->parse,
+                   ln, pos + (cp - p), "Dx");
+
+       cp = p;
+       while ((cp = strstr(cp + 1, "()")) != NULL) {
+               for (cpr = cp - 1; cpr >= p; cpr--)
+                       if (*cpr != '_' && !isalnum((unsigned char)*cpr))
+                               break;
+               if ((cpr < p || *cpr == ' ') && cpr + 1 < cp) {
+                       cpr++;
+                       mandoc_vmsg(MANDOCERR_FUNC, mdoc->parse,
+                           ln, pos + (cpr - p),
+                           "%.*s()", (int)(cp - cpr), cpr);
+               }
+       }
+}
+
+static void
+post_delim(POST_ARGS)
+{
+       const struct roff_node  *nch;
+       const char              *lc;
+       enum mdelim              delim;
+       enum roff_tok            tok;
+
+       tok = mdoc->last->tok;
+       nch = mdoc->last->last;
+       if (nch == NULL || nch->type != ROFFT_TEXT)
+               return;
+       lc = strchr(nch->string, '\0') - 1;
+       if (lc < nch->string)
+               return;
+       delim = mdoc_isdelim(lc);
+       if (delim == DELIM_NONE || delim == DELIM_OPEN)
+               return;
+       if (*lc == ')' && (tok == MDOC_Nd || tok == MDOC_Sh ||
+           tok == MDOC_Ss || tok == MDOC_Fo))
+               return;
+
+       mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse,
+           nch->line, nch->pos + (lc - nch->string),
+           "%s%s %s", roff_name[tok],
+           nch == mdoc->last->child ? "" : " ...", nch->string);
+}
+
+static void
+post_delim_nb(POST_ARGS)
+{
+       const struct roff_node  *nch;
+       const char              *lc, *cp;
+       int                      nw;
+       enum mdelim              delim;
+       enum roff_tok            tok;
+
+       /*
+        * Find candidates: at least two bytes,
+        * the last one a closing or middle delimiter.
+        */
+
+       tok = mdoc->last->tok;
+       nch = mdoc->last->last;
+       if (nch == NULL || nch->type != ROFFT_TEXT)
+               return;
+       lc = strchr(nch->string, '\0') - 1;
+       if (lc <= nch->string)
+               return;
+       delim = mdoc_isdelim(lc);
+       if (delim == DELIM_NONE || delim == DELIM_OPEN)
+               return;
+
+       /*
+        * Reduce false positives by allowing various cases.
+        */
+
+       /* Escaped delimiters. */
+       if (lc > nch->string + 1 && lc[-2] == '\\' &&
+           (lc[-1] == '&' || lc[-1] == 'e'))
+               return;
+
+       /* Specific byte sequences. */
+       switch (*lc) {
+       case ')':
+               for (cp = lc; cp >= nch->string; cp--)
+                       if (*cp == '(')
+                               return;
+               break;
+       case '.':
+               if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.')
+                       return;
+               if (lc[-1] == '.')
+                       return;
+               break;
+       case ';':
+               if (tok == MDOC_Vt)
+                       return;
+               break;
+       case '?':
+               if (lc[-1] == '?')
+                       return;
+               break;
+       case ']':
+               for (cp = lc; cp >= nch->string; cp--)
+                       if (*cp == '[')
+                               return;
+               break;
+       case '|':
+               if (lc == nch->string + 1 && lc[-1] == '|')
+                       return;
+       default:
+               break;
+       }
+
+       /* Exactly two non-alphanumeric bytes. */
+       if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1]))
+               return;
+
+       /* At least three alphabetic words with a sentence ending. */
+       if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em ||
+           tok == MDOC_Li || tok == MDOC_Pq || tok == MDOC_Sy)) {
+               nw = 0;
+               for (cp = lc - 1; cp >= nch->string; cp--) {
+                       if (*cp == ' ') {
+                               nw++;
+                               if (cp > nch->string && cp[-1] == ',')
+                                       cp--;
+                       } else if (isalpha((unsigned int)*cp)) {
+                               if (nw > 1)
+                                       return;
+                       } else
+                               break;
+               }
+       }
+
+       mandoc_vmsg(MANDOCERR_DELIM_NB, mdoc->parse,
+           nch->line, nch->pos + (lc - nch->string),
+           "%s%s %s", roff_name[tok],
+           nch == mdoc->last->child ? "" : " ...", nch->string);
+}
+
 static void
 post_bl_norm(POST_ARGS)
 {
@@ -519,7 +756,7 @@ post_bl_norm(POST_ARGS)
 
        switch (n->norm->Bl.type) {
        case LIST_tag:
-               if (NULL == n->norm->Bl.width)
+               if (n->norm->Bl.width == NULL)
                        mandoc_msg(MANDOCERR_BL_NOWIDTH, mdoc->parse,
                            n->line, n->pos, "Bl -tag");
                break;
@@ -528,19 +765,20 @@ post_bl_norm(POST_ARGS)
        case LIST_ohang:
        case LIST_inset:
        case LIST_item:
-               if (n->norm->Bl.width)
+               if (n->norm->Bl.width != NULL)
                        mandoc_vmsg(MANDOCERR_BL_SKIPW, mdoc->parse,
                            wa->line, wa->pos, "Bl -%s",
                            mdoc_argnames[mdoclt]);
+               n->norm->Bl.width = NULL;
                break;
        case LIST_bullet:
        case LIST_dash:
        case LIST_hyphen:
-               if (NULL == n->norm->Bl.width)
+               if (n->norm->Bl.width == NULL)
                        n->norm->Bl.width = "2n";
                break;
        case LIST_enum:
-               if (NULL == n->norm->Bl.width)
+               if (n->norm->Bl.width == NULL)
                        n->norm->Bl.width = "3n";
                break;
        default:
@@ -660,6 +898,7 @@ post_eoln(POST_ARGS)
 {
        struct roff_node        *n;
 
+       post_useless(mdoc);
        n = mdoc->last;
        if (n->child != NULL)
                mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, n->line,
@@ -745,6 +984,8 @@ post_lb(POST_ARGS)
        struct roff_node        *n;
        const char              *p;
 
+       post_delim_nb(mdoc);
+
        n = mdoc->last;
        assert(n->child->type == ROFFT_TEXT);
        mdoc->next = ROFF_NEXT_CHILD;
@@ -757,12 +998,15 @@ post_lb(POST_ARGS)
                return;
        }
 
+       mandoc_vmsg(MANDOCERR_LB_BAD, mdoc->parse, n->child->line,
+           n->child->pos, "Lb %s", n->child->string);
+
        roff_word_alloc(mdoc, n->line, n->pos, "library");
        mdoc->last->flags = NODE_NOSRC;
-       roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq");
+       roff_word_alloc(mdoc, n->line, n->pos, "\\(lq");
        mdoc->last->flags = NODE_DELIMO | NODE_NOSRC;
        mdoc->last = mdoc->last->next;
-       roff_word_alloc(mdoc, n->line, n->pos, "\\(Rq");
+       roff_word_alloc(mdoc, n->line, n->pos, "\\(rq");
        mdoc->last->flags = NODE_DELIMC | NODE_NOSRC;
        mdoc->last = n;
 }
@@ -811,6 +1055,8 @@ post_std(POST_ARGS)
 {
        struct roff_node *n;
 
+       post_delim(mdoc);
+
        n = mdoc->last;
        if (n->args && n->args->argc == 1)
                if (n->args->argv[0].arg == MDOC_Std)
@@ -855,6 +1101,16 @@ post_obsolete(POST_ARGS)
                    n->line, n->pos, roff_name[n->tok]);
 }
 
+static void
+post_useless(POST_ARGS)
+{
+       struct roff_node *n;
+
+       n = mdoc->last;
+       mandoc_msg(MANDOCERR_MACRO_USELESS, mdoc->parse,
+           n->line, n->pos, roff_name[n->tok]);
+}
+
 /*
  * Block macros.
  */
@@ -968,7 +1224,8 @@ post_fo(POST_ARGS)
                    "Fo ... %s", n->child->next->string);
                while (n->child != n->last)
                        roff_node_delete(mdoc, n->last);
-       }
+       } else
+               post_delim(mdoc);
 
        post_fname(mdoc);
 }
@@ -992,6 +1249,7 @@ post_fa(POST_ARGS)
                        break;
                }
        }
+       post_delim_nb(mdoc);
 }
 
 static void
@@ -1001,6 +1259,10 @@ post_nm(POST_ARGS)
 
        n = mdoc->last;
 
+       if (n->sec == SEC_NAME && n->child != NULL &&
+           n->child->type == ROFFT_TEXT && mdoc->meta.msec != NULL)
+               mandoc_xr_add(mdoc->meta.msec, n->child->string, -1, -1);
+
        if (n->last != NULL &&
            (n->last->tok == MDOC_Pp ||
             n->last->tok == MDOC_Lp))
@@ -1014,8 +1276,18 @@ post_nm(POST_ARGS)
                mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse,
                    n->line, n->pos, "Nm");
 
-       if ((n->type != ROFFT_ELEM && n->type != ROFFT_HEAD) ||
-           (n->child != NULL && n->child->type == ROFFT_TEXT) ||
+       switch (n->type) {
+       case ROFFT_ELEM:
+               post_delim_nb(mdoc);
+               break;
+       case ROFFT_HEAD:
+               post_delim(mdoc);
+               break;
+       default:
+               return;
+       }
+
+       if ((n->child != NULL && n->child->type == ROFFT_TEXT) ||
            mdoc->meta.name == NULL)
                return;
 
@@ -1042,6 +1314,8 @@ post_nd(POST_ARGS)
        if (n->child == NULL)
                mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse,
                    n->line, n->pos, "Nd");
+       else
+               post_delim(mdoc);
 
        post_hyph(mdoc);
 }
@@ -1098,17 +1372,18 @@ post_defaults(POST_ARGS)
 {
        struct roff_node *nn;
 
+       if (mdoc->last->child != NULL) {
+               post_delim_nb(mdoc);
+               return;
+       }
+
        /*
         * The `Ar' defaults to "file ..." if no value is provided as an
         * argument; the `Mt' and `Pa' macros use "~"; the `Li' just
         * gets an empty string.
         */
 
-       if (mdoc->last->child != NULL)
-               return;
-
        nn = mdoc->last;
-
        switch (nn->tok) {
        case MDOC_Ar:
                mdoc->next = ROFF_NEXT_CHILD;
@@ -1172,6 +1447,8 @@ post_an(POST_ARGS)
                if (nch == NULL)
                        mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse,
                            np->line, np->pos, "An");
+               else
+                       post_delim_nb(mdoc);
        } else if (nch != NULL)
                mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse,
                    nch->line, nch->pos, "An ... %s", nch->string);
@@ -1199,6 +1476,9 @@ post_xx(POST_ARGS)
 {
        struct roff_node        *n;
        const char              *os;
+       char                    *v;
+
+       post_delim_nb(mdoc);
 
        n = mdoc->last;
        switch (n->tok) {
@@ -1213,6 +1493,20 @@ post_xx(POST_ARGS)
                break;
        case MDOC_Nx:
                os = "NetBSD";
+               if (n->child == NULL)
+                       break;
+               v = n->child->string;
+               if ((v[0] != '0' && v[0] != '1') || v[1] != '.' ||
+                   v[2] < '0' || v[2] > '9' ||
+                   v[3] < 'a' || v[3] > 'z' || v[4] != '\0')
+                       break;
+               n->child->flags |= NODE_NOPRT;
+               mdoc->next = ROFF_NEXT_CHILD;
+               roff_word_alloc(mdoc, n->child->line, n->child->pos, v);
+               v = mdoc->last->string;
+               v[3] = toupper((unsigned char)v[3]);
+               mdoc->last->flags |= NODE_NOSRC;
+               mdoc->last = n;
                break;
        case MDOC_Ox:
                os = "OpenBSD";
@@ -1279,15 +1573,30 @@ post_it(POST_ARGS)
 
                assert(nit->head->child == NULL);
 
-               i = 0;
-               for (nch = nit->child; nch != NULL; nch = nch->next)
-                       if (nch->type == ROFFT_BODY)
-                               i++;
+               if (nit->head->next->child == NULL &&
+                   nit->head->next->next == NULL) {
+                       mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse,
+                           nit->line, nit->pos, "It");
+                       roff_node_delete(mdoc, nit);
+                       break;
+               }
 
+               i = 0;
+               for (nch = nit->child; nch != NULL; nch = nch->next) {
+                       if (nch->type != ROFFT_BODY)
+                               continue;
+                       if (i++ && nch->flags & NODE_LINE)
+                               mandoc_msg(MANDOCERR_TA_LINE, mdoc->parse,
+                                   nch->line, nch->pos, "Ta");
+               }
                if (i < cols || i > cols + 1)
                        mandoc_vmsg(MANDOCERR_BL_COL,
                            mdoc->parse, nit->line, nit->pos,
                            "%d columns, %d cells", cols, i);
+               else if (nit->head->next->child != NULL &&
+                   nit->head->next->child->line > nit->line)
+                       mandoc_msg(MANDOCERR_IT_NOARG, mdoc->parse,
+                           nit->line, nit->pos, "Bl -column It");
                break;
        default:
                abort();
@@ -1310,7 +1619,7 @@ post_bl_block(POST_ARGS)
                        switch (nc->tok) {
                        case MDOC_Pp:
                        case MDOC_Lp:
-                       case MDOC_br:
+                       case ROFF_br:
                                break;
                        default:
                                nc = NULL;
@@ -1426,6 +1735,8 @@ post_bl(POST_ARGS)
        struct roff_node        *nparent, *nprev; /* of the Bl block */
        struct roff_node        *nblock, *nbody;  /* of the Bl */
        struct roff_node        *nchild, *nnext;  /* of the Bl body */
+       const char              *prev_Er;
+       int                      order;
 
        nbody = mdoc->last;
        switch (nbody->type) {
@@ -1526,6 +1837,35 @@ post_bl(POST_ARGS)
 
                nchild = nnext;
        }
+
+       if (mdoc->meta.os_e != MANDOC_OS_NETBSD)
+               return;
+
+       prev_Er = NULL;
+       for (nchild = nbody->child; nchild != NULL; nchild = nchild->next) {
+               if (nchild->tok != MDOC_It)
+                       continue;
+               if ((nnext = nchild->head->child) == NULL)
+                       continue;
+               if (nnext->type == ROFFT_BLOCK)
+                       nnext = nnext->body->child;
+               if (nnext == NULL || nnext->tok != MDOC_Er)
+                       continue;
+               nnext = nnext->child;
+               if (prev_Er != NULL) {
+                       order = strcmp(prev_Er, nnext->string);
+                       if (order > 0)
+                               mandoc_vmsg(MANDOCERR_ER_ORDER,
+                                   mdoc->parse, nnext->line, nnext->pos,
+                                   "Er %s %s (NetBSD)",
+                                   prev_Er, nnext->string);
+                       else if (order == 0)
+                               mandoc_vmsg(MANDOCERR_ER_REP,
+                                   mdoc->parse, nnext->line, nnext->pos,
+                                   "Er %s (NetBSD)", prev_Er);
+               }
+               prev_Er = nnext->string;
+       }
 }
 
 static void
@@ -1575,14 +1915,35 @@ post_sm(POST_ARGS)
 static void
 post_root(POST_ARGS)
 {
+       const char *openbsd_arch[] = {
+               "alpha", "amd64", "arm64", "armv7", "hppa", "i386",
+               "landisk", "loongson", "luna88k", "macppc", "mips64",
+               "octeon", "sgi", "socppc", "sparc64", NULL
+       };
+       const char *netbsd_arch[] = {
+               "acorn26", "acorn32", "algor", "alpha", "amiga",
+               "arc", "atari",
+               "bebox", "cats", "cesfic", "cobalt", "dreamcast",
+               "emips", "evbarm", "evbmips", "evbppc", "evbsh3", "evbsh5",
+               "hp300", "hpcarm", "hpcmips", "hpcsh", "hppa",
+               "i386", "ibmnws", "luna68k",
+               "mac68k", "macppc", "mipsco", "mmeye", "mvme68k", "mvmeppc",
+               "netwinder", "news68k", "newsmips", "next68k",
+               "pc532", "playstation2", "pmax", "pmppc", "prep",
+               "sandpoint", "sbmips", "sgimips", "shark",
+               "sparc", "sparc64", "sun2", "sun3",
+               "vax", "walnut", "x68k", "x86", "x86_64", "xen", NULL
+        };
+       const char **arches[] = { NULL, netbsd_arch, openbsd_arch };
+
        struct roff_node *n;
+       const char **arch;
 
        /* Add missing prologue data. */
 
        if (mdoc->meta.date == NULL)
-               mdoc->meta.date = mdoc->quick ?
-                   mandoc_strdup("") :
-                   mandoc_normdate(mdoc->parse, NULL, 0, 0);
+               mdoc->meta.date = mdoc->quick ? mandoc_strdup("") :
+                   mandoc_normdate(mdoc, NULL, 0, 0);
 
        if (mdoc->meta.title == NULL) {
                mandoc_msg(MANDOCERR_DT_NOTITLE,
@@ -1597,13 +1958,39 @@ post_root(POST_ARGS)
                mandoc_msg(MANDOCERR_OS_MISSING,
                    mdoc->parse, 0, 0, NULL);
                mdoc->meta.os = mandoc_strdup("");
+       } else if (mdoc->meta.os_e &&
+           (mdoc->meta.rcsids & (1 << mdoc->meta.os_e)) == 0)
+               mandoc_msg(MANDOCERR_RCS_MISSING, mdoc->parse, 0, 0,
+                   mdoc->meta.os_e == MANDOC_OS_OPENBSD ?
+                   "(OpenBSD)" : "(NetBSD)");
+
+       if (mdoc->meta.arch != NULL &&
+           (arch = arches[mdoc->meta.os_e]) != NULL) {
+               while (*arch != NULL && strcmp(*arch, mdoc->meta.arch))
+                       arch++;
+               if (*arch == NULL) {
+                       n = mdoc->first->child;
+                       while (n->tok != MDOC_Dt ||
+                           n->child == NULL ||
+                           n->child->next == NULL ||
+                           n->child->next->next == NULL)
+                               n = n->next;
+                       n = n->child->next->next;
+                       mandoc_vmsg(MANDOCERR_ARCH_BAD,
+                           mdoc->parse, n->line, n->pos,
+                           "Dt ... %s %s", mdoc->meta.arch,
+                           mdoc->meta.os_e == MANDOC_OS_OPENBSD ?
+                           "(OpenBSD)" : "(NetBSD)");
+               }
        }
 
        /* Check that we begin with a proper `Sh'. */
 
        n = mdoc->first->child;
-       while (n != NULL && n->tok != TOKEN_NONE &&
-           mdoc_macros[n->tok].flags & MDOC_PROLOGUE)
+       while (n != NULL &&
+           (n->type == ROFFT_COMMENT ||
+            (n->tok >= MDOC_Dd &&
+             mdoc_macro(n->tok)->flags & MDOC_PROLOGUE)))
                n = n->next;
 
        if (n == NULL)
@@ -1729,10 +2116,20 @@ post_hyph(POST_ARGS)
 static void
 post_ns(POST_ARGS)
 {
+       struct roff_node        *n;
 
-       if (mdoc->last->flags & NODE_LINE)
+       n = mdoc->last;
+       if (n->flags & NODE_LINE ||
+           (n->next != NULL && n->next->flags & NODE_DELIMC))
                mandoc_msg(MANDOCERR_NS_SKIP, mdoc->parse,
-                   mdoc->last->line, mdoc->last->pos, NULL);
+                   n->line, n->pos, NULL);
+}
+
+static void
+post_sx(POST_ARGS)
+{
+       post_delim(mdoc);
+       post_hyph(mdoc);
 }
 
 static void
@@ -1868,7 +2265,7 @@ post_sh_see_also(POST_ARGS)
                        if (isalpha((const unsigned char)*name))
                                return;
                lastpunct = n->string;
-               if (n->next == NULL)
+               if (n->next == NULL || n->next->tok == MDOC_Rs)
                        mandoc_vmsg(MANDOCERR_XR_PUNCT, mdoc->parse,
                            n->line, n->pos, "%s after %s(%s)",
                            lastpunct, lastname, lastsec);
@@ -1895,11 +2292,54 @@ post_sh_authors(POST_ARGS)
                    mdoc->last->line, mdoc->last->pos, NULL);
 }
 
+/*
+ * Return an upper bound for the string distance (allowing
+ * transpositions).  Not a full Levenshtein implementation
+ * because Levenshtein is quadratic in the string length
+ * and this function is called for every standard name,
+ * so the check for each custom name would be cubic.
+ * The following crude heuristics is linear, resulting
+ * in quadratic behaviour for checking one custom name,
+ * which does not cause measurable slowdown.
+ */
+static int
+similar(const char *s1, const char *s2)
+{
+       const int       maxdist = 3;
+       int             dist = 0;
+
+       while (s1[0] != '\0' && s2[0] != '\0') {
+               if (s1[0] == s2[0]) {
+                       s1++;
+                       s2++;
+                       continue;
+               }
+               if (++dist > maxdist)
+                       return INT_MAX;
+               if (s1[1] == s2[1]) {  /* replacement */
+                       s1++;
+                       s2++;
+               } else if (s1[0] == s2[1] && s1[1] == s2[0]) {
+                       s1 += 2;        /* transposition */
+                       s2 += 2;
+               } else if (s1[0] == s2[1])  /* insertion */
+                       s2++;
+               else if (s1[1] == s2[0])  /* deletion */
+                       s1++;
+               else
+                       return INT_MAX;
+       }
+       dist += strlen(s1) + strlen(s2);
+       return dist > maxdist ? INT_MAX : dist;
+}
+
 static void
 post_sh_head(POST_ARGS)
 {
        struct roff_node        *nch;
        const char              *goodsec;
+       const char *const       *testsec;
+       int                      dist, mindist;
        enum roff_sec            sec;
 
        /*
@@ -1937,8 +2377,25 @@ post_sh_head(POST_ARGS)
 
        /* We don't care about custom sections after this. */
 
-       if (sec == SEC_CUSTOM)
+       if (sec == SEC_CUSTOM) {
+               if ((nch = mdoc->last->child) == NULL ||
+                   nch->type != ROFFT_TEXT || nch->next != NULL)
+                       return;
+               goodsec = NULL;
+               mindist = INT_MAX;
+               for (testsec = secnames + 1; *testsec != NULL; testsec++) {
+                       dist = similar(nch->string, *testsec);
+                       if (dist < mindist) {
+                               goodsec = *testsec;
+                               mindist = dist;
+                       }
+               }
+               if (goodsec != NULL)
+                       mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse,
+                           nch->line, nch->pos, "Sh %s instead of %s",
+                           nch->string, goodsec);
                return;
+       }
 
        /*
         * Check whether our non-custom section is being repeated or is
@@ -2004,9 +2461,15 @@ post_xr(POST_ARGS)
        if (nch->next == NULL) {
                mandoc_vmsg(MANDOCERR_XR_NOSEC, mdoc->parse,
                    n->line, n->pos, "Xr %s", nch->string);
-               return;
+       } else {
+               assert(nch->next == n->last);
+               if(mandoc_xr_add(nch->next->string, nch->string,
+                   nch->line, nch->pos))
+                       mandoc_vmsg(MANDOCERR_XR_SELF, mdoc->parse,
+                           nch->line, nch->pos, "Xr %s %s",
+                           nch->string, nch->next->string);
        }
-       assert(nch->next == n->last);
+       post_delim_nb(mdoc);
 }
 
 static void
@@ -2019,6 +2482,7 @@ post_ignpar(POST_ARGS)
                post_prevpar(mdoc);
                return;
        case ROFFT_HEAD:
+               post_delim(mdoc);
                post_hyph(mdoc);
                return;
        case ROFFT_BODY:
@@ -2064,7 +2528,7 @@ post_prevpar(POST_ARGS)
 
        if (n->prev->tok != MDOC_Pp &&
            n->prev->tok != MDOC_Lp &&
-           n->prev->tok != MDOC_br)
+           n->prev->tok != ROFF_br)
                return;
        if (n->tok == MDOC_Bl && n->norm->Bl.comp)
                return;
@@ -2085,10 +2549,10 @@ post_par(POST_ARGS)
        struct roff_node *np;
 
        np = mdoc->last;
-       if (np->tok != MDOC_br && np->tok != MDOC_sp)
+       if (np->tok != ROFF_br && np->tok != ROFF_sp)
                post_prevpar(mdoc);
 
-       if (np->tok == MDOC_sp) {
+       if (np->tok == ROFF_sp) {
                if (np->child != NULL && np->child->next != NULL)
                        mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse,
                            np->child->next->line, np->child->next->pos,
@@ -2103,8 +2567,8 @@ post_par(POST_ARGS)
                if (np->tok != MDOC_Sh && np->tok != MDOC_Ss)
                        return;
        } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp &&
-           (mdoc->last->tok != MDOC_br ||
-            (np->tok != MDOC_sp && np->tok != MDOC_br)))
+           (mdoc->last->tok != ROFF_br ||
+            (np->tok != ROFF_sp && np->tok != ROFF_br)))
                return;
 
        mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse,
@@ -2138,7 +2602,7 @@ post_dd(POST_ARGS)
 
        if (n->child == NULL || n->child->string[0] == '\0') {
                mdoc->meta.date = mdoc->quick ? mandoc_strdup("") :
-                   mandoc_normdate(mdoc->parse, NULL, n->line, n->pos);
+                   mandoc_normdate(mdoc, NULL, n->line, n->pos);
                return;
        }
 
@@ -2147,7 +2611,7 @@ post_dd(POST_ARGS)
        if (mdoc->quick)
                mdoc->meta.date = datestr;
        else {
-               mdoc->meta.date = mandoc_normdate(mdoc->parse,
+               mdoc->meta.date = mandoc_normdate(mdoc,
                    datestr, n->line, n->pos);
                free(datestr);
        }
@@ -2253,11 +2717,21 @@ static void
 post_bx(POST_ARGS)
 {
        struct roff_node        *n, *nch;
+       const char              *macro;
+
+       post_delim_nb(mdoc);
 
        n = mdoc->last;
        nch = n->child;
 
        if (nch != NULL) {
+               macro = !strcmp(nch->string, "Open") ? "Ox" :
+                   !strcmp(nch->string, "Net") ? "Nx" :
+                   !strcmp(nch->string, "Free") ? "Fx" :
+                   !strcmp(nch->string, "DragonFly") ? "Dx" : NULL;
+               if (macro != NULL)
+                       mandoc_msg(MANDOCERR_BX, mdoc->parse,
+                           n->line, n->pos, macro);
                mdoc->last = nch;
                nch = nch->next;
                mdoc->next = ROFF_NEXT_SIBLING;
@@ -2311,6 +2785,8 @@ post_os(POST_ARGS)
                mandoc_msg(MANDOCERR_PROLOG_LATE, mdoc->parse,
                    n->line, n->pos, "Os");
 
+       post_delim(mdoc);
+
        /*
         * Set the operating system by way of the `Os' macro.
         * The order of precedence is:
@@ -2324,11 +2800,11 @@ post_os(POST_ARGS)
        mdoc->meta.os = NULL;
        deroff(&mdoc->meta.os, n);
        if (mdoc->meta.os)
-               return;
+               goto out;
 
-       if (mdoc->defos) {
-               mdoc->meta.os = mandoc_strdup(mdoc->defos);
-               return;
+       if (mdoc->os_s != NULL) {
+               mdoc->meta.os = mandoc_strdup(mdoc->os_s);
+               goto out;
        }
 
 #ifdef OSNAME
@@ -2345,6 +2821,44 @@ post_os(POST_ARGS)
        }
        mdoc->meta.os = mandoc_strdup(defbuf);
 #endif /*!OSNAME*/
+
+out:
+       if (mdoc->meta.os_e == MANDOC_OS_OTHER) {
+               if (strstr(mdoc->meta.os, "OpenBSD") != NULL)
+                       mdoc->meta.os_e = MANDOC_OS_OPENBSD;
+               else if (strstr(mdoc->meta.os, "NetBSD") != NULL)
+                       mdoc->meta.os_e = MANDOC_OS_NETBSD;
+       }
+
+       /*
+        * This is the earliest point where we can check
+        * Mdocdate conventions because we don't know
+        * the operating system earlier.
+        */
+
+       if (n->child != NULL)
+               mandoc_vmsg(MANDOCERR_OS_ARG, mdoc->parse,
+                   n->child->line, n->child->pos,
+                   "Os %s (%s)", n->child->string,
+                   mdoc->meta.os_e == MANDOC_OS_OPENBSD ?
+                   "OpenBSD" : "NetBSD");
+
+       while (n->tok != MDOC_Dd)
+               if ((n = n->prev) == NULL)
+                       return;
+       if ((n = n->child) == NULL)
+               return;
+       if (strncmp(n->string, "$" "Mdocdate", 9)) {
+               if (mdoc->meta.os_e == MANDOC_OS_OPENBSD)
+                       mandoc_vmsg(MANDOCERR_MDOCDATE_MISSING,
+                           mdoc->parse, n->line, n->pos,
+                           "Dd %s (OpenBSD)", n->string);
+       } else {
+               if (mdoc->meta.os_e == MANDOC_OS_NETBSD)
+                       mandoc_vmsg(MANDOCERR_MDOCDATE,
+                           mdoc->parse, n->line, n->pos,
+                           "Dd %s (NetBSD)", n->string);
+       }
 }
 
 enum roff_sec