]> git.cameronkatri.com Git - mandoc.git/blobdiff - mdoc_validate.c
remove stray byte from broken escape sequence; diff from jmc@
[mandoc.git] / mdoc_validate.c
index 0ea41db142dcf93a8359689f981d4d1f75cdec60..3dc4a6f1c0eacc6f848af3f1da5abf7102d925e2 100644 (file)
@@ -1,7 +1,7 @@
-/*     $Id: mdoc_validate.c,v 1.379 2020/02/27 21:43:44 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.388 2020/10/30 13:24:33 schwarze Exp $ */
 /*
- * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -15,6 +15,8 @@
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Validation module for mdoc(7) syntax trees used by mandoc(1).
  */
 #include "config.h"
 
@@ -39,6 +41,7 @@
 #include "libmandoc.h"
 #include "roff_int.h"
 #include "libmdoc.h"
+#include "tag.h"
 
 /* FIXME: .Bl -diag can't have non-text children in HEAD. */
 
@@ -82,16 +85,18 @@ static      void     post_dd(POST_ARGS);
 static void     post_delim(POST_ARGS);
 static void     post_delim_nb(POST_ARGS);
 static void     post_dt(POST_ARGS);
+static void     post_em(POST_ARGS);
 static void     post_en(POST_ARGS);
+static void     post_er(POST_ARGS);
 static void     post_es(POST_ARGS);
 static void     post_eoln(POST_ARGS);
 static void     post_ex(POST_ARGS);
 static void     post_fa(POST_ARGS);
+static void     post_fl(POST_ARGS);
 static void     post_fn(POST_ARGS);
 static void     post_fname(POST_ARGS);
 static void     post_fo(POST_ARGS);
 static void     post_hyph(POST_ARGS);
-static void     post_ignpar(POST_ARGS);
 static void     post_it(POST_ARGS);
 static void     post_lb(POST_ARGS);
 static void     post_nd(POST_ARGS);
@@ -104,6 +109,7 @@ static      void     post_prevpar(POST_ARGS);
 static void     post_root(POST_ARGS);
 static void     post_rs(POST_ARGS);
 static void     post_rv(POST_ARGS);
+static void     post_section(POST_ARGS);
 static void     post_sh(POST_ARGS);
 static void     post_sh_head(POST_ARGS);
 static void     post_sh_name(POST_ARGS);
@@ -113,6 +119,7 @@ static      void     post_sm(POST_ARGS);
 static void     post_st(POST_ARGS);
 static void     post_std(POST_ARGS);
 static void     post_sx(POST_ARGS);
+static void     post_tag(POST_ARGS);
 static void     post_tg(POST_ARGS);
 static void     post_useless(POST_ARGS);
 static void     post_xr(POST_ARGS);
@@ -123,7 +130,7 @@ static      const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        post_dt,        /* Dt */
        post_os,        /* Os */
        post_sh,        /* Sh */
-       post_ignpar,    /* Ss */
+       post_section,   /* Ss */
        post_par,       /* Pp */
        post_display,   /* D1 */
        post_display,   /* Dl */
@@ -137,19 +144,19 @@ static    const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        NULL,           /* Ap */
        post_defaults,  /* Ar */
        NULL,           /* Cd */
-       post_delim_nb,  /* Cm */
-       post_delim_nb,  /* Dv */
-       post_delim_nb,  /* Er */
-       post_delim_nb,  /* Ev */
+       post_tag,       /* Cm */
+       post_tag,       /* Dv */
+       post_er,        /* Er */
+       post_tag,       /* Ev */
        post_ex,        /* Ex */
        post_fa,        /* Fa */
        NULL,           /* Fd */
-       post_delim_nb,  /* Fl */
+       post_fl,        /* Fl */
        post_fn,        /* Fn */
        post_delim_nb,  /* Ft */
-       post_delim_nb,  /* Ic */
+       post_tag,       /* Ic */
        post_delim_nb,  /* In */
-       post_defaults,  /* Li */
+       post_tag,       /* Li */
        post_nd,        /* Nd */
        post_nm,        /* Nm */
        post_delim_nb,  /* Op */
@@ -187,11 +194,11 @@ static    const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        NULL,           /* Dq */
        NULL,           /* Ec */
        NULL,           /* Ef */
-       post_delim_nb,  /* Em */
+       post_em,        /* Em */
        NULL,           /* Eo */
        post_xx,        /* Fx */
-       post_delim_nb,  /* Ms */
-       NULL,           /* No */
+       post_tag,       /* Ms */
+       post_tag,       /* No */
        post_ns,        /* Ns */
        post_xx,        /* Nx */
        post_xx,        /* Ox */
@@ -210,7 +217,7 @@ static      const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
        post_delim_nb,  /* Sq */
        post_sm,        /* Sm */
        post_sx,        /* Sx */
-       post_delim_nb,  /* Sy */
+       post_em,        /* Sy */
        post_useless,   /* Tn */
        post_xx,        /* Ux */
        NULL,           /* Xc */
@@ -287,6 +294,8 @@ static      const char * const secnames[SEC__MAX] = {
        NULL
 };
 
+static int       fn_prio = TAG_STRONG;
+
 
 /* Validate the subtree rooted at mdoc->last. */
 void
@@ -1094,8 +1103,12 @@ post_st(POST_ARGS)
 static void
 post_tg(POST_ARGS)
 {
-       struct roff_node        *n, *nch, *nn;
-       size_t                  len;
+       struct roff_node *n;    /* The .Tg node. */
+       struct roff_node *nch;  /* The first child of the .Tg node. */
+       struct roff_node *nn;   /* The next node after the .Tg node. */
+       struct roff_node *np;   /* The parent of the next node. */
+       struct roff_node *nt;   /* The TEXT node containing the tag. */
+       size_t            len;  /* The number of bytes in the tag. */
 
        /* Find the next node. */
        n = mdoc->last;
@@ -1106,30 +1119,26 @@ post_tg(POST_ARGS)
                }
        }
 
-       /* Add the default argument, if needed. */
-       nch = n->child;
-       if (nch == NULL && nn != NULL && nn->child->type == ROFFT_TEXT) {
-               mdoc->next = ROFF_NEXT_CHILD;
-               roff_word_alloc(mdoc, n->line, n->pos, n->next->child->string);
-               nch = mdoc->last;
-               nch->flags |= NODE_NOSRC;
-               mdoc->last = n;
-       }
+       /* Find the tag. */
+       nt = nch = n->child;
+       if (nch == NULL && nn != NULL && nn->child != NULL &&
+           nn->child->type == ROFFT_TEXT)
+               nt = nn->child;
 
-       /* Validate the first argument. */
-       if (nch == NULL || *nch->string == '\0')
+       /* Validate the tag. */
+       if (nt == NULL || *nt->string == '\0')
                mandoc_msg(MANDOCERR_MACRO_EMPTY, n->line, n->pos, "Tg");
-       if (nch == NULL) {
+       if (nt == NULL) {
                roff_node_delete(mdoc, n);
                return;
        }
-       len = strcspn(nch->string, " \t");
-       if (nch->string[len] != '\0')
-               mandoc_msg(MANDOCERR_TG_SPC, nch->line, nch->pos + len + 1,
-                   "Tg %s", nch->string);
+       len = strcspn(nt->string, " \t\\");
+       if (nt->string[len] != '\0')
+               mandoc_msg(MANDOCERR_TG_SPC, nt->line,
+                   nt->pos + len, "Tg %s", nt->string);
 
        /* Keep only the first argument. */
-       if (nch->next != NULL) {
+       if (nch != NULL && nch->next != NULL) {
                mandoc_msg(MANDOCERR_ARG_EXCESS, nch->next->line,
                    nch->next->pos, "Tg ... %s", nch->next->string);
                while (nch->next != NULL)
@@ -1137,32 +1146,77 @@ post_tg(POST_ARGS)
        }
 
        /* Drop the macro if the first argument is invalid. */
-       if (len == 0 || nch->string[len] != '\0') {
+       if (len == 0 || nt->string[len] != '\0') {
                roff_node_delete(mdoc, n);
                return;
        }
 
-       /* By default, write a <mark> element. */
-       n->flags |= NODE_ID;
-       if (nn == NULL)
-               return;
+       /* By default, tag the .Tg node itself. */
+       if (nn == NULL || nn->flags & NODE_ID)
+               nn = n;
 
        /* Explicit tagging of specific macros. */
        switch (nn->tok) {
        case MDOC_Sh:
        case MDOC_Ss:
-               if (nn->head->flags & NODE_ID || nn->head->child == NULL)
+       case MDOC_Fo:
+               nn = nn->head->child == NULL ? n : nn->head;
+               break;
+       case MDOC_It:
+               np = nn->parent;
+               while (np->tok != MDOC_Bl)
+                       np = np->parent;
+               switch (np->norm->Bl.type) {
+               case LIST_column:
                        break;
-               n->flags |= NODE_NOPRT;
-               nn->head->flags |= NODE_ID | NODE_HREF;
-               assert(nn->head->string == NULL);
-               nn->head->string = mandoc_strdup(nch->string);
+               case LIST_diag:
+               case LIST_hang:
+               case LIST_inset:
+               case LIST_ohang:
+               case LIST_tag:
+                       nn = nn->head;
+                       break;
+               case LIST_bullet:
+               case LIST_dash:
+               case LIST_enum:
+               case LIST_hyphen:
+               case LIST_item:
+                       nn = nn->body->child == NULL ? n : nn->body;
+                       break;
+               default:
+                       abort();
+               }
+               break;
+       case MDOC_Bd:
+       case MDOC_Bl:
+       case MDOC_D1:
+       case MDOC_Dl:
+               nn = nn->body->child == NULL ? n : nn->body;
+               break;
+       case MDOC_Pp:
+               break;
+       case MDOC_Cm:
+       case MDOC_Dv:
+       case MDOC_Em:
+       case MDOC_Er:
+       case MDOC_Ev:
+       case MDOC_Fl:
+       case MDOC_Fn:
+       case MDOC_Ic:
+       case MDOC_Li:
+       case MDOC_Ms:
+       case MDOC_No:
+       case MDOC_Sy:
+               if (nn->child == NULL)
+                       nn = n;
                break;
        default:
+               nn = n;
                break;
        }
-       if (n->flags & NODE_NOPRT)
-               n->flags &= ~NODE_ID;
+       tag_put(nt->string, TAG_MANUAL, nn);
+       if (nn != n)
+               n->flags |= NODE_NOPRT;
 }
 
 static void
@@ -1257,28 +1311,32 @@ post_bf(POST_ARGS)
 static void
 post_fname(POST_ARGS)
 {
-       const struct roff_node  *n;
+       struct roff_node        *n, *nch;
        const char              *cp;
        size_t                   pos;
 
-       n = mdoc->last->child;
-       cp = n->string;
+       n = mdoc->last;
+       nch = n->child;
+       cp = nch->string;
        if (*cp == '(') {
                if (cp[strlen(cp + 1)] == ')')
                        return;
                pos = 0;
        } else {
                pos = strcspn(cp, "()");
-               if (cp[pos] == '\0')
+               if (cp[pos] == '\0') {
+                       if (n->sec == SEC_DESCRIPTION ||
+                           n->sec == SEC_CUSTOM)
+                               tag_put(NULL, fn_prio++, n);
                        return;
+               }
        }
-       mandoc_msg(MANDOCERR_FN_PAREN, n->line, n->pos + pos, "%s", cp);
+       mandoc_msg(MANDOCERR_FN_PAREN, nch->line, nch->pos + pos, "%s", cp);
 }
 
 static void
 post_fn(POST_ARGS)
 {
-
        post_fname(mdoc);
        post_fa(mdoc);
 }
@@ -1442,38 +1500,29 @@ post_display(POST_ARGS)
 static void
 post_defaults(POST_ARGS)
 {
-       struct roff_node *nn;
+       struct roff_node *n;
 
-       if (mdoc->last->child != NULL) {
+       n = mdoc->last;
+       if (n->child != NULL) {
                post_delim_nb(mdoc);
                return;
        }
-
-       /*
-        * The `Ar' defaults to "file ..." if no value is provided as an
-        * argument; the `Mt' and `Pa' macros use "~"; the `Li' just
-        * gets an empty string.
-        */
-
-       nn = mdoc->last;
-       switch (nn->tok) {
+       mdoc->next = ROFF_NEXT_CHILD;
+       switch (n->tok) {
        case MDOC_Ar:
-               mdoc->next = ROFF_NEXT_CHILD;
-               roff_word_alloc(mdoc, nn->line, nn->pos, "file");
-               mdoc->last->flags |= NODE_NOSRC;
-               roff_word_alloc(mdoc, nn->line, nn->pos, "...");
+               roff_word_alloc(mdoc, n->line, n->pos, "file");
                mdoc->last->flags |= NODE_NOSRC;
+               roff_word_alloc(mdoc, n->line, n->pos, "...");
                break;
        case MDOC_Pa:
        case MDOC_Mt:
-               mdoc->next = ROFF_NEXT_CHILD;
-               roff_word_alloc(mdoc, nn->line, nn->pos, "~");
-               mdoc->last->flags |= NODE_NOSRC;
+               roff_word_alloc(mdoc, n->line, n->pos, "~");
                break;
        default:
                abort();
        }
-       mdoc->last = nn;
+       mdoc->last->flags |= NODE_NOSRC;
+       mdoc->last = n;
 }
 
 static void
@@ -1527,22 +1576,81 @@ post_an(POST_ARGS)
 }
 
 static void
-post_en(POST_ARGS)
+post_em(POST_ARGS)
 {
+       post_tag(mdoc);
+       tag_put(NULL, TAG_FALLBACK, mdoc->last);
+}
 
+static void
+post_en(POST_ARGS)
+{
        post_obsolete(mdoc);
        if (mdoc->last->type == ROFFT_BLOCK)
                mdoc->last->norm->Es = mdoc->last_es;
 }
 
 static void
-post_es(POST_ARGS)
+post_er(POST_ARGS)
+{
+       struct roff_node *n;
+
+       n = mdoc->last;
+       if (n->sec == SEC_ERRORS &&
+           (n->parent->tok == MDOC_It ||
+            (n->parent->tok == MDOC_Bq &&
+             n->parent->parent->parent->tok == MDOC_It)))
+               tag_put(NULL, TAG_STRONG, n);
+       post_delim_nb(mdoc);
+}
+
+static void
+post_tag(POST_ARGS)
 {
+       struct roff_node *n;
 
+       n = mdoc->last;
+       if ((n->prev == NULL ||
+            (n->prev->type == ROFFT_TEXT &&
+             strcmp(n->prev->string, "|") == 0)) &&
+           (n->parent->tok == MDOC_It ||
+            (n->parent->tok == MDOC_Xo &&
+             n->parent->parent->prev == NULL &&
+             n->parent->parent->parent->tok == MDOC_It)))
+               tag_put(NULL, TAG_STRONG, n);
+       post_delim_nb(mdoc);
+}
+
+static void
+post_es(POST_ARGS)
+{
        post_obsolete(mdoc);
        mdoc->last_es = mdoc->last;
 }
 
+static void
+post_fl(POST_ARGS)
+{
+       struct roff_node        *n;
+       char                    *cp;
+
+       /*
+        * Transform ".Fl Fl long" to ".Fl \-long",
+        * resulting for example in better HTML output.
+        */
+
+       n = mdoc->last;
+       if (n->prev != NULL && n->prev->tok == MDOC_Fl &&
+           n->prev->child == NULL && n->child != NULL &&
+           (n->flags & NODE_LINE) == 0) {
+               mandoc_asprintf(&cp, "\\-%s", n->child->string);
+               free(n->child->string);
+               n->child->string = cp;
+               roff_node_delete(mdoc, n->prev);
+       }
+       post_tag(mdoc);
+}
+
 static void
 post_xx(POST_ARGS)
 {
@@ -1635,8 +1743,8 @@ post_it(POST_ARGS)
                if ((nch = nit->head->child) != NULL)
                        mandoc_msg(MANDOCERR_ARG_SKIP,
                            nit->line, nit->pos, "It %s",
-                           nch->string == NULL ? roff_name[nch->tok] :
-                           nch->string);
+                           nch->type == ROFFT_TEXT ? nch->string :
+                           roff_name[nch->tok]);
                break;
        case LIST_column:
                cols = (int)nbl->norm->Bl.ncols;
@@ -2114,10 +2222,11 @@ post_rs(POST_ARGS)
 static void
 post_hyph(POST_ARGS)
 {
-       struct roff_node        *nch;
+       struct roff_node        *n, *nch;
        char                    *cp;
 
-       for (nch = mdoc->last->child; nch != NULL; nch = nch->next) {
+       n = mdoc->last;
+       for (nch = n->child; nch != NULL; nch = nch->next) {
                if (nch->type != ROFFT_TEXT)
                        continue;
                cp = nch->string;
@@ -2126,8 +2235,11 @@ post_hyph(POST_ARGS)
                while (*(++cp) != '\0')
                        if (*cp == '-' &&
                            isalpha((unsigned char)cp[-1]) &&
-                           isalpha((unsigned char)cp[1]))
+                           isalpha((unsigned char)cp[1])) {
+                               if (n->tag == NULL && n->flags & NODE_ID)
+                                       n->tag = mandoc_strdup(nch->string);
                                *cp = ASCII_HYPH;
+                       }
        }
 }
 
@@ -2152,8 +2264,7 @@ post_sx(POST_ARGS)
 static void
 post_sh(POST_ARGS)
 {
-
-       post_ignpar(mdoc);
+       post_section(mdoc);
 
        switch (mdoc->last->type) {
        case ROFFT_HEAD:
@@ -2384,6 +2495,8 @@ post_sh_head(POST_ARGS)
                roff_setreg(mdoc->roff, "nS", 0, '=');
                mdoc->flags &= ~MDOC_SYNOPSIS;
        }
+       if (sec == SEC_DESCRIPTION)
+               fn_prio = TAG_STRONG;
 
        /* Mark our last section. */
 
@@ -2484,15 +2597,31 @@ post_xr(POST_ARGS)
 }
 
 static void
-post_ignpar(POST_ARGS)
+post_section(POST_ARGS)
 {
-       struct roff_node *np;
+       struct roff_node *n, *nch;
+       char             *cp, *tag;
 
-       switch (mdoc->last->type) {
+       n = mdoc->last;
+       switch (n->type) {
        case ROFFT_BLOCK:
                post_prevpar(mdoc);
                return;
        case ROFFT_HEAD:
+               tag = NULL;
+               deroff(&tag, n);
+               if (tag != NULL) {
+                       for (cp = tag; *cp != '\0'; cp++)
+                               if (*cp == ' ')
+                                       *cp = '_';
+                       if ((nch = n->child) != NULL &&
+                           nch->type == ROFFT_TEXT &&
+                           strcmp(nch->string, tag) == 0)
+                               tag_put(NULL, TAG_STRONG, n);
+                       else
+                               tag_put(tag, TAG_FALLBACK, n);
+                       free(tag);
+               }
                post_delim(mdoc);
                post_hyph(mdoc);
                return;
@@ -2501,23 +2630,21 @@ post_ignpar(POST_ARGS)
        default:
                return;
        }
-
-       if ((np = mdoc->last->child) != NULL)
-               if (np->tok == MDOC_Pp ||
-                   np->tok == ROFF_br || np->tok == ROFF_sp) {
-                       mandoc_msg(MANDOCERR_PAR_SKIP, np->line, np->pos,
-                           "%s after %s", roff_name[np->tok],
-                           roff_name[mdoc->last->tok]);
-                       roff_node_delete(mdoc, np);
-               }
-
-       if ((np = mdoc->last->last) != NULL)
-               if (np->tok == MDOC_Pp || np->tok == ROFF_br) {
-                       mandoc_msg(MANDOCERR_PAR_SKIP, np->line, np->pos,
-                           "%s at the end of %s", roff_name[np->tok],
-                           roff_name[mdoc->last->tok]);
-                       roff_node_delete(mdoc, np);
-               }
+       if ((nch = n->child) != NULL &&
+           (nch->tok == MDOC_Pp || nch->tok == ROFF_br ||
+            nch->tok == ROFF_sp)) {
+               mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos,
+                   "%s after %s", roff_name[nch->tok],
+                   roff_name[n->tok]);
+               roff_node_delete(mdoc, nch);
+       }
+       if ((nch = n->last) != NULL &&
+           (nch->tok == MDOC_Pp || nch->tok == ROFF_br)) {
+               mandoc_msg(MANDOCERR_PAR_SKIP, nch->line, nch->pos,
+                   "%s at the end of %s", roff_name[nch->tok],
+                   roff_name[n->tok]);
+               roff_node_delete(mdoc, nch);
+       }
 }
 
 static void
@@ -2555,6 +2682,7 @@ post_par(POST_ARGS)
 {
        struct roff_node *np;
 
+       fn_prio = TAG_STRONG;
        post_prevpar(mdoc);
 
        np = mdoc->last;
@@ -2661,8 +2789,14 @@ post_dt(POST_ARGS)
                mandoc_msg(MANDOCERR_MSEC_BAD,
                    nn->line, nn->pos, "Dt ... %s", nn->string);
                mdoc->meta.vol = mandoc_strdup(nn->string);
-       } else
+       } else {
                mdoc->meta.vol = mandoc_strdup(cp);
+               if (mdoc->filesec != '\0' &&
+                   mdoc->filesec != *nn->string &&
+                   *nn->string >= '1' && *nn->string <= '9')
+                       mandoc_msg(MANDOCERR_MSEC_FILE, nn->line, nn->pos,
+                           "*.%c vs Dt ... %c", mdoc->filesec, *nn->string);
+       }
 
        /* Optional third argument: architecture. */