aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mdoc_validate.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2020-03-13 15:32:28 +0000
committerIngo Schwarze <schwarze@openbsd.org>2020-03-13 15:32:28 +0000
commitf10f0fe3970de778125a29d73e65e63f32c138e1 (patch)
tree757b21a5fff7cfdab542f40f9aa0e1306287415c /mdoc_validate.c
parent62450180320c529d836c4c25672879a7ce53221a (diff)
downloadmandoc-f10f0fe3970de778125a29d73e65e63f32c138e1.tar.gz
mandoc-f10f0fe3970de778125a29d73e65e63f32c138e1.tar.zst
mandoc-f10f0fe3970de778125a29d73e65e63f32c138e1.zip
Split tagging into a validation part including prioritization
in tag.{h,c} and {mdoc,man}_validate.c and into a formatting part including command line argument checking in term_tag.{h,c}, html.c, and {mdoc|man}_{term|html}.c. Immediate functional benefits include: * Improved prioritization of automatic tags for .Em and .Sy. * Avoiding bogus automatic tags when .Em, .Fn, or .Sy are explicitly tagged. * Explicit tagging of .Er and .Fl now works in HTML output. * Automatic tagging of .IP and .TP now works in HTML output. But mainly, this patch provides clean earth to build further improvements on. Technical changes: * Main program: Write a tag file for ASCII and UTF-8 output only. * All formatters: There is no more need to delay writing the tags. * mdoc(7)+man(7) formatters: No more need for elaborate syntax tree inspection. * HTML formatter: If available, use the "string" attribute as the tag. * HTML formatter: New function to write permalinks, to reduce code duplication. Style cleanup in the vicinity while here: * mdoc(7) terminal formatter: To set up bold font for children, defer to termp_bold_pre() rather than calling term_fontpush() manually. * mdoc(7) terminal formatter: Garbage collect some duplicate functions. * mdoc(7) HTML formatter: Unify <code> handling, delete redundant functions. * Where possible, use switch statements rather than if cascades. * Get rid of some more Yoda notation. The necessity for such changes was first discussed with kn@, but i didn't bother him with a request to review the resulting -673/+782 line patch.
Diffstat (limited to 'mdoc_validate.c')
-rw-r--r--mdoc_validate.c200
1 files changed, 126 insertions, 74 deletions
diff --git a/mdoc_validate.c b/mdoc_validate.c
index 0ea41db1..bae3d6b8 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -1,7 +1,7 @@
-/* $Id: mdoc_validate.c,v 1.379 2020/02/27 21:43:44 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.380 2020/03/13 15:32:28 schwarze Exp $ */
/*
- * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -15,6 +15,8 @@
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Validation module for mdoc(7) syntax trees used by mandoc(1).
*/
#include "config.h"
@@ -35,6 +37,7 @@
#include "mandoc.h"
#include "mandoc_xr.h"
#include "roff.h"
+#include "tag.h"
#include "mdoc.h"
#include "libmandoc.h"
#include "roff_int.h"
@@ -82,7 +85,9 @@ static void post_dd(POST_ARGS);
static void post_delim(POST_ARGS);
static void post_delim_nb(POST_ARGS);
static void post_dt(POST_ARGS);
+static void post_em(POST_ARGS);
static void post_en(POST_ARGS);
+static void post_er(POST_ARGS);
static void post_es(POST_ARGS);
static void post_eoln(POST_ARGS);
static void post_ex(POST_ARGS);
@@ -113,6 +118,7 @@ static void post_sm(POST_ARGS);
static void post_st(POST_ARGS);
static void post_std(POST_ARGS);
static void post_sx(POST_ARGS);
+static void post_tag(POST_ARGS);
static void post_tg(POST_ARGS);
static void post_useless(POST_ARGS);
static void post_xr(POST_ARGS);
@@ -137,19 +143,19 @@ static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
NULL, /* Ap */
post_defaults, /* Ar */
NULL, /* Cd */
- post_delim_nb, /* Cm */
- post_delim_nb, /* Dv */
- post_delim_nb, /* Er */
- post_delim_nb, /* Ev */
+ post_tag, /* Cm */
+ post_tag, /* Dv */
+ post_er, /* Er */
+ post_tag, /* Ev */
post_ex, /* Ex */
post_fa, /* Fa */
NULL, /* Fd */
- post_delim_nb, /* Fl */
+ post_tag, /* Fl */
post_fn, /* Fn */
post_delim_nb, /* Ft */
- post_delim_nb, /* Ic */
+ post_tag, /* Ic */
post_delim_nb, /* In */
- post_defaults, /* Li */
+ post_tag, /* Li */
post_nd, /* Nd */
post_nm, /* Nm */
post_delim_nb, /* Op */
@@ -187,11 +193,11 @@ static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
NULL, /* Dq */
NULL, /* Ec */
NULL, /* Ef */
- post_delim_nb, /* Em */
+ post_em, /* Em */
NULL, /* Eo */
post_xx, /* Fx */
- post_delim_nb, /* Ms */
- NULL, /* No */
+ post_tag, /* Ms */
+ post_tag, /* No */
post_ns, /* Ns */
post_xx, /* Nx */
post_xx, /* Ox */
@@ -210,7 +216,7 @@ static const v_post mdoc_valids[MDOC_MAX - MDOC_Dd] = {
post_delim_nb, /* Sq */
post_sm, /* Sm */
post_sx, /* Sx */
- post_delim_nb, /* Sy */
+ post_em, /* Sy */
post_useless, /* Tn */
post_xx, /* Ux */
NULL, /* Xc */
@@ -287,6 +293,8 @@ static const char * const secnames[SEC__MAX] = {
NULL
};
+static int fn_prio = TAG_STRONG;
+
/* Validate the subtree rooted at mdoc->last. */
void
@@ -1094,8 +1102,11 @@ post_st(POST_ARGS)
static void
post_tg(POST_ARGS)
{
- struct roff_node *n, *nch, *nn;
- size_t len;
+ struct roff_node *n; /* The .Tg node. */
+ struct roff_node *nch; /* The first child of the .Tg node. */
+ struct roff_node *nn; /* The next node after the .Tg node. */
+ struct roff_node *nt; /* The TEXT node containing the tag. */
+ size_t len; /* The number of bytes in the tag. */
/* Find the next node. */
n = mdoc->last;
@@ -1106,30 +1117,26 @@ post_tg(POST_ARGS)
}
}
- /* Add the default argument, if needed. */
- nch = n->child;
- if (nch == NULL && nn != NULL && nn->child->type == ROFFT_TEXT) {
- mdoc->next = ROFF_NEXT_CHILD;
- roff_word_alloc(mdoc, n->line, n->pos, n->next->child->string);
- nch = mdoc->last;
- nch->flags |= NODE_NOSRC;
- mdoc->last = n;
- }
+ /* Find the tag. */
+ nt = nch = n->child;
+ if (nch == NULL && nn != NULL && nn->child != NULL &&
+ nn->child->type == ROFFT_TEXT)
+ nt = nn->child;
- /* Validate the first argument. */
- if (nch == NULL || *nch->string == '\0')
+ /* Validate the tag. */
+ if (nt == NULL || *nt->string == '\0')
mandoc_msg(MANDOCERR_MACRO_EMPTY, n->line, n->pos, "Tg");
- if (nch == NULL) {
+ if (nt == NULL) {
roff_node_delete(mdoc, n);
return;
}
- len = strcspn(nch->string, " \t");
- if (nch->string[len] != '\0')
- mandoc_msg(MANDOCERR_TG_SPC, nch->line, nch->pos + len + 1,
- "Tg %s", nch->string);
+ len = strcspn(nt->string, " \t\\");
+ if (nt->string[len] != '\0')
+ mandoc_msg(MANDOCERR_TG_SPC, nt->line,
+ nt->pos + len, "Tg %s", nt->string);
/* Keep only the first argument. */
- if (nch->next != NULL) {
+ if (nch != NULL && nch->next != NULL) {
mandoc_msg(MANDOCERR_ARG_EXCESS, nch->next->line,
nch->next->pos, "Tg ... %s", nch->next->string);
while (nch->next != NULL)
@@ -1137,32 +1144,44 @@ post_tg(POST_ARGS)
}
/* Drop the macro if the first argument is invalid. */
- if (len == 0 || nch->string[len] != '\0') {
+ if (len == 0 || nt->string[len] != '\0') {
roff_node_delete(mdoc, n);
return;
}
- /* By default, write a <mark> element. */
- n->flags |= NODE_ID;
+ /* By default, tag the .Tg node itself. */
if (nn == NULL)
- return;
+ nn = n;
/* Explicit tagging of specific macros. */
switch (nn->tok) {
case MDOC_Sh:
case MDOC_Ss:
- if (nn->head->flags & NODE_ID || nn->head->child == NULL)
+ case MDOC_Fo:
+ nn = nn->head;
+ /* FALLTHROUGH */
+ case MDOC_Cm:
+ case MDOC_Dv:
+ case MDOC_Em:
+ case MDOC_Er:
+ case MDOC_Ev:
+ case MDOC_Fl:
+ case MDOC_Fn:
+ case MDOC_Ic:
+ case MDOC_Li:
+ case MDOC_Ms:
+ case MDOC_No:
+ case MDOC_Sy:
+ if (nn->child != NULL && (nn->flags & NODE_ID) == 0)
break;
- n->flags |= NODE_NOPRT;
- nn->head->flags |= NODE_ID | NODE_HREF;
- assert(nn->head->string == NULL);
- nn->head->string = mandoc_strdup(nch->string);
- break;
+ /* FALLTHROUGH */
default:
+ nn = n;
break;
}
- if (n->flags & NODE_NOPRT)
- n->flags &= ~NODE_ID;
+ tag_put(nt->string, TAG_MANUAL, nn);
+ if (nn != n)
+ n->flags |= NODE_NOPRT;
}
static void
@@ -1257,28 +1276,32 @@ post_bf(POST_ARGS)
static void
post_fname(POST_ARGS)
{
- const struct roff_node *n;
+ struct roff_node *n, *nch;
const char *cp;
size_t pos;
- n = mdoc->last->child;
- cp = n->string;
+ n = mdoc->last;
+ nch = n->child;
+ cp = nch->string;
if (*cp == '(') {
if (cp[strlen(cp + 1)] == ')')
return;
pos = 0;
} else {
pos = strcspn(cp, "()");
- if (cp[pos] == '\0')
+ if (cp[pos] == '\0') {
+ if (n->sec == SEC_DESCRIPTION ||
+ n->sec == SEC_CUSTOM)
+ tag_put(NULL, fn_prio++, n);
return;
+ }
}
- mandoc_msg(MANDOCERR_FN_PAREN, n->line, n->pos + pos, "%s", cp);
+ mandoc_msg(MANDOCERR_FN_PAREN, nch->line, nch->pos + pos, "%s", cp);
}
static void
post_fn(POST_ARGS)
{
-
post_fname(mdoc);
post_fa(mdoc);
}
@@ -1442,38 +1465,29 @@ post_display(POST_ARGS)
static void
post_defaults(POST_ARGS)
{
- struct roff_node *nn;
+ struct roff_node *n;
- if (mdoc->last->child != NULL) {
+ n = mdoc->last;
+ if (n->child != NULL) {
post_delim_nb(mdoc);
return;
}
-
- /*
- * The `Ar' defaults to "file ..." if no value is provided as an
- * argument; the `Mt' and `Pa' macros use "~"; the `Li' just
- * gets an empty string.
- */
-
- nn = mdoc->last;
- switch (nn->tok) {
+ mdoc->next = ROFF_NEXT_CHILD;
+ switch (n->tok) {
case MDOC_Ar:
- mdoc->next = ROFF_NEXT_CHILD;
- roff_word_alloc(mdoc, nn->line, nn->pos, "file");
- mdoc->last->flags |= NODE_NOSRC;
- roff_word_alloc(mdoc, nn->line, nn->pos, "...");
+ roff_word_alloc(mdoc, n->line, n->pos, "file");
mdoc->last->flags |= NODE_NOSRC;
+ roff_word_alloc(mdoc, n->line, n->pos, "...");
break;
case MDOC_Pa:
case MDOC_Mt:
- mdoc->next = ROFF_NEXT_CHILD;
- roff_word_alloc(mdoc, nn->line, nn->pos, "~");
- mdoc->last->flags |= NODE_NOSRC;
+ roff_word_alloc(mdoc, n->line, n->pos, "~");
break;
default:
abort();
}
- mdoc->last = nn;
+ mdoc->last->flags |= NODE_NOSRC;
+ mdoc->last = n;
}
static void
@@ -1527,18 +1541,54 @@ post_an(POST_ARGS)
}
static void
-post_en(POST_ARGS)
+post_em(POST_ARGS)
{
+ post_tag(mdoc);
+ tag_put(NULL, TAG_FALLBACK, mdoc->last);
+}
+static void
+post_en(POST_ARGS)
+{
post_obsolete(mdoc);
if (mdoc->last->type == ROFFT_BLOCK)
mdoc->last->norm->Es = mdoc->last_es;
}
static void
-post_es(POST_ARGS)
+post_er(POST_ARGS)
+{
+ struct roff_node *n;
+
+ n = mdoc->last;
+ if (n->sec == SEC_ERRORS &&
+ (n->parent->tok == MDOC_It ||
+ (n->parent->tok == MDOC_Bq &&
+ n->parent->parent->parent->tok == MDOC_It)))
+ tag_put(NULL, TAG_STRONG, n);
+ post_delim_nb(mdoc);
+}
+
+static void
+post_tag(POST_ARGS)
{
+ struct roff_node *n;
+
+ n = mdoc->last;
+ if ((n->prev == NULL ||
+ (n->prev->type == ROFFT_TEXT &&
+ strcmp(n->prev->string, "|") == 0)) &&
+ (n->parent->tok == MDOC_It ||
+ (n->parent->tok == MDOC_Xo &&
+ n->parent->parent->prev == NULL &&
+ n->parent->parent->parent->tok == MDOC_It)))
+ tag_put(NULL, TAG_STRONG, n);
+ post_delim_nb(mdoc);
+}
+static void
+post_es(POST_ARGS)
+{
post_obsolete(mdoc);
mdoc->last_es = mdoc->last;
}
@@ -1635,8 +1685,8 @@ post_it(POST_ARGS)
if ((nch = nit->head->child) != NULL)
mandoc_msg(MANDOCERR_ARG_SKIP,
nit->line, nit->pos, "It %s",
- nch->string == NULL ? roff_name[nch->tok] :
- nch->string);
+ nch->type == ROFFT_TEXT ? nch->string :
+ roff_name[nch->tok]);
break;
case LIST_column:
cols = (int)nbl->norm->Bl.ncols;
@@ -2152,7 +2202,6 @@ post_sx(POST_ARGS)
static void
post_sh(POST_ARGS)
{
-
post_ignpar(mdoc);
switch (mdoc->last->type) {
@@ -2384,6 +2433,8 @@ post_sh_head(POST_ARGS)
roff_setreg(mdoc->roff, "nS", 0, '=');
mdoc->flags &= ~MDOC_SYNOPSIS;
}
+ if (sec == SEC_DESCRIPTION)
+ fn_prio = TAG_STRONG;
/* Mark our last section. */
@@ -2555,6 +2606,7 @@ post_par(POST_ARGS)
{
struct roff_node *np;
+ fn_prio = TAG_STRONG;
post_prevpar(mdoc);
np = mdoc->last;