aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/man_validate.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2020-03-13 15:32:28 +0000
committerIngo Schwarze <schwarze@openbsd.org>2020-03-13 15:32:28 +0000
commitf10f0fe3970de778125a29d73e65e63f32c138e1 (patch)
tree757b21a5fff7cfdab542f40f9aa0e1306287415c /man_validate.c
parent62450180320c529d836c4c25672879a7ce53221a (diff)
downloadmandoc-f10f0fe3970de778125a29d73e65e63f32c138e1.tar.gz
mandoc-f10f0fe3970de778125a29d73e65e63f32c138e1.tar.zst
mandoc-f10f0fe3970de778125a29d73e65e63f32c138e1.zip
Split tagging into a validation part including prioritization
in tag.{h,c} and {mdoc,man}_validate.c and into a formatting part including command line argument checking in term_tag.{h,c}, html.c, and {mdoc|man}_{term|html}.c. Immediate functional benefits include: * Improved prioritization of automatic tags for .Em and .Sy. * Avoiding bogus automatic tags when .Em, .Fn, or .Sy are explicitly tagged. * Explicit tagging of .Er and .Fl now works in HTML output. * Automatic tagging of .IP and .TP now works in HTML output. But mainly, this patch provides clean earth to build further improvements on. Technical changes: * Main program: Write a tag file for ASCII and UTF-8 output only. * All formatters: There is no more need to delay writing the tags. * mdoc(7)+man(7) formatters: No more need for elaborate syntax tree inspection. * HTML formatter: If available, use the "string" attribute as the tag. * HTML formatter: New function to write permalinks, to reduce code duplication. Style cleanup in the vicinity while here: * mdoc(7) terminal formatter: To set up bold font for children, defer to termp_bold_pre() rather than calling term_fontpush() manually. * mdoc(7) terminal formatter: Garbage collect some duplicate functions. * mdoc(7) HTML formatter: Unify <code> handling, delete redundant functions. * Where possible, use switch statements rather than if cascades. * Get rid of some more Yoda notation. The necessity for such changes was first discussed with kn@, but i didn't bother him with a request to review the resulting -673/+782 line patch.
Diffstat (limited to 'man_validate.c')
-rw-r--r--man_validate.c108
1 files changed, 103 insertions, 5 deletions
diff --git a/man_validate.c b/man_validate.c
index 374793e4..0c2ed8a4 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -1,7 +1,7 @@
-/* $Id: man_validate.c,v 1.150 2020/01/19 16:44:50 schwarze Exp $ */
+/* $Id: man_validate.c,v 1.151 2020/03/13 15:32:28 schwarze Exp $ */
/*
- * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -14,6 +14,8 @@
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Validation module for man(7) syntax trees used by mandoc(1).
*/
#include "config.h"
@@ -32,6 +34,7 @@
#include "mandoc_aux.h"
#include "mandoc.h"
#include "roff.h"
+#include "tag.h"
#include "man.h"
#include "libmandoc.h"
#include "roff_int.h"
@@ -45,6 +48,7 @@ static void check_abort(CHKARGS) __attribute__((__noreturn__));
static void check_par(CHKARGS);
static void check_part(CHKARGS);
static void check_root(CHKARGS);
+static void check_tag(struct roff_node *, struct roff_node *);
static void check_text(CHKARGS);
static void post_AT(CHKARGS);
@@ -54,6 +58,7 @@ static void post_IP(CHKARGS);
static void post_OP(CHKARGS);
static void post_SH(CHKARGS);
static void post_TH(CHKARGS);
+static void post_TP(CHKARGS);
static void post_UC(CHKARGS);
static void post_UR(CHKARGS);
static void post_in(CHKARGS);
@@ -62,8 +67,8 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = {
post_TH, /* TH */
post_SH, /* SH */
post_SH, /* SS */
- NULL, /* TP */
- NULL, /* TQ */
+ post_TP, /* TP */
+ post_TP, /* TQ */
check_abort,/* LP */
check_par, /* PP */
check_abort,/* P */
@@ -201,6 +206,66 @@ check_abort(CHKARGS)
abort();
}
+/*
+ * Skip leading whitespace, dashes, backslashes, and font escapes,
+ * then create a tag if the first following byte is a letter.
+ * Priority is high unless whitespace is present.
+ */
+static void
+check_tag(struct roff_node *n, struct roff_node *nt)
+{
+ const char *cp, *arg;
+ int prio, sz;
+
+ if (nt == NULL || nt->type != ROFFT_TEXT)
+ return;
+
+ cp = nt->string;
+ prio = TAG_STRONG;
+ for (;;) {
+ switch (*cp) {
+ case ' ':
+ case '\t':
+ prio = TAG_WEAK;
+ /* FALLTHROUGH */
+ case '-':
+ cp++;
+ break;
+ case '\\':
+ cp++;
+ switch (mandoc_escape(&cp, &arg, &sz)) {
+ case ESCAPE_FONT:
+ case ESCAPE_FONTBOLD:
+ case ESCAPE_FONTITALIC:
+ case ESCAPE_FONTBI:
+ case ESCAPE_FONTROMAN:
+ case ESCAPE_FONTCW:
+ case ESCAPE_FONTPREV:
+ case ESCAPE_IGNORE:
+ break;
+ case ESCAPE_SPECIAL:
+ if (sz != 1)
+ return;
+ switch (*arg) {
+ case '-':
+ case 'e':
+ break;
+ default:
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+ break;
+ default:
+ if (isalpha((unsigned char)*cp))
+ tag_put(cp, prio, n);
+ return;
+ }
+ }
+}
+
static void
check_text(CHKARGS)
{
@@ -332,12 +397,14 @@ check_par(CHKARGS)
static void
post_IP(CHKARGS)
{
-
switch (n->type) {
case ROFFT_BLOCK:
if (n->head->child == NULL && n->body->child == NULL)
roff_node_delete(man, n);
break;
+ case ROFFT_HEAD:
+ check_tag(n, n->child);
+ break;
case ROFFT_BODY:
if (n->parent->head->child == NULL && n->child == NULL)
mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos,
@@ -348,6 +415,37 @@ post_IP(CHKARGS)
}
}
+/*
+ * The first next-line element in the head is the tag.
+ * If that's a font macro, use its first child instead.
+ */
+static void
+post_TP(CHKARGS)
+{
+ struct roff_node *nt;
+
+ if (n->type != ROFFT_HEAD || (nt = n->child) == NULL)
+ return;
+
+ while ((nt->flags & NODE_LINE) == 0)
+ if ((nt = nt->next) == NULL)
+ return;
+
+ switch (nt->tok) {
+ case MAN_B:
+ case MAN_BI:
+ case MAN_BR:
+ case MAN_I:
+ case MAN_IB:
+ case MAN_IR:
+ nt = nt->child;
+ break;
+ default:
+ break;
+ }
+ check_tag(n, nt);
+}
+
static void
post_TH(CHKARGS)
{