From f10f0fe3970de778125a29d73e65e63f32c138e1 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Fri, 13 Mar 2020 15:32:28 +0000 Subject: Split tagging into a validation part including prioritization in tag.{h,c} and {mdoc,man}_validate.c and into a formatting part including command line argument checking in term_tag.{h,c}, html.c, and {mdoc|man}_{term|html}.c. Immediate functional benefits include: * Improved prioritization of automatic tags for .Em and .Sy. * Avoiding bogus automatic tags when .Em, .Fn, or .Sy are explicitly tagged. * Explicit tagging of .Er and .Fl now works in HTML output. * Automatic tagging of .IP and .TP now works in HTML output. But mainly, this patch provides clean earth to build further improvements on. Technical changes: * Main program: Write a tag file for ASCII and UTF-8 output only. * All formatters: There is no more need to delay writing the tags. * mdoc(7)+man(7) formatters: No more need for elaborate syntax tree inspection. * HTML formatter: If available, use the "string" attribute as the tag. * HTML formatter: New function to write permalinks, to reduce code duplication. Style cleanup in the vicinity while here: * mdoc(7) terminal formatter: To set up bold font for children, defer to termp_bold_pre() rather than calling term_fontpush() manually. * mdoc(7) terminal formatter: Garbage collect some duplicate functions. * mdoc(7) HTML formatter: Unify handling, delete redundant functions. * Where possible, use switch statements rather than if cascades. * Get rid of some more Yoda notation. The necessity for such changes was first discussed with kn@, but i didn't bother him with a request to review the resulting -673/+782 line patch. --- html.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 10 deletions(-) (limited to 'html.c') diff --git a/html.c b/html.c index babe237c..769acce8 100644 --- a/html.c +++ b/html.c @@ -1,7 +1,7 @@ -/* $Id: html.c,v 1.263 2020/02/27 22:28:13 schwarze Exp $ */ +/* $Id: html.c,v 1.264 2020/03/13 15:32:28 schwarze Exp $ */ /* - * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,6 +14,9 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Common functions for mandoc(1) HTML formatters. + * For use by individual formatters and by the main program. */ #include "config.h" @@ -321,6 +324,18 @@ html_fillmode(struct html *h, enum roff_tok want) return had; } +/* + * Allocate a string to be used for the "id=" attribute of an HTML + * element and/or as a segment identifier for a URI in an element. + * The function may fail and return NULL if the node lacks text data + * to create the attribute from. + * If the "unique" argument is 0, the caller is responsible for + * free(3)ing the returned string after using it. + * If the "unique" argument is non-zero, the "id_unique" ohash table + * is used for de-duplication and owns the returned string, so the + * caller must not free(3) it. In this case, it will be freed + * automatically by html_reset() or html_free(). + */ char * html_make_id(const struct roff_node *n, int unique) { @@ -329,14 +344,30 @@ html_make_id(const struct roff_node *n, int unique) unsigned int slot; int suffix; - for (nch = n->child; nch != NULL; nch = nch->next) - if (nch->type != ROFFT_TEXT) - return NULL; - - buf = NULL; - deroff(&buf, n); - if (buf == NULL) - return NULL; + if (n->string != NULL) + buf = mandoc_strdup(n->string); + else { + switch (n->tok) { + case MDOC_Sh: + case MDOC_Ss: + case MDOC_Sx: + case MAN_SH: + case MAN_SS: + for (nch = n->child; nch != NULL; nch = nch->next) + if (nch->type != ROFFT_TEXT) + return NULL; + buf = NULL; + deroff(&buf, n); + if (buf == NULL) + return NULL; + break; + default: + if (n->child->type != ROFFT_TEXT) + return NULL; + buf = mandoc_strdup(n->child->string); + break; + } + } /* * In ID attributes, only use ASCII characters that are @@ -736,6 +767,33 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) return t; } +/* + * Print an element with an optional "id=" attribute. + * If there is an "id=" attribute, also add a permalink: + * outside if it's a phrasing element, or inside otherwise. + */ +struct tag * +print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr, + struct roff_node *n) +{ + struct tag *ret, *t; + const char *id; + + ret = NULL; + id = NULL; + if (n->flags & NODE_ID) + id = html_make_id(n, 1); + if (id != NULL && htmltags[elemtype].flags & HTML_INPHRASE) + ret = print_otag(h, TAG_A, "chR", "permalink", id); + t = print_otag(h, elemtype, "ci", cattr, id); + if (ret == NULL) { + ret = t; + if (id != NULL) + print_otag(h, TAG_A, "chR", "permalink", id); + } + return ret; +} + static void print_ctag(struct html *h, struct tag *tag) { -- cgit v1.2.3-56-ge451