From f10f0fe3970de778125a29d73e65e63f32c138e1 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Fri, 13 Mar 2020 15:32:28 +0000 Subject: Split tagging into a validation part including prioritization in tag.{h,c} and {mdoc,man}_validate.c and into a formatting part including command line argument checking in term_tag.{h,c}, html.c, and {mdoc|man}_{term|html}.c. Immediate functional benefits include: * Improved prioritization of automatic tags for .Em and .Sy. * Avoiding bogus automatic tags when .Em, .Fn, or .Sy are explicitly tagged. * Explicit tagging of .Er and .Fl now works in HTML output. * Automatic tagging of .IP and .TP now works in HTML output. But mainly, this patch provides clean earth to build further improvements on. Technical changes: * Main program: Write a tag file for ASCII and UTF-8 output only. * All formatters: There is no more need to delay writing the tags. * mdoc(7)+man(7) formatters: No more need for elaborate syntax tree inspection. * HTML formatter: If available, use the "string" attribute as the tag. * HTML formatter: New function to write permalinks, to reduce code duplication. Style cleanup in the vicinity while here: * mdoc(7) terminal formatter: To set up bold font for children, defer to termp_bold_pre() rather than calling term_fontpush() manually. * mdoc(7) terminal formatter: Garbage collect some duplicate functions. * mdoc(7) HTML formatter: Unify handling, delete redundant functions. * Where possible, use switch statements rather than if cascades. * Get rid of some more Yoda notation. The necessity for such changes was first discussed with kn@, but i didn't bother him with a request to review the resulting -673/+782 line patch. --- tag.c | 280 ++++++++++++++++++------------------------------------------------ 1 file changed, 76 insertions(+), 204 deletions(-) (limited to 'tag.c') diff --git a/tag.c b/tag.c index 6df91c8b..50e74f48 100644 --- a/tag.c +++ b/tag.c @@ -1,4 +1,4 @@ -/* $Id: tag.c,v 1.27 2020/01/20 10:37:15 schwarze Exp $ */ +/* $Id: tag.c,v 1.28 2020/03/13 15:32:29 schwarze Exp $ */ /* * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze * @@ -13,134 +13,67 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Functions to tag syntax tree nodes. + * For internal use by mandoc(1) validation modules only. */ #include "config.h" #include #include -#include #include -#include #include -#include -#include #include #include -#include #include "mandoc_aux.h" #include "mandoc_ohash.h" -#include "mandoc.h" +#include "roff.h" #include "tag.h" struct tag_entry { - size_t *lines; - size_t maxlines; - size_t nlines; + struct roff_node **nodes; + size_t maxnodes; + size_t nnodes; int prio; char s[]; }; -static void tag_signal(int) __attribute__((__noreturn__)); - static struct ohash tag_data; -static struct tag_files tag_files; /* - * Prepare for using a pager. - * Not all pagers are capable of using a tag file, - * but for simplicity, create it anyway. + * Set up the ohash table to collect nodes + * where various marked-up terms are documented. */ -struct tag_files * -tag_init(char *tagname) +void +tag_alloc(void) { - struct sigaction sa; - int ofd; - - ofd = -1; - tag_files.tfd = -1; - tag_files.tcpgid = -1; - tag_files.tagname = tagname; - - /* Clean up when dying from a signal. */ - - memset(&sa, 0, sizeof(sa)); - sigfillset(&sa.sa_mask); - sa.sa_handler = tag_signal; - sigaction(SIGHUP, &sa, NULL); - sigaction(SIGINT, &sa, NULL); - sigaction(SIGTERM, &sa, NULL); - - /* - * POSIX requires that a process calling tcsetpgrp(3) - * from the background gets a SIGTTOU signal. - * In that case, do not stop. - */ - - sa.sa_handler = SIG_IGN; - sigaction(SIGTTOU, &sa, NULL); - - /* Save the original standard output for use by the pager. */ - - if ((tag_files.ofd = dup(STDOUT_FILENO)) == -1) { - mandoc_msg(MANDOCERR_DUP, 0, 0, "%s", strerror(errno)); - goto fail; - } + mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); +} - /* Create both temporary output files. */ +void +tag_free(void) +{ + struct tag_entry *entry; + unsigned int slot; - (void)strlcpy(tag_files.ofn, "/tmp/man.XXXXXXXXXX", - sizeof(tag_files.ofn)); - (void)strlcpy(tag_files.tfn, "/tmp/man.XXXXXXXXXX", - sizeof(tag_files.tfn)); - if ((ofd = mkstemp(tag_files.ofn)) == -1) { - mandoc_msg(MANDOCERR_MKSTEMP, 0, 0, - "%s: %s", tag_files.ofn, strerror(errno)); - goto fail; - } - if ((tag_files.tfd = mkstemp(tag_files.tfn)) == -1) { - mandoc_msg(MANDOCERR_MKSTEMP, 0, 0, - "%s: %s", tag_files.tfn, strerror(errno)); - goto fail; - } - if (dup2(ofd, STDOUT_FILENO) == -1) { - mandoc_msg(MANDOCERR_DUP, 0, 0, "%s", strerror(errno)); - goto fail; + entry = ohash_first(&tag_data, &slot); + while (entry != NULL) { + free(entry->nodes); + free(entry); + entry = ohash_next(&tag_data, &slot); } - close(ofd); - - /* - * Set up the ohash table to collect output line numbers - * where various marked-up terms are documented. - */ - - mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); - return &tag_files; - -fail: - tag_unlink(); - if (ofd != -1) - close(ofd); - if (tag_files.ofd != -1) - close(tag_files.ofd); - if (tag_files.tfd != -1) - close(tag_files.tfd); - *tag_files.ofn = '\0'; - *tag_files.tfn = '\0'; - tag_files.ofd = -1; - tag_files.tfd = -1; - tag_files.tagname = NULL; - return NULL; + ohash_delete(&tag_data); } /* - * Set the line number where a term is defined, + * Set a node where a term is defined, * unless it is already defined at a lower priority. */ void -tag_put(const char *s, int prio, size_t line) +tag_put(const char *s, int prio, struct roff_node *n) { struct tag_entry *entry; const char *se; @@ -148,11 +81,14 @@ tag_put(const char *s, int prio, size_t line) unsigned int slot; assert(prio <= TAG_FALLBACK); - if (tag_files.tfd <= 0) - return; - if (s[0] == '\\' && (s[1] == '&' || s[1] == 'e')) - s += 2; + if (s == NULL) { + if (n->child == NULL || n->child->type != ROFFT_TEXT) + return; + s = n->child->string; + if (s[0] == '\\' && (s[1] == '&' || s[1] == 'e')) + s += 2; + } /* * Skip whitespace and escapes and whatever follows, @@ -170,131 +106,67 @@ tag_put(const char *s, int prio, size_t line) slot = ohash_qlookupi(&tag_data, s, &se); entry = ohash_find(&tag_data, slot); - if (entry == NULL) { - - /* Build a new entry. */ + /* Build a new entry. */ + if (entry == NULL) { entry = mandoc_malloc(sizeof(*entry) + len + 1); memcpy(entry->s, s, len); entry->s[len] = '\0'; - entry->lines = NULL; - entry->maxlines = entry->nlines = 0; + entry->nodes = NULL; + entry->maxnodes = entry->nnodes = 0; ohash_insert(&tag_data, slot, entry); + } - } else { - - /* - * Lower priority numbers take precedence, - * but TAG_FALLBACK is special. - * A tag with priority TAG_FALLBACK is only used - * if the tag occurs exactly once. - */ + /* + * Lower priority numbers take precedence. + * If a better entry is already present, ignore the new one. + */ - if (prio == TAG_FALLBACK) { - if (entry->prio == TAG_FALLBACK) - entry->prio = TAG_DELETE; + else if (entry->prio < prio) return; - } - /* A better entry is already present, ignore the new one. */ - - if (entry->prio < prio) - return; + /* + * If the existing entry is worse, clear it. + * In addition, a tag with priority TAG_FALLBACK + * is only used if the tag occurs exactly once. + */ - /* The existing entry is worse, clear it. */ + else if (entry->prio > prio || prio == TAG_FALLBACK) { + while (entry->nnodes > 0) + entry->nodes[--entry->nnodes]->flags &= ~NODE_ID; - if (entry->prio > prio) - entry->nlines = 0; + if (prio == TAG_FALLBACK) { + entry->prio = TAG_DELETE; + return; + } } - /* Remember the new line. */ + /* Remember the new node. */ - if (entry->maxlines == entry->nlines) { - entry->maxlines += 4; - entry->lines = mandoc_reallocarray(entry->lines, - entry->maxlines, sizeof(*entry->lines)); + if (entry->maxnodes == entry->nnodes) { + entry->maxnodes += 4; + entry->nodes = mandoc_reallocarray(entry->nodes, + entry->maxnodes, sizeof(*entry->nodes)); } - entry->lines[entry->nlines++] = line; + entry->nodes[entry->nnodes++] = n; entry->prio = prio; -} - -/* - * Write out the tags file using the previously collected - * information and clear the ohash table while going along. - */ -void -tag_write(void) -{ - FILE *stream; - struct tag_entry *entry; - size_t i; - unsigned int slot; - int empty; - - if (tag_files.tfd <= 0) - return; - if (tag_files.tagname != NULL && ohash_find(&tag_data, - ohash_qlookup(&tag_data, tag_files.tagname)) == NULL) { - mandoc_msg(MANDOCERR_TAG, 0, 0, "%s", tag_files.tagname); - tag_files.tagname = NULL; - } - if ((stream = fdopen(tag_files.tfd, "w")) == NULL) - mandoc_msg(MANDOCERR_FDOPEN, 0, 0, "%s", strerror(errno)); - empty = 1; - entry = ohash_first(&tag_data, &slot); - while (entry != NULL) { - if (stream != NULL && entry->prio < TAG_DELETE) { - for (i = 0; i < entry->nlines; i++) { - fprintf(stream, "%s %s %zu\n", - entry->s, tag_files.ofn, entry->lines[i]); - empty = 0; - } - } - free(entry->lines); - free(entry); - entry = ohash_next(&tag_data, &slot); - } - ohash_delete(&tag_data); - if (stream != NULL) - fclose(stream); - else - close(tag_files.tfd); - tag_files.tfd = -1; - if (empty) { - unlink(tag_files.tfn); - *tag_files.tfn = '\0'; + n->flags |= NODE_ID; + if (n->child == NULL || n->child->string != s || *se != '\0') { + assert(n->string == NULL); + n->string = mandoc_strndup(s, len); } } -void -tag_unlink(void) +enum tag_result +tag_check(const char *test_tag) { - pid_t tc_pgid; + unsigned int slot; - if (tag_files.tcpgid != -1) { - tc_pgid = tcgetpgrp(tag_files.ofd); - if (tc_pgid == tag_files.pager_pid || - tc_pgid == getpgid(0) || - getpgid(tc_pgid) == -1) - (void)tcsetpgrp(tag_files.ofd, tag_files.tcpgid); - } - if (*tag_files.ofn != '\0') - unlink(tag_files.ofn); - if (*tag_files.tfn != '\0') - unlink(tag_files.tfn); -} - -static void -tag_signal(int signum) -{ - struct sigaction sa; - - tag_unlink(); - memset(&sa, 0, sizeof(sa)); - sigemptyset(&sa.sa_mask); - sa.sa_handler = SIG_DFL; - sigaction(signum, &sa, NULL); - kill(getpid(), signum); - /* NOTREACHED */ - _exit(1); + if (ohash_first(&tag_data, &slot) == NULL) + return TAG_EMPTY; + else if (test_tag != NULL && ohash_find(&tag_data, + ohash_qlookup(&tag_data, test_tag)) == NULL) + return TAG_MISS; + else + return TAG_OK; } -- cgit v1.2.3-56-ge451