/* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */ /* * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * Functions to tag syntax tree nodes. * For internal use by mandoc(1) validation modules only. */ #include "config.h" #include <sys/types.h> #include <assert.h> #include <limits.h> #include <stddef.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include "mandoc_aux.h" #include "mandoc_ohash.h" #include "roff.h" #include "mdoc.h" #include "roff_int.h" #include "tag.h" struct tag_entry { struct roff_node **nodes; size_t maxnodes; size_t nnodes; int prio; char s[]; }; static void tag_move_href(struct roff_man *, struct roff_node *, const char *); static void tag_move_id(struct roff_node *); static struct ohash tag_data; /* * Set up the ohash table to collect nodes * where various marked-up terms are documented. */ void tag_alloc(void) { mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); } void tag_free(void) { struct tag_entry *entry; unsigned int slot; if (tag_data.info.free == NULL) return; entry = ohash_first(&tag_data, &slot); while (entry != NULL) { free(entry->nodes); free(entry); entry = ohash_next(&tag_data, &slot); } ohash_delete(&tag_data); tag_data.info.free = NULL; } /* * Set a node where a term is defined, * unless it is already defined at a lower priority. */ void tag_put(const char *s, int prio, struct roff_node *n) { struct tag_entry *entry; struct roff_node *nold; const char *se; size_t len; unsigned int slot; assert(prio <= TAG_FALLBACK); if (s == NULL) { if (n->child == NULL || n->child->type != ROFFT_TEXT) return; s = n->child->string; switch (s[0]) { case '-': s++; break; case '\\': switch (s[1]) { case '&': case '-': case 'e': s += 2; break; default: break; } break; default: break; } } /* * Skip whitespace and escapes and whatever follows, * and if there is any, downgrade the priority. */ len = strcspn(s, " \t\\"); if (len == 0) return; se = s + len; if (*se != '\0' && prio < TAG_WEAK) prio = TAG_WEAK; slot = ohash_qlookupi(&tag_data, s, &se); entry = ohash_find(&tag_data, slot); /* Build a new entry. */ if (entry == NULL) { entry = mandoc_malloc(sizeof(*entry) + len + 1); memcpy(entry->s, s, len); entry->s[len] = '\0'; entry->nodes = NULL; entry->maxnodes = entry->nnodes = 0; ohash_insert(&tag_data, slot, entry); } /* * Lower priority numbers take precedence. * If a better entry is already present, ignore the new one. */ else if (entry->prio < prio) return; /* * If the existing entry is worse, clear it. * In addition, a tag with priority TAG_FALLBACK * is only used if the tag occurs exactly once. */ else if (entry->prio > prio || prio == TAG_FALLBACK) { while (entry->nnodes > 0) { nold = entry->nodes[--entry->nnodes]; nold->flags &= ~NODE_ID; free(nold->tag); nold->tag = NULL; } if (prio == TAG_FALLBACK) { entry->prio = TAG_DELETE; return; } } /* Remember the new node. */ if (entry->maxnodes == entry->nnodes) { entry->maxnodes += 4; entry->nodes = mandoc_reallocarray(entry->nodes, entry->maxnodes, sizeof(*entry->nodes)); } entry->nodes[entry->nnodes++] = n; entry->prio = prio; n->flags |= NODE_ID; if (n->child == NULL || n->child->string != s || *se != '\0') { assert(n->tag == NULL); n->tag = mandoc_strndup(s, len); } } int tag_exists(const char *tag) { return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; } /* * For in-line elements, move the link target * to the enclosing paragraph when appropriate. */ static void tag_move_id(struct roff_node *n) { struct roff_node *np; np = n; for (;;) { if (np->prev != NULL) np = np->prev; else if ((np = np->parent) == NULL) return; switch (np->tok) { case MDOC_It: switch (np->parent->parent->norm->Bl.type) { case LIST_column: /* Target the ROFFT_BLOCK = <tr>. */ np = np->parent; break; case LIST_diag: case LIST_hang: case LIST_inset: case LIST_ohang: case LIST_tag: /* Target the ROFFT_HEAD = <dt>. */ np = np->parent->head; break; default: /* Target the ROFF_BODY = <li>. */ break; } /* FALLTHROUGH */ case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ if (np->tag == NULL) { np->tag = mandoc_strdup(n->tag == NULL ? n->child->string : n->tag); np->flags |= NODE_ID; n->flags &= ~NODE_ID; } return; case MDOC_Sh: case MDOC_Ss: case MDOC_Bd: case MDOC_Bl: case MDOC_D1: case MDOC_Dl: case MDOC_Rs: /* Do not move past major blocks. */ return; default: /* * Move past in-line content and partial * blocks, for example .It Xo or .It Bq Er. */ break; } } } /* * When a paragraph is tagged and starts with text, * move the permalink to the first few words. */ static void tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) { char *cp; if (n == NULL || n->type != ROFFT_TEXT || *n->string == '\0' || *n->string == ' ') return; cp = n->string; while (cp != NULL && cp - n->string < 5) cp = strchr(cp + 1, ' '); /* If the first text node is longer, split it. */ if (cp != NULL && cp[1] != '\0') { man->last = n; man->next = ROFF_NEXT_SIBLING; roff_word_alloc(man, n->line, n->pos + (cp - n->string), cp + 1); man->last->flags = n->flags & ~NODE_LINE; *cp = '\0'; } assert(n->tag == NULL); n->tag = mandoc_strdup(tag); n->flags |= NODE_HREF; } /* * When all tags have been set, decide where to put * the associated permalinks, and maybe move some tags * to the beginning of the respective paragraphs. */ void tag_postprocess(struct roff_man *man, struct roff_node *n) { if (n->flags & NODE_ID) { switch (n->tok) { case MDOC_Pp: tag_move_href(man, n->next, n->tag); break; case MDOC_Bd: case MDOC_D1: case MDOC_Dl: tag_move_href(man, n->child, n->tag); break; case MDOC_Bl: /* XXX No permalink for now. */ break; default: if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) tag_move_id(n); if (n->tok != MDOC_Tg) n->flags |= NODE_HREF; else if ((n->flags & NODE_ID) == 0) { n->flags |= NODE_NOPRT; free(n->tag); n->tag = NULL; } break; } } for (n = n->child; n != NULL; n = n->next) tag_postprocess(man, n); }