-/* $Id: mdoc_markdown.c,v 1.11 2017/03/08 17:40:55 schwarze Exp $ */
+/* $Id: mdoc_markdown.c,v 1.37 2021/08/10 12:55:03 schwarze Exp $ */
/*
- * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2017, 2018, 2020 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Markdown formatter for mdoc(7) used by mandoc(1).
*/
+#include "config.h"
+
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include "mandoc_aux.h"
#include "main.h"
struct md_act {
- int (*cond)(struct roff_node *n);
- int (*pre)(struct roff_node *n);
- void (*post)(struct roff_node *n);
+ int (*cond)(struct roff_node *);
+ int (*pre)(struct roff_node *);
+ void (*post)(struct roff_node *);
const char *prefix; /* pre-node string constant */
const char *suffix; /* post-node string constant */
};
static void md_nodelist(struct roff_node *);
static void md_node(struct roff_node *);
-static const char *md_stack(char c);
+static const char *md_stack(char);
static void md_preword(void);
static void md_rawword(const char *);
static void md_word(const char *);
static void md_named(const char *);
static void md_char(unsigned char);
+static void md_uri(const char *);
static int md_cond_head(struct roff_node *);
static int md_cond_body(struct roff_node *);
+static int md_pre_abort(struct roff_node *);
static int md_pre_raw(struct roff_node *);
static int md_pre_word(struct roff_node *);
static int md_pre_skip(struct roff_node *);
static int md_pre_In(struct roff_node *);
static int md_pre_It(struct roff_node *);
static int md_pre_Lk(struct roff_node *);
+static int md_pre_Mt(struct roff_node *);
static int md_pre_Nd(struct roff_node *);
static int md_pre_Nm(struct roff_node *);
static int md_pre_No(struct roff_node *);
static void md_post_Vt(struct roff_node *);
static void md_post__T(struct roff_node *);
-static const struct md_act md_acts[MDOC_MAX + 1] = {
- { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */
+static const struct md_act md_acts[MDOC_MAX - MDOC_Dd] = {
{ NULL, NULL, NULL, NULL, NULL }, /* Dd */
{ NULL, NULL, NULL, NULL, NULL }, /* Dt */
{ NULL, NULL, NULL, NULL, NULL }, /* Os */
{ NULL, md_pre_It, md_post_It, NULL, NULL }, /* It */
{ NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ad */
{ NULL, md_pre_An, NULL, NULL, NULL }, /* An */
+ { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */
{ NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ar */
{ NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cd */
{ NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cm */
{ md_cond_head, md_pre_Nd, NULL, NULL, NULL }, /* Nd */
{ NULL, md_pre_Nm, md_post_Nm, "**", "**" }, /* Nm */
{ md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Op */
- { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ot */
+ { NULL, md_pre_abort, NULL, NULL, NULL }, /* Ot */
{ NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Pa */
{ NULL, NULL, NULL, NULL, NULL }, /* Rv */
{ NULL, NULL, NULL, NULL, NULL }, /* St */
{ NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Fr */
{ NULL, NULL, NULL, NULL, NULL }, /* Ud */
{ NULL, NULL, md_post_Lb, NULL, NULL }, /* Lb */
- { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Lp */
+ { NULL, md_pre_abort, NULL, NULL, NULL }, /* Lp */
{ NULL, md_pre_Lk, NULL, NULL, NULL }, /* Lk */
- { NULL, md_pre_raw, md_post_raw, "<", ">" }, /* Mt */
+ { NULL, md_pre_Mt, NULL, NULL, NULL }, /* Mt */
{ md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Brq */
{ md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Bro */
{ NULL, NULL, NULL, NULL, NULL }, /* Brc */
{ md_cond_body, md_pre_En, md_post_En, NULL, NULL }, /* En */
{ NULL, NULL, NULL, NULL, NULL }, /* Dx */
{ NULL, NULL, md_post_pc, NULL, NULL }, /* %Q */
- { NULL, md_pre_br, NULL, NULL, NULL }, /* br */
- { NULL, md_pre_Pp, NULL, NULL, NULL }, /* sp */
{ NULL, md_pre_Lk, md_post_pc, NULL, NULL }, /* %U */
{ NULL, NULL, NULL, NULL, NULL }, /* Ta */
- { NULL, NULL, NULL, NULL, NULL }, /* ll */
- { NULL, NULL, NULL, NULL, NULL }, /* ROOT */
+ { NULL, md_pre_skip, NULL, NULL, NULL }, /* Tg */
};
+static const struct md_act *md_act(enum roff_tok);
static int outflags;
#define MD_spc (1 << 0) /* Blank character before next word. */
#define ESC_BOL (1 << 0) /* "#*+-" near the beginning of a line. */
#define ESC_NUM (1 << 1) /* "." after a leading number. */
#define ESC_HYP (1 << 2) /* "(" immediately after "]". */
-#define ESC_PAR (1 << 3) /* ")" when "(" is open. */
#define ESC_SQU (1 << 4) /* "]" when "[" is open. */
#define ESC_FON (1 << 5) /* "*" immediately after unrelated "*". */
#define ESC_EOL (1 << 6) /* " " at the and of a line. */
static int code_blocks, quote_blocks, list_blocks;
static int outcount;
+
+static const struct md_act *
+md_act(enum roff_tok tok)
+{
+ assert(tok >= MDOC_Dd && tok <= MDOC_MAX);
+ return md_acts + (tok - MDOC_Dd);
+}
+
void
-markdown_mdoc(void *arg, const struct roff_man *mdoc)
+markdown_mdoc(void *arg, const struct roff_meta *mdoc)
{
outflags = MD_Sm;
- md_word(mdoc->meta.title);
- if (mdoc->meta.msec != NULL) {
+ md_word(mdoc->title);
+ if (mdoc->msec != NULL) {
outflags &= ~MD_spc;
md_word("(");
- md_word(mdoc->meta.msec);
+ md_word(mdoc->msec);
md_word(")");
}
md_word("-");
- md_word(mdoc->meta.vol);
- if (mdoc->meta.arch != NULL) {
+ md_word(mdoc->vol);
+ if (mdoc->arch != NULL) {
md_word("(");
- md_word(mdoc->meta.arch);
+ md_word(mdoc->arch);
md_word(")");
}
outflags |= MD_sp;
md_nodelist(mdoc->first->child);
outflags |= MD_sp;
- md_word(mdoc->meta.os);
+ md_word(mdoc->os);
md_word("-");
- md_word(mdoc->meta.date);
+ md_word(mdoc->date);
putchar('\n');
}
const struct md_act *act;
int cond, process_children;
- if (n->flags & NODE_NOPRT)
+ if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
return;
if (outflags & MD_nonl)
outflags &= ~(MD_nl | MD_sp);
- else if (outflags & MD_spc && n->flags & NODE_LINE)
+ else if (outflags & MD_spc &&
+ n->flags & NODE_LINE &&
+ !roff_node_transparent(n))
outflags |= MD_nl;
act = NULL;
process_children = 1;
n->flags &= ~NODE_ENDED;
- switch (n->type) {
- case ROFFT_TEXT:
+ if (n->type == ROFFT_TEXT) {
if (n->flags & NODE_DELIMC)
outflags &= ~(MD_spc | MD_spc_force);
else if (outflags & MD_Sm)
outflags &= ~(MD_spc | MD_spc_force);
else if (outflags & MD_Sm)
outflags |= MD_spc;
- break;
- default:
- act = md_acts + n->tok;
+ } else if (n->tok < ROFF_MAX) {
+ switch (n->tok) {
+ case ROFF_br:
+ process_children = md_pre_br(n);
+ break;
+ case ROFF_sp:
+ process_children = md_pre_Pp(n);
+ break;
+ default:
+ process_children = 0;
+ break;
+ }
+ } else {
+ act = md_act(n->tok);
cond = act->cond == NULL || (*act->cond)(n);
if (cond && act->pre != NULL &&
(n->end == ENDBODY_NOT || n->child != NULL))
process_children = (*act->pre)(n);
- break;
}
if (process_children && n->child != NULL)
while (*s != '\0') {
switch(*s) {
- case '(':
- escflags |= ESC_PAR;
- break;
- case ')':
- escflags |= ~ESC_PAR;
- break;
case '*':
if (s[1] == '\0')
escflags |= ESC_FON;
{
const char *seq, *prevfont, *currfont, *nextfont;
char c;
- int bs, sz, uc;
+ int bs, sz, uc, breakline;
/* No spacing before closing delimiters. */
if (s[0] != '\0' && s[1] == '\0' &&
if ((s[0] == '(' || s[0] == '[') && s[1] == '\0')
outflags &= ~MD_spc;
+ breakline = 0;
prevfont = currfont = "";
while ((c = *s++) != '\0') {
bs = 0;
bs = escflags & ESC_HYP && !code_blocks;
break;
case ')':
- bs = escflags & ESC_PAR && !code_blocks;
+ bs = escflags & ESC_NUM && !code_blocks;
break;
case '*':
case '[':
case ESCAPE_SPECIAL:
uc = mchars_spec2cp(seq, sz);
break;
+ case ESCAPE_UNDEF:
+ uc = *seq;
+ break;
+ case ESCAPE_DEVICE:
+ md_rawword("markdown");
+ continue;
case ESCAPE_FONTBOLD:
+ case ESCAPE_FONTCB:
nextfont = "**";
break;
case ESCAPE_FONTITALIC:
+ case ESCAPE_FONTCI:
nextfont = "*";
break;
case ESCAPE_FONTBI:
nextfont = "***";
break;
case ESCAPE_FONT:
+ case ESCAPE_FONTCR:
case ESCAPE_FONTROMAN:
nextfont = "";
break;
case ESCAPE_FONTPREV:
nextfont = prevfont;
break;
+ case ESCAPE_BREAK:
+ breakline = 1;
+ break;
case ESCAPE_NOSPACE:
case ESCAPE_SKIPCHAR:
case ESCAPE_OVERSTRIKE:
if (bs)
putchar('\\');
md_char(c);
+ if (breakline &&
+ (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) {
+ printf(" \n");
+ breakline = 0;
+ while (*s == ' ' || *s == ASCII_NBRSP)
+ s++;
+ }
}
if (*currfont != '\0') {
outflags &= ~MD_spc;
return n->type == ROFFT_BODY;
}
+static int
+md_pre_abort(struct roff_node *n)
+{
+ abort();
+}
+
static int
md_pre_raw(struct roff_node *n)
{
const char *prefix;
- if ((prefix = md_acts[n->tok].prefix) != NULL) {
+ if ((prefix = md_act(n->tok)->prefix) != NULL) {
md_rawword(prefix);
outflags &= ~MD_spc;
+ if (*prefix == '`')
+ code_blocks++;
}
return 1;
}
{
const char *suffix;
- if ((suffix = md_acts[n->tok].suffix) != NULL) {
+ if ((suffix = md_act(n->tok)->suffix) != NULL) {
outflags &= ~(MD_spc | MD_nl);
md_rawword(suffix);
+ if (*suffix == '`')
+ code_blocks--;
}
}
{
const char *prefix;
- if ((prefix = md_acts[n->tok].prefix) != NULL) {
+ if ((prefix = md_act(n->tok)->prefix) != NULL) {
md_word(prefix);
outflags &= ~MD_spc;
}
{
const char *suffix;
- if ((suffix = md_acts[n->tok].suffix) != NULL) {
+ if ((suffix = md_act(n->tok)->suffix) != NULL) {
outflags &= ~(MD_spc | MD_nl);
md_word(suffix);
}
static void
md_post_pc(struct roff_node *n)
{
+ struct roff_node *nn;
+
md_post_raw(n);
if (n->parent->tok != MDOC_Rs)
return;
- if (n->next != NULL) {
+
+ if ((nn = roff_node_next(n)) != NULL) {
md_word(",");
- if (n->prev != NULL &&
- n->prev->tok == n->tok &&
- n->next->tok == n->tok)
+ if (nn->tok == n->tok &&
+ (nn = roff_node_prev(n)) != NULL &&
+ nn->tok == n->tok)
md_word("and");
} else {
md_word(".");
static void
md_pre_syn(struct roff_node *n)
{
- if (n->prev == NULL || ! (n->flags & NODE_SYNPRETTY))
+ struct roff_node *np;
+
+ if ((n->flags & NODE_SYNPRETTY) == 0 ||
+ (np = roff_node_prev(n)) == NULL)
return;
- if (n->prev->tok == n->tok &&
+ if (np->tok == n->tok &&
n->tok != MDOC_Ft &&
n->tok != MDOC_Fo &&
n->tok != MDOC_Fn) {
return;
}
- switch (n->prev->tok) {
+ switch (np->tok) {
case MDOC_Fd:
case MDOC_Fn:
case MDOC_Fo:
static void
md_post_Fa(struct roff_node *n)
{
- if (n->next != NULL && n->next->tok == MDOC_Fa)
+ struct roff_node *nn;
+
+ if ((nn = roff_node_next(n)) != NULL && nn->tok == MDOC_Fa)
md_word(",");
}
static void
md_post_Fl(struct roff_node *n)
{
+ struct roff_node *nn;
+
md_post_raw(n);
- if (n->child == NULL && n->next != NULL &&
- n->next->type != ROFFT_TEXT && !(n->next->flags & NODE_LINE))
+ if (n->child == NULL && (nn = roff_node_next(n)) != NULL &&
+ nn->type != ROFFT_TEXT && (nn->flags & NODE_LINE) == 0)
outflags &= ~MD_spc;
}
while ((n = n->prev) != NULL && n->type != ROFFT_HEAD)
i++;
- /*
+ /*
* If a width was specified for this column,
* subtract what printed, and
* add the same spacing as in mdoc_term.c.
outflags |= MD_br;
}
+static void
+md_uri(const char *s)
+{
+ while (*s != '\0') {
+ if (strchr("%()<>", *s) != NULL) {
+ printf("%%%2.2hhX", *s);
+ outcount += 3;
+ } else {
+ putchar(*s);
+ outcount++;
+ }
+ s++;
+ }
+}
+
static int
md_pre_Lk(struct roff_node *n)
{
- const struct roff_node *link, *descr;
- const unsigned char *s;
+ const struct roff_node *link, *descr, *punct;
if ((link = n->child) == NULL)
return 0;
- if ((descr = link->next) != NULL) {
- md_rawword("[");
- outflags &= ~MD_spc;
- while (descr != NULL) {
- md_word(descr->string);
- descr = descr->next;
- }
- outflags &= ~MD_spc;
- md_rawword("](");
- } else
- md_rawword("<");
+ /* Find beginning of trailing punctuation. */
+ punct = n->last;
+ while (punct != link && punct->flags & NODE_DELIMC)
+ punct = punct->prev;
+ punct = punct->next;
+
+ /* Link text. */
+ descr = link->next;
+ if (descr == punct)
+ descr = link; /* no text */
+ md_rawword("[");
+ outflags &= ~MD_spc;
+ do {
+ md_word(descr->string);
+ descr = descr->next;
+ } while (descr != punct);
+ outflags &= ~MD_spc;
- for (s = link->string; *s != '\0'; s++) {
- if (strchr("%)<>", *s) != NULL) {
- printf("%%%2.2hhX", *s);
- outcount += 3;
- } else {
- putchar(*s);
+ /* Link target. */
+ md_rawword("](");
+ md_uri(link->string);
+ outflags &= ~MD_spc;
+ md_rawword(")");
+
+ /* Trailing punctuation. */
+ while (punct != NULL) {
+ md_word(punct->string);
+ punct = punct->next;
+ }
+ return 0;
+}
+
+static int
+md_pre_Mt(struct roff_node *n)
+{
+ const struct roff_node *nch;
+
+ md_rawword("[");
+ outflags &= ~MD_spc;
+ for (nch = n->child; nch != NULL; nch = nch->next)
+ md_word(nch->string);
+ outflags &= ~MD_spc;
+ md_rawword("](mailto:");
+ for (nch = n->child; nch != NULL; nch = nch->next) {
+ md_uri(nch->string);
+ if (nch->next != NULL) {
+ putchar(' ');
outcount++;
}
}
-
outflags &= ~MD_spc;
- md_rawword(link->next == NULL ? ">" : ")");
+ md_rawword(")");
return 0;
}