/* $Id: roff.c,v 1.73 2010/05/15 22:28:22 kristaps Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include <assert.h> #include <stdlib.h> #include <string.h> #include "mandoc.h" #include "roff.h" enum rofft { ROFF_de, ROFF_dei, ROFF_am, ROFF_ami, ROFF_ig, ROFF_close, ROFF_MAX }; struct roff { struct roffnode *last; /* leaf of stack */ mandocmsg msg; /* err/warn/fatal messages */ void *data; /* privdata for messages */ }; struct roffnode { enum rofft tok; /* type of node */ struct roffnode *parent; /* up one in stack */ char *end; /* custom end-token */ int line; /* parse line */ int col; /* parse col */ }; #define ROFF_ARGS struct roff *r, /* parse ctx */ \ enum rofft tok, /* tok of macro */ \ char **bufp, /* input buffer */ \ size_t *szp, /* size of input buffer */ \ int ln, /* parse line */ \ int ppos /* current pos in buffer */ typedef enum rofferr (*roffproc)(ROFF_ARGS); struct roffmac { const char *name; /* macro name */ roffproc sub; /* child of control black */ roffproc new; /* root of stack (type = ROFF_MAX) */ }; static enum rofferr roff_new_close(ROFF_ARGS); static enum rofferr roff_new_ig(ROFF_ARGS); static enum rofferr roff_sub_ig(ROFF_ARGS); const struct roffmac roffs[ROFF_MAX] = { { "de", roff_sub_ig, roff_new_ig }, { "dei", roff_sub_ig, roff_new_ig }, { "am", roff_sub_ig, roff_new_ig }, { "ami", roff_sub_ig, roff_new_ig }, { "ig", roff_sub_ig, roff_new_ig }, { ".", NULL, roff_new_close }, }; static void roff_free1(struct roff *); static enum rofft roff_hash_find(const char *); static int roffnode_push(struct roff *, enum rofft, int, int); static void roffnode_pop(struct roff *); static enum rofft roff_parse(const char *, int *); /* * Look up a roff token by its name. Returns ROFF_MAX if no macro by * the nil-terminated string name could be found. */ static enum rofft roff_hash_find(const char *p) { int i; /* FIXME: make this be fast and efficient. */ for (i = 0; i < (int)ROFF_MAX; i++) if (0 == strcmp(roffs[i].name, p)) return((enum rofft)i); return(ROFF_MAX); } /* * Pop the current node off of the stack of roff instructions currently * pending. */ static void roffnode_pop(struct roff *r) { struct roffnode *p; if (NULL == (p = r->last)) return; r->last = p->parent; free(p); } /* * Push a roff node onto the instruction stack. This must later be * removed with roffnode_pop(). */ static int roffnode_push(struct roff *r, enum rofft tok, int line, int col) { struct roffnode *p; if (NULL == (p = calloc(1, sizeof(struct roffnode)))) { (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL); return(0); } p->tok = tok; p->parent = r->last; p->line = line; p->col = col; r->last = p; return(1); } static void roff_free1(struct roff *r) { while (r->last) roffnode_pop(r); } void roff_reset(struct roff *r) { roff_free1(r); } void roff_free(struct roff *r) { roff_free1(r); free(r); } struct roff * roff_alloc(const mandocmsg msg, void *data) { struct roff *r; if (NULL == (r = calloc(1, sizeof(struct roff)))) { (*msg)(MANDOCERR_MEM, data, 0, 0, NULL); return(0); } r->msg = msg; r->data = data; return(r); } enum rofferr roff_parseln(struct roff *r, int ln, char **bufp, size_t *szp) { enum rofft t; int ppos; if (NULL != r->last) { /* * If there's a node on the stack, then jump directly * into its processing function. */ t = r->last->tok; assert(roffs[t].sub); return((*roffs[t].sub)(r, t, bufp, szp, ln, 0)); } else if ('.' != (*bufp)[0] && NULL == r->last) /* Return when in free text without a context. */ return(ROFF_CONT); /* There's nothing on the stack: make us anew. */ if (ROFF_MAX == (t = roff_parse(*bufp, &ppos))) return(ROFF_CONT); assert(roffs[t].new); return((*roffs[t].new)(r, t, bufp, szp, ln, ppos)); } /* * Parse a roff node's type from the input buffer. This must be in the * form of ".foo xxx" in the usual way. */ static enum rofft roff_parse(const char *buf, int *pos) { int j; char mac[5]; enum rofft t; assert('.' == buf[0]); *pos = 1; while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos])) (*pos)++; if ('\0' == buf[*pos]) return(ROFF_MAX); for (j = 0; j < 4; j++, (*pos)++) if ('\0' == (mac[j] = buf[*pos])) break; else if (' ' == buf[*pos]) break; if (j == 4 || j < 1) return(ROFF_MAX); mac[j] = '\0'; if (ROFF_MAX == (t = roff_hash_find(mac))) return(t); while (buf[*pos] && ' ' == buf[*pos]) (*pos)++; return(t); } /* ARGSUSED */ static enum rofferr roff_sub_ig(ROFF_ARGS) { int i, j; /* Ignore free-text lines. */ if ('.' != (*bufp)[ppos]) return(ROFF_IGN); if (r->last->end) { i = ppos + 1; while ((*bufp)[i] && ' ' == (*bufp)[i]) i++; for (j = 0; r->last->end[j]; i++, j++) if ((*bufp)[i] != r->last->end[j]) return(ROFF_IGN); if (r->last->end[j]) return(ROFF_IGN); if ((*bufp)[i] && ' ' != (*bufp)[i]) return(ROFF_IGN); while (' ' == (*bufp)[i]) i++; } else if (ROFF_close != roff_parse(*bufp, &i)) return(ROFF_IGN); roffnode_pop(r); if ('\0' == (*bufp)[i]) return(ROFF_IGN); if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL)) return(ROFF_ERR); return(ROFF_IGN); } /* ARGSUSED */ static enum rofferr roff_new_close(ROFF_ARGS) { if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL)) return(ROFF_ERR); return(ROFF_IGN); } /* ARGSUSED */ static enum rofferr roff_new_ig(ROFF_ARGS) { int i; if ( ! roffnode_push(r, tok, ln, ppos)) return(ROFF_ERR); if (ROFF_ig != tok) { while ((*bufp)[ppos] && ' ' != (*bufp)[ppos]) ppos++; while (' ' == (*bufp)[ppos]) ppos++; } i = (int)ppos; while ((*bufp)[i] && ' ' != (*bufp)[i]) i++; if (i == (int)ppos) return(ROFF_IGN); if ((*bufp)[i]) if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, i, NULL)) return(ROFF_ERR); /* * If the macro has arguments, the first argument (up to the * next whitespace) is interpreted as an argument marking the * macro close. Thus, `.ig foo' will close at `.foo'. * * NOTE: the closing macro `.foo' in the above case is not * allowed to have leading spaces with old groff! Thus `.foo' * != `. foo'. Oh yeah, everything after the `.foo' is lost. * Merry fucking Christmas. */ r->last->end = malloc((size_t)(i - ppos) + 1); if (NULL == r->last->end) { (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL); return(ROFF_ERR); } memcpy(r->last->end, &(*bufp)[ppos], (size_t)(i - ppos)); r->last->end[i - ppos] = '\0'; return(ROFF_IGN); } int roff_endparse(struct roff *r) { if (NULL == r->last) return(1); return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line, r->last->col, NULL)); }