-/* $Id: mdoc.h,v 1.15 2009/01/05 17:57:08 kristaps Exp $ */
+/* $Id: mdoc.h,v 1.33 2009/02/23 09:33:34 kristaps Exp $ */
/*
* Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
*
#ifndef MDOC_H
#define MDOC_H
-#define MDOC_LINEARG_MAX 8
+/*
+ * This library implements a validating scanner/parser for ``mdoc'' roff
+ * macro documents, a.k.a. BSD manual page documents. The mdoc.c file
+ * drives the parser, while macro.c describes the macro ontologies.
+ * validate.c pre- and post-validates parsed macros, and action.c
+ * performs actions on parsed and validated macros.
+ */
+
+/* What follows is a list of ALL possible macros. */
#define MDOC___ 0
#define MDOC_Dd 1
#define MDOC_Ud 105
#define MDOC_MAX 106
+/* What follows is a list of ALL possible macro arguments. */
+
#define MDOC_Split 0
#define MDOC_Nosplit 1
#define MDOC_Ragged 2
#define MDOC_Symbolic 61
#define MDOC_ARG_MAX 62
-enum mdoc_err {
- ERR_SYNTAX_QUOTE, /* NOTUSED */
- ERR_SYNTAX_UNQUOTE,
- ERR_SYNTAX_NOPUNCT,
- ERR_SYNTAX_WS,
- ERR_SYNTAX_ARG,
- ERR_SYNTAX_ARGFORM,
- ERR_SYNTAX_ARGVAL,
- ERR_SYNTAX_ARGBAD,
- ERR_SYNTAX_ARGMANY,
- ERR_MACRO_NOTSUP,
- ERR_MACRO_NOTCALL,
- ERR_SCOPE_BREAK,
- ERR_SCOPE_NOCTX,
- ERR_SCOPE_NONEST,
- ERR_SEC_PROLOGUE,
- ERR_SEC_NPROLOGUE,
- ERR_SEC_PROLOGUE_OO,
- ERR_SEC_PROLOGUE_REP,
- ERR_SEC_NAME,
- ERR_ARGS_EQ0,
- ERR_ARGS_EQ1,
- ERR_ARGS_GE1,
- ERR_ARGS_LE2,
- ERR_ARGS_MANY,
- ERR_SYNTAX_CHILDHEAD,
- ERR_SYNTAX_CHILDBODY,
- ERR_SYNTAX_EMPTYBODY,
- ERR_SYNTAX_EMPTYHEAD,
- ERR_SYNTAX_NOTEXT
+/* Warnings are either syntax or groff-compatibility. */
+enum mdoc_warn {
+ WARN_SYNTAX,
+ WARN_COMPAT
};
+/* Possible values for the `At' macro. */
enum mdoc_att {
ATT_DEFAULT = 0,
ATT_v1,
ATT_V4
};
-enum mdoc_warn {
- WARN_SYNTAX_WS_EOLN,
- WARN_SYNTAX_MACLIKE,
- WARN_SYNTAX_ARGLIKE,
- WARN_SYNTAX_QUOTED,
- WARN_SYNTAX_EMPTYBODY,
- WARN_IGN_AFTER_BLK,
- WARN_IGN_BEFORE_BLK,
- WARN_IGN_OBSOLETE,
- WARN_SEC_OO,
- WARN_ARGS_GE1,
- WARN_ARGS_EQ0,
- WARN_COMPAT_TROFF
-};
-
+/* An argument to a macro (multiple values = `It -column'). */
struct mdoc_arg {
int arg;
+ int line;
+ int pos;
size_t sz;
char **value;
};
+/* Type of a syntax node. */
enum mdoc_type {
MDOC_TEXT,
MDOC_ELEM,
MDOC_HEAD,
MDOC_TAIL,
MDOC_BODY,
- MDOC_BLOCK
+ MDOC_BLOCK,
+ MDOC_ROOT
};
+/* Manual section. */
enum mdoc_msec {
MSEC_DEFAULT = 0,
MSEC_1,
MSEC_paper
};
+/* Section (named/unnamed) of `Ss'. */
enum mdoc_sec {
SEC_PROLOGUE = 0,
SEC_BODY,
SEC_CUSTOM
};
+/* Volume of `Dt'. */
enum mdoc_vol {
VOL_DEFAULT = 0,
VOL_AMD,
VOL_USD
};
+/* Architecture of `Dt'. */
enum mdoc_arch {
ARCH_DEFAULT = 0,
ARCH_alpha,
ARCH_amd64,
ARCH_amiga,
ARCH_arc,
+ ARCH_arm,
ARCH_armish,
ARCH_aviion,
ARCH_hp300,
ARCH_zaurus
};
+/* Information from prologue. */
struct mdoc_meta {
enum mdoc_msec msec;
enum mdoc_vol vol;
enum mdoc_arch arch;
time_t date;
-#define META_TITLE_SZ (64)
- char title[META_TITLE_SZ];
-#define META_OS_SZ (64)
- char os[META_OS_SZ];
+ char *title;
+ char *os;
+ char *name;
};
+/* Text-only node. */
struct mdoc_text {
char *string;
};
+/* Block (scoped) node. */
struct mdoc_block {
- int tok;
size_t argc;
struct mdoc_arg *argv;
+ struct mdoc_node *head;
+ struct mdoc_node *body;
+ struct mdoc_node *tail;
};
-struct mdoc_head {
- int tok;
-};
-
-struct mdoc_tail {
- int tok;
-};
-
-struct mdoc_body {
- int tok;
-};
-
+/* In-line element node. */
struct mdoc_elem {
size_t sz;
char **args;
- int tok;
size_t argc;
struct mdoc_arg *argv;
};
+/* Typed nodes of an AST node. */
union mdoc_data {
struct mdoc_text text;
struct mdoc_elem elem;
- struct mdoc_body body;
- struct mdoc_head head;
- struct mdoc_tail tail;
struct mdoc_block block;
};
+/* Node in AST. */
struct mdoc_node {
struct mdoc_node *parent;
struct mdoc_node *child;
struct mdoc_node *next;
struct mdoc_node *prev;
+ int line;
+ int pos;
+ int tok;
+ int flags;
+#define MDOC_VALID (1 << 0)
+#define MDOC_ACTED (1 << 1)
enum mdoc_type type;
union mdoc_data data;
+ enum mdoc_sec sec;
};
+/* Call-backs for parse messages. */
struct mdoc_cb {
- int (*mdoc_err)(void *, int, int, enum mdoc_err);
- int (*mdoc_warn)(void *, int, int, enum mdoc_warn);
- void (*mdoc_msg)(void *, int, const char *);
+ void (*mdoc_msg)(void *, int, int, const char *);
+ int (*mdoc_err)(void *, int, int, const char *);
+ int (*mdoc_warn)(void *, int, int,
+ enum mdoc_warn, const char *);
};
+/* Global table of macro names (`Bd', `Ed', etc.). */
extern const char *const *mdoc_macronames;
+
+/* Global table of argument names (`column', `tag', etc.). */
extern const char *const *mdoc_argnames;
__BEGIN_DECLS
struct mdoc;
+/* Free memory allocated with mdoc_alloc. */
void mdoc_free(struct mdoc *);
+
+/* Allocate a new parser instance. */
struct mdoc *mdoc_alloc(void *data, const struct mdoc_cb *);
-int mdoc_parseln(struct mdoc *, char *buf);
-const struct mdoc_node
- *mdoc_result(struct mdoc *);
+
+/* Parse a single line in a stream (boolean retval). */
+int mdoc_parseln(struct mdoc *, int, char *buf);
+
+/* Get result first node (after mdoc_endparse!). */
+const struct mdoc_node *mdoc_node(const struct mdoc *);
+
+/* Get result meta-information (after mdoc_endparse!). */
+const struct mdoc_meta *mdoc_meta(const struct mdoc *);
+
+/* Signal end of parse sequence (boolean retval). */
+int mdoc_endparse(struct mdoc *);
+
+/* The following are utility functions. */
+const char *mdoc_arch2a(enum mdoc_arch);
+const char *mdoc_vol2a(enum mdoc_vol);
+const char *mdoc_msec2a(enum mdoc_msec);
+int mdoc_isdelim(const char *);
__END_DECLS