-/* $Id: mdoc.h,v 1.1 2008/12/15 01:54:58 kristaps Exp $ */
+/* $Id: mdoc.h,v 1.36 2009/03/02 12:09:32 kristaps Exp $ */
/*
* Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
*
#ifndef MDOC_H
#define MDOC_H
-#define MDOC_LINEARG_MAX 9
+/*
+ * This library implements a validating scanner/parser for ``mdoc'' roff
+ * macro documents, a.k.a. BSD manual page documents. The mdoc.c file
+ * drives the parser, while macro.c describes the macro ontologies.
+ * validate.c pre- and post-validates parsed macros, and action.c
+ * performs actions on parsed and validated macros.
+ */
+
+/* What follows is a list of ALL possible macros. */
#define MDOC___ 0
#define MDOC_Dd 1
#define MDOC_Ud 105
#define MDOC_MAX 106
+/* What follows is a list of ALL possible macro arguments. */
+
#define MDOC_Split 0
#define MDOC_Nosplit 1
#define MDOC_Ragged 2
#define MDOC_svid4 57
#define MDOC_Filled 58
#define MDOC_Words 59
-#define MDOC_ARG_MAX 60
-
-enum mdoc_err {
- ERR_SYNTAX_QUOTE,
- ERR_SYNTAX_WS,
- ERR_MACRO_NOTSUP,
- ERR_MACRO_NOTCALL,
- ERR_SCOPE_BREAK,
- ERR_ARGS_GE1
-};
+#define MDOC_Emphasis 60
+#define MDOC_Symbolic 61
+#define MDOC_ARG_MAX 62
+/* Warnings are either syntax or groff-compatibility. */
enum mdoc_warn {
- WARN_SYNTAX_WS_EOLN,
- WARN_SYNTAX_MACLIKE,
- WARN_ARGS_GE1
+ WARN_SYNTAX,
+ WARN_COMPAT
};
+/* Possible values for the `At' macro. */
+enum mdoc_att {
+ ATT_DEFAULT = 0,
+ ATT_v1,
+ ATT_v2,
+ ATT_v3,
+ ATT_v4,
+ ATT_v5,
+ ATT_v6,
+ ATT_v7,
+ ATT_32v,
+ ATT_V,
+ ATT_V1,
+ ATT_V2,
+ ATT_V3,
+ ATT_V4
+};
+
+/* An argument to a macro (multiple values = `It -column'). */
struct mdoc_arg {
int arg;
+ int line;
+ int pos;
size_t sz;
char **value;
};
+/* Type of a syntax node. */
enum mdoc_type {
MDOC_TEXT,
MDOC_ELEM,
MDOC_HEAD,
+ MDOC_TAIL,
MDOC_BODY,
- MDOC_BLOCK
+ MDOC_BLOCK,
+ MDOC_ROOT
+};
+
+/* Manual section. */
+enum mdoc_msec {
+ MSEC_DEFAULT = 0,
+ MSEC_1,
+ MSEC_2,
+ MSEC_3,
+ MSEC_3f,
+ MSEC_3p,
+ MSEC_4,
+ MSEC_5,
+ MSEC_6,
+ MSEC_7,
+ MSEC_8,
+ MSEC_9,
+ MSEC_X11,
+ MSEC_X11R6,
+ MSEC_local,
+ MSEC_n,
+ MSEC_unass,
+ MSEC_draft,
+ MSEC_paper
+};
+
+/* Section (named/unnamed) of `Ss'. */
+enum mdoc_sec {
+ SEC_PROLOGUE = 0,
+ SEC_BODY,
+ SEC_NAME,
+ SEC_SYNOPSIS,
+ SEC_DESCRIPTION,
+ SEC_RETURN_VALUES,
+ SEC_ENVIRONMENT,
+ SEC_FILES,
+ SEC_EXAMPLES,
+ SEC_DIAGNOSTICS,
+ SEC_ERRORS,
+ SEC_SEE_ALSO,
+ SEC_STANDARDS,
+ SEC_HISTORY,
+ SEC_AUTHORS,
+ SEC_CAVEATS,
+ SEC_BUGS,
+ SEC_CUSTOM
+};
+
+/* Volume of `Dt'. */
+enum mdoc_vol {
+ VOL_DEFAULT = 0,
+ VOL_AMD,
+ VOL_IND,
+ VOL_KM,
+ VOL_LOCAL,
+ VOL_PRM,
+ VOL_PS1,
+ VOL_SMM,
+ VOL_URM,
+ VOL_USD
+};
+
+/* Architecture of `Dt'. */
+enum mdoc_arch {
+ ARCH_DEFAULT = 0,
+ ARCH_alpha,
+ ARCH_amd64,
+ ARCH_amiga,
+ ARCH_arc,
+ ARCH_arm,
+ ARCH_armish,
+ ARCH_aviion,
+ ARCH_hp300,
+ ARCH_hppa,
+ ARCH_hppa64,
+ ARCH_i386,
+ ARCH_landisk,
+ ARCH_luna88k,
+ ARCH_mac68k,
+ ARCH_macppc,
+ ARCH_mvme68k,
+ ARCH_mvme88k,
+ ARCH_mvmeppc,
+ ARCH_pmax,
+ ARCH_sgi,
+ ARCH_socppc,
+ ARCH_sparc,
+ ARCH_sparc64,
+ ARCH_sun3,
+ ARCH_vax,
+ ARCH_zaurus
+};
+
+/* Information from prologue. */
+struct mdoc_meta {
+ enum mdoc_msec msec;
+ enum mdoc_vol vol;
+ enum mdoc_arch arch;
+ time_t date;
+ char *title;
+ char *os;
+ char *name;
};
+/* Text-only node. */
struct mdoc_text {
char *string;
};
+/* Block (scoped) node. */
struct mdoc_block {
- int tok;
size_t argc;
struct mdoc_arg *argv;
+ struct mdoc_node *head;
+ struct mdoc_node *body;
+ struct mdoc_node *tail;
};
-struct mdoc_head {
- size_t sz;
- char **args;
- int tok;
-};
-
-struct mdoc_body {
- int tok;
-};
-
+/* In-line element node. */
struct mdoc_elem {
- size_t sz;
- char **args;
- int tok;
size_t argc;
struct mdoc_arg *argv;
};
+/* Typed nodes of an AST node. */
union mdoc_data {
struct mdoc_text text;
struct mdoc_elem elem;
- struct mdoc_body body;
- struct mdoc_head head;
struct mdoc_block block;
};
+/* Node in AST. */
struct mdoc_node {
struct mdoc_node *parent;
struct mdoc_node *child;
struct mdoc_node *next;
+ struct mdoc_node *prev;
+ int line;
+ int pos;
+ int tok;
+ int flags;
+#define MDOC_VALID (1 << 0)
+#define MDOC_ACTED (1 << 1)
enum mdoc_type type;
union mdoc_data data;
+ enum mdoc_sec sec;
};
+/* Call-backs for parse messages. */
struct mdoc_cb {
- int (*mdoc_err)(void *, int, int, enum mdoc_err);
- int (*mdoc_warn)(void *, int, int, enum mdoc_warn);
- void (*mdoc_msg)(void *, int, const char *);
+ void (*mdoc_msg)(void *, int, int, const char *);
+ int (*mdoc_err)(void *, int, int, const char *);
+ int (*mdoc_warn)(void *, int, int,
+ enum mdoc_warn, const char *);
};
+/* Global table of macro names (`Bd', `Ed', etc.). */
extern const char *const *mdoc_macronames;
+
+/* Global table of argument names (`column', `tag', etc.). */
extern const char *const *mdoc_argnames;
__BEGIN_DECLS
struct mdoc;
+/* Free memory allocated with mdoc_alloc. */
void mdoc_free(struct mdoc *);
+
+/* Allocate a new parser instance. */
struct mdoc *mdoc_alloc(void *data, const struct mdoc_cb *);
-int mdoc_parseln(struct mdoc *, char *buf);
-const struct mdoc_node
- *mdoc_result(struct mdoc *);
+/* Parse a single line in a stream (boolean retval). */
+int mdoc_parseln(struct mdoc *, int, char *buf);
+
+/* Get result first node (after mdoc_endparse!). */
+const struct mdoc_node *mdoc_node(const struct mdoc *);
+
+/* Get result meta-information (after mdoc_endparse!). */
+const struct mdoc_meta *mdoc_meta(const struct mdoc *);
+
+/* Signal end of parse sequence (boolean retval). */
+int mdoc_endparse(struct mdoc *);
+
+/* The following are utility functions. */
+const char *mdoc_arch2a(enum mdoc_arch);
+const char *mdoc_vol2a(enum mdoc_vol);
+const char *mdoc_msec2a(enum mdoc_msec);
+const char *mdoc_att2a(enum mdoc_att);
+enum mdoc_att mdoc_atoatt(const char *);
+const char *mdoc_st2a(int);
+int mdoc_isdelim(const char *);
__END_DECLS