-/* $Id: mdoc.h,v 1.24 2009/01/17 14:04:25 kristaps Exp $ */
+/* $Id: mdoc.h,v 1.54 2009/03/31 13:50:19 kristaps Exp $ */
/*
- * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
+ * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the
#ifndef MDOC_H
#define MDOC_H
+#include <time.h>
+
+/*
+ * This library implements a validating scanner/parser for ``mdoc'' roff
+ * macro documents, a.k.a. BSD manual page documents. The mdoc.c file
+ * drives the parser, while macro.c describes the macro ontologies.
+ * validate.c pre- and post-validates parsed macros, and action.c
+ * performs actions on parsed and validated macros.
+ */
+
/* What follows is a list of ALL possible macros. */
#define MDOC___ 0
#define MDOC_Hf 103
#define MDOC_Fr 104
#define MDOC_Ud 105
-#define MDOC_MAX 106
+#define MDOC_Lb 106
+#define MDOC_Ap 107
+#define MDOC_Lp 108
+#define MDOC_Lk 109
+#define MDOC_Mt 110
+#define MDOC_Brq 111
+#define MDOC_Bro 112
+#define MDOC_Brc 113
+#define MDOC__C 114
+#define MDOC_Es 115
+#define MDOC_En 116
+#define MDOC_Dx 117
+#define MDOC__Q 118
+#define MDOC_MAX 119
/* What follows is a list of ALL possible macro arguments. */
#define MDOC_Width 18
#define MDOC_Compact 19
#define MDOC_Std 20
-#define MDOC_p1003_1_88 21
-#define MDOC_p1003_1_90 22
-#define MDOC_p1003_1_96 23
-#define MDOC_p1003_1_2001 24
-#define MDOC_p1003_1_2004 25
-#define MDOC_p1003_1 26
-#define MDOC_p1003_1b 27
-#define MDOC_p1003_1b_93 28
-#define MDOC_p1003_1c_95 29
-#define MDOC_p1003_1g_2000 30
-#define MDOC_p1003_2_92 31
-#define MDOC_p1387_2_95 32
-#define MDOC_p1003_2 33
-#define MDOC_p1387_2 34
-#define MDOC_isoC_90 35
-#define MDOC_isoC_amd1 36
-#define MDOC_isoC_tcor1 37
-#define MDOC_isoC_tcor2 38
-#define MDOC_isoC_99 39
-#define MDOC_ansiC 40
-#define MDOC_ansiC_89 41
-#define MDOC_ansiC_99 42
-#define MDOC_ieee754 43
-#define MDOC_iso8802_3 44
-#define MDOC_xpg3 45
-#define MDOC_xpg4 46
-#define MDOC_xpg4_2 47
-#define MDOC_xpg4_3 48
-#define MDOC_xbd5 49
-#define MDOC_xcu5 50
-#define MDOC_xsh5 51
-#define MDOC_xns5 52
-#define MDOC_xns5_2d2_0 53
-#define MDOC_xcurses4_2 54
-#define MDOC_susv2 55
-#define MDOC_susv3 56
-#define MDOC_svid4 57
-#define MDOC_Filled 58
-#define MDOC_Words 59
-#define MDOC_Emphasis 60
-#define MDOC_Symbolic 61
-#define MDOC_ARG_MAX 62
+#define MDOC_Filled 21
+#define MDOC_Words 22
+#define MDOC_Emphasis 23
+#define MDOC_Symbolic 24
+#define MDOC_Nested 25
+#define MDOC_ARG_MAX 26
+/* Warnings are either syntax or groff-compatibility. */
enum mdoc_warn {
- WARN_SYNTAX, /* Syntax warn (at line/col). */
- WARN_COMPAT /* Groff compat warn (at line/col). */
-};
-
-/* Possible values for the `At' macro. */
-enum mdoc_att {
- ATT_DEFAULT = 0,
- ATT_v1,
- ATT_v2,
- ATT_v3,
- ATT_v4,
- ATT_v5,
- ATT_v6,
- ATT_v7,
- ATT_32v,
- ATT_V1,
- ATT_V2,
- ATT_V3,
- ATT_V4
-};
-
-/* An argument to a macro (multiple values = `It -column'). */
-struct mdoc_arg {
- int arg;
- int line;
- int pos;
- size_t sz;
- char **value;
+ WARN_SYNTAX,
+ WARN_COMPAT
};
-/*
- * Simplified grammar of syntax tree:
- *
- * MDOC_ROOT: root of tree
- * MDOC_TEXT: free-form text
- * MDOC_ELEM: elem [args] MDOC_TEXT...
- * MDOC_BLOCK, MDOC_HEAD, MDOC_BODY, MDOC_TAIL:
- * MDOC_BLOCK:
- * MDOC_HEAD [args] (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)...
- * MDOC_BODY (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)...
- * MDOC_TAIL (optional) (MDOC_TEXT|MDOC_ELEM|MDOC_BLOCK)...
- */
-
/* Type of a syntax node. */
enum mdoc_type {
MDOC_TEXT,
MDOC_ROOT
};
-/* Manual section. */
-enum mdoc_msec {
- MSEC_DEFAULT = 0,
- MSEC_1,
- MSEC_2,
- MSEC_3,
- MSEC_3f,
- MSEC_3p,
- MSEC_4,
- MSEC_5,
- MSEC_6,
- MSEC_7,
- MSEC_8,
- MSEC_9,
- MSEC_X11,
- MSEC_X11R6,
- MSEC_local,
- MSEC_n,
- MSEC_unass,
- MSEC_draft,
- MSEC_paper
-};
-
-/* Section (named/unnamed) of `Ss'. */
+/* Section (named/unnamed) of `Sh'. */
enum mdoc_sec {
- SEC_PROLOGUE = 0,
- SEC_BODY,
- SEC_NAME,
- SEC_SYNOPSIS,
- SEC_DESCRIPTION,
- SEC_RETURN_VALUES,
- SEC_ENVIRONMENT,
- SEC_FILES,
- SEC_EXAMPLES,
- SEC_DIAGNOSTICS,
- SEC_ERRORS,
- SEC_SEE_ALSO,
- SEC_STANDARDS,
- SEC_HISTORY,
- SEC_AUTHORS,
- SEC_CAVEATS,
- SEC_BUGS,
+ SEC_PROLOGUE = 0,
+ SEC_BODY = 1,
+ SEC_NAME = 2,
+ SEC_LIBRARY = 3,
+ SEC_SYNOPSIS = 4,
+ SEC_DESCRIPTION = 5,
+ SEC_IMPLEMENTATION = 6,
+ SEC_RETURN_VALUES = 7,
+ SEC_ENVIRONMENT = 8,
+ SEC_FILES = 9,
+ SEC_EXAMPLES = 10,
+ SEC_DIAGNOSTICS = 11,
+ SEC_COMPATIBILITY = 12,
+ SEC_ERRORS = 13,
+ SEC_SEE_ALSO = 14,
+ SEC_STANDARDS = 15,
+ SEC_HISTORY = 16,
+ SEC_AUTHORS = 17,
+ SEC_CAVEATS = 18,
+ SEC_BUGS = 19,
SEC_CUSTOM
};
-/* Volume of `Dt'. */
-enum mdoc_vol {
- VOL_DEFAULT = 0,
- VOL_AMD,
- VOL_IND,
- VOL_KM,
- VOL_LOCAL,
- VOL_PRM,
- VOL_PS1,
- VOL_SMM,
- VOL_URM,
- VOL_USD
-};
-
-/* Architecture of `Dt'. */
-enum mdoc_arch {
- ARCH_DEFAULT = 0,
- ARCH_alpha,
- ARCH_amd64,
- ARCH_amiga,
- ARCH_arc,
- ARCH_arm,
- ARCH_armish,
- ARCH_aviion,
- ARCH_hp300,
- ARCH_hppa,
- ARCH_hppa64,
- ARCH_i386,
- ARCH_landisk,
- ARCH_luna88k,
- ARCH_mac68k,
- ARCH_macppc,
- ARCH_mvme68k,
- ARCH_mvme88k,
- ARCH_mvmeppc,
- ARCH_pmax,
- ARCH_sgi,
- ARCH_socppc,
- ARCH_sparc,
- ARCH_sparc64,
- ARCH_sun3,
- ARCH_vax,
- ARCH_zaurus
-};
-
-/* Meta-information from prologue. */
+/* Information from prologue. */
struct mdoc_meta {
- enum mdoc_msec msec;
- enum mdoc_vol vol;
- enum mdoc_arch arch;
+ int msec;
+ char *vol;
+ char *arch;
time_t date;
-#define META_TITLE_SZ (64)
- char title[META_TITLE_SZ];
-#define META_OS_SZ (64)
- char os[META_OS_SZ];
-};
-
-struct mdoc_text {
- char *string;
+ char *title;
+ char *os;
+ char *name;
};
-struct mdoc_block {
- size_t argc;
- struct mdoc_arg *argv;
- struct mdoc_node *head;
- struct mdoc_node *body;
- struct mdoc_node *tail;
-};
-
-struct mdoc_elem {
+/* An argument to a macro (multiple values = `It -column'). */
+struct mdoc_argv {
+ int arg;
+ int line;
+ int pos;
size_t sz;
- char **args;
- size_t argc;
- struct mdoc_arg *argv;
+ char **value;
};
-union mdoc_data {
- struct mdoc_text text;
- struct mdoc_elem elem;
- struct mdoc_block block;
+struct mdoc_arg {
+ size_t argc;
+ struct mdoc_argv *argv;
+ unsigned int refcnt;
};
-/* Syntax node in parse tree. */
+/* Node in AST. */
struct mdoc_node {
struct mdoc_node *parent;
struct mdoc_node *child;
int line;
int pos;
int tok;
+ int flags;
+#define MDOC_VALID (1 << 0)
+#define MDOC_ACTED (1 << 1)
enum mdoc_type type;
- union mdoc_data data;
+ enum mdoc_sec sec;
+
+ /* FIXME: union/struct this with #defines. */
+ struct mdoc_arg *args; /* BLOCK/ELEM */
+ struct mdoc_node *head; /* BLOCK */
+ struct mdoc_node *body; /* BLOCK */
+ struct mdoc_node *tail; /* BLOCK */
+ char *string; /* TEXT */
};
+#define MDOC_IGN_SCOPE (1 << 0) /* Ignore scope violations. */
+#define MDOC_IGN_ESCAPE (1 << 1) /* Ignore bad escape sequences. */
+#define MDOC_IGN_MACRO (1 << 2) /* Ignore unknown macros. */
+
/* Call-backs for parse messages. */
struct mdoc_cb {
void (*mdoc_msg)(void *, int, int, const char *);
enum mdoc_warn, const char *);
};
+/* Global table of macro names (`Bd', `Ed', etc.). */
extern const char *const *mdoc_macronames;
+
+/* Global table of argument names (`column', `tag', etc.). */
extern const char *const *mdoc_argnames;
__BEGIN_DECLS
void mdoc_free(struct mdoc *);
/* Allocate a new parser instance. */
-struct mdoc *mdoc_alloc(void *data, const struct mdoc_cb *);
+struct mdoc *mdoc_alloc(void *, int, const struct mdoc_cb *);
+
+/* Gets system ready for another parse. */
+int mdoc_reset(struct mdoc *);
-/* Parse a single line (boolean retval). */
+/* Parse a single line in a stream (boolean retval). */
int mdoc_parseln(struct mdoc *, int, char *buf);
-/* Get parse result or NULL. */
-const struct mdoc_node *mdoc_result(struct mdoc *);
+/* Get result first node (after mdoc_endparse!). */
+const struct mdoc_node *mdoc_node(const struct mdoc *);
+
+/* Get result meta-information (after mdoc_endparse!). */
+const struct mdoc_meta *mdoc_meta(const struct mdoc *);
/* Signal end of parse sequence (boolean retval). */
int mdoc_endparse(struct mdoc *);
+/* The following are utility functions. */
+
+const char *mdoc_a2att(const char *);
+const char *mdoc_a2lib(const char *);
+const char *mdoc_a2st(const char *);
+
__END_DECLS
#endif /*!MDOC_H*/