X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/bbca9de4c44fc5e7d5aa68015495daccc2f69081..581ec294b3151ee015247639a62753ba3748aa44:/mdoc.h

diff --git a/mdoc.h b/mdoc.h
index 00cc5f6a..58ad8131 100644
--- a/mdoc.h
+++ b/mdoc.h
@@ -1,6 +1,6 @@
-/*	$Id: mdoc.h,v 1.78 2010/05/13 06:22:11 kristaps Exp $ */
+/*	$Id: mdoc.h,v 1.131 2014/07/29 13:58:18 schwarze Exp $ */
 /*
- * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -17,16 +17,6 @@
 #ifndef MDOC_H
 #define MDOC_H
 
-/*
- * This library implements a validating scanner/parser for ``mdoc'' roff
- * macro documents, a.k.a. BSD manual page documents.  The mdoc.c file
- * drives the parser, while macro.c describes the macro ontologies.
- * validate.c pre- and post-validates parsed macros, and action.c
- * performs actions on parsed and validated macros.
- */
-
-/* What follows is a list of ALL possible macros. */
-
 enum	mdoct {
 	MDOC_Ap = 0,
 	MDOC_Dd,
@@ -149,41 +139,42 @@ enum	mdoct {
 	MDOC_br,
 	MDOC_sp,
 	MDOC__U,
+	MDOC_Ta,
+	MDOC_ll,
 	MDOC_MAX
 };
 
-/* What follows is a list of ALL possible macro arguments. */
-
-#define	MDOC_Split	 0
-#define	MDOC_Nosplit	 1
-#define	MDOC_Ragged	 2
-#define	MDOC_Unfilled	 3
-#define	MDOC_Literal	 4
-#define	MDOC_File	 5
-#define	MDOC_Offset	 6
-#define	MDOC_Bullet	 7
-#define	MDOC_Dash	 8
-#define	MDOC_Hyphen	 9
-#define	MDOC_Item	 10
-#define	MDOC_Enum	 11
-#define	MDOC_Tag	 12
-#define	MDOC_Diag	 13
-#define	MDOC_Hang	 14
-#define	MDOC_Ohang	 15
-#define	MDOC_Inset	 16
-#define	MDOC_Column	 17
-#define	MDOC_Width	 18
-#define	MDOC_Compact	 19
-#define	MDOC_Std	 20
-#define	MDOC_Filled	 21
-#define	MDOC_Words	 22
-#define	MDOC_Emphasis	 23
-#define	MDOC_Symbolic	 24
-#define	MDOC_Nested	 25
-#define	MDOC_Centred	 26
-#define	MDOC_ARG_MAX	 27
+enum	mdocargt {
+	MDOC_Split, /* -split */
+	MDOC_Nosplit, /* -nospli */
+	MDOC_Ragged, /* -ragged */
+	MDOC_Unfilled, /* -unfilled */
+	MDOC_Literal, /* -literal */
+	MDOC_File, /* -file */
+	MDOC_Offset, /* -offset */
+	MDOC_Bullet, /* -bullet */
+	MDOC_Dash, /* -dash */
+	MDOC_Hyphen, /* -hyphen */
+	MDOC_Item, /* -item */
+	MDOC_Enum, /* -enum */
+	MDOC_Tag, /* -tag */
+	MDOC_Diag, /* -diag */
+	MDOC_Hang, /* -hang */
+	MDOC_Ohang, /* -ohang */
+	MDOC_Inset, /* -inset */
+	MDOC_Column, /* -column */
+	MDOC_Width, /* -width */
+	MDOC_Compact, /* -compact */
+	MDOC_Std, /* -std */
+	MDOC_Filled, /* -filled */
+	MDOC_Words, /* -words */
+	MDOC_Emphasis, /* -emphasis */
+	MDOC_Symbolic, /* -symbolic */
+	MDOC_Nested, /* -nested */
+	MDOC_Centred, /* -centered */
+	MDOC_ARG_MAX
+};
 
-/* Type of a syntax node. */
 enum	mdoc_type {
 	MDOC_TEXT,
 	MDOC_ELEM,
@@ -191,118 +182,217 @@ enum	mdoc_type {
 	MDOC_TAIL,
 	MDOC_BODY,
 	MDOC_BLOCK,
+	MDOC_TBL,
+	MDOC_EQN,
 	MDOC_ROOT
 };
 
-/* Section (named/unnamed) of `Sh'. */
+/*
+ * Section (named/unnamed) of `Sh'.   Note that these appear in the
+ * conventional order imposed by mdoc.7.  In the case of SEC_NONE, no
+ * section has been invoked (this shouldn't happen).  SEC_CUSTOM refers
+ * to other sections.
+ */
 enum	mdoc_sec {
-	SEC_NONE,		/* No section, yet. */
-	SEC_NAME,
-	SEC_LIBRARY,
-	SEC_SYNOPSIS,
-	SEC_DESCRIPTION,
-	SEC_IMPLEMENTATION,
-	SEC_RETURN_VALUES,
-	SEC_ENVIRONMENT, 
-	SEC_FILES,
-	SEC_EXIT_STATUS,
-	SEC_EXAMPLES,
-	SEC_DIAGNOSTICS,
-	SEC_COMPATIBILITY,
-	SEC_ERRORS,
-	SEC_SEE_ALSO,
-	SEC_STANDARDS,
-	SEC_HISTORY,
-	SEC_AUTHORS,
-	SEC_CAVEATS,
-	SEC_BUGS,
-	SEC_SECURITY,
-	SEC_CUSTOM,		/* User-defined. */
+	SEC_NONE = 0,
+	SEC_NAME, /* NAME */
+	SEC_LIBRARY, /* LIBRARY */
+	SEC_SYNOPSIS, /* SYNOPSIS */
+	SEC_DESCRIPTION, /* DESCRIPTION */
+	SEC_CONTEXT, /* CONTEXT */
+	SEC_IMPLEMENTATION, /* IMPLEMENTATION NOTES */
+	SEC_RETURN_VALUES, /* RETURN VALUES */
+	SEC_ENVIRONMENT,  /* ENVIRONMENT */
+	SEC_FILES, /* FILES */
+	SEC_EXIT_STATUS, /* EXIT STATUS */
+	SEC_EXAMPLES, /* EXAMPLES */
+	SEC_DIAGNOSTICS, /* DIAGNOSTICS */
+	SEC_COMPATIBILITY, /* COMPATIBILITY */
+	SEC_ERRORS, /* ERRORS */
+	SEC_SEE_ALSO, /* SEE ALSO */
+	SEC_STANDARDS, /* STANDARDS */
+	SEC_HISTORY, /* HISTORY */
+	SEC_AUTHORS, /* AUTHORS */
+	SEC_CAVEATS, /* CAVEATS */
+	SEC_BUGS, /* BUGS */
+	SEC_SECURITY, /* SECURITY */
+	SEC_CUSTOM,
 	SEC__MAX
 };
 
-/* Information from prologue. */
 struct	mdoc_meta {
-	int		  msec;
-	char		 *vol;
-	char		 *arch;
-	time_t		  date;
-	char		 *title;
-	char		 *os;
-	char		 *name;
+	char		 *msec; /* `Dt' section (1, 3p, etc.) */
+	char		 *vol; /* `Dt' volume (implied) */
+	char		 *arch; /* `Dt' arch (i386, etc.) */
+	char		 *date; /* `Dd' normalised date */
+	char		 *title; /* `Dt' title (FOO, etc.) */
+	char		 *os; /* `Os' system (OpenBSD, etc.) */
+	char		 *name; /* leading `Nm' name */
 };
 
-/* An argument to a macro (multiple values = `It -column'). */
+/*
+ * An argument to a macro (multiple values = `-column xxx yyy').
+ */
 struct	mdoc_argv {
-	int	  	  arg;
+	enum mdocargt	  arg; /* type of argument */
 	int		  line;
 	int		  pos;
-	size_t		  sz;
-	char		**value;
+	size_t		  sz; /* elements in "value" */
+	char		**value; /* argument strings */
 };
 
-struct 	mdoc_arg {
+/*
+ * Reference-counted macro arguments.  These are refcounted because
+ * blocks have multiple instances of the same arguments spread across
+ * the HEAD, BODY, TAIL, and BLOCK node types.
+ */
+struct	mdoc_arg {
 	size_t		  argc;
 	struct mdoc_argv *argv;
 	unsigned int	  refcnt;
 };
 
-/* Node in AST. */
+/*
+ * Indicates that a BODY's formatting has ended, but the scope is still
+ * open.  Used for syntax-broken blocks.
+ */
+enum	mdoc_endbody {
+	ENDBODY_NOT = 0,
+	ENDBODY_SPACE, /* is broken: append a space */
+	ENDBODY_NOSPACE /* is broken: don't append a space */
+};
+
+enum	mdoc_list {
+	LIST__NONE = 0,
+	LIST_bullet, /* -bullet */
+	LIST_column, /* -column */
+	LIST_dash, /* -dash */
+	LIST_diag, /* -diag */
+	LIST_enum, /* -enum */
+	LIST_hang, /* -hang */
+	LIST_hyphen, /* -hyphen */
+	LIST_inset, /* -inset */
+	LIST_item, /* -item */
+	LIST_ohang, /* -ohang */
+	LIST_tag, /* -tag */
+	LIST_MAX
+};
+
+enum	mdoc_disp {
+	DISP__NONE = 0,
+	DISP_centered, /* -centered */
+	DISP_ragged, /* -ragged */
+	DISP_unfilled, /* -unfilled */
+	DISP_filled, /* -filled */
+	DISP_literal /* -literal */
+};
+
+enum	mdoc_auth {
+	AUTH__NONE = 0,
+	AUTH_split, /* -split */
+	AUTH_nosplit /* -nosplit */
+};
+
+enum	mdoc_font {
+	FONT__NONE = 0,
+	FONT_Em, /* Em, -emphasis */
+	FONT_Li, /* Li, -literal */
+	FONT_Sy /* Sy, -symbolic */
+};
+
+struct	mdoc_bd {
+	const char	 *offs; /* -offset */
+	enum mdoc_disp	  type; /* -ragged, etc. */
+	int		  comp; /* -compact */
+};
+
+struct	mdoc_bl {
+	const char	 *width; /* -width */
+	const char	 *offs; /* -offset */
+	enum mdoc_list	  type; /* -tag, -enum, etc. */
+	int		  comp; /* -compact */
+	size_t		  ncols; /* -column arg count */
+	const char	**cols; /* -column val ptr */
+	int		  count; /* -enum counter */
+};
+
+struct	mdoc_bf {
+	enum mdoc_font	  font; /* font */
+};
+
+struct	mdoc_an {
+	enum mdoc_auth	  auth; /* -split, etc. */
+};
+
+struct	mdoc_rs {
+	int		  quote_T; /* whether to quote %T */
+};
+
+/*
+ * Consists of normalised node arguments.  These should be used instead
+ * of iterating through the mdoc_arg pointers of a node: defaults are
+ * provided, etc.
+ */
+union	mdoc_data {
+	struct mdoc_an	  An;
+	struct mdoc_bd	  Bd;
+	struct mdoc_bf	  Bf;
+	struct mdoc_bl	  Bl;
+	struct mdoc_node *Es;
+	struct mdoc_rs	  Rs;
+};
+
+/*
+ * Single node in tree-linked AST.
+ */
 struct	mdoc_node {
 	struct mdoc_node *parent; /* parent AST node */
 	struct mdoc_node *child; /* first child AST node */
+	struct mdoc_node *last; /* last child AST node */
 	struct mdoc_node *next; /* sibling AST node */
 	struct mdoc_node *prev; /* prior sibling AST node */
 	int		  nchild; /* number children */
 	int		  line; /* parse line */
 	int		  pos; /* parse column */
+	int		  lastline; /* the node ends on this line */
 	enum mdoct	  tok; /* tok or MDOC__MAX if none */
 	int		  flags;
 #define	MDOC_VALID	 (1 << 0) /* has been validated */
-#define	MDOC_ACTED	 (1 << 1) /* has been acted upon */
 #define	MDOC_EOS	 (1 << 2) /* at sentence boundary */
 #define	MDOC_LINE	 (1 << 3) /* first macro/text on line */
+#define	MDOC_SYNPRETTY	 (1 << 4) /* SYNOPSIS-style formatting */
+#define	MDOC_ENDED	 (1 << 5) /* rendering has been ended */
+#define	MDOC_DELIMO	 (1 << 6)
+#define	MDOC_DELIMC	 (1 << 7)
 	enum mdoc_type	  type; /* AST node type */
 	enum mdoc_sec	  sec; /* current named section */
-	struct mdoc_arg	 *args; 	/* BLOCK/ELEM */
-#ifdef	UGLY
-	struct mdoc_node *pending;	/* BLOCK */
-#endif
-	struct mdoc_node *head;		/* BLOCK */
-	struct mdoc_node *body;		/* BLOCK */
-	struct mdoc_node *tail;		/* BLOCK */
-	char		 *string;	/* TEXT */
+	union mdoc_data	 *norm; /* normalised args */
+	const void	 *prev_font; /* before entering this node */
+	/* FIXME: these can be union'd to shave a few bytes. */
+	struct mdoc_arg	 *args; /* BLOCK/ELEM */
+	struct mdoc_node *pending; /* BLOCK */
+	struct mdoc_node *head; /* BLOCK */
+	struct mdoc_node *body; /* BLOCK */
+	struct mdoc_node *tail; /* BLOCK */
+	char		 *string; /* TEXT */
+	const struct tbl_span *span; /* TBL */
+	const struct eqn *eqn; /* EQN */
+	enum mdoc_endbody end; /* BODY */
 };
 
-#define	MDOC_IGN_SCOPE	 (1 << 0) /* Ignore scope violations. */
-#define	MDOC_IGN_ESCAPE	 (1 << 1) /* Ignore bad escape sequences. */
-#define	MDOC_IGN_MACRO	 (1 << 2) /* Ignore unknown macros. */
-
-/* Call-backs for parse messages. */
-
-struct	mdoc_cb {
-	int	(*mdoc_err)(void *, int, int, const char *);
-	int	(*mdoc_warn)(void *, int, int, const char *);
-};
-
-/* See mdoc.3 for documentation. */
-
+/* Names of macros.  Index is enum mdoct. */
 extern	const char *const *mdoc_macronames;
+
+/* Names of macro args.  Index is enum mdocargt. */
 extern	const char *const *mdoc_argnames;
 
 __BEGIN_DECLS
 
 struct	mdoc;
 
-/* See mdoc.3 for documentation. */
-
-void	 	  mdoc_free(struct mdoc *);
-struct	mdoc	 *mdoc_alloc(void *, int, const struct mdoc_cb *);
-void		  mdoc_reset(struct mdoc *);
-int	 	  mdoc_parseln(struct mdoc *, int, char *buf);
 const struct mdoc_node *mdoc_node(const struct mdoc *);
 const struct mdoc_meta *mdoc_meta(const struct mdoc *);
-int		  mdoc_endparse(struct mdoc *);
+void mdoc_deroff(char **, const struct mdoc_node *);
 
 __END_DECLS