+.\" $Id: mdoc.3,v 1.13 2009/02/27 09:14:02 kristaps Exp $
+.\"
+.\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the
+.\" above copyright notice and this permission notice appear in all
+.\" copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+.\" WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+.\" WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+.\" AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+.\" DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+.\" PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+.\" PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: January 16 2009 $
+.Dd $Mdocdate: February 27 2009 $
.Dt mdoc 3
.Os
-.\"
+.\" SECTION
.Sh NAME
.Nm mdoc_alloc ,
.Nm mdoc_parseln ,
.Nm mdoc_endparse ,
-.Nm mdoc_result ,
+.Nm mdoc_node ,
+.Nm mdoc_meta ,
.Nm mdoc_free
-.Nd mdoc macro compiler
-.\"
+.Nd mdoc macro compiler library
+.\" SECTION
.Sh SYNOPSIS
-.In mdoc.h
+.Fd #include <mdoc.h>
+.Vt extern const char * const * mdoc_macronames;
+.Vt extern const char * const * mdoc_argnames;
.Ft "struct mdoc *"
.Fn mdoc_alloc "void *data" "const struct mdoc_cb *cb"
.Ft void
-.Fn mdoc_free "struct mdoc *"
+.Fn mdoc_free "struct mdoc *mdoc"
.Ft int
-.Fn mdoc_parseln "struct mdoc *" "int" "char *buf"
+.Fn mdoc_parseln "struct mdoc *mdoc" "int line" "char *buf"
.Ft "const struct mdoc_node *"
-.Fn mdoc_result "struct mdoc *"
+.Fn mdoc_node "struct mdoc *mdoc"
+.Ft "const struct mdoc_meta *"
+.Fn mdoc_meta "struct mdoc *mdoc"
.Ft int
-.Fn mdoc_endparse "struct mdoc *"
-.\"
+.Fn mdoc_endparse "struct mdoc *mdoc"
+.\" SECTION
.Sh DESCRIPTION
The
.Nm mdoc
-library parses lines of mdoc-macro text into an abstract syntax tree.
+library parses lines of mdoc input into an abstract syntax tree.
+.Dq mdoc ,
+which is used to format BSD manual pages, is a macro package of the
+.Dq roff
+language. The
+.Nm
+library implements only those macros documented in the
+.Xr mdoc 7
+and
+.Xr mdoc.samples 7
+manuals. Documents with
+.Xr refer 1 ,
+.Xr eqn 1
+and other pre-processor sections aren't accomodated.
+.\" PARAGRAPH
+.Pp
+.Nm
+is
+.Ud
+.\" PARAGRAPH
+.Pp
In general, applications initiate a parsing sequence with
.Fn mdoc_alloc ,
parse each line in a document with
close the parsing session with
.Fn mdoc_endparse ,
operate over the syntax tree returned by
-.Fn mdoc_result ,
+.Fn mdoc_node
+and
+.Fn mdoc_meta ,
then free all allocated memory with
.Fn mdoc_free .
See the
.Sx EXAMPLES
section for a full example.
-.\" The following requests should be uncommented and used where appropriate.
-.\" This next request is for sections 2, 3, and 9 function return values only.
-.\" .Sh RETURN VALUES
-.\" .Sh EXAMPLES
-.\" The next request is for sections 2, 3, and 9 error and signal handling only.
-.\" .Sh ERRORS
-.\" .Sh SEE ALSO
-.\" .Xr foobar 1
-.\" .Sh STANDARDS
-.\" .Sh HISTORY
-.\" .Sh AUTHORS
-.\" .Sh CAVEATS
-.\" .Sh BUGS
+.\" PARAGRAPH
+.Pp
+This section further defines the
+.Sx Types ,
+.Sx Functions
+and
+.Sx Variables
+available to programmers. Following that,
+.Sx Character Encoding
+describes input format. Lastly,
+.Sx Abstract Syntax Tree ,
+documents the output tree.
+.\" SUBSECTION
+.Ss Types
+Both functions (see
+.Sx Functions )
+and variables (see
+.Sx Variables )
+may use the following types:
+.Bl -ohang -offset "XXXX"
+.\" LIST-ITEM
+.It Vt struct mdoc
+An opaque type defined in
+.Pa mdoc.c .
+Its values are only used privately within the library.
+.\" LIST-ITEM
+.It Vt struct mdoc_cb
+A set of message callbacks defined in
+.Pa mdoc.h .
+.\" LIST-ITEM
+.It Vt struct mdoc_node
+A parsed node. Defined in
+.Pa mdoc.h .
+See
+.Sx Abstract Syntax Tree
+for details.
+.El
+.\" SUBSECTION
+.Ss Functions
+Function descriptions follow:
+.Bl -ohang -offset "XXXX"
+.\" LIST-ITEM
+.It Fn mdoc_alloc
+Allocates a parsing structure. The
+.Fa data
+pointer is passed to callbacks in
+.Fa cb ,
+which are documented further in the header file. Returns NULL on
+failure. If non-NULL, the pointer must be freed with
+.Fn mdoc_free .
+.\" LIST-ITEM
+.It Fn mdoc_free
+Free all resources of a parser. The pointer is no longer valid after
+invocation.
+.\" LIST-ITEM
+.It Fn mdoc_parseln
+Parse a nil-terminated line of input. This line should not contain the
+trailing newline. Returns 0 on failure, 1 on success. The input buffer
+.Fa buf
+is modified by this function.
+.\" LIST-ITEM
+.It Fn mdoc_endparse
+Signals that the parse is complete. Note that if
+.Fn mdoc_endparse
+is called subsequent to
+.Fn mdoc_node ,
+the resulting tree is incomplete. Returns 0 on failure, 1 on success.
+.\" LIST-ITEM
+.It Fn mdoc_node
+Returns the first node of the parse. Note that if
+.Fn mdoc_parseln
+or
+.Fn mdoc_endparse
+return 0, the tree will be incomplete.
+.It Fn mdoc_meta
+Returns the document's parsed meta-data. If this information has not
+yet been supplied or
+.Fn mdoc_parseln
+or
+.Fn mdoc_endparse
+return 0, the data will be incomplete.
+.El
+.\" SUBSECTION
+.Ss Variables
+The following variables are also defined:
+.Bl -ohang -offset "XXXX"
+.\" LIST-ITEM
+.It Va mdoc_macronames
+An array of string-ified token names.
+.\" LIST-ITEM
+.It Va mdoc_argnames
+An array of string-ified token argument names.
+.El
+.\" SUBSECTION
+.Ss Character Encoding
+The
+.Xr mdoc 3
+library accepts only printable ASCII characters as defined by
+.Xr isprint 3 .
+Non-ASCII character sequences are delimited in various ways. All are
+preceeded by an escape character
+.Sq \\
+and followed by either an open-parenthesis
+.Sq \&(
+for two-character sequences; an open-bracket
+.Sq \&[
+for n-character sequences (terminated at a close-bracket
+.Sq \&] ) ;
+an asterisk and open-parenthesis
+.Sq \&*(
+for two-character sequences;
+an asterisk and non-open-parenthesis
+.Sq \&*
+for single-character sequences; or one of a small set of standalone
+single characters for other escapes.
+.\" PARAGRAPH
+.Pp
+Examples:
+.Pp
+.Bl -tag -width "XXXXXXXX" -offset "XXXX" -compact
+.\" LIST-ITEM
+.It \\*(<=
+prints
+.Dq \*(<=
+.Pq greater-equal
+.\" LIST-ITEM
+.It \\(<-
+prints
+.Dq \(<-
+.Pq left-arrow
+.\" LIST-ITEM
+.It \\[<-]
+also prints
+.Dq \(<-
+.Pq left-arrow
+.\" LIST-ITEM
+.It \\*(Ba
+prints
+.Dq \*(Ba
+.Pq bar
+.\" LIST-ITEM
+.It \\*q
+prints
+.Dq \*q
+.Pq double-quote
+.El
+.\" PARAGRAPH
+.Pp
+All escaped sequences are syntax-checked, but it's up to the front-end
+system to correctly render them to the output device.
+.\" SUBSECTION
+.Ss Abstract Syntax Tree
+The
+.Nm
+functions produce an abstract syntax tree (AST) describing the input
+lines in a regular form. It may be reviewed at any time with
+.Fn mdoc_nodes ;
+however, if called before
+.Fn mdoc_endparse ,
+or after
+.Fn mdoc_endparse
+or
+.Fn mdoc_parseln
+fail, it may be incomplete.
+.\" PARAGRAPH
+.Pp
+The AST is composed of
+.Vt struct mdoc_node
+nodes with block, head, body, element, root and text types as declared
+by the
+.Va type
+field. Each node also provides its parse point (the
+.Va line ,
+.Va sec ,
+and
+.Va pos
+fields), its position in the tree (the
+.Va parent ,
+.Va child ,
+.Va next
+and
+.Va prev
+fields) and type-specific data (the
+.Va data
+field).
+.\" PARAGRAPH
+.Pp
+The tree itself is arranged according to the following normal form,
+where capitalised non-terminals represent nodes.
+.Pp
+.Bl -tag -width "ELEMENTXX" -compact -offset "XXXX"
+.\" LIST-ITEM
+.It ROOT
+\(<- mnode+
+.It mnode
+\(<- BLOCK | ELEMENT | TEXT
+.It BLOCK
+\(<- (HEAD [TEXT])+ [BODY [TEXT]] [TAIL [TEXT]]
+.It BLOCK
+\(<- BODY [TEXT] [TAIL [TEXT]]
+.It ELEMENT
+\(<- TEXT*
+.It HEAD
+\(<- mnode+
+.It BODY
+\(<- mnode+
+.It TAIL
+\(<- mnode+
+.It TEXT
+\(<- [[:alpha:]]*
+.El
+.\" PARAGRAPH
+.Pp
+Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
+the BLOCK production. These refer to punctuation marks. Furthermore,
+although a TEXT node will generally have a non-zero-length string, in
+the specific case of
+.Sq \&.Bd \-literal ,
+an empty line will produce a zero-length string.
+.\" PARAGRAPH
+.Pp
+The rule-of-thumb for mapping node types to macros follows. In-line
+elements, such as
+.Sq \&.Em foo ,
+are classified as ELEMENT nodes, which can only contain text.
+Multi-line elements, such as
+.Sq \&.Sh ,
+are BLOCK elements, where the HEAD constitutes line contents and the
+BODY constitutes subsequent lines. In-line elements with matching
+pairs, such as
+.Sq \&.So
+and
+.Sq \&.Sc ,
+are BLOCK elements with no HEAD tag. The only exception to this is
+.Sq \&.Eo
+and
+.Sq \&.Ec ,
+which has a HEAD and TAIL node corresponding to the enclosure string.
+TEXT nodes, obviously, constitute text, and the ROOT node is the
+document's root.
+.\" SECTION
+.Sh EXAMPLES
+The following example reads lines from stdin and parses them, operating
+on the finished parse tree with
+.Fn parsed .
+Note that, if the last line of the file isn't newline-terminated, this
+will truncate the file's last character (see
+.Xr fgetln 3 ) .
+Further, this example does not error-check nor free memory upon failure.
+.Bd -literal -offset "XXXX"
+struct mdoc *mdoc;
+struct mdoc_node *node;
+char *buf;
+size_t len;
+int line;
+
+line = 1;
+mdoc = mdoc_alloc(NULL, NULL);
+
+while ((buf = fgetln(fp, &len))) {
+ buf[len - 1] = '\\0';
+ if ( ! mdoc_parseln(mdoc, line, buf))
+ errx(1, "mdoc_parseln");
+ line++;
+}
+
+if ( ! mdoc_endparse(mdoc))
+ errx(1, "mdoc_endparse");
+if (NULL == (node = mdoc_node(mdoc)))
+ errx(1, "mdoc_node");
+
+parsed(mdoc, node);
+mdoc_free(mdoc);
+.Ed
+.\" SECTION
+.Sh SEE ALSO
+.Xr mdoc 7 ,
+.Xr mdoc.samples 7 ,
+.Xr groff 1 ,
+.Xr mdocml 1
+.\" SECTION
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons Aq kristaps@kth.se .
+.\" SECTION
+.Sh BUGS
+Bugs, un-implemented macros and incompabilities are documented in this
+section. The baseline for determining whether macro parsing is
+.Qq incompatible
+is the default
+.Xr groff 1
+system bundled with
+.Ox .
+.\" PARAGRAPH
+.Pp
+Un-implemented: the
+.Sq \&Xc
+and
+.Sq \&Xo
+macros aren't handled when used to span lines for the
+.Sq \&It
+macro. Such usage is specifically discouraged in
+.Xr mdoc.samples 7 .
+.\" PARAGRAPH
+.Pp
+Bugs: when
+.Sq \&It \-column
+is invoked, whitespace is not stripped around
+.Sq \&Ta
+or tab-character separators.
+.\" PARAGRAPH
+.Pp
+Bugs: elements within columns for
+.Sq \&It \-column
+are not yet supported.
+.\" PARAGRAPH
+.Pp
+Incompatible: the
+.Sq \&At
+macro only accepts a single parameter. Furthermore, several macros
+.Pf ( Sq \&Pp ,
+.Sq \&It ,
+and possibly others) accept multiple arguments with a warning.
+.\" PARAGRAPH
+.Pp
+Incompatible: only those macros specified by
+.Xr mdoc.samples 7
+and
+.Xr mdoc 7
+for
+.Ox
+are supported; support for
+.Nx
+and other
+.Bx
+systems is in progress.