-.\" $Id: mandoc.3,v 1.10 2011/05/24 21:41:11 kristaps Exp $
+.\" $Id: mandoc.3,v 1.29 2014/11/26 23:42:14 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 24 2011 $
+.Dd $Mdocdate: November 26 2014 $
.Dt MANDOC 3
.Os
.Sh NAME
.Nm mandoc ,
-.Nm mandoc_escape ,
+.Nm man_deroff ,
.Nm man_meta ,
+.Nm man_mparse ,
.Nm man_node ,
-.Nm mchars_alloc ,
-.Nm mchars_free ,
-.Nm mchars_num2char ,
-.Nm mchars_num2uc ,
-.Nm mchars_spec2cp ,
-.Nm mchars_spec2str ,
+.Nm mdoc_deroff ,
.Nm mdoc_meta ,
.Nm mdoc_node ,
.Nm mparse_alloc ,
.Nm mparse_free ,
+.Nm mparse_getkeep ,
+.Nm mparse_keep ,
+.Nm mparse_open ,
.Nm mparse_readfd ,
.Nm mparse_reset ,
.Nm mparse_result ,
.Nm mparse_strerror ,
.Nm mparse_strlevel
+.Nm mparse_wait ,
.Nd mandoc macro compiler library
.Sh LIBRARY
-.Lb mandoc
+.Lb libmandoc
.Sh SYNOPSIS
-.In man.h
-.In mdoc.h
+.In sys/types.h
.In mandoc.h
-.Ft "enum mandoc_esc"
-.Fo mandoc_escape
-.Fa "const char **in"
-.Fa "const char **seq"
-.Fa "int *len"
-.Fc
-.Ft "const struct man_meta *"
-.Fo man_meta
-.Fa "const struct man *man"
-.Fc
-.Ft "const struct man_node *"
-.Fo man_node
-.Fa "const struct man *man"
+.Fd "#define ASCII_NBRSP"
+.Fd "#define ASCII_HYPH"
+.Fd "#define ASCII_BREAK"
+.Ft struct mparse *
+.Fo mparse_alloc
+.Fa "int options"
+.Fa "enum mandoclevel wlevel"
+.Fa "mandocmsg mmsg"
+.Fa "const struct mchars *mchars"
+.Fa "char *defos"
.Fc
-.Ft "struct mchars *"
-.Fn mchars_alloc
.Ft void
-.Fn mchars_free "struct mchars *p"
-.Ft char
-.Fn mchars_num2char "const char *cp" "size_t sz"
-.Ft int
-.Fn mchars_num2uc "const char *cp" "size_t sz"
-.Ft "const char *"
-.Fo mchars_spec2str
-.Fa "struct mchars *p"
-.Fa "const char *cp"
-.Fa "size_t sz"
-.Fa "size_t *rsz"
+.Fo (*mandocmsg)
+.Fa "enum mandocerr errtype"
+.Fa "enum mandoclevel level"
+.Fa "const char *file"
+.Fa "int line"
+.Fa "int col"
+.Fa "const char *msg"
.Fc
-.Ft int
-.Fo mchars_spec2cp
-.Fa "struct mchars *p"
-.Fa "const char *cp"
-.Fa "size_t sz"
-.Ft "const char *"
-.Fc
-.Ft "const struct mdoc_meta *"
-.Fo mdoc_meta
-.Fa "const struct mdoc *mdoc"
+.Ft void
+.Fo mparse_free
+.Fa "struct mparse *parse"
.Fc
-.Ft "const struct mdoc_node *"
-.Fo mdoc_node
-.Fa "const struct mdoc *mdoc"
+.Ft const char *
+.Fo mparse_getkeep
+.Fa "const struct mparse *parse"
.Fc
.Ft void
-.Fo mparse_alloc
-.Fa "enum mparset type"
-.Fa "enum mandoclevel wlevel"
-.Fa "mandocmsg msg"
-.Fa "void *msgarg"
+.Fo mparse_keep
+.Fa "struct mparse *parse"
.Fc
-.Ft void
-.Fo mparse_free
+.Ft "enum mandoclevel"
+.Fo mparse_open
.Fa "struct mparse *parse"
+.Fa "int *fd"
+.Fa "const char *fname"
.Fc
.Ft "enum mandoclevel"
.Fo mparse_readfd
.Fa "struct mparse *parse"
.Fa "struct mdoc **mdoc"
.Fa "struct man **man"
+.Fa "char **sodest"
.Fc
.Ft "const char *"
.Fo mparse_strerror
.Fo mparse_strlevel
.Fa "enum mandoclevel"
.Fc
-.Vt extern const char * const * man_macronames;
+.Ft "enum mandoclevel"
+.Fo mparse_wait
+.Fa "struct mparse *parse"
+.Fc
+.In sys/types.h
+.In mandoc.h
+.In mdoc.h
+.Ft void
+.Fo mdoc_deroff
+.Fa "char **dest"
+.Fa "const struct mdoc_node *node"
+.Fc
+.Ft "const struct mdoc_meta *"
+.Fo mdoc_meta
+.Fa "const struct mdoc *mdoc"
+.Fc
+.Ft "const struct mdoc_node *"
+.Fo mdoc_node
+.Fa "const struct mdoc *mdoc"
+.Fc
.Vt extern const char * const * mdoc_argnames;
.Vt extern const char * const * mdoc_macronames;
-.Fd "#define ASCII_NBRSP"
-.Fd "#define ASCII_HYPH"
+.In sys/types.h
+.In mandoc.h
+.In man.h
+.Ft void
+.Fo man_deroff
+.Fa "char **dest"
+.Fa "const struct man_node *node"
+.Fc
+.Ft "const struct man_meta *"
+.Fo man_meta
+.Fa "const struct man *man"
+.Fc
+.Ft "const struct mparse *"
+.Fo man_mparse
+.Fa "const struct man *man"
+.Fc
+.Ft "const struct man_node *"
+.Fo man_node
+.Fa "const struct man *man"
+.Fc
+.Vt extern const char * const * man_macronames;
.Sh DESCRIPTION
The
.Nm mandoc
.Bl -enum
.It
initiate a parsing sequence with
+.Xr mchars_alloc 3
+and
.Fn mparse_alloc ;
.It
parse files or file descriptors with
.Fn man_node ;
.It
free all allocated memory with
-.Fn mparse_free ,
+.Fn mparse_free
+and
+.Xr mchars_free 3 ,
or invoke
.Fn mparse_reset
and parse new files.
.El
-.Pp
-The
-.Nm
-library also contains routines for translating character strings into glyphs
-.Pq see Fn mchars_alloc
-and parsing escape sequences from strings
-.Pq see Fn mandoc_escape .
-.Pp
-This library is
-.Ud
.Sh REFERENCE
This section documents the functions, types, and variables available
via
-.In mandoc.h .
+.In mandoc.h ,
+with the exception of those documented in
+.Xr mandoc_escape 3
+and
+.Xr mchars_alloc 3 .
.Ss Types
.Bl -ohang
-.It Vt "enum mandoc_esc"
.It Vt "enum mandocerr"
+A fatal error, error, or warning message during parsing.
.It Vt "enum mandoclevel"
+A classification of an
+.Vt "enum mandocerr"
+as regards system operation.
.It Vt "struct mchars"
-An opaque pointer to an object allowing for translation between
-character strings and glyphs.
-See
-.Fn mchars_alloc .
-.It Vt "enum mparset"
+An opaque pointer to a a character table.
+Created with
+.Xr mchars_alloc 3
+and freed with
+.Xr mchars_free 3 .
.It Vt "struct mparse"
+An opaque pointer to a running parse sequence.
+Created with
+.Fn mparse_alloc
+and freed with
+.Fn mparse_free .
+This may be used across parsed input if
+.Fn mparse_reset
+is called between parses.
.It Vt "mandocmsg"
+A prototype for a function to handle fatal error, error, and warning
+messages emitted by the parser.
.El
.Ss Functions
.Bl -ohang
-.It Fn mandoc_escape
-Scan an escape sequence, i.e., a character string beginning with
-.Sq \e .
-Pass a pointer to this string as
-.Va end ;
-it will be set to the supremum of the parsed escape sequence unless
-returning ESCAPE_ERROR, in which case the string is bogus and should be
-thrown away.
-If not ESCAPE_ERROR or ESCAPE_IGNORE,
-.Va start
-is set to the first relevant character of the substring (font, glyph,
-whatever) of length
-.Va sz .
-Both
-.Va start
-and
-.Va sz
-may be NULL.
+.It Fn man_deroff
+Obtain a text-only representation of a
+.Vt struct man_node ,
+including text contained in its child nodes.
+To be used on children of the pointer returned from
+.Fn man_node .
+When it is no longer needed, the pointer returned from
+.Fn man_deroff
+can be passed to
+.Xr free 3 .
.It Fn man_meta
-Obtain the meta-data of a successful parse.
+Obtain the meta-data of a successful
+.Xr man 7
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
+Declared in
+.In man.h ,
+implemented in
+.Pa man.c .
+.It Fn man_mparse
+Get the parser used for the current output.
+Declared in
+.In man.h ,
+implemented in
+.Pa man.c .
.It Fn man_node
-Obtain the root node of a successful parse.
+Obtain the root node of a successful
+.Xr man 7
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
-.It Fn mchars_alloc
-Allocate an
-.Vt "struct mchars *"
-object for translating special characters into glyphs.
-See
-.Xr mandoc_char 7
-for an overview of special characters.
-The object must be freed with
-.Fn mchars_free .
-.It Fn mchars_free
-Free an object created with
-.Fn mchars_alloc .
-.It Fn mchars_num2char
-Convert a character index (e.g., the \eN\(aq\(aq escape) into a
-printable ASCII character.
-Returns \e0 (the nil character) if the input sequence is malformed.
-.It Fn mchars_num2uc
-Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
-a Unicode codepoint.
-Returns \e0 (the nil character) if the input sequence is malformed.
-.It Fn mchars_spec2cp
-Convert a special character into a valid Unicode codepoint.
-Returns \-1 on failure or a non-zero Unicode codepoint on success.
-.It Fn mchars_spec2str
-Convert a special character into an ASCII string.
-Returns NULL on failure.
+Declared in
+.In man.h ,
+implemented in
+.Pa man.c .
+.It Fn mdoc_deroff
+Obtain a text-only representation of a
+.Vt struct mdoc_node ,
+including text contained in its child nodes.
+To be used on children of the pointer returned from
+.Fn mdoc_node .
+When it is no longer needed, the pointer returned from
+.Fn mdoc_deroff
+can be passed to
+.Xr free 3 .
.It Fn mdoc_meta
-Obtain the meta-data of a successful parse.
+Obtain the meta-data of a successful
+.Xr mdoc
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
+Declared in
+.In mdoc.h ,
+implemented in
+.Pa mdoc.c .
.It Fn mdoc_node
-Obtain the root node of a successful parse.
+Obtain the root node of a successful
+.Xr mdoc
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
+Declared in
+.In mdoc.h ,
+implemented in
+.Pa mdoc.c .
.It Fn mparse_alloc
Allocate a parser.
+The arguments have the following effect:
+.Bl -tag -offset 5n -width inttype
+.It Ar options
+When the
+.Dv MPARSE_MDOC
+or
+.Dv MPARSE_MAN
+bit is set, only that parser is used.
+Otherwise, the document type is automatically detected.
+.Pp
+When the
+.Dv MPARSE_SO
+bit is set,
+.Xr roff 7
+.Ic \&so
+file inclusion requests are always honoured.
+Otherwise, if the request is the only content in an input file,
+only the file name is remembered, to be returned in the
+.Fa sodest
+argument of
+.Fn mparse_result .
+.Pp
+When the
+.Dv MPARSE_QUICK
+bit is set, parsing is aborted after the NAME section.
+This is for example useful in
+.Xr makewhatis 8
+.Fl Q
+to quickly build minimal databases.
+.It Ar wlevel
+Can be set to
+.Dv MANDOCLEVEL_FATAL ,
+.Dv MANDOCLEVEL_ERROR ,
+or
+.Dv MANDOCLEVEL_WARNING .
+Messages below the selected level will be suppressed.
+.It Ar mmsg
+A callback function to handle errors and warnings.
+See
+.Pa main.c
+for an example.
+.It Ar mchars
+An opaque pointer to a a character table obtained from
+.Xr mchars_alloc 3 .
+.It Ar defos
+A default string for the
+.Xr mdoc 7
+.Sq \&Os
+macro, overriding the
+.Dv OSNAME
+preprocessor definition and the results of
+.Xr uname 3 .
+.El
+.Pp
The same parser may be used for multiple files so long as
.Fn mparse_reset
is called between parses.
.Fn mparse_free
must be called to free the memory allocated by this function.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
.It Fn mparse_free
Free all memory allocated by
.Fn mparse_alloc .
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
+.It Fn mparse_getkeep
+Acquire the keep buffer.
+Must follow a call of
+.Fn mparse_keep .
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
+.It Fn mparse_keep
+Instruct the parser to retain a copy of its parsed input.
+This can be acquired with subsequent
+.Fn mparse_getkeep
+calls.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
+.It Fn mparse_open
+If the
+.Fa fname
+ends in
+.Pa .gz ,
+open with
+.Xr gunzip 1 ;
+otherwise, with
+.Xr open 2 .
+If
+.Xr open 2
+fails, append
+.Pa .gz
+and try with
+.Xr gunzip 1 .
+Return a file descriptor open for reading in
+.Fa fd ,
+or -1 on failure.
+It can be passed to
+.Fn mparse_readfd
+or used directly.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
.It Fn mparse_readfd
Parse a file or file descriptor.
If
.Va fd
-is -1,
+is -1, open
.Va fname
-is opened for reading.
+with
+.Fn mparse_open .
Otherwise,
.Va fname
is assumed to be the name associated with
.Va fd .
-This may be called multiple times with different parameters; however,
+Calls
+.Fn mparse_wait
+before returning.
+This function may be called multiple times with different parameters; however,
.Fn mparse_reset
should be invoked between parses.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
.It Fn mparse_reset
Reset a parser so that
.Fn mparse_readfd
may be used again.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
.It Fn mparse_result
Obtain the result of a parse.
Only successful parses
.Fn mparse_readfd
returned less than MANDOCLEVEL_FATAL
.Pc
-should invoke this function, in which case one of the two pointers will
+should invoke this function, in which case one of the three pointers will
be filled in.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
.It Fn mparse_strerror
Return a statically-allocated string representation of an error code.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
.It Fn mparse_strlevel
Return a statically-allocated string representation of a level code.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
+.It Fn mparse_wait
+Bury a
+.Xr gunzip 1
+child process that was spawned with
+.Fn mparse_open .
+To be called after the parse sequence is complete.
+Not needed after
+.Fn mparse_readfd ,
+but does no harm in that case, either.
+Returns
+.Dv MANDOCLEVEL_OK
+on success and
+.Dv MANDOCLEVEL_SYSERR
+on failure, that is, when
+.Xr wait 2
+fails, or when
+.Xr gunzip 1
+died from a signal or exited with non-zero status.
+Declared in
+.In mandoc.h ,
+implemented in
+.Pa read.c .
.El
.Ss Variables
.Bl -ohang
.Xr mdoc 7
and
.Xr man 7
-syntax trees.
+syntax trees and strings.
+.Ss Man and Mdoc Strings
+Strings may be extracted from mdoc and man meta-data, or from text
+nodes (MDOC_TEXT and MAN_TEXT, respectively).
+These strings have special non-printing formatting cues embedded in the
+text itself, as well as
+.Xr roff 7
+escapes preserved from input.
+Implementing systems will need to handle both situations to produce
+human-readable text.
+In general, strings may be assumed to consist of 7-bit ASCII characters.
+.Pp
+The following non-printing characters may be embedded in text strings:
+.Bl -tag -width Ds
+.It Dv ASCII_NBRSP
+A non-breaking space character.
+.It Dv ASCII_HYPH
+A soft hyphen.
+.It Dv ASCII_BREAK
+A breakable zero-width space.
+.El
+.Pp
+Escape characters are also passed verbatim into text strings.
+An escape character is a sequence of characters beginning with the
+backslash
+.Pq Sq \e .
+To construct human-readable text, these should be intercepted with
+.Xr mandoc_escape 3
+and converted with one the functions described in
+.Xr mchars_alloc 3 .
.Ss Man Abstract Syntax Tree
This AST is governed by the ontological rules dictated in
.Xr man 7
.It ELEMENT
\(<- ELEMENT | TEXT*
.It TEXT
-\(<- [[:alpha:]]*
+\(<- [[:ascii:]]*
.El
.Pp
The only elements capable of nesting other elements are those with
-next-lint scope as documented in
+next-line scope as documented in
.Xr man 7 .
.Ss Mdoc Abstract Syntax Tree
This AST is governed by the ontological
.It TAIL
\(<- mnode*
.It TEXT
-\(<- [[:printable:],0x1e]*
+\(<- [[:ascii:]]*
.El
.Pp
Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
.Xr mandoc 1
are unable to render them in any meaningful way.
Furthermore, behaviour when encountering badly-nested blocks is not
-consistent across troff implementations, especially when using multiple
+consistent across troff implementations, especially when using multiple
levels of badly-nested blocks.
.Sh SEE ALSO
.Xr mandoc 1 ,
+.Xr mandoc_escape 3 ,
+.Xr mandoc_malloc 3 ,
+.Xr mchars_alloc 3 ,
.Xr eqn 7 ,
.Xr man 7 ,
.Xr mandoc_char 7 ,
The
.Nm
library was written by
-.An Kristaps Dzonsons Aq kristaps@bsd.lv .
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .