X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/a2bdf1f864112905d51fe6770a86ecd3823ce2c9..f1120c542b0577e236401251c9af324555049408:/mandoc.3 diff --git a/mandoc.3 b/mandoc.3 index acaf4e59..4d0b20d6 100644 --- a/mandoc.3 +++ b/mandoc.3 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.3,v 1.8 2011/05/17 12:22:15 kristaps Exp $ +.\" $Id: mandoc.3,v 1.17 2012/01/13 15:27:14 joerg Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,26 +15,27 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: May 17 2011 $ +.Dd $Mdocdate: January 13 2012 $ .Dt MANDOC 3 .Os .Sh NAME .Nm mandoc , .Nm mandoc_escape , .Nm man_meta , +.Nm man_mparse , .Nm man_node , .Nm mchars_alloc , .Nm mchars_free , .Nm mchars_num2char , .Nm mchars_num2uc , -.Nm mchars_res2cp , -.Nm mchars_res2str , .Nm mchars_spec2cp , .Nm mchars_spec2str , .Nm mdoc_meta , .Nm mdoc_node , .Nm mparse_alloc , .Nm mparse_free , +.Nm mparse_getkeep , +.Nm mparse_keep , .Nm mparse_readfd , .Nm mparse_reset , .Nm mparse_result , @@ -49,14 +50,18 @@ .In mandoc.h .Ft "enum mandoc_esc" .Fo mandoc_escape -.Fa "const char **in" -.Fa "const char **seq" -.Fa "int *len" +.Fa "const char **end" +.Fa "const char **start" +.Fa "int *sz" .Fc .Ft "const struct man_meta *" .Fo man_meta .Fa "const struct man *man" .Fc +.Ft "const struct mparse *" +.Fo man_mparse +.Fa "const struct man *man" +.Fc .Ft "const struct man_node *" .Fo man_node .Fa "const struct man *man" @@ -70,29 +75,15 @@ .Ft int .Fn mchars_num2uc "const char *cp" "size_t sz" .Ft "const char *" -.Fo mchars_res2str -.Fa "struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Fa "size_t *rsz" -.Fc -.Ft int -.Fo mchars_res2cp -.Fa "struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Ft "const char *" -.Fc -.Ft "const char *" .Fo mchars_spec2str -.Fa "struct mchars *p" +.Fa "const struct mchars *p" .Fa "const char *cp" .Fa "size_t sz" .Fa "size_t *rsz" .Fc .Ft int .Fo mchars_spec2cp -.Fa "struct mchars *p" +.Fa "const struct mchars *p" .Fa "const char *cp" .Fa "size_t sz" .Ft "const char *" @@ -116,6 +107,14 @@ .Fo mparse_free .Fa "struct mparse *parse" .Fc +.Ft void +.Fo mparse_getkeep +.Fa "const struct mparse *parse" +.Fc +.Ft void +.Fo mparse_keep +.Fa "struct mparse *parse" +.Fc .Ft "enum mandoclevel" .Fo mparse_readfd .Fa "struct mparse *parse" @@ -193,9 +192,6 @@ library also contains routines for translating character strings into glyphs .Pq see Fn mchars_alloc and parsing escape sequences from strings .Pq see Fn mandoc_escape . -.Pp -This library is -.Ud .Sh REFERENCE This section documents the functions, types, and variables available via @@ -203,28 +199,54 @@ via .Ss Types .Bl -ohang .It Vt "enum mandoc_esc" +An escape sequence classification. .It Vt "enum mandocerr" +A fatal error, error, or warning message during parsing. .It Vt "enum mandoclevel" +A classification of an +.Vt "enum mandoclevel" +as regards system operation. .It Vt "struct mchars" An opaque pointer to an object allowing for translation between character strings and glyphs. See .Fn mchars_alloc . .It Vt "enum mparset" +The type of parser when reading input. +This should usually be +.Dv MPARSE_AUTO +for auto-detection. .It Vt "struct mparse" +An opaque pointer to a running parse sequence. +Created with +.Fn mparse_alloc +and freed with +.Fn mparse_free . +This may be used across parsed input if +.Fn mparse_reset +is called between parses. .It Vt "mandocmsg" +A prototype for a function to handle fatal error, error, and warning +messages emitted by the parser. .El .Ss Functions .Bl -ohang .It Fn mandoc_escape Scan an escape sequence, i.e., a character string beginning with .Sq \e . -Pass a pointer to this string as +Pass a pointer to the character after the +.Sq \e +as .Va end ; it will be set to the supremum of the parsed escape sequence unless -returning ESCAPE_ERROR, in which case the string is bogus and should be +returning +.Dv ESCAPE_ERROR , +in which case the string is bogus and should be thrown away. -If not ESCAPE_ERROR or ESCAPE_IGNORE, +If not +.Dv ESCAPE_ERROR +or +.Dv ESCAPE_IGNORE , .Va start is set to the first relevant character of the substring (font, glyph, whatever) of length @@ -233,11 +255,14 @@ Both .Va start and .Va sz -may be NULL. +may be +.Dv NULL . .It Fn man_meta Obtain the meta-data of a successful parse. This may only be used on a pointer returned by .Fn mparse_result . +.It Fn man_mparse +Get the parser used for the current output. .It Fn man_node Obtain the root node of a successful parse. This may only be used on a pointer returned by @@ -262,22 +287,14 @@ Returns \e0 (the nil character) if the input sequence is malformed. Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into a Unicode codepoint. Returns \e0 (the nil character) if the input sequence is malformed. -.It Fn mchars_res2cp -Convert a predefined character into a valid Unicode codepoint. -Returns \-1 on failure and 0 if no code-point exists (if this occurs, -the caller should fall back to -.Fn mchars_res2str ) . -.It Fn mchars_res2str -Convert a predefined character into an ASCII string. -Returns NULL on failure. .It Fn mchars_spec2cp Convert a special character into a valid Unicode codepoint. -Returns \-1 on failure and 0 if no code-point exists (if this occurs, -the caller should fall back to -.Fn mchars_spec2str ) . +Returns \-1 on failure or a non-zero Unicode codepoint on success. .It Fn mchars_spec2str Convert a special character into an ASCII string. -Returns NULL on failure. +Returns +.Dv NULL +on failure. .It Fn mdoc_meta Obtain the meta-data of a successful parse. This may only be used on a pointer returned by @@ -296,6 +313,15 @@ must be called to free the memory allocated by this function. .It Fn mparse_free Free all memory allocated by .Fn mparse_alloc . +.It Fn mparse_getkeep +Acquire the keep buffer. +Must follow a call of +.Fn mparse_keep . +.It Fn mparse_keep +Instruct the parser to retain a copy of its parsed input. +This can be acquired with subsequent +.Fn mparse_getkeep +calls. .It Fn mparse_readfd Parse a file or file descriptor. If @@ -346,7 +372,36 @@ This section consists of structural documentation for .Xr mdoc 7 and .Xr man 7 -syntax trees. +syntax trees and strings. +.Ss Man and Mdoc Strings +Strings may be extracted from mdoc and man meta-data, or from text +nodes (MDOC_TEXT and MAN_TEXT, respectively). +These strings have special non-printing formatting cues embedded in the +text itself, as well as +.Xr roff 7 +escapes preserved from input. +Implementing systems will need to handle both situations to produce +human-readable text. +In general, strings may be assumed to consist of 7-bit ASCII characters. +.Pp +The following non-printing characters may be embedded in text strings: +.Bl -tag -width Ds +.It Dv ASCII_NBRSP +A non-breaking space character. +.It Dv ASCII_HYPH +A soft hyphen. +.El +.Pp +Escape characters are also passed verbatim into text strings. +An escape character is a sequence of characters beginning with the +backslash +.Pq Sq \e . +To construct human-readable text, these should be intercepted with +.Fn mandoc_escape +and converted with one of +.Fn mchars_num2char , +.Fn mchars_spec2str , +and so on. .Ss Man Abstract Syntax Tree This AST is governed by the ontological rules dictated in .Xr man 7 @@ -387,7 +442,7 @@ where capitalised non-terminals represent nodes. .It ELEMENT \(<- ELEMENT | TEXT* .It TEXT -\(<- [[:alpha:]]* +\(<- [[:ascii:]]* .El .Pp The only elements capable of nesting other elements are those with @@ -446,7 +501,7 @@ where capitalised non-terminals represent nodes. .It TAIL \(<- mnode* .It TEXT -\(<- [[:printable:],0x1e]* +\(<- [[:ascii:]]* .El .Pp Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of @@ -541,4 +596,5 @@ levels of badly-nested blocks. The .Nm library was written by -.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.An Kristaps Dzonsons , +.Mt kristaps@bsd.lv .