X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/89fbd7032339cc31f5c189d02050375f532bf281..92d36306307652f916347656b5c31928db651c45:/mdoc.3?ds=inline diff --git a/mdoc.3 b/mdoc.3 index 0adf9302..3620b155 100644 --- a/mdoc.3 +++ b/mdoc.3 @@ -1,217 +1,210 @@ -.\" $Id: mdoc.3,v 1.10 2009/02/24 13:46:54 kristaps Exp $ +.\" $Id: mdoc.3,v 1.49 2010/08/20 01:02:07 schwarze Exp $ .\" -.\" Copyright (c) 2009 Kristaps Dzonsons +.\" Copyright (c) 2009, 2010 Kristaps Dzonsons +.\" Copyright (c) 2010 Ingo Schwarze .\" .\" Permission to use, copy, modify, and distribute this software for any -.\" purpose with or without fee is hereby granted, provided that the -.\" above copyright notice and this permission notice appear in all -.\" copies. +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. .\" -.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL -.\" WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -.\" WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -.\" AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL -.\" DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR -.\" PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -.\" PERFORMANCE OF THIS SOFTWARE. -.\" -.Dd $Mdocdate: February 24 2009 $ -.Dt mdoc 3 +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: August 20 2010 $ +.Dt MDOC 3 .Os -.\" SECTION .Sh NAME +.Nm mdoc , .Nm mdoc_alloc , -.Nm mdoc_parseln , .Nm mdoc_endparse , -.Nm mdoc_node , +.Nm mdoc_free , .Nm mdoc_meta , -.Nm mdoc_free +.Nm mdoc_node , +.Nm mdoc_parseln , +.Nm mdoc_reset .Nd mdoc macro compiler library -.\" SECTION .Sh SYNOPSIS -.Fd #include +.In mandoc.h +.In mdoc.h .Vt extern const char * const * mdoc_macronames; .Vt extern const char * const * mdoc_argnames; .Ft "struct mdoc *" -.Fn mdoc_alloc "void *data" "const struct mdoc_cb *cb" +.Fo mdoc_alloc +.Fa "struct regset *regs" +.Fa "void *data" +.Fa "mandocmsg msgs" +.Fc +.Ft int +.Fn mdoc_endparse "struct mdoc *mdoc" .Ft void .Fn mdoc_free "struct mdoc *mdoc" -.Ft int -.Fn mdoc_parseln "struct mdoc *mdoc" "int line" "char *buf" -.Ft "const struct mdoc_node *" -.Fn mdoc_node "struct mdoc *mdoc" .Ft "const struct mdoc_meta *" -.Fn mdoc_meta "struct mdoc *mdoc" +.Fn mdoc_meta "const struct mdoc *mdoc" +.Ft "const struct mdoc_node *" +.Fn mdoc_node "const struct mdoc *mdoc" .Ft int -.Fn mdoc_endparse "struct mdoc *mdoc" -.\" SECTION +.Fo mdoc_parseln +.Fa "struct mdoc *mdoc" +.Fa "int line" +.Fa "char *buf" +.Fc +.Ft int +.Fn mdoc_reset "struct mdoc *mdoc" .Sh DESCRIPTION The .Nm mdoc -library parses lines of mdoc input into an abstract syntax tree. -.Dq mdoc , -which is used to format BSD manual pages, is a macro package of the -.Dq roff -language. The -.Nm -library implements only those macros documented in the +library parses lines of .Xr mdoc 7 -and -.Xr mdoc.samples 7 -manuals. -.\" PARAGRAPH -.Pp -.Nm -is -.Ud -.\" PARAGRAPH +input +into an abstract syntax tree (AST). .Pp In general, applications initiate a parsing sequence with .Fn mdoc_alloc , -parse each line in a document with +parse each line in a document with .Fn mdoc_parseln , close the parsing session with .Fn mdoc_endparse , operate over the syntax tree returned by -.Fn mdoc_node +.Fn mdoc_node and .Fn mdoc_meta , then free all allocated memory with .Fn mdoc_free . +The +.Fn mdoc_reset +function may be used in order to reset the parser for another input +sequence. See the .Sx EXAMPLES -section for a full example. -.\" PARAGRAPH +section for a simple example. .Pp -This section further defines the +This section further defines the .Sx Types , -.Sx Functions +.Sx Functions and .Sx Variables -available to programmers. Following that, -.Sx Character Encoding -describes input format. Lastly, -.Sx Abstract Syntax Tree , -documents the output tree. -.\" SUBSECTION +available to programmers. +Following that, the +.Sx Abstract Syntax Tree +section documents the output tree. .Ss Types Both functions (see .Sx Functions ) and variables (see .Sx Variables ) may use the following types: -.Bl -ohang -offset "XXXX" -.\" LIST-ITEM +.Bl -ohang .It Vt struct mdoc An opaque type defined in .Pa mdoc.c . Its values are only used privately within the library. -.\" LIST-ITEM -.It Vt struct mdoc_cb -A set of message callbacks defined in -.Pa mdoc.h . -.\" LIST-ITEM .It Vt struct mdoc_node -A parsed node. Defined in +A parsed node. +Defined in .Pa mdoc.h . -See +See .Sx Abstract Syntax Tree for details. +.It Vt mandocmsg +A function callback type defined in +.Pa mandoc.h . .El -.\" SUBSECTION .Ss Functions Function descriptions follow: -.Bl -ohang -offset "XXXX" -.\" LIST-ITEM +.Bl -ohang .It Fn mdoc_alloc -Allocates a parsing structure. The +Allocates a parsing structure. +The .Fa data -pointer is passed to callbacks in -.Fa cb , -which are documented further in the header file. Returns NULL on -failure. If non-NULL, the pointer must be freed with +pointer is passed to +.Fa msgs . +Returns NULL on failure. +If non-NULL, the pointer must be freed with .Fn mdoc_free . -.\" LIST-ITEM +.It Fn mdoc_reset +Reset the parser for another parse routine. +After its use, +.Fn mdoc_parseln +behaves as if invoked for the first time. +If it returns 0, memory could not be allocated. .It Fn mdoc_free -Free all resources of a parser. The pointer is no longer valid after -invocation. -.\" LIST-ITEM +Free all resources of a parser. +The pointer is no longer valid after invocation. .It Fn mdoc_parseln -Parse a nil-terminated line of input. This line should not contain the -trailing newline. Returns 0 on failure, 1 on success. The input buffer +Parse a nil-terminated line of input. +This line should not contain the trailing newline. +Returns 0 on failure, 1 on success. +The input buffer .Fa buf is modified by this function. -.\" LIST-ITEM .It Fn mdoc_endparse -Signals that the parse is complete. Note that if +Signals that the parse is complete. +Note that if .Fn mdoc_endparse is called subsequent to .Fn mdoc_node , -the resulting tree is incomplete. Returns 0 on failure, 1 on success. -.\" LIST-ITEM +the resulting tree is incomplete. +Returns 0 on failure, 1 on success. .It Fn mdoc_node -Returns the first node of the parse. Note that if +Returns the first node of the parse. +Note that if .Fn mdoc_parseln or .Fn mdoc_endparse return 0, the tree will be incomplete. .It Fn mdoc_meta -Returns the document's parsed meta-data. If this information has not -yet been supplied or +Returns the document's parsed meta-data. +If this information has not yet been supplied or .Fn mdoc_parseln or .Fn mdoc_endparse return 0, the data will be incomplete. .El -.\" SUBSECTION .Ss Variables The following variables are also defined: -.Bl -ohang -offset "XXXX" -.\" LIST-ITEM +.Bl -ohang .It Va mdoc_macronames An array of string-ified token names. -.\" LIST-ITEM .It Va mdoc_argnames An array of string-ified token argument names. .El -.\" SUBSECTION -.Ss Character Encoding -The -.Xr mdoc 3 -library accepts only printable ASCII characters as defined by -.Xr isprint 3 . -Non-ASCII character sequences are escaped with an escape character -.Sq \\ -and followed by either an open-parenthesis -.Sq \&( -for two-character sequences; an open-bracket -.Sq \&[ -for n-character sequences (terminated at a close-bracket -.Sq \&] ) ; -or one of a small set of single characters for other escapes. -.\" SUBSECTION .Ss Abstract Syntax Tree -The +The .Nm -functions produce an abstract syntax tree (AST) describing the input -lines in a regular form. It may be reviewed at any time with +functions produce an abstract syntax tree (AST) describing input in a +regular form. +It may be reviewed at any time with .Fn mdoc_nodes ; however, if called before .Fn mdoc_endparse , or after -.Fn mdoc_endparse +.Fn mdoc_endparse or .Fn mdoc_parseln fail, it may be incomplete. -.\" PARAGRAPH .Pp -The AST is composed of +This AST is governed by the ontological +rules dictated in +.Xr mdoc 7 +and derives its terminology accordingly. +.Qq In-line +elements described in +.Xr mdoc 7 +are described simply as +.Qq elements . +.Pp +The AST is composed of .Vt struct mdoc_node nodes with block, head, body, element, root and text types as declared by the .Va type -field. Each node also provides its parse point (the +field. +Each node also provides its parse point (the .Va line , .Va sec , and @@ -219,162 +212,156 @@ and fields), its position in the tree (the .Va parent , .Va child , -.Va next +.Va nchild , +.Va next and -.Va prev -fields) and type-specific data (the -.Va data -field). -.\" PARAGRAPH +.Va prev +fields) and some type-specific data, in particular, for nodes generated +from macros, the generating macro in the +.Va tok +field. .Pp The tree itself is arranged according to the following normal form, where capitalised non-terminals represent nodes. .Pp -.Bl -tag -width "ELEMENTXX" -compact -offset "XXXX" -.\" LIST-ITEM +.Bl -tag -width "ELEMENTXX" -compact .It ROOT \(<- mnode+ .It mnode \(<- BLOCK | ELEMENT | TEXT .It BLOCK -\(<- (HEAD [TEXT])+ [BODY [TEXT]] [TAIL [TEXT]] -.It BLOCK -\(<- BODY [TEXT] [TAIL [TEXT]] +\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]] .It ELEMENT \(<- TEXT* .It HEAD -\(<- mnode+ +\(<- mnode* .It BODY -\(<- mnode+ +\(<- mnode* [ENDBODY mnode*] .It TAIL -\(<- mnode+ +\(<- mnode* .It TEXT -\(<- [[:alpha:]]* +\(<- [[:printable:],0x1e]* .El -.\" PARAGRAPH .Pp Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of -the BLOCK production. These refer to punctuation marks. Furthermore, -although a TEXT node will generally have a non-zero-length string, in -the specific case of +the BLOCK production: these refer to punctuation marks. +Furthermore, although a TEXT node will generally have a non-zero-length +string, in the specific case of .Sq \&.Bd \-literal , an empty line will produce a zero-length string. -.\" PARAGRAPH +Multiple body parts are only found in invocations of +.Sq \&Bl \-column , +where a new body introduces a new phrase. +.Ss Badly-nested Blocks +The ENDBODY node is available to end the formatting associated +with a given block before the physical end of that block. +It has a non-null +.Va end +field, is of the BODY +.Va type , +has the same +.Va tok +as the BLOCK it is ending, and has a +.Va pending +field pointing to that BLOCK's BODY node. +It is an indirect child of that BODY node +and has no children of its own. .Pp -The rule-of-thumb for mapping node types to macros follows. In-line -elements, such as -.Sq \&.Em foo , -are classified as ELEMENT nodes, which can only contain text. -Multi-line elements, such as -.Sq \&.Sh , -are BLOCK elements, where the HEAD constitutes line contents and the -BODY constitutes subsequent lines. In-line elements with matching -pairs, such as -.Sq \&.So -and -.Sq \&.Sc , -are BLOCK elements with no HEAD tag. The only exception to this is -.Sq \&.Eo +An ENDBODY node is generated when a block ends while one of its child +blocks is still open, like in the following example: +.Bd -literal -offset indent +\&.Ao ao +\&.Bo bo ac +\&.Ac bc +\&.Bc end +.Ed +.Pp +This example results in the following block structure: +.Bd -literal -offset indent +BLOCK Ao + HEAD Ao + BODY Ao + TEXT ao + BLOCK Bo, pending -> Ao + HEAD Bo + BODY Bo + TEXT bo + TEXT ac + ENDBODY Ao, pending -> Ao + TEXT bc +TEXT end +.Ed +.Pp +Here, the formatting of the +.Sq \&Ao +block extends from TEXT ao to TEXT ac, +while the formatting of the +.Sq \&Bo +block extends from TEXT bo to TEXT bc. +It renders as follows in +.Fl T Ns Cm ascii +mode: +.Pp +.Dl bc] end +.Pp +Support for badly-nested blocks is only provided for backward +compatibility with some older +.Xr mdoc 7 +implementations. +Using badly-nested blocks is +.Em strongly discouraged : +the +.Fl T Ns Cm html and -.Sq \&.Ec , -which has a HEAD and TAIL node corresponding to the enclosure string. -TEXT nodes, obviously, constitute text, and the ROOT node is the -document's root. -.\" SECTION +.Fl T Ns Cm xhtml +front-ends are unable to render them in any meaningful way. +Furthermore, behaviour when encountering badly-nested blocks is not +consistent across troff implementations, especially when using multiple +levels of badly-nested blocks. .Sh EXAMPLES The following example reads lines from stdin and parses them, operating -on the finished parse tree with +on the finished parse tree with .Fn parsed . -Note that, if the last line of the file isn't newline-terminated, this -will truncate the file's last character (see -.Xr fgetln 3 ) . -Further, this example does not error-check nor free memory upon failure. -.Bd -literal -offset "XXXX" +This example does not error-check nor free memory upon failure. +.Bd -literal -offset indent +struct regset regs; struct mdoc *mdoc; -struct mdoc_node *node; +const struct mdoc_node *node; char *buf; size_t len; int line; +bzero(®s, sizeof(struct regset)); line = 1; -mdoc = mdoc_alloc(NULL, NULL); +mdoc = mdoc_alloc(®s, NULL, NULL); +buf = NULL; +alloc_len = 0; -while ((buf = fgetln(fp, &len))) { - buf[len - 1] = '\\0'; - if ( ! mdoc_parseln(mdoc, line, buf)) - errx(1, "mdoc_parseln"); - line++; +while ((len = getline(&buf, &alloc_len, stdin)) >= 0) { + if (len && buflen[len - 1] = '\en') + buf[len - 1] = '\e0'; + if ( ! mdoc_parseln(mdoc, line, buf)) + errx(1, "mdoc_parseln"); + line++; } if ( ! mdoc_endparse(mdoc)) - errx(1, "mdoc_endparse"); + errx(1, "mdoc_endparse"); if (NULL == (node = mdoc_node(mdoc))) - errx(1, "mdoc_node"); + errx(1, "mdoc_node"); parsed(mdoc, node); mdoc_free(mdoc); .Ed -.\" SECTION +.Pp +Please see +.Pa main.c +in the source archive for a rigorous reference. .Sh SEE ALSO -.Xr mdoc 7 , -.Xr mdoc.samples 7 , -.Xr groff 1 , -.Xr mdocml 1 -.\" SECTION +.Xr mandoc 1 , +.Xr mdoc 7 .Sh AUTHORS The .Nm -utility was written by -.An Kristaps Dzonsons Aq kristaps@kth.se . -.\" SECTION -.Sh BUGS -Bugs, un-implemented macros and incompabilities are documented in this -section. The baseline for determining whether macro parsing is -.Qq incompatible -is the default -.Xr groff 1 -system bundled with -.Ox . -.\" PARAGRAPH -.Pp -Un-implemented: the -.Sq \&Xc -and -.Sq \&Xo -macros aren't handled when used to span lines for the -.Sq \&It -macro. Such usage is specifically discouraged in -.Xr mdoc.samples 7 . -.\" PARAGRAPH -.Pp -Bugs: when -.Sq \&It \-column -is invoked, whitespace is not stripped around -.Sq \&Ta -or tab-character separators. -.\" PARAGRAPH -.Pp -Bugs: elements within columns for -.Sq \&It \-column -are not yet supported. -.\" PARAGRAPH -.Pp -Incompatible: the -.Sq \&At -macro only accepts a single parameter. Furthermore, several macros -.Pf ( Sq \&Pp , -.Sq \&It , -and possibly others) accept multiple arguments with a warning. -.\" PARAGRAPH -.Pp -Incompatible: only those macros specified by -.Xr mdoc.samples 7 -and -.Xr mdoc 7 -for -.Ox -are supported; support for -.Nx -and other -.Bx -systems is in progress. +library was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv .