X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/9019392a2931ff28d10dc8c3b961c1263fd4646f..101604fb661df3209a20d07b6b9070d8d3fbffac:/mandoc.3 diff --git a/mandoc.3 b/mandoc.3 index bc6aa904..4ecfbdeb 100644 --- a/mandoc.3 +++ b/mandoc.3 @@ -1,7 +1,7 @@ -.\" $Id: mandoc.3,v 1.20 2013/09/16 22:54:38 schwarze Exp $ +.\" $Id: mandoc.3,v 1.44 2018/12/30 00:49:55 schwarze Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons -.\" Copyright (c) 2010 Ingo Schwarze +.\" Copyright (c) 2010-2017 Ingo Schwarze .\" .\" Permission to use, copy, modify, and distribute this software for any .\" purpose with or without fee is hereby granted, provided that the above @@ -15,106 +15,48 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: September 16 2013 $ +.Dd $Mdocdate: December 30 2018 $ .Dt MANDOC 3 .Os .Sh NAME .Nm mandoc , -.Nm mandoc_escape , -.Nm man_meta , -.Nm man_mparse , -.Nm man_node , -.Nm mchars_alloc , -.Nm mchars_free , -.Nm mchars_num2char , -.Nm mchars_num2uc , -.Nm mchars_spec2cp , -.Nm mchars_spec2str , -.Nm mdoc_meta , -.Nm mdoc_node , +.Nm deroff , .Nm mparse_alloc , +.Nm mparse_copy , .Nm mparse_free , -.Nm mparse_getkeep , -.Nm mparse_keep , +.Nm mparse_open , .Nm mparse_readfd , .Nm mparse_reset , -.Nm mparse_result , -.Nm mparse_strerror , -.Nm mparse_strlevel +.Nm mparse_result .Nd mandoc macro compiler library -.Sh LIBRARY -.Lb mandoc .Sh SYNOPSIS -.In man.h -.In mdoc.h +.In sys/types.h +.In stdio.h .In mandoc.h -.Ft "enum mandoc_esc" -.Fo mandoc_escape -.Fa "const char **end" -.Fa "const char **start" -.Fa "int *sz" -.Fc -.Ft "const struct man_meta *" -.Fo man_meta -.Fa "const struct man *man" -.Fc -.Ft "const struct mparse *" -.Fo man_mparse -.Fa "const struct man *man" -.Fc -.Ft "const struct man_node *" -.Fo man_node -.Fa "const struct man *man" -.Fc -.Ft "struct mchars *" -.Fn mchars_alloc "void" -.Ft void -.Fn mchars_free "struct mchars *p" -.Ft char -.Fn mchars_num2char "const char *cp" "size_t sz" -.Ft int -.Fn mchars_num2uc "const char *cp" "size_t sz" -.Ft "const char *" -.Fo mchars_spec2str -.Fa "const struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Fa "size_t *rsz" -.Fc -.Ft int -.Fo mchars_spec2cp -.Fa "const struct mchars *p" -.Fa "const char *cp" -.Fa "size_t sz" -.Fc -.Ft "const struct mdoc_meta *" -.Fo mdoc_meta -.Fa "const struct mdoc *mdoc" -.Fc -.Ft "const struct mdoc_node *" -.Fo mdoc_node -.Fa "const struct mdoc *mdoc" -.Fc -.Ft void +.Pp +.Fd "#define ASCII_NBRSP" +.Fd "#define ASCII_HYPH" +.Fd "#define ASCII_BREAK" +.Ft struct mparse * .Fo mparse_alloc -.Fa "enum mparset type" -.Fa "enum mandoclevel wlevel" -.Fa "mandocmsg msg" -.Fa "void *msgarg" +.Fa "int options" +.Fa "enum mandoc_os oe_e" +.Fa "char *os_s" .Fc .Ft void .Fo mparse_free .Fa "struct mparse *parse" .Fc .Ft void -.Fo mparse_getkeep +.Fo mparse_copy .Fa "const struct mparse *parse" .Fc -.Ft void -.Fo mparse_keep +.Ft int +.Fo mparse_open .Fa "struct mparse *parse" +.Fa "const char *fname" .Fc -.Ft "enum mandoclevel" +.Ft void .Fo mparse_readfd .Fa "struct mparse *parse" .Fa "int fd" @@ -124,25 +66,25 @@ .Fo mparse_reset .Fa "struct mparse *parse" .Fc -.Ft void +.Ft struct roff_meta * .Fo mparse_result .Fa "struct mparse *parse" -.Fa "struct mdoc **mdoc" -.Fa "struct man **man" -.Fc -.Ft "const char *" -.Fo mparse_strerror -.Fa "enum mandocerr" .Fc -.Ft "const char *" -.Fo mparse_strlevel -.Fa "enum mandoclevel" +.In roff.h +.Ft void +.Fo deroff +.Fa "char **dest" +.Fa "const struct roff_node *node" .Fc -.Vt extern const char * const * man_macronames; +.In sys/types.h +.In mandoc.h +.In mdoc.h .Vt extern const char * const * mdoc_argnames; .Vt extern const char * const * mdoc_macronames; -.Fd "#define ASCII_NBRSP" -.Fd "#define ASCII_HYPH" +.In sys/types.h +.In mandoc.h +.In man.h +.Vt extern const char * const * man_macronames; .Sh DESCRIPTION The .Nm mandoc @@ -165,56 +107,59 @@ The following describes a general parse sequence: .Bl -enum .It initiate a parsing sequence with +.Xr mchars_alloc 3 +and .Fn mparse_alloc ; .It -parse files or file descriptors with +open a file with +.Xr open 2 +or +.Fn mparse_open ; +.It +parse it with .Fn mparse_readfd ; .It -retrieve a parsed syntax tree, if the parse was successful, with +close it with +.Xr close 2 ; +.It +retrieve the syntax tree with .Fn mparse_result ; .It -iterate over parse nodes with -.Fn mdoc_node -or -.Fn man_node ; +if information about the validity of the input is needed, fetch it with +.Fn mparse_updaterc ; +.It +iterate over parse nodes with starting from the +.Fa first +member of the returned +.Vt struct roff_meta ; .It free all allocated memory with -.Fn mparse_free , +.Fn mparse_free +and +.Xr mchars_free 3 , or invoke .Fn mparse_reset -and parse new files. +and go back to step 2 to parse new files. .El -.Pp -The -.Nm -library also contains routines for translating character strings into glyphs -.Pq see Fn mchars_alloc -and parsing escape sequences from strings -.Pq see Fn mandoc_escape . .Sh REFERENCE This section documents the functions, types, and variables available via -.In mandoc.h . +.In mandoc.h , +with the exception of those documented in +.Xr mandoc_escape 3 +and +.Xr mchars_alloc 3 . .Ss Types .Bl -ohang -.It Vt "enum mandoc_esc" -An escape sequence classification. .It Vt "enum mandocerr" -A fatal error, error, or warning message during parsing. +An error or warning message during parsing. .It Vt "enum mandoclevel" A classification of an -.Vt "enum mandoclevel" +.Vt "enum mandocerr" as regards system operation. -.It Vt "struct mchars" -An opaque pointer to an object allowing for translation between -character strings and glyphs. -See -.Fn mchars_alloc . -.It Vt "enum mparset" -The type of parser when reading input. -This should usually be -.Dv MPARSE_AUTO -for auto-detection. +See the DIAGNOSTICS section in +.Xr mandoc 1 +regarding the meanings of the levels. .It Vt "struct mparse" An opaque pointer to a running parse sequence. Created with @@ -224,134 +169,82 @@ and freed with This may be used across parsed input if .Fn mparse_reset is called between parses. -.It Vt "mandocmsg" -A prototype for a function to handle fatal error, error, and warning -messages emitted by the parser. .El .Ss Functions .Bl -ohang -.It Fn mandoc_escape -Scan an escape sequence, i.e., a character string beginning with -.Sq \e . -Pass a pointer to the character after the -.Sq \e -as -.Va end ; -it will be set to the supremum of the parsed escape sequence unless -returning -.Dv ESCAPE_ERROR , -in which case the string is bogus and should be -thrown away. -If not -.Dv ESCAPE_ERROR -or -.Dv ESCAPE_IGNORE , -.Va start -is set to the first relevant character of the substring (font, glyph, -whatever) of length -.Va sz . -Both -.Va start -and -.Va sz -may be -.Dv NULL . -Declared in -.In mandoc.h , -implemented in -.Pa mandoc.c . -.It Fn man_meta -Obtain the meta-data of a successful parse. -This may only be used on a pointer returned by -.Fn mparse_result . -Declared in -.In man.h , -implemented in -.Pa man.c . -.It Fn man_mparse -Get the parser used for the current output. -Declared in -.In man.h , -implemented in -.Pa man.c . -.It Fn man_node -Obtain the root node of a successful parse. -This may only be used on a pointer returned by -.Fn mparse_result . -Declared in -.In man.h , -implemented in -.Pa man.c . -.It Fn mchars_alloc -Allocate an -.Vt "struct mchars *" -object for translating special characters into glyphs. -See -.Xr mandoc_char 7 -for an overview of special characters. -The object must be freed with -.Fn mchars_free . -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_free -Free an object created with -.Fn mchars_alloc . -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_num2char -Convert a character index (e.g., the \eN\(aq\(aq escape) into a -printable ASCII character. -Returns \e0 (the nil character) if the input sequence is malformed. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_num2uc -Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into -a Unicode codepoint. -Returns \e0 (the nil character) if the input sequence is malformed. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_spec2cp -Convert a special character into a valid Unicode codepoint. -Returns \-1 on failure or a non-zero Unicode codepoint on success. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mchars_spec2str -Convert a special character into an ASCII string. -Returns -.Dv NULL -on failure. -Declared in -.In mandoc.h , -implemented in -.Pa chars.c . -.It Fn mdoc_meta -Obtain the meta-data of a successful parse. -This may only be used on a pointer returned by -.Fn mparse_result . -Declared in -.In mdoc.h , -implemented in -.Pa mdoc.c . -.It Fn mdoc_node -Obtain the root node of a successful parse. -This may only be used on a pointer returned by -.Fn mparse_result . -Declared in -.In mdoc.h , -implemented in -.Pa mdoc.c . +.It Fn deroff +Obtain a text-only representation of a +.Vt struct roff_node , +including text contained in its child nodes. +To be used on children of the +.Fa first +member of +.Vt struct roff_meta . +When it is no longer needed, the pointer returned from +.Fn deroff +can be passed to +.Xr free 3 . .It Fn mparse_alloc Allocate a parser. +The arguments have the following effect: +.Bl -tag -offset 5n -width inttype +.It Ar options +When the +.Dv MPARSE_MDOC +or +.Dv MPARSE_MAN +bit is set, only that parser is used. +Otherwise, the document type is automatically detected. +.Pp +When the +.Dv MPARSE_SO +bit is set, +.Xr roff 7 +.Ic \&so +file inclusion requests are always honoured. +Otherwise, if the request is the only content in an input file, +only the file name is remembered, to be returned in the +.Fa sodest +field of +.Vt struct roff_meta . +.Pp +When the +.Dv MPARSE_QUICK +bit is set, parsing is aborted after the NAME section. +This is for example useful in +.Xr makewhatis 8 +.Fl Q +to quickly build minimal databases. +.Pp +When the +.Dv MARSE_VALIDATE +bit is set, +.Fn mparse_result +runs the validation functions before returning the syntax tree. +This is almost always required, except in certain debugging scenarios, +for example to dump unvalidated syntax trees. +.It Ar os_e +Operating system to check base system conventions for. +If +.Dv MANDOC_OS_OTHER , +the system is automatically detected from +.Ic \&Os , +.Fl Ios , +or +.Xr uname 3 . +.It Ar os_s +A default string for the +.Xr mdoc 7 +.Ic \&Os +macro, overriding the +.Dv OSNAME +preprocessor definition and the results of +.Xr uname 3 . +Passing +.Dv NULL +sets no default. +.El +.Pp The same parser may be used for multiple files so long as .Fn mparse_reset is called between parses. @@ -368,35 +261,40 @@ Declared in .In mandoc.h , implemented in .Pa read.c . -.It Fn mparse_getkeep -Acquire the keep buffer. -Must follow a call of -.Fn mparse_keep . +.It Fn mparse_copy +Dump a copy of the input to the standard output; used for +.Fl man T Ns Cm man . Declared in .In mandoc.h , implemented in .Pa read.c . -.It Fn mparse_keep -Instruct the parser to retain a copy of its parsed input. -This can be acquired with subsequent -.Fn mparse_getkeep -calls. +.It Fn mparse_open +Open the file for reading. +If that fails and +.Fa fname +does not already end in +.Ql .gz , +try again after appending +.Ql .gz . +Save the information whether the file is zipped or not. +Return a file descriptor open for reading or -1 on failure. +It can be passed to +.Fn mparse_readfd +or used directly. Declared in .In mandoc.h , implemented in .Pa read.c . .It Fn mparse_readfd -Parse a file or file descriptor. -If -.Va fd -is -1, -.Va fname -is opened for reading. -Otherwise, -.Va fname -is assumed to be the name associated with -.Va fd . -This may be called multiple times with different parameters; however, +Parse a file descriptor opened with +.Xr open 2 +or +.Fn mparse_open . +Pass the associated filename in +.Va fname . +This function may be called multiple times with different parameters; however, +.Xr close 2 +and .Fn mparse_reset should be invoked between parses. Declared in @@ -413,26 +311,6 @@ implemented in .Pa read.c . .It Fn mparse_result Obtain the result of a parse. -Only successful parses -.Po -i.e., those where -.Fn mparse_readfd -returned less than MANDOCLEVEL_FATAL -.Pc -should invoke this function, in which case one of the two pointers will -be filled in. -Declared in -.In mandoc.h , -implemented in -.Pa read.c . -.It Fn mparse_strerror -Return a statically-allocated string representation of an error code. -Declared in -.In mandoc.h , -implemented in -.Pa read.c . -.It Fn mparse_strlevel -Return a statically-allocated string representation of a level code. Declared in .In mandoc.h , implemented in @@ -441,13 +319,19 @@ implemented in .Ss Variables .Bl -ohang .It Va man_macronames -The string representation of a man macro as indexed by +The string representation of a +.Xr man 7 +macro as indexed by .Vt "enum mant" . .It Va mdoc_argnames -The string representation of a mdoc macro argument as indexed by +The string representation of an +.Xr mdoc 7 +macro argument as indexed by .Vt "enum mdocargt" . .It Va mdoc_macronames -The string representation of a mdoc macro as indexed by +The string representation of an +.Xr mdoc 7 +macro as indexed by .Vt "enum mdoct" . .El .Sh IMPLEMENTATION NOTES @@ -473,6 +357,8 @@ The following non-printing characters may be embedded in text strings: A non-breaking space character. .It Dv ASCII_HYPH A soft hyphen. +.It Dv ASCII_BREAK +A breakable zero-width space. .El .Pp Escape characters are also passed verbatim into text strings. @@ -480,26 +366,24 @@ An escape character is a sequence of characters beginning with the backslash .Pq Sq \e . To construct human-readable text, these should be intercepted with -.Fn mandoc_escape -and converted with one of -.Fn mchars_num2char , -.Fn mchars_spec2str , -and so on. +.Xr mandoc_escape 3 +and converted with one the functions described in +.Xr mchars_alloc 3 . .Ss Man Abstract Syntax Tree This AST is governed by the ontological rules dictated in .Xr man 7 and derives its terminology accordingly. .Pp The AST is composed of -.Vt struct man_node +.Vt struct roff_node nodes with element, root and text types as declared by the .Va type field. Each node also provides its parse point (the .Va line , -.Va sec , +.Va pos , and -.Va pos +.Va sec fields), its position in the tree (the .Va parent , .Va child , @@ -529,7 +413,7 @@ where capitalised non-terminals represent nodes. .El .Pp The only elements capable of nesting other elements are those with -next-lint scope as documented in +next-line scope as documented in .Xr man 7 . .Ss Mdoc Abstract Syntax Tree This AST is governed by the ontological @@ -543,20 +427,20 @@ are described simply as .Qq elements . .Pp The AST is composed of -.Vt struct mdoc_node +.Vt struct roff_node nodes with block, head, body, element, root and text types as declared by the .Va type field. Each node also provides its parse point (the .Va line , -.Va sec , +.Va pos , and -.Va pos +.Va sec fields), its position in the tree (the .Va parent , .Va child , -.Va nchild , +.Va last , .Va next and .Va prev @@ -640,10 +524,10 @@ TEXT end .Ed .Pp Here, the formatting of the -.Sq \&Ao +.Ic \&Ao block extends from TEXT ao to TEXT ac, while the formatting of the -.Sq \&Bo +.Ic \&Bo block extends from TEXT bo to TEXT bc. It renders as follows in .Fl T Ns Cm ascii @@ -659,16 +543,21 @@ Using badly-nested blocks is .Em strongly discouraged ; for example, the .Fl T Ns Cm html -and -.Fl T Ns Cm xhtml -front-ends to +front-end to .Xr mandoc 1 -are unable to render them in any meaningful way. +is unable to render them in any meaningful way. Furthermore, behaviour when encountering badly-nested blocks is not -consistent across troff implementations, especially when using multiple +consistent across troff implementations, especially when using multiple levels of badly-nested blocks. .Sh SEE ALSO .Xr mandoc 1 , +.Xr man.cgi 3 , +.Xr mandoc_escape 3 , +.Xr mandoc_headers 3 , +.Xr mandoc_malloc 3 , +.Xr mansearch 3 , +.Xr mchars_alloc 3 , +.Xr tbl 3 , .Xr eqn 7 , .Xr man 7 , .Xr mandoc_char 7 , @@ -676,7 +565,10 @@ levels of badly-nested blocks. .Xr roff 7 , .Xr tbl 7 .Sh AUTHORS +.An -nosplit The .Nm library was written by -.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and is maintained by +.An Ingo Schwarze Aq Mt schwarze@openbsd.org .