]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
If -Tman is specified and input is -man, echo the preprocessed (`so'
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.14 2011/10/06 22:29:12 kristaps Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: October 6 2011 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm mandoc_escape ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mchars_alloc ,
28 .Nm mchars_free ,
29 .Nm mchars_num2char ,
30 .Nm mchars_num2uc ,
31 .Nm mchars_spec2cp ,
32 .Nm mchars_spec2str ,
33 .Nm mdoc_meta ,
34 .Nm mdoc_node ,
35 .Nm mparse_alloc ,
36 .Nm mparse_free ,
37 .Nm mparse_getkeep ,
38 .Nm mparse_keep ,
39 .Nm mparse_readfd ,
40 .Nm mparse_reset ,
41 .Nm mparse_result ,
42 .Nm mparse_strerror ,
43 .Nm mparse_strlevel
44 .Nd mandoc macro compiler library
45 .Sh LIBRARY
46 .Lb mandoc
47 .Sh SYNOPSIS
48 .In man.h
49 .In mdoc.h
50 .In mandoc.h
51 .Ft "enum mandoc_esc"
52 .Fo mandoc_escape
53 .Fa "const char **in"
54 .Fa "const char **seq"
55 .Fa "int *len"
56 .Fc
57 .Ft "const struct man_meta *"
58 .Fo man_meta
59 .Fa "const struct man *man"
60 .Fc
61 .Ft "const struct mparse *"
62 .Fo man_mparse
63 .Fa "const struct man *man"
64 .Fc
65 .Ft "const struct man_node *"
66 .Fo man_node
67 .Fa "const struct man *man"
68 .Fc
69 .Ft "struct mchars *"
70 .Fn mchars_alloc
71 .Ft void
72 .Fn mchars_free "struct mchars *p"
73 .Ft char
74 .Fn mchars_num2char "const char *cp" "size_t sz"
75 .Ft int
76 .Fn mchars_num2uc "const char *cp" "size_t sz"
77 .Ft "const char *"
78 .Fo mchars_spec2str
79 .Fa "struct mchars *p"
80 .Fa "const char *cp"
81 .Fa "size_t sz"
82 .Fa "size_t *rsz"
83 .Fc
84 .Ft int
85 .Fo mchars_spec2cp
86 .Fa "struct mchars *p"
87 .Fa "const char *cp"
88 .Fa "size_t sz"
89 .Ft "const char *"
90 .Fc
91 .Ft "const struct mdoc_meta *"
92 .Fo mdoc_meta
93 .Fa "const struct mdoc *mdoc"
94 .Fc
95 .Ft "const struct mdoc_node *"
96 .Fo mdoc_node
97 .Fa "const struct mdoc *mdoc"
98 .Fc
99 .Ft void
100 .Fo mparse_alloc
101 .Fa "enum mparset type"
102 .Fa "enum mandoclevel wlevel"
103 .Fa "mandocmsg msg"
104 .Fa "void *msgarg"
105 .Fc
106 .Ft void
107 .Fo mparse_free
108 .Fa "struct mparse *parse"
109 .Fc
110 .Ft void
111 .Fo mparse_getkeep
112 .Fa "const struct mparse *parse"
113 .Fc
114 .Ft void
115 .Fo mparse_keep
116 .Fa "struct mparse *parse"
117 .Fc
118 .Ft "enum mandoclevel"
119 .Fo mparse_readfd
120 .Fa "struct mparse *parse"
121 .Fa "int fd"
122 .Fa "const char *fname"
123 .Fc
124 .Ft void
125 .Fo mparse_reset
126 .Fa "struct mparse *parse"
127 .Fc
128 .Ft void
129 .Fo mparse_result
130 .Fa "struct mparse *parse"
131 .Fa "struct mdoc **mdoc"
132 .Fa "struct man **man"
133 .Fc
134 .Ft "const char *"
135 .Fo mparse_strerror
136 .Fa "enum mandocerr"
137 .Fc
138 .Ft "const char *"
139 .Fo mparse_strlevel
140 .Fa "enum mandoclevel"
141 .Fc
142 .Vt extern const char * const * man_macronames;
143 .Vt extern const char * const * mdoc_argnames;
144 .Vt extern const char * const * mdoc_macronames;
145 .Fd "#define ASCII_NBRSP"
146 .Fd "#define ASCII_HYPH"
147 .Sh DESCRIPTION
148 The
149 .Nm mandoc
150 library parses a
151 .Ux
152 manual into an abstract syntax tree (AST).
153 .Ux
154 manuals are composed of
155 .Xr mdoc 7
156 or
157 .Xr man 7 ,
158 and may be mixed with
159 .Xr roff 7 ,
160 .Xr tbl 7 ,
161 and
162 .Xr eqn 7
163 invocations.
164 .Pp
165 The following describes a general parse sequence:
166 .Bl -enum
167 .It
168 initiate a parsing sequence with
169 .Fn mparse_alloc ;
170 .It
171 parse files or file descriptors with
172 .Fn mparse_readfd ;
173 .It
174 retrieve a parsed syntax tree, if the parse was successful, with
175 .Fn mparse_result ;
176 .It
177 iterate over parse nodes with
178 .Fn mdoc_node
179 or
180 .Fn man_node ;
181 .It
182 free all allocated memory with
183 .Fn mparse_free ,
184 or invoke
185 .Fn mparse_reset
186 and parse new files.
187 .El
188 .Pp
189 The
190 .Nm
191 library also contains routines for translating character strings into glyphs
192 .Pq see Fn mchars_alloc
193 and parsing escape sequences from strings
194 .Pq see Fn mandoc_escape .
195 .Sh REFERENCE
196 This section documents the functions, types, and variables available
197 via
198 .In mandoc.h .
199 .Ss Types
200 .Bl -ohang
201 .It Vt "enum mandoc_esc"
202 An escape sequence classification.
203 .It Vt "enum mandocerr"
204 A fatal error, error, or warning message during parsing.
205 .It Vt "enum mandoclevel"
206 A classification of an
207 .Vt "enum mandoclevel"
208 as regards system operation.
209 .It Vt "struct mchars"
210 An opaque pointer to an object allowing for translation between
211 character strings and glyphs.
212 See
213 .Fn mchars_alloc .
214 .It Vt "enum mparset"
215 The type of parser when reading input.
216 This should usually be
217 .Dv MPARSE_AUTO
218 for auto-detection.
219 .It Vt "struct mparse"
220 An opaque pointer to a running parse sequence.
221 Created with
222 .Fn mparse_alloc
223 and freed with
224 .Fn mparse_free .
225 This may be used across parsed input if
226 .Fn mparse_reset
227 is called between parses.
228 .It Vt "mandocmsg"
229 A prototype for a function to handle fatal error, error, and warning
230 messages emitted by the parser.
231 .El
232 .Ss Functions
233 .Bl -ohang
234 .It Fn mandoc_escape
235 Scan an escape sequence, i.e., a character string beginning with
236 .Sq \e .
237 Pass a pointer to this string as
238 .Va end ;
239 it will be set to the supremum of the parsed escape sequence unless
240 returning
241 .Dv ESCAPE_ERROR ,
242 in which case the string is bogus and should be
243 thrown away.
244 If not
245 .Dv ESCAPE_ERROR
246 or
247 .Dv ESCAPE_IGNORE ,
248 .Va start
249 is set to the first relevant character of the substring (font, glyph,
250 whatever) of length
251 .Va sz .
252 Both
253 .Va start
254 and
255 .Va sz
256 may be
257 .Dv NULL .
258 .It Fn man_meta
259 Obtain the meta-data of a successful parse.
260 This may only be used on a pointer returned by
261 .Fn mparse_result .
262 .It Fn man_mparse
263 Get the parser used for the current output.
264 .It Fn man_node
265 Obtain the root node of a successful parse.
266 This may only be used on a pointer returned by
267 .Fn mparse_result .
268 .It Fn mchars_alloc
269 Allocate an
270 .Vt "struct mchars *"
271 object for translating special characters into glyphs.
272 See
273 .Xr mandoc_char 7
274 for an overview of special characters.
275 The object must be freed with
276 .Fn mchars_free .
277 .It Fn mchars_free
278 Free an object created with
279 .Fn mchars_alloc .
280 .It Fn mchars_num2char
281 Convert a character index (e.g., the \eN\(aq\(aq escape) into a
282 printable ASCII character.
283 Returns \e0 (the nil character) if the input sequence is malformed.
284 .It Fn mchars_num2uc
285 Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
286 a Unicode codepoint.
287 Returns \e0 (the nil character) if the input sequence is malformed.
288 .It Fn mchars_spec2cp
289 Convert a special character into a valid Unicode codepoint.
290 Returns \-1 on failure or a non-zero Unicode codepoint on success.
291 .It Fn mchars_spec2str
292 Convert a special character into an ASCII string.
293 Returns
294 .Dv NULL
295 on failure.
296 .It Fn mdoc_meta
297 Obtain the meta-data of a successful parse.
298 This may only be used on a pointer returned by
299 .Fn mparse_result .
300 .It Fn mdoc_node
301 Obtain the root node of a successful parse.
302 This may only be used on a pointer returned by
303 .Fn mparse_result .
304 .It Fn mparse_alloc
305 Allocate a parser.
306 The same parser may be used for multiple files so long as
307 .Fn mparse_reset
308 is called between parses.
309 .Fn mparse_free
310 must be called to free the memory allocated by this function.
311 .It Fn mparse_free
312 Free all memory allocated by
313 .Fn mparse_alloc .
314 .It Fn mparse_getkeep
315 Acquire the keep buffer.
316 Must follow a call of
317 .Fn mparse_keep .
318 .It Fn mparse_keep
319 Instruct the parser to retain a copy of its parsed input.
320 This can be acquired with subsequent
321 .Fn mparse_getkeep
322 calls.
323 .It Fn mparse_readfd
324 Parse a file or file descriptor.
325 If
326 .Va fd
327 is -1,
328 .Va fname
329 is opened for reading.
330 Otherwise,
331 .Va fname
332 is assumed to be the name associated with
333 .Va fd .
334 This may be called multiple times with different parameters; however,
335 .Fn mparse_reset
336 should be invoked between parses.
337 .It Fn mparse_reset
338 Reset a parser so that
339 .Fn mparse_readfd
340 may be used again.
341 .It Fn mparse_result
342 Obtain the result of a parse.
343 Only successful parses
344 .Po
345 i.e., those where
346 .Fn mparse_readfd
347 returned less than MANDOCLEVEL_FATAL
348 .Pc
349 should invoke this function, in which case one of the two pointers will
350 be filled in.
351 .It Fn mparse_strerror
352 Return a statically-allocated string representation of an error code.
353 .It Fn mparse_strlevel
354 Return a statically-allocated string representation of a level code.
355 .El
356 .Ss Variables
357 .Bl -ohang
358 .It Va man_macronames
359 The string representation of a man macro as indexed by
360 .Vt "enum mant" .
361 .It Va mdoc_argnames
362 The string representation of a mdoc macro argument as indexed by
363 .Vt "enum mdocargt" .
364 .It Va mdoc_macronames
365 The string representation of a mdoc macro as indexed by
366 .Vt "enum mdoct" .
367 .El
368 .Sh IMPLEMENTATION NOTES
369 This section consists of structural documentation for
370 .Xr mdoc 7
371 and
372 .Xr man 7
373 syntax trees and strings.
374 .Ss Man and Mdoc Strings
375 Strings may be extracted from mdoc and man meta-data, or from text
376 nodes (MDOC_TEXT and MAN_TEXT, respectively).
377 These strings have special non-printing formatting cues embedded in the
378 text itself, as well as
379 .Xr roff 7
380 escapes preserved from input.
381 Implementing systems will need to handle both situations to produce
382 human-readable text.
383 In general, strings may be assumed to consist of 7-bit ASCII characters.
384 .Pp
385 The following non-printing characters may be embedded in text strings:
386 .Bl -tag -width Ds
387 .It Dv ASCII_NBRSP
388 A non-breaking space character.
389 .It Dv ASCII_HYPH
390 A soft hyphen.
391 .El
392 .Pp
393 Escape characters are also passed verbatim into text strings.
394 An escape character is a sequence of characters beginning with the
395 backslash
396 .Pq Sq \e .
397 To construct human-readable text, these should be intercepted with
398 .Fn mandoc_escape
399 and converted with one of
400 .Fn mchars_num2char ,
401 .Fn mchars_spec2str ,
402 and so on.
403 .Ss Man Abstract Syntax Tree
404 This AST is governed by the ontological rules dictated in
405 .Xr man 7
406 and derives its terminology accordingly.
407 .Pp
408 The AST is composed of
409 .Vt struct man_node
410 nodes with element, root and text types as declared by the
411 .Va type
412 field.
413 Each node also provides its parse point (the
414 .Va line ,
415 .Va sec ,
416 and
417 .Va pos
418 fields), its position in the tree (the
419 .Va parent ,
420 .Va child ,
421 .Va next
422 and
423 .Va prev
424 fields) and some type-specific data.
425 .Pp
426 The tree itself is arranged according to the following normal form,
427 where capitalised non-terminals represent nodes.
428 .Pp
429 .Bl -tag -width "ELEMENTXX" -compact
430 .It ROOT
431 \(<- mnode+
432 .It mnode
433 \(<- ELEMENT | TEXT | BLOCK
434 .It BLOCK
435 \(<- HEAD BODY
436 .It HEAD
437 \(<- mnode*
438 .It BODY
439 \(<- mnode*
440 .It ELEMENT
441 \(<- ELEMENT | TEXT*
442 .It TEXT
443 \(<- [[:ascii:]]*
444 .El
445 .Pp
446 The only elements capable of nesting other elements are those with
447 next-lint scope as documented in
448 .Xr man 7 .
449 .Ss Mdoc Abstract Syntax Tree
450 This AST is governed by the ontological
451 rules dictated in
452 .Xr mdoc 7
453 and derives its terminology accordingly.
454 .Qq In-line
455 elements described in
456 .Xr mdoc 7
457 are described simply as
458 .Qq elements .
459 .Pp
460 The AST is composed of
461 .Vt struct mdoc_node
462 nodes with block, head, body, element, root and text types as declared
463 by the
464 .Va type
465 field.
466 Each node also provides its parse point (the
467 .Va line ,
468 .Va sec ,
469 and
470 .Va pos
471 fields), its position in the tree (the
472 .Va parent ,
473 .Va child ,
474 .Va nchild ,
475 .Va next
476 and
477 .Va prev
478 fields) and some type-specific data, in particular, for nodes generated
479 from macros, the generating macro in the
480 .Va tok
481 field.
482 .Pp
483 The tree itself is arranged according to the following normal form,
484 where capitalised non-terminals represent nodes.
485 .Pp
486 .Bl -tag -width "ELEMENTXX" -compact
487 .It ROOT
488 \(<- mnode+
489 .It mnode
490 \(<- BLOCK | ELEMENT | TEXT
491 .It BLOCK
492 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
493 .It ELEMENT
494 \(<- TEXT*
495 .It HEAD
496 \(<- mnode*
497 .It BODY
498 \(<- mnode* [ENDBODY mnode*]
499 .It TAIL
500 \(<- mnode*
501 .It TEXT
502 \(<- [[:ascii:]]*
503 .El
504 .Pp
505 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
506 the BLOCK production: these refer to punctuation marks.
507 Furthermore, although a TEXT node will generally have a non-zero-length
508 string, in the specific case of
509 .Sq \&.Bd \-literal ,
510 an empty line will produce a zero-length string.
511 Multiple body parts are only found in invocations of
512 .Sq \&Bl \-column ,
513 where a new body introduces a new phrase.
514 .Pp
515 The
516 .Xr mdoc 7
517 syntax tree accommodates for broken block structures as well.
518 The ENDBODY node is available to end the formatting associated
519 with a given block before the physical end of that block.
520 It has a non-null
521 .Va end
522 field, is of the BODY
523 .Va type ,
524 has the same
525 .Va tok
526 as the BLOCK it is ending, and has a
527 .Va pending
528 field pointing to that BLOCK's BODY node.
529 It is an indirect child of that BODY node
530 and has no children of its own.
531 .Pp
532 An ENDBODY node is generated when a block ends while one of its child
533 blocks is still open, like in the following example:
534 .Bd -literal -offset indent
535 \&.Ao ao
536 \&.Bo bo ac
537 \&.Ac bc
538 \&.Bc end
539 .Ed
540 .Pp
541 This example results in the following block structure:
542 .Bd -literal -offset indent
543 BLOCK Ao
544 HEAD Ao
545 BODY Ao
546 TEXT ao
547 BLOCK Bo, pending -> Ao
548 HEAD Bo
549 BODY Bo
550 TEXT bo
551 TEXT ac
552 ENDBODY Ao, pending -> Ao
553 TEXT bc
554 TEXT end
555 .Ed
556 .Pp
557 Here, the formatting of the
558 .Sq \&Ao
559 block extends from TEXT ao to TEXT ac,
560 while the formatting of the
561 .Sq \&Bo
562 block extends from TEXT bo to TEXT bc.
563 It renders as follows in
564 .Fl T Ns Cm ascii
565 mode:
566 .Pp
567 .Dl <ao [bo ac> bc] end
568 .Pp
569 Support for badly-nested blocks is only provided for backward
570 compatibility with some older
571 .Xr mdoc 7
572 implementations.
573 Using badly-nested blocks is
574 .Em strongly discouraged ;
575 for example, the
576 .Fl T Ns Cm html
577 and
578 .Fl T Ns Cm xhtml
579 front-ends to
580 .Xr mandoc 1
581 are unable to render them in any meaningful way.
582 Furthermore, behaviour when encountering badly-nested blocks is not
583 consistent across troff implementations, especially when using multiple
584 levels of badly-nested blocks.
585 .Sh SEE ALSO
586 .Xr mandoc 1 ,
587 .Xr eqn 7 ,
588 .Xr man 7 ,
589 .Xr mandoc_char 7 ,
590 .Xr mdoc 7 ,
591 .Xr roff 7 ,
592 .Xr tbl 7
593 .Sh AUTHORS
594 The
595 .Nm
596 library was written by
597 .An Kristaps Dzonsons ,
598 .Mt kristaps@bsd.lv .