]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
document new SEE ALSO .Xr warnings
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.26 2014/09/03 23:21:47 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: September 3 2014 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm man_deroff ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mdoc_deroff ,
28 .Nm mdoc_meta ,
29 .Nm mdoc_node ,
30 .Nm mparse_alloc ,
31 .Nm mparse_free ,
32 .Nm mparse_getkeep ,
33 .Nm mparse_keep ,
34 .Nm mparse_open ,
35 .Nm mparse_readfd ,
36 .Nm mparse_reset ,
37 .Nm mparse_result ,
38 .Nm mparse_strerror ,
39 .Nm mparse_strlevel
40 .Nm mparse_wait ,
41 .Nd mandoc macro compiler library
42 .Sh LIBRARY
43 .Lb libmandoc
44 .Sh SYNOPSIS
45 .In sys/types.h
46 .In mandoc.h
47 .Fd "#define ASCII_NBRSP"
48 .Fd "#define ASCII_HYPH"
49 .Fd "#define ASCII_BREAK"
50 .Ft struct mparse *
51 .Fo mparse_alloc
52 .Fa "int options"
53 .Fa "enum mandoclevel wlevel"
54 .Fa "mandocmsg mmsg"
55 .Fa "char *defos"
56 .Fc
57 .Ft void
58 .Fo (*mandocmsg)
59 .Fa "enum mandocerr errtype"
60 .Fa "enum mandoclevel level"
61 .Fa "const char *file"
62 .Fa "int line"
63 .Fa "int col"
64 .Fa "const char *msg"
65 .Fc
66 .Ft void
67 .Fo mparse_free
68 .Fa "struct mparse *parse"
69 .Fc
70 .Ft const char *
71 .Fo mparse_getkeep
72 .Fa "const struct mparse *parse"
73 .Fc
74 .Ft void
75 .Fo mparse_keep
76 .Fa "struct mparse *parse"
77 .Fc
78 .Ft "enum mandoclevel"
79 .Fo mparse_open
80 .Fa "struct mparse *parse"
81 .Fa "int *fd"
82 .Fa "const char *fname"
83 .Fa "pid_t *child_pid"
84 .Fc
85 .Ft "enum mandoclevel"
86 .Fo mparse_readfd
87 .Fa "struct mparse *parse"
88 .Fa "int fd"
89 .Fa "const char *fname"
90 .Fc
91 .Ft void
92 .Fo mparse_reset
93 .Fa "struct mparse *parse"
94 .Fc
95 .Ft void
96 .Fo mparse_result
97 .Fa "struct mparse *parse"
98 .Fa "struct mdoc **mdoc"
99 .Fa "struct man **man"
100 .Fa "char **sodest"
101 .Fc
102 .Ft "const char *"
103 .Fo mparse_strerror
104 .Fa "enum mandocerr"
105 .Fc
106 .Ft "const char *"
107 .Fo mparse_strlevel
108 .Fa "enum mandoclevel"
109 .Fc
110 .Ft "enum mandoclevel"
111 .Fo mparse_wait
112 .Fa "struct mparse *parse"
113 .Fa "pid_t child_pid"
114 .Fc
115 .In sys/types.h
116 .In mandoc.h
117 .In mdoc.h
118 .Ft void
119 .Fo mdoc_deroff
120 .Fa "char **dest"
121 .Fa "const struct mdoc_node *node"
122 .Fc
123 .Ft "const struct mdoc_meta *"
124 .Fo mdoc_meta
125 .Fa "const struct mdoc *mdoc"
126 .Fc
127 .Ft "const struct mdoc_node *"
128 .Fo mdoc_node
129 .Fa "const struct mdoc *mdoc"
130 .Fc
131 .Vt extern const char * const * mdoc_argnames;
132 .Vt extern const char * const * mdoc_macronames;
133 .In sys/types.h
134 .In mandoc.h
135 .In man.h
136 .Ft void
137 .Fo man_deroff
138 .Fa "char **dest"
139 .Fa "const struct man_node *node"
140 .Fc
141 .Ft "const struct man_meta *"
142 .Fo man_meta
143 .Fa "const struct man *man"
144 .Fc
145 .Ft "const struct mparse *"
146 .Fo man_mparse
147 .Fa "const struct man *man"
148 .Fc
149 .Ft "const struct man_node *"
150 .Fo man_node
151 .Fa "const struct man *man"
152 .Fc
153 .Vt extern const char * const * man_macronames;
154 .Sh DESCRIPTION
155 The
156 .Nm mandoc
157 library parses a
158 .Ux
159 manual into an abstract syntax tree (AST).
160 .Ux
161 manuals are composed of
162 .Xr mdoc 7
163 or
164 .Xr man 7 ,
165 and may be mixed with
166 .Xr roff 7 ,
167 .Xr tbl 7 ,
168 and
169 .Xr eqn 7
170 invocations.
171 .Pp
172 The following describes a general parse sequence:
173 .Bl -enum
174 .It
175 initiate a parsing sequence with
176 .Fn mparse_alloc ;
177 .It
178 parse files or file descriptors with
179 .Fn mparse_readfd ;
180 .It
181 retrieve a parsed syntax tree, if the parse was successful, with
182 .Fn mparse_result ;
183 .It
184 iterate over parse nodes with
185 .Fn mdoc_node
186 or
187 .Fn man_node ;
188 .It
189 free all allocated memory with
190 .Fn mparse_free ,
191 or invoke
192 .Fn mparse_reset
193 and parse new files.
194 .El
195 .Sh REFERENCE
196 This section documents the functions, types, and variables available
197 via
198 .In mandoc.h ,
199 with the exception of those documented in
200 .Xr mandoc_escape 3
201 and
202 .Xr mchars_alloc 3 .
203 .Ss Types
204 .Bl -ohang
205 .It Vt "enum mandocerr"
206 A fatal error, error, or warning message during parsing.
207 .It Vt "enum mandoclevel"
208 A classification of an
209 .Vt "enum mandocerr"
210 as regards system operation.
211 .It Vt "struct mparse"
212 An opaque pointer to a running parse sequence.
213 Created with
214 .Fn mparse_alloc
215 and freed with
216 .Fn mparse_free .
217 This may be used across parsed input if
218 .Fn mparse_reset
219 is called between parses.
220 .It Vt "mandocmsg"
221 A prototype for a function to handle fatal error, error, and warning
222 messages emitted by the parser.
223 .El
224 .Ss Functions
225 .Bl -ohang
226 .It Fn man_deroff
227 Obtain a text-only representation of a
228 .Vt struct man_node ,
229 including text contained in its child nodes.
230 To be used on children of the pointer returned from
231 .Fn man_node .
232 When it is no longer needed, the pointer returned from
233 .Fn man_deroff
234 can be passed to
235 .Xr free 3 .
236 .It Fn man_meta
237 Obtain the meta-data of a successful
238 .Xr man 7
239 parse.
240 This may only be used on a pointer returned by
241 .Fn mparse_result .
242 Declared in
243 .In man.h ,
244 implemented in
245 .Pa man.c .
246 .It Fn man_mparse
247 Get the parser used for the current output.
248 Declared in
249 .In man.h ,
250 implemented in
251 .Pa man.c .
252 .It Fn man_node
253 Obtain the root node of a successful
254 .Xr man 7
255 parse.
256 This may only be used on a pointer returned by
257 .Fn mparse_result .
258 Declared in
259 .In man.h ,
260 implemented in
261 .Pa man.c .
262 .It Fn mdoc_deroff
263 Obtain a text-only representation of a
264 .Vt struct mdoc_node ,
265 including text contained in its child nodes.
266 To be used on children of the pointer returned from
267 .Fn mdoc_node .
268 When it is no longer needed, the pointer returned from
269 .Fn mdoc_deroff
270 can be passed to
271 .Xr free 3 .
272 .It Fn mdoc_meta
273 Obtain the meta-data of a successful
274 .Xr mdoc
275 parse.
276 This may only be used on a pointer returned by
277 .Fn mparse_result .
278 Declared in
279 .In mdoc.h ,
280 implemented in
281 .Pa mdoc.c .
282 .It Fn mdoc_node
283 Obtain the root node of a successful
284 .Xr mdoc
285 parse.
286 This may only be used on a pointer returned by
287 .Fn mparse_result .
288 Declared in
289 .In mdoc.h ,
290 implemented in
291 .Pa mdoc.c .
292 .It Fn mparse_alloc
293 Allocate a parser.
294 The arguments have the following effect:
295 .Bl -tag -offset 5n -width inttype
296 .It Ar options
297 When the
298 .Dv MPARSE_MDOC
299 or
300 .Dv MPARSE_MAN
301 bit is set, only that parser is used.
302 Otherwise, the document type is automatically detected.
303 .Pp
304 When the
305 .Dv MPARSE_SO
306 bit is set,
307 .Xr roff 7
308 .Ic \&so
309 file inclusion requests are always honoured.
310 Otherwise, if the request is the only content in an input file,
311 only the file name is remembered, to be returned in the
312 .Fa sodest
313 argument of
314 .Fn mparse_result .
315 .Pp
316 When the
317 .Dv MPARSE_QUICK
318 bit is set, parsing is aborted after the NAME section.
319 This is for example useful in
320 .Xr makewhatis 8
321 .Fl Q
322 to quickly build minimal databases.
323 .It Ar wlevel
324 Can be set to
325 .Dv MANDOCLEVEL_FATAL ,
326 .Dv MANDOCLEVEL_ERROR ,
327 or
328 .Dv MANDOCLEVEL_WARNING .
329 Messages below the selected level will be suppressed.
330 .It Ar mmsg
331 A callback function to handle errors and warnings.
332 See
333 .Pa main.c
334 for an example.
335 .It Ar defos
336 A default string for the
337 .Xr mdoc 7
338 .Sq \&Os
339 macro, overriding the
340 .Dv OSNAME
341 preprocessor definition and the results of
342 .Xr uname 3 .
343 .El
344 .Pp
345 The same parser may be used for multiple files so long as
346 .Fn mparse_reset
347 is called between parses.
348 .Fn mparse_free
349 must be called to free the memory allocated by this function.
350 Declared in
351 .In mandoc.h ,
352 implemented in
353 .Pa read.c .
354 .It Fn mparse_free
355 Free all memory allocated by
356 .Fn mparse_alloc .
357 Declared in
358 .In mandoc.h ,
359 implemented in
360 .Pa read.c .
361 .It Fn mparse_getkeep
362 Acquire the keep buffer.
363 Must follow a call of
364 .Fn mparse_keep .
365 Declared in
366 .In mandoc.h ,
367 implemented in
368 .Pa read.c .
369 .It Fn mparse_keep
370 Instruct the parser to retain a copy of its parsed input.
371 This can be acquired with subsequent
372 .Fn mparse_getkeep
373 calls.
374 Declared in
375 .In mandoc.h ,
376 implemented in
377 .Pa read.c .
378 .It Fn mparse_open
379 If the
380 .Fa fname
381 ends in
382 .Pa .gz ,
383 open with
384 .Xr gunzip 1 ;
385 otherwise, with
386 .Xr open 2 .
387 Return a file descriptor open for reading in
388 .Fa fd ,
389 or -1 on failure.
390 It can be passed to
391 .Fn mparse_readfd
392 or used directly.
393 If applicable, return the
394 .Xr gunzip 1
395 child process ID in
396 .Fa child_pid ,
397 or otherwise 0.
398 If non-zero, it should be passed to
399 .Fn mparse_wait
400 after completing the parse sequence.
401 Declared in
402 .In mandoc.h ,
403 implemented in
404 .Pa read.c .
405 .It Fn mparse_readfd
406 Parse a file or file descriptor.
407 If
408 .Va fd
409 is -1,
410 .Va fname
411 is opened for reading.
412 Otherwise,
413 .Va fname
414 is assumed to be the name associated with
415 .Va fd .
416 This may be called multiple times with different parameters; however,
417 .Fn mparse_reset
418 should be invoked between parses.
419 Declared in
420 .In mandoc.h ,
421 implemented in
422 .Pa read.c .
423 .It Fn mparse_reset
424 Reset a parser so that
425 .Fn mparse_readfd
426 may be used again.
427 Declared in
428 .In mandoc.h ,
429 implemented in
430 .Pa read.c .
431 .It Fn mparse_result
432 Obtain the result of a parse.
433 Only successful parses
434 .Po
435 i.e., those where
436 .Fn mparse_readfd
437 returned less than MANDOCLEVEL_FATAL
438 .Pc
439 should invoke this function, in which case one of the three pointers will
440 be filled in.
441 Declared in
442 .In mandoc.h ,
443 implemented in
444 .Pa read.c .
445 .It Fn mparse_strerror
446 Return a statically-allocated string representation of an error code.
447 Declared in
448 .In mandoc.h ,
449 implemented in
450 .Pa read.c .
451 .It Fn mparse_strlevel
452 Return a statically-allocated string representation of a level code.
453 Declared in
454 .In mandoc.h ,
455 implemented in
456 .Pa read.c .
457 .It Fn mparse_wait
458 Bury a
459 .Xr gunzip 1
460 child process
461 .Fa child_pid
462 that was spawned with
463 .Fn mparse_open .
464 To be called after the parse sequence is complete.
465 Returns
466 .Dv MANDOCLEVEL_OK
467 on success and
468 .Dv MANDOCLEVEL_SYSERR
469 on failure, that is, when
470 .Xr wait 2
471 fails, or when
472 .Xr gunzip 1
473 died from a signal or exited with non-zero status.
474 Declared in
475 .In mandoc.h ,
476 implemented in
477 .Pa read.c .
478 .El
479 .Ss Variables
480 .Bl -ohang
481 .It Va man_macronames
482 The string representation of a man macro as indexed by
483 .Vt "enum mant" .
484 .It Va mdoc_argnames
485 The string representation of a mdoc macro argument as indexed by
486 .Vt "enum mdocargt" .
487 .It Va mdoc_macronames
488 The string representation of a mdoc macro as indexed by
489 .Vt "enum mdoct" .
490 .El
491 .Sh IMPLEMENTATION NOTES
492 This section consists of structural documentation for
493 .Xr mdoc 7
494 and
495 .Xr man 7
496 syntax trees and strings.
497 .Ss Man and Mdoc Strings
498 Strings may be extracted from mdoc and man meta-data, or from text
499 nodes (MDOC_TEXT and MAN_TEXT, respectively).
500 These strings have special non-printing formatting cues embedded in the
501 text itself, as well as
502 .Xr roff 7
503 escapes preserved from input.
504 Implementing systems will need to handle both situations to produce
505 human-readable text.
506 In general, strings may be assumed to consist of 7-bit ASCII characters.
507 .Pp
508 The following non-printing characters may be embedded in text strings:
509 .Bl -tag -width Ds
510 .It Dv ASCII_NBRSP
511 A non-breaking space character.
512 .It Dv ASCII_HYPH
513 A soft hyphen.
514 .It Dv ASCII_BREAK
515 A breakable zero-width space.
516 .El
517 .Pp
518 Escape characters are also passed verbatim into text strings.
519 An escape character is a sequence of characters beginning with the
520 backslash
521 .Pq Sq \e .
522 To construct human-readable text, these should be intercepted with
523 .Xr mandoc_escape 3
524 and converted with one the functions described in
525 .Xr mchars_alloc 3 .
526 .Ss Man Abstract Syntax Tree
527 This AST is governed by the ontological rules dictated in
528 .Xr man 7
529 and derives its terminology accordingly.
530 .Pp
531 The AST is composed of
532 .Vt struct man_node
533 nodes with element, root and text types as declared by the
534 .Va type
535 field.
536 Each node also provides its parse point (the
537 .Va line ,
538 .Va sec ,
539 and
540 .Va pos
541 fields), its position in the tree (the
542 .Va parent ,
543 .Va child ,
544 .Va next
545 and
546 .Va prev
547 fields) and some type-specific data.
548 .Pp
549 The tree itself is arranged according to the following normal form,
550 where capitalised non-terminals represent nodes.
551 .Pp
552 .Bl -tag -width "ELEMENTXX" -compact
553 .It ROOT
554 \(<- mnode+
555 .It mnode
556 \(<- ELEMENT | TEXT | BLOCK
557 .It BLOCK
558 \(<- HEAD BODY
559 .It HEAD
560 \(<- mnode*
561 .It BODY
562 \(<- mnode*
563 .It ELEMENT
564 \(<- ELEMENT | TEXT*
565 .It TEXT
566 \(<- [[:ascii:]]*
567 .El
568 .Pp
569 The only elements capable of nesting other elements are those with
570 next-line scope as documented in
571 .Xr man 7 .
572 .Ss Mdoc Abstract Syntax Tree
573 This AST is governed by the ontological
574 rules dictated in
575 .Xr mdoc 7
576 and derives its terminology accordingly.
577 .Qq In-line
578 elements described in
579 .Xr mdoc 7
580 are described simply as
581 .Qq elements .
582 .Pp
583 The AST is composed of
584 .Vt struct mdoc_node
585 nodes with block, head, body, element, root and text types as declared
586 by the
587 .Va type
588 field.
589 Each node also provides its parse point (the
590 .Va line ,
591 .Va sec ,
592 and
593 .Va pos
594 fields), its position in the tree (the
595 .Va parent ,
596 .Va child ,
597 .Va nchild ,
598 .Va next
599 and
600 .Va prev
601 fields) and some type-specific data, in particular, for nodes generated
602 from macros, the generating macro in the
603 .Va tok
604 field.
605 .Pp
606 The tree itself is arranged according to the following normal form,
607 where capitalised non-terminals represent nodes.
608 .Pp
609 .Bl -tag -width "ELEMENTXX" -compact
610 .It ROOT
611 \(<- mnode+
612 .It mnode
613 \(<- BLOCK | ELEMENT | TEXT
614 .It BLOCK
615 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
616 .It ELEMENT
617 \(<- TEXT*
618 .It HEAD
619 \(<- mnode*
620 .It BODY
621 \(<- mnode* [ENDBODY mnode*]
622 .It TAIL
623 \(<- mnode*
624 .It TEXT
625 \(<- [[:ascii:]]*
626 .El
627 .Pp
628 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
629 the BLOCK production: these refer to punctuation marks.
630 Furthermore, although a TEXT node will generally have a non-zero-length
631 string, in the specific case of
632 .Sq \&.Bd \-literal ,
633 an empty line will produce a zero-length string.
634 Multiple body parts are only found in invocations of
635 .Sq \&Bl \-column ,
636 where a new body introduces a new phrase.
637 .Pp
638 The
639 .Xr mdoc 7
640 syntax tree accommodates for broken block structures as well.
641 The ENDBODY node is available to end the formatting associated
642 with a given block before the physical end of that block.
643 It has a non-null
644 .Va end
645 field, is of the BODY
646 .Va type ,
647 has the same
648 .Va tok
649 as the BLOCK it is ending, and has a
650 .Va pending
651 field pointing to that BLOCK's BODY node.
652 It is an indirect child of that BODY node
653 and has no children of its own.
654 .Pp
655 An ENDBODY node is generated when a block ends while one of its child
656 blocks is still open, like in the following example:
657 .Bd -literal -offset indent
658 \&.Ao ao
659 \&.Bo bo ac
660 \&.Ac bc
661 \&.Bc end
662 .Ed
663 .Pp
664 This example results in the following block structure:
665 .Bd -literal -offset indent
666 BLOCK Ao
667 HEAD Ao
668 BODY Ao
669 TEXT ao
670 BLOCK Bo, pending -> Ao
671 HEAD Bo
672 BODY Bo
673 TEXT bo
674 TEXT ac
675 ENDBODY Ao, pending -> Ao
676 TEXT bc
677 TEXT end
678 .Ed
679 .Pp
680 Here, the formatting of the
681 .Sq \&Ao
682 block extends from TEXT ao to TEXT ac,
683 while the formatting of the
684 .Sq \&Bo
685 block extends from TEXT bo to TEXT bc.
686 It renders as follows in
687 .Fl T Ns Cm ascii
688 mode:
689 .Pp
690 .Dl <ao [bo ac> bc] end
691 .Pp
692 Support for badly-nested blocks is only provided for backward
693 compatibility with some older
694 .Xr mdoc 7
695 implementations.
696 Using badly-nested blocks is
697 .Em strongly discouraged ;
698 for example, the
699 .Fl T Ns Cm html
700 and
701 .Fl T Ns Cm xhtml
702 front-ends to
703 .Xr mandoc 1
704 are unable to render them in any meaningful way.
705 Furthermore, behaviour when encountering badly-nested blocks is not
706 consistent across troff implementations, especially when using multiple
707 levels of badly-nested blocks.
708 .Sh SEE ALSO
709 .Xr mandoc 1 ,
710 .Xr mandoc_escape 3 ,
711 .Xr mandoc_malloc 3 ,
712 .Xr mchars_alloc 3 ,
713 .Xr eqn 7 ,
714 .Xr man 7 ,
715 .Xr mandoc_char 7 ,
716 .Xr mdoc 7 ,
717 .Xr roff 7 ,
718 .Xr tbl 7
719 .Sh AUTHORS
720 The
721 .Nm
722 library was written by
723 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .