]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
Fix two minibugs reported by Thomas Klausner <wiz at NetBSD>:
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.27 2014/10/28 17:36:19 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: October 28 2014 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm man_deroff ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mdoc_deroff ,
28 .Nm mdoc_meta ,
29 .Nm mdoc_node ,
30 .Nm mparse_alloc ,
31 .Nm mparse_free ,
32 .Nm mparse_getkeep ,
33 .Nm mparse_keep ,
34 .Nm mparse_open ,
35 .Nm mparse_readfd ,
36 .Nm mparse_reset ,
37 .Nm mparse_result ,
38 .Nm mparse_strerror ,
39 .Nm mparse_strlevel
40 .Nm mparse_wait ,
41 .Nd mandoc macro compiler library
42 .Sh LIBRARY
43 .Lb libmandoc
44 .Sh SYNOPSIS
45 .In sys/types.h
46 .In mandoc.h
47 .Fd "#define ASCII_NBRSP"
48 .Fd "#define ASCII_HYPH"
49 .Fd "#define ASCII_BREAK"
50 .Ft struct mparse *
51 .Fo mparse_alloc
52 .Fa "int options"
53 .Fa "enum mandoclevel wlevel"
54 .Fa "mandocmsg mmsg"
55 .Fa "const struct mchars *mchars"
56 .Fa "char *defos"
57 .Fc
58 .Ft void
59 .Fo (*mandocmsg)
60 .Fa "enum mandocerr errtype"
61 .Fa "enum mandoclevel level"
62 .Fa "const char *file"
63 .Fa "int line"
64 .Fa "int col"
65 .Fa "const char *msg"
66 .Fc
67 .Ft void
68 .Fo mparse_free
69 .Fa "struct mparse *parse"
70 .Fc
71 .Ft const char *
72 .Fo mparse_getkeep
73 .Fa "const struct mparse *parse"
74 .Fc
75 .Ft void
76 .Fo mparse_keep
77 .Fa "struct mparse *parse"
78 .Fc
79 .Ft "enum mandoclevel"
80 .Fo mparse_open
81 .Fa "struct mparse *parse"
82 .Fa "int *fd"
83 .Fa "const char *fname"
84 .Fa "pid_t *child_pid"
85 .Fc
86 .Ft "enum mandoclevel"
87 .Fo mparse_readfd
88 .Fa "struct mparse *parse"
89 .Fa "int fd"
90 .Fa "const char *fname"
91 .Fc
92 .Ft void
93 .Fo mparse_reset
94 .Fa "struct mparse *parse"
95 .Fc
96 .Ft void
97 .Fo mparse_result
98 .Fa "struct mparse *parse"
99 .Fa "struct mdoc **mdoc"
100 .Fa "struct man **man"
101 .Fa "char **sodest"
102 .Fc
103 .Ft "const char *"
104 .Fo mparse_strerror
105 .Fa "enum mandocerr"
106 .Fc
107 .Ft "const char *"
108 .Fo mparse_strlevel
109 .Fa "enum mandoclevel"
110 .Fc
111 .Ft "enum mandoclevel"
112 .Fo mparse_wait
113 .Fa "struct mparse *parse"
114 .Fa "pid_t child_pid"
115 .Fc
116 .In sys/types.h
117 .In mandoc.h
118 .In mdoc.h
119 .Ft void
120 .Fo mdoc_deroff
121 .Fa "char **dest"
122 .Fa "const struct mdoc_node *node"
123 .Fc
124 .Ft "const struct mdoc_meta *"
125 .Fo mdoc_meta
126 .Fa "const struct mdoc *mdoc"
127 .Fc
128 .Ft "const struct mdoc_node *"
129 .Fo mdoc_node
130 .Fa "const struct mdoc *mdoc"
131 .Fc
132 .Vt extern const char * const * mdoc_argnames;
133 .Vt extern const char * const * mdoc_macronames;
134 .In sys/types.h
135 .In mandoc.h
136 .In man.h
137 .Ft void
138 .Fo man_deroff
139 .Fa "char **dest"
140 .Fa "const struct man_node *node"
141 .Fc
142 .Ft "const struct man_meta *"
143 .Fo man_meta
144 .Fa "const struct man *man"
145 .Fc
146 .Ft "const struct mparse *"
147 .Fo man_mparse
148 .Fa "const struct man *man"
149 .Fc
150 .Ft "const struct man_node *"
151 .Fo man_node
152 .Fa "const struct man *man"
153 .Fc
154 .Vt extern const char * const * man_macronames;
155 .Sh DESCRIPTION
156 The
157 .Nm mandoc
158 library parses a
159 .Ux
160 manual into an abstract syntax tree (AST).
161 .Ux
162 manuals are composed of
163 .Xr mdoc 7
164 or
165 .Xr man 7 ,
166 and may be mixed with
167 .Xr roff 7 ,
168 .Xr tbl 7 ,
169 and
170 .Xr eqn 7
171 invocations.
172 .Pp
173 The following describes a general parse sequence:
174 .Bl -enum
175 .It
176 initiate a parsing sequence with
177 .Xr mchars_alloc 3
178 and
179 .Fn mparse_alloc ;
180 .It
181 parse files or file descriptors with
182 .Fn mparse_readfd ;
183 .It
184 retrieve a parsed syntax tree, if the parse was successful, with
185 .Fn mparse_result ;
186 .It
187 iterate over parse nodes with
188 .Fn mdoc_node
189 or
190 .Fn man_node ;
191 .It
192 free all allocated memory with
193 .Fn mparse_free
194 and
195 .Xr mchars_free 3 ,
196 or invoke
197 .Fn mparse_reset
198 and parse new files.
199 .El
200 .Sh REFERENCE
201 This section documents the functions, types, and variables available
202 via
203 .In mandoc.h ,
204 with the exception of those documented in
205 .Xr mandoc_escape 3
206 and
207 .Xr mchars_alloc 3 .
208 .Ss Types
209 .Bl -ohang
210 .It Vt "enum mandocerr"
211 A fatal error, error, or warning message during parsing.
212 .It Vt "enum mandoclevel"
213 A classification of an
214 .Vt "enum mandocerr"
215 as regards system operation.
216 .It Vt "struct mchars"
217 An opaque pointer to a a character table.
218 Created with
219 .Xr mchars_alloc 3
220 and freed with
221 .Xr mchars_free 3 .
222 .It Vt "struct mparse"
223 An opaque pointer to a running parse sequence.
224 Created with
225 .Fn mparse_alloc
226 and freed with
227 .Fn mparse_free .
228 This may be used across parsed input if
229 .Fn mparse_reset
230 is called between parses.
231 .It Vt "mandocmsg"
232 A prototype for a function to handle fatal error, error, and warning
233 messages emitted by the parser.
234 .El
235 .Ss Functions
236 .Bl -ohang
237 .It Fn man_deroff
238 Obtain a text-only representation of a
239 .Vt struct man_node ,
240 including text contained in its child nodes.
241 To be used on children of the pointer returned from
242 .Fn man_node .
243 When it is no longer needed, the pointer returned from
244 .Fn man_deroff
245 can be passed to
246 .Xr free 3 .
247 .It Fn man_meta
248 Obtain the meta-data of a successful
249 .Xr man 7
250 parse.
251 This may only be used on a pointer returned by
252 .Fn mparse_result .
253 Declared in
254 .In man.h ,
255 implemented in
256 .Pa man.c .
257 .It Fn man_mparse
258 Get the parser used for the current output.
259 Declared in
260 .In man.h ,
261 implemented in
262 .Pa man.c .
263 .It Fn man_node
264 Obtain the root node of a successful
265 .Xr man 7
266 parse.
267 This may only be used on a pointer returned by
268 .Fn mparse_result .
269 Declared in
270 .In man.h ,
271 implemented in
272 .Pa man.c .
273 .It Fn mdoc_deroff
274 Obtain a text-only representation of a
275 .Vt struct mdoc_node ,
276 including text contained in its child nodes.
277 To be used on children of the pointer returned from
278 .Fn mdoc_node .
279 When it is no longer needed, the pointer returned from
280 .Fn mdoc_deroff
281 can be passed to
282 .Xr free 3 .
283 .It Fn mdoc_meta
284 Obtain the meta-data of a successful
285 .Xr mdoc
286 parse.
287 This may only be used on a pointer returned by
288 .Fn mparse_result .
289 Declared in
290 .In mdoc.h ,
291 implemented in
292 .Pa mdoc.c .
293 .It Fn mdoc_node
294 Obtain the root node of a successful
295 .Xr mdoc
296 parse.
297 This may only be used on a pointer returned by
298 .Fn mparse_result .
299 Declared in
300 .In mdoc.h ,
301 implemented in
302 .Pa mdoc.c .
303 .It Fn mparse_alloc
304 Allocate a parser.
305 The arguments have the following effect:
306 .Bl -tag -offset 5n -width inttype
307 .It Ar options
308 When the
309 .Dv MPARSE_MDOC
310 or
311 .Dv MPARSE_MAN
312 bit is set, only that parser is used.
313 Otherwise, the document type is automatically detected.
314 .Pp
315 When the
316 .Dv MPARSE_SO
317 bit is set,
318 .Xr roff 7
319 .Ic \&so
320 file inclusion requests are always honoured.
321 Otherwise, if the request is the only content in an input file,
322 only the file name is remembered, to be returned in the
323 .Fa sodest
324 argument of
325 .Fn mparse_result .
326 .Pp
327 When the
328 .Dv MPARSE_QUICK
329 bit is set, parsing is aborted after the NAME section.
330 This is for example useful in
331 .Xr makewhatis 8
332 .Fl Q
333 to quickly build minimal databases.
334 .It Ar wlevel
335 Can be set to
336 .Dv MANDOCLEVEL_FATAL ,
337 .Dv MANDOCLEVEL_ERROR ,
338 or
339 .Dv MANDOCLEVEL_WARNING .
340 Messages below the selected level will be suppressed.
341 .It Ar mmsg
342 A callback function to handle errors and warnings.
343 See
344 .Pa main.c
345 for an example.
346 .It Ar mchars
347 An opaque pointer to a a character table obtained from
348 .Xr mchars_alloc 3 .
349 .It Ar defos
350 A default string for the
351 .Xr mdoc 7
352 .Sq \&Os
353 macro, overriding the
354 .Dv OSNAME
355 preprocessor definition and the results of
356 .Xr uname 3 .
357 .El
358 .Pp
359 The same parser may be used for multiple files so long as
360 .Fn mparse_reset
361 is called between parses.
362 .Fn mparse_free
363 must be called to free the memory allocated by this function.
364 Declared in
365 .In mandoc.h ,
366 implemented in
367 .Pa read.c .
368 .It Fn mparse_free
369 Free all memory allocated by
370 .Fn mparse_alloc .
371 Declared in
372 .In mandoc.h ,
373 implemented in
374 .Pa read.c .
375 .It Fn mparse_getkeep
376 Acquire the keep buffer.
377 Must follow a call of
378 .Fn mparse_keep .
379 Declared in
380 .In mandoc.h ,
381 implemented in
382 .Pa read.c .
383 .It Fn mparse_keep
384 Instruct the parser to retain a copy of its parsed input.
385 This can be acquired with subsequent
386 .Fn mparse_getkeep
387 calls.
388 Declared in
389 .In mandoc.h ,
390 implemented in
391 .Pa read.c .
392 .It Fn mparse_open
393 If the
394 .Fa fname
395 ends in
396 .Pa .gz ,
397 open with
398 .Xr gunzip 1 ;
399 otherwise, with
400 .Xr open 2 .
401 Return a file descriptor open for reading in
402 .Fa fd ,
403 or -1 on failure.
404 It can be passed to
405 .Fn mparse_readfd
406 or used directly.
407 If applicable, return the
408 .Xr gunzip 1
409 child process ID in
410 .Fa child_pid ,
411 or otherwise 0.
412 If non-zero, it should be passed to
413 .Fn mparse_wait
414 after completing the parse sequence.
415 Declared in
416 .In mandoc.h ,
417 implemented in
418 .Pa read.c .
419 .It Fn mparse_readfd
420 Parse a file or file descriptor.
421 If
422 .Va fd
423 is -1,
424 .Va fname
425 is opened for reading.
426 Otherwise,
427 .Va fname
428 is assumed to be the name associated with
429 .Va fd .
430 This may be called multiple times with different parameters; however,
431 .Fn mparse_reset
432 should be invoked between parses.
433 Declared in
434 .In mandoc.h ,
435 implemented in
436 .Pa read.c .
437 .It Fn mparse_reset
438 Reset a parser so that
439 .Fn mparse_readfd
440 may be used again.
441 Declared in
442 .In mandoc.h ,
443 implemented in
444 .Pa read.c .
445 .It Fn mparse_result
446 Obtain the result of a parse.
447 Only successful parses
448 .Po
449 i.e., those where
450 .Fn mparse_readfd
451 returned less than MANDOCLEVEL_FATAL
452 .Pc
453 should invoke this function, in which case one of the three pointers will
454 be filled in.
455 Declared in
456 .In mandoc.h ,
457 implemented in
458 .Pa read.c .
459 .It Fn mparse_strerror
460 Return a statically-allocated string representation of an error code.
461 Declared in
462 .In mandoc.h ,
463 implemented in
464 .Pa read.c .
465 .It Fn mparse_strlevel
466 Return a statically-allocated string representation of a level code.
467 Declared in
468 .In mandoc.h ,
469 implemented in
470 .Pa read.c .
471 .It Fn mparse_wait
472 Bury a
473 .Xr gunzip 1
474 child process
475 .Fa child_pid
476 that was spawned with
477 .Fn mparse_open .
478 To be called after the parse sequence is complete.
479 Returns
480 .Dv MANDOCLEVEL_OK
481 on success and
482 .Dv MANDOCLEVEL_SYSERR
483 on failure, that is, when
484 .Xr wait 2
485 fails, or when
486 .Xr gunzip 1
487 died from a signal or exited with non-zero status.
488 Declared in
489 .In mandoc.h ,
490 implemented in
491 .Pa read.c .
492 .El
493 .Ss Variables
494 .Bl -ohang
495 .It Va man_macronames
496 The string representation of a man macro as indexed by
497 .Vt "enum mant" .
498 .It Va mdoc_argnames
499 The string representation of a mdoc macro argument as indexed by
500 .Vt "enum mdocargt" .
501 .It Va mdoc_macronames
502 The string representation of a mdoc macro as indexed by
503 .Vt "enum mdoct" .
504 .El
505 .Sh IMPLEMENTATION NOTES
506 This section consists of structural documentation for
507 .Xr mdoc 7
508 and
509 .Xr man 7
510 syntax trees and strings.
511 .Ss Man and Mdoc Strings
512 Strings may be extracted from mdoc and man meta-data, or from text
513 nodes (MDOC_TEXT and MAN_TEXT, respectively).
514 These strings have special non-printing formatting cues embedded in the
515 text itself, as well as
516 .Xr roff 7
517 escapes preserved from input.
518 Implementing systems will need to handle both situations to produce
519 human-readable text.
520 In general, strings may be assumed to consist of 7-bit ASCII characters.
521 .Pp
522 The following non-printing characters may be embedded in text strings:
523 .Bl -tag -width Ds
524 .It Dv ASCII_NBRSP
525 A non-breaking space character.
526 .It Dv ASCII_HYPH
527 A soft hyphen.
528 .It Dv ASCII_BREAK
529 A breakable zero-width space.
530 .El
531 .Pp
532 Escape characters are also passed verbatim into text strings.
533 An escape character is a sequence of characters beginning with the
534 backslash
535 .Pq Sq \e .
536 To construct human-readable text, these should be intercepted with
537 .Xr mandoc_escape 3
538 and converted with one the functions described in
539 .Xr mchars_alloc 3 .
540 .Ss Man Abstract Syntax Tree
541 This AST is governed by the ontological rules dictated in
542 .Xr man 7
543 and derives its terminology accordingly.
544 .Pp
545 The AST is composed of
546 .Vt struct man_node
547 nodes with element, root and text types as declared by the
548 .Va type
549 field.
550 Each node also provides its parse point (the
551 .Va line ,
552 .Va sec ,
553 and
554 .Va pos
555 fields), its position in the tree (the
556 .Va parent ,
557 .Va child ,
558 .Va next
559 and
560 .Va prev
561 fields) and some type-specific data.
562 .Pp
563 The tree itself is arranged according to the following normal form,
564 where capitalised non-terminals represent nodes.
565 .Pp
566 .Bl -tag -width "ELEMENTXX" -compact
567 .It ROOT
568 \(<- mnode+
569 .It mnode
570 \(<- ELEMENT | TEXT | BLOCK
571 .It BLOCK
572 \(<- HEAD BODY
573 .It HEAD
574 \(<- mnode*
575 .It BODY
576 \(<- mnode*
577 .It ELEMENT
578 \(<- ELEMENT | TEXT*
579 .It TEXT
580 \(<- [[:ascii:]]*
581 .El
582 .Pp
583 The only elements capable of nesting other elements are those with
584 next-line scope as documented in
585 .Xr man 7 .
586 .Ss Mdoc Abstract Syntax Tree
587 This AST is governed by the ontological
588 rules dictated in
589 .Xr mdoc 7
590 and derives its terminology accordingly.
591 .Qq In-line
592 elements described in
593 .Xr mdoc 7
594 are described simply as
595 .Qq elements .
596 .Pp
597 The AST is composed of
598 .Vt struct mdoc_node
599 nodes with block, head, body, element, root and text types as declared
600 by the
601 .Va type
602 field.
603 Each node also provides its parse point (the
604 .Va line ,
605 .Va sec ,
606 and
607 .Va pos
608 fields), its position in the tree (the
609 .Va parent ,
610 .Va child ,
611 .Va nchild ,
612 .Va next
613 and
614 .Va prev
615 fields) and some type-specific data, in particular, for nodes generated
616 from macros, the generating macro in the
617 .Va tok
618 field.
619 .Pp
620 The tree itself is arranged according to the following normal form,
621 where capitalised non-terminals represent nodes.
622 .Pp
623 .Bl -tag -width "ELEMENTXX" -compact
624 .It ROOT
625 \(<- mnode+
626 .It mnode
627 \(<- BLOCK | ELEMENT | TEXT
628 .It BLOCK
629 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
630 .It ELEMENT
631 \(<- TEXT*
632 .It HEAD
633 \(<- mnode*
634 .It BODY
635 \(<- mnode* [ENDBODY mnode*]
636 .It TAIL
637 \(<- mnode*
638 .It TEXT
639 \(<- [[:ascii:]]*
640 .El
641 .Pp
642 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
643 the BLOCK production: these refer to punctuation marks.
644 Furthermore, although a TEXT node will generally have a non-zero-length
645 string, in the specific case of
646 .Sq \&.Bd \-literal ,
647 an empty line will produce a zero-length string.
648 Multiple body parts are only found in invocations of
649 .Sq \&Bl \-column ,
650 where a new body introduces a new phrase.
651 .Pp
652 The
653 .Xr mdoc 7
654 syntax tree accommodates for broken block structures as well.
655 The ENDBODY node is available to end the formatting associated
656 with a given block before the physical end of that block.
657 It has a non-null
658 .Va end
659 field, is of the BODY
660 .Va type ,
661 has the same
662 .Va tok
663 as the BLOCK it is ending, and has a
664 .Va pending
665 field pointing to that BLOCK's BODY node.
666 It is an indirect child of that BODY node
667 and has no children of its own.
668 .Pp
669 An ENDBODY node is generated when a block ends while one of its child
670 blocks is still open, like in the following example:
671 .Bd -literal -offset indent
672 \&.Ao ao
673 \&.Bo bo ac
674 \&.Ac bc
675 \&.Bc end
676 .Ed
677 .Pp
678 This example results in the following block structure:
679 .Bd -literal -offset indent
680 BLOCK Ao
681 HEAD Ao
682 BODY Ao
683 TEXT ao
684 BLOCK Bo, pending -> Ao
685 HEAD Bo
686 BODY Bo
687 TEXT bo
688 TEXT ac
689 ENDBODY Ao, pending -> Ao
690 TEXT bc
691 TEXT end
692 .Ed
693 .Pp
694 Here, the formatting of the
695 .Sq \&Ao
696 block extends from TEXT ao to TEXT ac,
697 while the formatting of the
698 .Sq \&Bo
699 block extends from TEXT bo to TEXT bc.
700 It renders as follows in
701 .Fl T Ns Cm ascii
702 mode:
703 .Pp
704 .Dl <ao [bo ac> bc] end
705 .Pp
706 Support for badly-nested blocks is only provided for backward
707 compatibility with some older
708 .Xr mdoc 7
709 implementations.
710 Using badly-nested blocks is
711 .Em strongly discouraged ;
712 for example, the
713 .Fl T Ns Cm html
714 and
715 .Fl T Ns Cm xhtml
716 front-ends to
717 .Xr mandoc 1
718 are unable to render them in any meaningful way.
719 Furthermore, behaviour when encountering badly-nested blocks is not
720 consistent across troff implementations, especially when using multiple
721 levels of badly-nested blocks.
722 .Sh SEE ALSO
723 .Xr mandoc 1 ,
724 .Xr mandoc_escape 3 ,
725 .Xr mandoc_malloc 3 ,
726 .Xr mchars_alloc 3 ,
727 .Xr eqn 7 ,
728 .Xr man 7 ,
729 .Xr mandoc_char 7 ,
730 .Xr mdoc 7 ,
731 .Xr roff 7 ,
732 .Xr tbl 7
733 .Sh AUTHORS
734 The
735 .Nm
736 library was written by
737 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .