]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
Don't dereference NULL pointers when formatting missing denominators,
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.29 2014/11/26 23:42:14 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: November 26 2014 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm man_deroff ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mdoc_deroff ,
28 .Nm mdoc_meta ,
29 .Nm mdoc_node ,
30 .Nm mparse_alloc ,
31 .Nm mparse_free ,
32 .Nm mparse_getkeep ,
33 .Nm mparse_keep ,
34 .Nm mparse_open ,
35 .Nm mparse_readfd ,
36 .Nm mparse_reset ,
37 .Nm mparse_result ,
38 .Nm mparse_strerror ,
39 .Nm mparse_strlevel
40 .Nm mparse_wait ,
41 .Nd mandoc macro compiler library
42 .Sh LIBRARY
43 .Lb libmandoc
44 .Sh SYNOPSIS
45 .In sys/types.h
46 .In mandoc.h
47 .Fd "#define ASCII_NBRSP"
48 .Fd "#define ASCII_HYPH"
49 .Fd "#define ASCII_BREAK"
50 .Ft struct mparse *
51 .Fo mparse_alloc
52 .Fa "int options"
53 .Fa "enum mandoclevel wlevel"
54 .Fa "mandocmsg mmsg"
55 .Fa "const struct mchars *mchars"
56 .Fa "char *defos"
57 .Fc
58 .Ft void
59 .Fo (*mandocmsg)
60 .Fa "enum mandocerr errtype"
61 .Fa "enum mandoclevel level"
62 .Fa "const char *file"
63 .Fa "int line"
64 .Fa "int col"
65 .Fa "const char *msg"
66 .Fc
67 .Ft void
68 .Fo mparse_free
69 .Fa "struct mparse *parse"
70 .Fc
71 .Ft const char *
72 .Fo mparse_getkeep
73 .Fa "const struct mparse *parse"
74 .Fc
75 .Ft void
76 .Fo mparse_keep
77 .Fa "struct mparse *parse"
78 .Fc
79 .Ft "enum mandoclevel"
80 .Fo mparse_open
81 .Fa "struct mparse *parse"
82 .Fa "int *fd"
83 .Fa "const char *fname"
84 .Fc
85 .Ft "enum mandoclevel"
86 .Fo mparse_readfd
87 .Fa "struct mparse *parse"
88 .Fa "int fd"
89 .Fa "const char *fname"
90 .Fc
91 .Ft void
92 .Fo mparse_reset
93 .Fa "struct mparse *parse"
94 .Fc
95 .Ft void
96 .Fo mparse_result
97 .Fa "struct mparse *parse"
98 .Fa "struct mdoc **mdoc"
99 .Fa "struct man **man"
100 .Fa "char **sodest"
101 .Fc
102 .Ft "const char *"
103 .Fo mparse_strerror
104 .Fa "enum mandocerr"
105 .Fc
106 .Ft "const char *"
107 .Fo mparse_strlevel
108 .Fa "enum mandoclevel"
109 .Fc
110 .Ft "enum mandoclevel"
111 .Fo mparse_wait
112 .Fa "struct mparse *parse"
113 .Fc
114 .In sys/types.h
115 .In mandoc.h
116 .In mdoc.h
117 .Ft void
118 .Fo mdoc_deroff
119 .Fa "char **dest"
120 .Fa "const struct mdoc_node *node"
121 .Fc
122 .Ft "const struct mdoc_meta *"
123 .Fo mdoc_meta
124 .Fa "const struct mdoc *mdoc"
125 .Fc
126 .Ft "const struct mdoc_node *"
127 .Fo mdoc_node
128 .Fa "const struct mdoc *mdoc"
129 .Fc
130 .Vt extern const char * const * mdoc_argnames;
131 .Vt extern const char * const * mdoc_macronames;
132 .In sys/types.h
133 .In mandoc.h
134 .In man.h
135 .Ft void
136 .Fo man_deroff
137 .Fa "char **dest"
138 .Fa "const struct man_node *node"
139 .Fc
140 .Ft "const struct man_meta *"
141 .Fo man_meta
142 .Fa "const struct man *man"
143 .Fc
144 .Ft "const struct mparse *"
145 .Fo man_mparse
146 .Fa "const struct man *man"
147 .Fc
148 .Ft "const struct man_node *"
149 .Fo man_node
150 .Fa "const struct man *man"
151 .Fc
152 .Vt extern const char * const * man_macronames;
153 .Sh DESCRIPTION
154 The
155 .Nm mandoc
156 library parses a
157 .Ux
158 manual into an abstract syntax tree (AST).
159 .Ux
160 manuals are composed of
161 .Xr mdoc 7
162 or
163 .Xr man 7 ,
164 and may be mixed with
165 .Xr roff 7 ,
166 .Xr tbl 7 ,
167 and
168 .Xr eqn 7
169 invocations.
170 .Pp
171 The following describes a general parse sequence:
172 .Bl -enum
173 .It
174 initiate a parsing sequence with
175 .Xr mchars_alloc 3
176 and
177 .Fn mparse_alloc ;
178 .It
179 parse files or file descriptors with
180 .Fn mparse_readfd ;
181 .It
182 retrieve a parsed syntax tree, if the parse was successful, with
183 .Fn mparse_result ;
184 .It
185 iterate over parse nodes with
186 .Fn mdoc_node
187 or
188 .Fn man_node ;
189 .It
190 free all allocated memory with
191 .Fn mparse_free
192 and
193 .Xr mchars_free 3 ,
194 or invoke
195 .Fn mparse_reset
196 and parse new files.
197 .El
198 .Sh REFERENCE
199 This section documents the functions, types, and variables available
200 via
201 .In mandoc.h ,
202 with the exception of those documented in
203 .Xr mandoc_escape 3
204 and
205 .Xr mchars_alloc 3 .
206 .Ss Types
207 .Bl -ohang
208 .It Vt "enum mandocerr"
209 A fatal error, error, or warning message during parsing.
210 .It Vt "enum mandoclevel"
211 A classification of an
212 .Vt "enum mandocerr"
213 as regards system operation.
214 .It Vt "struct mchars"
215 An opaque pointer to a a character table.
216 Created with
217 .Xr mchars_alloc 3
218 and freed with
219 .Xr mchars_free 3 .
220 .It Vt "struct mparse"
221 An opaque pointer to a running parse sequence.
222 Created with
223 .Fn mparse_alloc
224 and freed with
225 .Fn mparse_free .
226 This may be used across parsed input if
227 .Fn mparse_reset
228 is called between parses.
229 .It Vt "mandocmsg"
230 A prototype for a function to handle fatal error, error, and warning
231 messages emitted by the parser.
232 .El
233 .Ss Functions
234 .Bl -ohang
235 .It Fn man_deroff
236 Obtain a text-only representation of a
237 .Vt struct man_node ,
238 including text contained in its child nodes.
239 To be used on children of the pointer returned from
240 .Fn man_node .
241 When it is no longer needed, the pointer returned from
242 .Fn man_deroff
243 can be passed to
244 .Xr free 3 .
245 .It Fn man_meta
246 Obtain the meta-data of a successful
247 .Xr man 7
248 parse.
249 This may only be used on a pointer returned by
250 .Fn mparse_result .
251 Declared in
252 .In man.h ,
253 implemented in
254 .Pa man.c .
255 .It Fn man_mparse
256 Get the parser used for the current output.
257 Declared in
258 .In man.h ,
259 implemented in
260 .Pa man.c .
261 .It Fn man_node
262 Obtain the root node of a successful
263 .Xr man 7
264 parse.
265 This may only be used on a pointer returned by
266 .Fn mparse_result .
267 Declared in
268 .In man.h ,
269 implemented in
270 .Pa man.c .
271 .It Fn mdoc_deroff
272 Obtain a text-only representation of a
273 .Vt struct mdoc_node ,
274 including text contained in its child nodes.
275 To be used on children of the pointer returned from
276 .Fn mdoc_node .
277 When it is no longer needed, the pointer returned from
278 .Fn mdoc_deroff
279 can be passed to
280 .Xr free 3 .
281 .It Fn mdoc_meta
282 Obtain the meta-data of a successful
283 .Xr mdoc
284 parse.
285 This may only be used on a pointer returned by
286 .Fn mparse_result .
287 Declared in
288 .In mdoc.h ,
289 implemented in
290 .Pa mdoc.c .
291 .It Fn mdoc_node
292 Obtain the root node of a successful
293 .Xr mdoc
294 parse.
295 This may only be used on a pointer returned by
296 .Fn mparse_result .
297 Declared in
298 .In mdoc.h ,
299 implemented in
300 .Pa mdoc.c .
301 .It Fn mparse_alloc
302 Allocate a parser.
303 The arguments have the following effect:
304 .Bl -tag -offset 5n -width inttype
305 .It Ar options
306 When the
307 .Dv MPARSE_MDOC
308 or
309 .Dv MPARSE_MAN
310 bit is set, only that parser is used.
311 Otherwise, the document type is automatically detected.
312 .Pp
313 When the
314 .Dv MPARSE_SO
315 bit is set,
316 .Xr roff 7
317 .Ic \&so
318 file inclusion requests are always honoured.
319 Otherwise, if the request is the only content in an input file,
320 only the file name is remembered, to be returned in the
321 .Fa sodest
322 argument of
323 .Fn mparse_result .
324 .Pp
325 When the
326 .Dv MPARSE_QUICK
327 bit is set, parsing is aborted after the NAME section.
328 This is for example useful in
329 .Xr makewhatis 8
330 .Fl Q
331 to quickly build minimal databases.
332 .It Ar wlevel
333 Can be set to
334 .Dv MANDOCLEVEL_FATAL ,
335 .Dv MANDOCLEVEL_ERROR ,
336 or
337 .Dv MANDOCLEVEL_WARNING .
338 Messages below the selected level will be suppressed.
339 .It Ar mmsg
340 A callback function to handle errors and warnings.
341 See
342 .Pa main.c
343 for an example.
344 .It Ar mchars
345 An opaque pointer to a a character table obtained from
346 .Xr mchars_alloc 3 .
347 .It Ar defos
348 A default string for the
349 .Xr mdoc 7
350 .Sq \&Os
351 macro, overriding the
352 .Dv OSNAME
353 preprocessor definition and the results of
354 .Xr uname 3 .
355 .El
356 .Pp
357 The same parser may be used for multiple files so long as
358 .Fn mparse_reset
359 is called between parses.
360 .Fn mparse_free
361 must be called to free the memory allocated by this function.
362 Declared in
363 .In mandoc.h ,
364 implemented in
365 .Pa read.c .
366 .It Fn mparse_free
367 Free all memory allocated by
368 .Fn mparse_alloc .
369 Declared in
370 .In mandoc.h ,
371 implemented in
372 .Pa read.c .
373 .It Fn mparse_getkeep
374 Acquire the keep buffer.
375 Must follow a call of
376 .Fn mparse_keep .
377 Declared in
378 .In mandoc.h ,
379 implemented in
380 .Pa read.c .
381 .It Fn mparse_keep
382 Instruct the parser to retain a copy of its parsed input.
383 This can be acquired with subsequent
384 .Fn mparse_getkeep
385 calls.
386 Declared in
387 .In mandoc.h ,
388 implemented in
389 .Pa read.c .
390 .It Fn mparse_open
391 If the
392 .Fa fname
393 ends in
394 .Pa .gz ,
395 open with
396 .Xr gunzip 1 ;
397 otherwise, with
398 .Xr open 2 .
399 If
400 .Xr open 2
401 fails, append
402 .Pa .gz
403 and try with
404 .Xr gunzip 1 .
405 Return a file descriptor open for reading in
406 .Fa fd ,
407 or -1 on failure.
408 It can be passed to
409 .Fn mparse_readfd
410 or used directly.
411 Declared in
412 .In mandoc.h ,
413 implemented in
414 .Pa read.c .
415 .It Fn mparse_readfd
416 Parse a file or file descriptor.
417 If
418 .Va fd
419 is -1, open
420 .Va fname
421 with
422 .Fn mparse_open .
423 Otherwise,
424 .Va fname
425 is assumed to be the name associated with
426 .Va fd .
427 Calls
428 .Fn mparse_wait
429 before returning.
430 This function may be called multiple times with different parameters; however,
431 .Fn mparse_reset
432 should be invoked between parses.
433 Declared in
434 .In mandoc.h ,
435 implemented in
436 .Pa read.c .
437 .It Fn mparse_reset
438 Reset a parser so that
439 .Fn mparse_readfd
440 may be used again.
441 Declared in
442 .In mandoc.h ,
443 implemented in
444 .Pa read.c .
445 .It Fn mparse_result
446 Obtain the result of a parse.
447 Only successful parses
448 .Po
449 i.e., those where
450 .Fn mparse_readfd
451 returned less than MANDOCLEVEL_FATAL
452 .Pc
453 should invoke this function, in which case one of the three pointers will
454 be filled in.
455 Declared in
456 .In mandoc.h ,
457 implemented in
458 .Pa read.c .
459 .It Fn mparse_strerror
460 Return a statically-allocated string representation of an error code.
461 Declared in
462 .In mandoc.h ,
463 implemented in
464 .Pa read.c .
465 .It Fn mparse_strlevel
466 Return a statically-allocated string representation of a level code.
467 Declared in
468 .In mandoc.h ,
469 implemented in
470 .Pa read.c .
471 .It Fn mparse_wait
472 Bury a
473 .Xr gunzip 1
474 child process that was spawned with
475 .Fn mparse_open .
476 To be called after the parse sequence is complete.
477 Not needed after
478 .Fn mparse_readfd ,
479 but does no harm in that case, either.
480 Returns
481 .Dv MANDOCLEVEL_OK
482 on success and
483 .Dv MANDOCLEVEL_SYSERR
484 on failure, that is, when
485 .Xr wait 2
486 fails, or when
487 .Xr gunzip 1
488 died from a signal or exited with non-zero status.
489 Declared in
490 .In mandoc.h ,
491 implemented in
492 .Pa read.c .
493 .El
494 .Ss Variables
495 .Bl -ohang
496 .It Va man_macronames
497 The string representation of a man macro as indexed by
498 .Vt "enum mant" .
499 .It Va mdoc_argnames
500 The string representation of a mdoc macro argument as indexed by
501 .Vt "enum mdocargt" .
502 .It Va mdoc_macronames
503 The string representation of a mdoc macro as indexed by
504 .Vt "enum mdoct" .
505 .El
506 .Sh IMPLEMENTATION NOTES
507 This section consists of structural documentation for
508 .Xr mdoc 7
509 and
510 .Xr man 7
511 syntax trees and strings.
512 .Ss Man and Mdoc Strings
513 Strings may be extracted from mdoc and man meta-data, or from text
514 nodes (MDOC_TEXT and MAN_TEXT, respectively).
515 These strings have special non-printing formatting cues embedded in the
516 text itself, as well as
517 .Xr roff 7
518 escapes preserved from input.
519 Implementing systems will need to handle both situations to produce
520 human-readable text.
521 In general, strings may be assumed to consist of 7-bit ASCII characters.
522 .Pp
523 The following non-printing characters may be embedded in text strings:
524 .Bl -tag -width Ds
525 .It Dv ASCII_NBRSP
526 A non-breaking space character.
527 .It Dv ASCII_HYPH
528 A soft hyphen.
529 .It Dv ASCII_BREAK
530 A breakable zero-width space.
531 .El
532 .Pp
533 Escape characters are also passed verbatim into text strings.
534 An escape character is a sequence of characters beginning with the
535 backslash
536 .Pq Sq \e .
537 To construct human-readable text, these should be intercepted with
538 .Xr mandoc_escape 3
539 and converted with one the functions described in
540 .Xr mchars_alloc 3 .
541 .Ss Man Abstract Syntax Tree
542 This AST is governed by the ontological rules dictated in
543 .Xr man 7
544 and derives its terminology accordingly.
545 .Pp
546 The AST is composed of
547 .Vt struct man_node
548 nodes with element, root and text types as declared by the
549 .Va type
550 field.
551 Each node also provides its parse point (the
552 .Va line ,
553 .Va sec ,
554 and
555 .Va pos
556 fields), its position in the tree (the
557 .Va parent ,
558 .Va child ,
559 .Va next
560 and
561 .Va prev
562 fields) and some type-specific data.
563 .Pp
564 The tree itself is arranged according to the following normal form,
565 where capitalised non-terminals represent nodes.
566 .Pp
567 .Bl -tag -width "ELEMENTXX" -compact
568 .It ROOT
569 \(<- mnode+
570 .It mnode
571 \(<- ELEMENT | TEXT | BLOCK
572 .It BLOCK
573 \(<- HEAD BODY
574 .It HEAD
575 \(<- mnode*
576 .It BODY
577 \(<- mnode*
578 .It ELEMENT
579 \(<- ELEMENT | TEXT*
580 .It TEXT
581 \(<- [[:ascii:]]*
582 .El
583 .Pp
584 The only elements capable of nesting other elements are those with
585 next-line scope as documented in
586 .Xr man 7 .
587 .Ss Mdoc Abstract Syntax Tree
588 This AST is governed by the ontological
589 rules dictated in
590 .Xr mdoc 7
591 and derives its terminology accordingly.
592 .Qq In-line
593 elements described in
594 .Xr mdoc 7
595 are described simply as
596 .Qq elements .
597 .Pp
598 The AST is composed of
599 .Vt struct mdoc_node
600 nodes with block, head, body, element, root and text types as declared
601 by the
602 .Va type
603 field.
604 Each node also provides its parse point (the
605 .Va line ,
606 .Va sec ,
607 and
608 .Va pos
609 fields), its position in the tree (the
610 .Va parent ,
611 .Va child ,
612 .Va nchild ,
613 .Va next
614 and
615 .Va prev
616 fields) and some type-specific data, in particular, for nodes generated
617 from macros, the generating macro in the
618 .Va tok
619 field.
620 .Pp
621 The tree itself is arranged according to the following normal form,
622 where capitalised non-terminals represent nodes.
623 .Pp
624 .Bl -tag -width "ELEMENTXX" -compact
625 .It ROOT
626 \(<- mnode+
627 .It mnode
628 \(<- BLOCK | ELEMENT | TEXT
629 .It BLOCK
630 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
631 .It ELEMENT
632 \(<- TEXT*
633 .It HEAD
634 \(<- mnode*
635 .It BODY
636 \(<- mnode* [ENDBODY mnode*]
637 .It TAIL
638 \(<- mnode*
639 .It TEXT
640 \(<- [[:ascii:]]*
641 .El
642 .Pp
643 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
644 the BLOCK production: these refer to punctuation marks.
645 Furthermore, although a TEXT node will generally have a non-zero-length
646 string, in the specific case of
647 .Sq \&.Bd \-literal ,
648 an empty line will produce a zero-length string.
649 Multiple body parts are only found in invocations of
650 .Sq \&Bl \-column ,
651 where a new body introduces a new phrase.
652 .Pp
653 The
654 .Xr mdoc 7
655 syntax tree accommodates for broken block structures as well.
656 The ENDBODY node is available to end the formatting associated
657 with a given block before the physical end of that block.
658 It has a non-null
659 .Va end
660 field, is of the BODY
661 .Va type ,
662 has the same
663 .Va tok
664 as the BLOCK it is ending, and has a
665 .Va pending
666 field pointing to that BLOCK's BODY node.
667 It is an indirect child of that BODY node
668 and has no children of its own.
669 .Pp
670 An ENDBODY node is generated when a block ends while one of its child
671 blocks is still open, like in the following example:
672 .Bd -literal -offset indent
673 \&.Ao ao
674 \&.Bo bo ac
675 \&.Ac bc
676 \&.Bc end
677 .Ed
678 .Pp
679 This example results in the following block structure:
680 .Bd -literal -offset indent
681 BLOCK Ao
682 HEAD Ao
683 BODY Ao
684 TEXT ao
685 BLOCK Bo, pending -> Ao
686 HEAD Bo
687 BODY Bo
688 TEXT bo
689 TEXT ac
690 ENDBODY Ao, pending -> Ao
691 TEXT bc
692 TEXT end
693 .Ed
694 .Pp
695 Here, the formatting of the
696 .Sq \&Ao
697 block extends from TEXT ao to TEXT ac,
698 while the formatting of the
699 .Sq \&Bo
700 block extends from TEXT bo to TEXT bc.
701 It renders as follows in
702 .Fl T Ns Cm ascii
703 mode:
704 .Pp
705 .Dl <ao [bo ac> bc] end
706 .Pp
707 Support for badly-nested blocks is only provided for backward
708 compatibility with some older
709 .Xr mdoc 7
710 implementations.
711 Using badly-nested blocks is
712 .Em strongly discouraged ;
713 for example, the
714 .Fl T Ns Cm html
715 and
716 .Fl T Ns Cm xhtml
717 front-ends to
718 .Xr mandoc 1
719 are unable to render them in any meaningful way.
720 Furthermore, behaviour when encountering badly-nested blocks is not
721 consistent across troff implementations, especially when using multiple
722 levels of badly-nested blocks.
723 .Sh SEE ALSO
724 .Xr mandoc 1 ,
725 .Xr mandoc_escape 3 ,
726 .Xr mandoc_malloc 3 ,
727 .Xr mchars_alloc 3 ,
728 .Xr eqn 7 ,
729 .Xr man 7 ,
730 .Xr mandoc_char 7 ,
731 .Xr mdoc 7 ,
732 .Xr roff 7 ,
733 .Xr tbl 7
734 .Sh AUTHORS
735 The
736 .Nm
737 library was written by
738 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .