]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
Do not dereference a NULL pointer if a .Bl macro has
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.25 2014/08/05 05:48:56 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: August 5 2014 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm man_deroff ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mdoc_deroff ,
28 .Nm mdoc_meta ,
29 .Nm mdoc_node ,
30 .Nm mparse_alloc ,
31 .Nm mparse_free ,
32 .Nm mparse_getkeep ,
33 .Nm mparse_keep ,
34 .Nm mparse_readfd ,
35 .Nm mparse_reset ,
36 .Nm mparse_result ,
37 .Nm mparse_strerror ,
38 .Nm mparse_strlevel
39 .Nd mandoc macro compiler library
40 .Sh LIBRARY
41 .Lb libmandoc
42 .Sh SYNOPSIS
43 .In sys/types.h
44 .In mandoc.h
45 .Fd "#define ASCII_NBRSP"
46 .Fd "#define ASCII_HYPH"
47 .Fd "#define ASCII_BREAK"
48 .Ft struct mparse *
49 .Fo mparse_alloc
50 .Fa "int options"
51 .Fa "enum mandoclevel wlevel"
52 .Fa "mandocmsg mmsg"
53 .Fa "char *defos"
54 .Fc
55 .Ft void
56 .Fo (*mandocmsg)
57 .Fa "enum mandocerr errtype"
58 .Fa "enum mandoclevel level"
59 .Fa "const char *file"
60 .Fa "int line"
61 .Fa "int col"
62 .Fa "const char *msg"
63 .Fc
64 .Ft void
65 .Fo mparse_free
66 .Fa "struct mparse *parse"
67 .Fc
68 .Ft const char *
69 .Fo mparse_getkeep
70 .Fa "const struct mparse *parse"
71 .Fc
72 .Ft void
73 .Fo mparse_keep
74 .Fa "struct mparse *parse"
75 .Fc
76 .Ft "enum mandoclevel"
77 .Fo mparse_readfd
78 .Fa "struct mparse *parse"
79 .Fa "int fd"
80 .Fa "const char *fname"
81 .Fc
82 .Ft void
83 .Fo mparse_reset
84 .Fa "struct mparse *parse"
85 .Fc
86 .Ft void
87 .Fo mparse_result
88 .Fa "struct mparse *parse"
89 .Fa "struct mdoc **mdoc"
90 .Fa "struct man **man"
91 .Fa "char **sodest"
92 .Fc
93 .Ft "const char *"
94 .Fo mparse_strerror
95 .Fa "enum mandocerr"
96 .Fc
97 .Ft "const char *"
98 .Fo mparse_strlevel
99 .Fa "enum mandoclevel"
100 .Fc
101 .In sys/types.h
102 .In mandoc.h
103 .In mdoc.h
104 .Ft void
105 .Fo mdoc_deroff
106 .Fa "char **dest"
107 .Fa "const struct mdoc_node *node"
108 .Fc
109 .Ft "const struct mdoc_meta *"
110 .Fo mdoc_meta
111 .Fa "const struct mdoc *mdoc"
112 .Fc
113 .Ft "const struct mdoc_node *"
114 .Fo mdoc_node
115 .Fa "const struct mdoc *mdoc"
116 .Fc
117 .Vt extern const char * const * mdoc_argnames;
118 .Vt extern const char * const * mdoc_macronames;
119 .In sys/types.h
120 .In mandoc.h
121 .In man.h
122 .Ft void
123 .Fo man_deroff
124 .Fa "char **dest"
125 .Fa "const struct man_node *node"
126 .Fc
127 .Ft "const struct man_meta *"
128 .Fo man_meta
129 .Fa "const struct man *man"
130 .Fc
131 .Ft "const struct mparse *"
132 .Fo man_mparse
133 .Fa "const struct man *man"
134 .Fc
135 .Ft "const struct man_node *"
136 .Fo man_node
137 .Fa "const struct man *man"
138 .Fc
139 .Vt extern const char * const * man_macronames;
140 .Sh DESCRIPTION
141 The
142 .Nm mandoc
143 library parses a
144 .Ux
145 manual into an abstract syntax tree (AST).
146 .Ux
147 manuals are composed of
148 .Xr mdoc 7
149 or
150 .Xr man 7 ,
151 and may be mixed with
152 .Xr roff 7 ,
153 .Xr tbl 7 ,
154 and
155 .Xr eqn 7
156 invocations.
157 .Pp
158 The following describes a general parse sequence:
159 .Bl -enum
160 .It
161 initiate a parsing sequence with
162 .Fn mparse_alloc ;
163 .It
164 parse files or file descriptors with
165 .Fn mparse_readfd ;
166 .It
167 retrieve a parsed syntax tree, if the parse was successful, with
168 .Fn mparse_result ;
169 .It
170 iterate over parse nodes with
171 .Fn mdoc_node
172 or
173 .Fn man_node ;
174 .It
175 free all allocated memory with
176 .Fn mparse_free ,
177 or invoke
178 .Fn mparse_reset
179 and parse new files.
180 .El
181 .Sh REFERENCE
182 This section documents the functions, types, and variables available
183 via
184 .In mandoc.h ,
185 with the exception of those documented in
186 .Xr mandoc_escape 3
187 and
188 .Xr mchars_alloc 3 .
189 .Ss Types
190 .Bl -ohang
191 .It Vt "enum mandocerr"
192 A fatal error, error, or warning message during parsing.
193 .It Vt "enum mandoclevel"
194 A classification of an
195 .Vt "enum mandocerr"
196 as regards system operation.
197 .It Vt "struct mparse"
198 An opaque pointer to a running parse sequence.
199 Created with
200 .Fn mparse_alloc
201 and freed with
202 .Fn mparse_free .
203 This may be used across parsed input if
204 .Fn mparse_reset
205 is called between parses.
206 .It Vt "mandocmsg"
207 A prototype for a function to handle fatal error, error, and warning
208 messages emitted by the parser.
209 .El
210 .Ss Functions
211 .Bl -ohang
212 .It Fn man_deroff
213 Obtain a text-only representation of a
214 .Vt struct man_node ,
215 including text contained in its child nodes.
216 To be used on children of the pointer returned from
217 .Fn man_node .
218 When it is no longer needed, the pointer returned from
219 .Fn man_deroff
220 can be passed to
221 .Xr free 3 .
222 .It Fn man_meta
223 Obtain the meta-data of a successful
224 .Xr man 7
225 parse.
226 This may only be used on a pointer returned by
227 .Fn mparse_result .
228 Declared in
229 .In man.h ,
230 implemented in
231 .Pa man.c .
232 .It Fn man_mparse
233 Get the parser used for the current output.
234 Declared in
235 .In man.h ,
236 implemented in
237 .Pa man.c .
238 .It Fn man_node
239 Obtain the root node of a successful
240 .Xr man 7
241 parse.
242 This may only be used on a pointer returned by
243 .Fn mparse_result .
244 Declared in
245 .In man.h ,
246 implemented in
247 .Pa man.c .
248 .It Fn mdoc_deroff
249 Obtain a text-only representation of a
250 .Vt struct mdoc_node ,
251 including text contained in its child nodes.
252 To be used on children of the pointer returned from
253 .Fn mdoc_node .
254 When it is no longer needed, the pointer returned from
255 .Fn mdoc_deroff
256 can be passed to
257 .Xr free 3 .
258 .It Fn mdoc_meta
259 Obtain the meta-data of a successful
260 .Xr mdoc
261 parse.
262 This may only be used on a pointer returned by
263 .Fn mparse_result .
264 Declared in
265 .In mdoc.h ,
266 implemented in
267 .Pa mdoc.c .
268 .It Fn mdoc_node
269 Obtain the root node of a successful
270 .Xr mdoc
271 parse.
272 This may only be used on a pointer returned by
273 .Fn mparse_result .
274 Declared in
275 .In mdoc.h ,
276 implemented in
277 .Pa mdoc.c .
278 .It Fn mparse_alloc
279 Allocate a parser.
280 The arguments have the following effect:
281 .Bl -tag -offset 5n -width inttype
282 .It Ar options
283 When the
284 .Dv MPARSE_MDOC
285 or
286 .Dv MPARSE_MAN
287 bit is set, only that parser is used.
288 Otherwise, the document type is automatically detected.
289 .Pp
290 When the
291 .Dv MPARSE_SO
292 bit is set,
293 .Xr roff 7
294 .Ic \&so
295 file inclusion requests are always honoured.
296 Otherwise, if the request is the only content in an input file,
297 only the file name is remembered, to be returned in the
298 .Fa sodest
299 argument of
300 .Fn mparse_result .
301 .Pp
302 When the
303 .Dv MPARSE_QUICK
304 bit is set, parsing is aborted after the NAME section.
305 This is for example useful in
306 .Xr makewhatis 8
307 .Fl Q
308 to quickly build minimal databases.
309 .It Ar wlevel
310 Can be set to
311 .Dv MANDOCLEVEL_FATAL ,
312 .Dv MANDOCLEVEL_ERROR ,
313 or
314 .Dv MANDOCLEVEL_WARNING .
315 Messages below the selected level will be suppressed.
316 .It Ar mmsg
317 A callback function to handle errors and warnings.
318 See
319 .Pa main.c
320 for an example.
321 .It Ar defos
322 A default string for the
323 .Xr mdoc 7
324 .Sq \&Os
325 macro, overriding the
326 .Dv OSNAME
327 preprocessor definition and the results of
328 .Xr uname 3 .
329 .El
330 .Pp
331 The same parser may be used for multiple files so long as
332 .Fn mparse_reset
333 is called between parses.
334 .Fn mparse_free
335 must be called to free the memory allocated by this function.
336 Declared in
337 .In mandoc.h ,
338 implemented in
339 .Pa read.c .
340 .It Fn mparse_free
341 Free all memory allocated by
342 .Fn mparse_alloc .
343 Declared in
344 .In mandoc.h ,
345 implemented in
346 .Pa read.c .
347 .It Fn mparse_getkeep
348 Acquire the keep buffer.
349 Must follow a call of
350 .Fn mparse_keep .
351 Declared in
352 .In mandoc.h ,
353 implemented in
354 .Pa read.c .
355 .It Fn mparse_keep
356 Instruct the parser to retain a copy of its parsed input.
357 This can be acquired with subsequent
358 .Fn mparse_getkeep
359 calls.
360 Declared in
361 .In mandoc.h ,
362 implemented in
363 .Pa read.c .
364 .It Fn mparse_readfd
365 Parse a file or file descriptor.
366 If
367 .Va fd
368 is -1,
369 .Va fname
370 is opened for reading.
371 Otherwise,
372 .Va fname
373 is assumed to be the name associated with
374 .Va fd .
375 This may be called multiple times with different parameters; however,
376 .Fn mparse_reset
377 should be invoked between parses.
378 Declared in
379 .In mandoc.h ,
380 implemented in
381 .Pa read.c .
382 .It Fn mparse_reset
383 Reset a parser so that
384 .Fn mparse_readfd
385 may be used again.
386 Declared in
387 .In mandoc.h ,
388 implemented in
389 .Pa read.c .
390 .It Fn mparse_result
391 Obtain the result of a parse.
392 Only successful parses
393 .Po
394 i.e., those where
395 .Fn mparse_readfd
396 returned less than MANDOCLEVEL_FATAL
397 .Pc
398 should invoke this function, in which case one of the three pointers will
399 be filled in.
400 Declared in
401 .In mandoc.h ,
402 implemented in
403 .Pa read.c .
404 .It Fn mparse_strerror
405 Return a statically-allocated string representation of an error code.
406 Declared in
407 .In mandoc.h ,
408 implemented in
409 .Pa read.c .
410 .It Fn mparse_strlevel
411 Return a statically-allocated string representation of a level code.
412 Declared in
413 .In mandoc.h ,
414 implemented in
415 .Pa read.c .
416 .El
417 .Ss Variables
418 .Bl -ohang
419 .It Va man_macronames
420 The string representation of a man macro as indexed by
421 .Vt "enum mant" .
422 .It Va mdoc_argnames
423 The string representation of a mdoc macro argument as indexed by
424 .Vt "enum mdocargt" .
425 .It Va mdoc_macronames
426 The string representation of a mdoc macro as indexed by
427 .Vt "enum mdoct" .
428 .El
429 .Sh IMPLEMENTATION NOTES
430 This section consists of structural documentation for
431 .Xr mdoc 7
432 and
433 .Xr man 7
434 syntax trees and strings.
435 .Ss Man and Mdoc Strings
436 Strings may be extracted from mdoc and man meta-data, or from text
437 nodes (MDOC_TEXT and MAN_TEXT, respectively).
438 These strings have special non-printing formatting cues embedded in the
439 text itself, as well as
440 .Xr roff 7
441 escapes preserved from input.
442 Implementing systems will need to handle both situations to produce
443 human-readable text.
444 In general, strings may be assumed to consist of 7-bit ASCII characters.
445 .Pp
446 The following non-printing characters may be embedded in text strings:
447 .Bl -tag -width Ds
448 .It Dv ASCII_NBRSP
449 A non-breaking space character.
450 .It Dv ASCII_HYPH
451 A soft hyphen.
452 .It Dv ASCII_BREAK
453 A breakable zero-width space.
454 .El
455 .Pp
456 Escape characters are also passed verbatim into text strings.
457 An escape character is a sequence of characters beginning with the
458 backslash
459 .Pq Sq \e .
460 To construct human-readable text, these should be intercepted with
461 .Xr mandoc_escape 3
462 and converted with one the functions described in
463 .Xr mchars_alloc 3 .
464 .Ss Man Abstract Syntax Tree
465 This AST is governed by the ontological rules dictated in
466 .Xr man 7
467 and derives its terminology accordingly.
468 .Pp
469 The AST is composed of
470 .Vt struct man_node
471 nodes with element, root and text types as declared by the
472 .Va type
473 field.
474 Each node also provides its parse point (the
475 .Va line ,
476 .Va sec ,
477 and
478 .Va pos
479 fields), its position in the tree (the
480 .Va parent ,
481 .Va child ,
482 .Va next
483 and
484 .Va prev
485 fields) and some type-specific data.
486 .Pp
487 The tree itself is arranged according to the following normal form,
488 where capitalised non-terminals represent nodes.
489 .Pp
490 .Bl -tag -width "ELEMENTXX" -compact
491 .It ROOT
492 \(<- mnode+
493 .It mnode
494 \(<- ELEMENT | TEXT | BLOCK
495 .It BLOCK
496 \(<- HEAD BODY
497 .It HEAD
498 \(<- mnode*
499 .It BODY
500 \(<- mnode*
501 .It ELEMENT
502 \(<- ELEMENT | TEXT*
503 .It TEXT
504 \(<- [[:ascii:]]*
505 .El
506 .Pp
507 The only elements capable of nesting other elements are those with
508 next-line scope as documented in
509 .Xr man 7 .
510 .Ss Mdoc Abstract Syntax Tree
511 This AST is governed by the ontological
512 rules dictated in
513 .Xr mdoc 7
514 and derives its terminology accordingly.
515 .Qq In-line
516 elements described in
517 .Xr mdoc 7
518 are described simply as
519 .Qq elements .
520 .Pp
521 The AST is composed of
522 .Vt struct mdoc_node
523 nodes with block, head, body, element, root and text types as declared
524 by the
525 .Va type
526 field.
527 Each node also provides its parse point (the
528 .Va line ,
529 .Va sec ,
530 and
531 .Va pos
532 fields), its position in the tree (the
533 .Va parent ,
534 .Va child ,
535 .Va nchild ,
536 .Va next
537 and
538 .Va prev
539 fields) and some type-specific data, in particular, for nodes generated
540 from macros, the generating macro in the
541 .Va tok
542 field.
543 .Pp
544 The tree itself is arranged according to the following normal form,
545 where capitalised non-terminals represent nodes.
546 .Pp
547 .Bl -tag -width "ELEMENTXX" -compact
548 .It ROOT
549 \(<- mnode+
550 .It mnode
551 \(<- BLOCK | ELEMENT | TEXT
552 .It BLOCK
553 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
554 .It ELEMENT
555 \(<- TEXT*
556 .It HEAD
557 \(<- mnode*
558 .It BODY
559 \(<- mnode* [ENDBODY mnode*]
560 .It TAIL
561 \(<- mnode*
562 .It TEXT
563 \(<- [[:ascii:]]*
564 .El
565 .Pp
566 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
567 the BLOCK production: these refer to punctuation marks.
568 Furthermore, although a TEXT node will generally have a non-zero-length
569 string, in the specific case of
570 .Sq \&.Bd \-literal ,
571 an empty line will produce a zero-length string.
572 Multiple body parts are only found in invocations of
573 .Sq \&Bl \-column ,
574 where a new body introduces a new phrase.
575 .Pp
576 The
577 .Xr mdoc 7
578 syntax tree accommodates for broken block structures as well.
579 The ENDBODY node is available to end the formatting associated
580 with a given block before the physical end of that block.
581 It has a non-null
582 .Va end
583 field, is of the BODY
584 .Va type ,
585 has the same
586 .Va tok
587 as the BLOCK it is ending, and has a
588 .Va pending
589 field pointing to that BLOCK's BODY node.
590 It is an indirect child of that BODY node
591 and has no children of its own.
592 .Pp
593 An ENDBODY node is generated when a block ends while one of its child
594 blocks is still open, like in the following example:
595 .Bd -literal -offset indent
596 \&.Ao ao
597 \&.Bo bo ac
598 \&.Ac bc
599 \&.Bc end
600 .Ed
601 .Pp
602 This example results in the following block structure:
603 .Bd -literal -offset indent
604 BLOCK Ao
605 HEAD Ao
606 BODY Ao
607 TEXT ao
608 BLOCK Bo, pending -> Ao
609 HEAD Bo
610 BODY Bo
611 TEXT bo
612 TEXT ac
613 ENDBODY Ao, pending -> Ao
614 TEXT bc
615 TEXT end
616 .Ed
617 .Pp
618 Here, the formatting of the
619 .Sq \&Ao
620 block extends from TEXT ao to TEXT ac,
621 while the formatting of the
622 .Sq \&Bo
623 block extends from TEXT bo to TEXT bc.
624 It renders as follows in
625 .Fl T Ns Cm ascii
626 mode:
627 .Pp
628 .Dl <ao [bo ac> bc] end
629 .Pp
630 Support for badly-nested blocks is only provided for backward
631 compatibility with some older
632 .Xr mdoc 7
633 implementations.
634 Using badly-nested blocks is
635 .Em strongly discouraged ;
636 for example, the
637 .Fl T Ns Cm html
638 and
639 .Fl T Ns Cm xhtml
640 front-ends to
641 .Xr mandoc 1
642 are unable to render them in any meaningful way.
643 Furthermore, behaviour when encountering badly-nested blocks is not
644 consistent across troff implementations, especially when using multiple
645 levels of badly-nested blocks.
646 .Sh SEE ALSO
647 .Xr mandoc 1 ,
648 .Xr mandoc_escape 3 ,
649 .Xr mandoc_malloc 3 ,
650 .Xr mchars_alloc 3 ,
651 .Xr eqn 7 ,
652 .Xr man 7 ,
653 .Xr mandoc_char 7 ,
654 .Xr mdoc 7 ,
655 .Xr roff 7 ,
656 .Xr tbl 7
657 .Sh AUTHORS
658 The
659 .Nm
660 library was written by
661 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .