]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
Use "-" rather than "\(hy" for the heads of .Bl -dash and -hyphen lists.
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.32 2015/07/19 06:05:16 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: July 19 2015 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm man_deroff ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mdoc_deroff ,
28 .Nm mdoc_meta ,
29 .Nm mdoc_node ,
30 .Nm mparse_alloc ,
31 .Nm mparse_free ,
32 .Nm mparse_getkeep ,
33 .Nm mparse_keep ,
34 .Nm mparse_open ,
35 .Nm mparse_readfd ,
36 .Nm mparse_reset ,
37 .Nm mparse_result ,
38 .Nm mparse_strerror ,
39 .Nm mparse_strlevel
40 .Nd mandoc macro compiler library
41 .Sh SYNOPSIS
42 .In sys/types.h
43 .In mandoc.h
44 .Pp
45 .Fd "#define ASCII_NBRSP"
46 .Fd "#define ASCII_HYPH"
47 .Fd "#define ASCII_BREAK"
48 .Ft struct mparse *
49 .Fo mparse_alloc
50 .Fa "int options"
51 .Fa "enum mandoclevel wlevel"
52 .Fa "mandocmsg mmsg"
53 .Fa "const struct mchars *mchars"
54 .Fa "char *defos"
55 .Fc
56 .Ft void
57 .Fo (*mandocmsg)
58 .Fa "enum mandocerr errtype"
59 .Fa "enum mandoclevel level"
60 .Fa "const char *file"
61 .Fa "int line"
62 .Fa "int col"
63 .Fa "const char *msg"
64 .Fc
65 .Ft void
66 .Fo mparse_free
67 .Fa "struct mparse *parse"
68 .Fc
69 .Ft const char *
70 .Fo mparse_getkeep
71 .Fa "const struct mparse *parse"
72 .Fc
73 .Ft void
74 .Fo mparse_keep
75 .Fa "struct mparse *parse"
76 .Fc
77 .Ft "enum mandoclevel"
78 .Fo mparse_open
79 .Fa "struct mparse *parse"
80 .Fa "int *fd"
81 .Fa "const char *fname"
82 .Fc
83 .Ft "enum mandoclevel"
84 .Fo mparse_readfd
85 .Fa "struct mparse *parse"
86 .Fa "int fd"
87 .Fa "const char *fname"
88 .Fc
89 .Ft void
90 .Fo mparse_reset
91 .Fa "struct mparse *parse"
92 .Fc
93 .Ft void
94 .Fo mparse_result
95 .Fa "struct mparse *parse"
96 .Fa "struct mdoc **mdoc"
97 .Fa "struct man **man"
98 .Fa "char **sodest"
99 .Fc
100 .Ft "const char *"
101 .Fo mparse_strerror
102 .Fa "enum mandocerr"
103 .Fc
104 .Ft "const char *"
105 .Fo mparse_strlevel
106 .Fa "enum mandoclevel"
107 .Fc
108 .In sys/types.h
109 .In mandoc.h
110 .In mdoc.h
111 .Ft void
112 .Fo mdoc_deroff
113 .Fa "char **dest"
114 .Fa "const struct mdoc_node *node"
115 .Fc
116 .Ft "const struct mdoc_meta *"
117 .Fo mdoc_meta
118 .Fa "const struct mdoc *mdoc"
119 .Fc
120 .Ft "const struct mdoc_node *"
121 .Fo mdoc_node
122 .Fa "const struct mdoc *mdoc"
123 .Fc
124 .Vt extern const char * const * mdoc_argnames;
125 .Vt extern const char * const * mdoc_macronames;
126 .In sys/types.h
127 .In mandoc.h
128 .In man.h
129 .Ft void
130 .Fo man_deroff
131 .Fa "char **dest"
132 .Fa "const struct man_node *node"
133 .Fc
134 .Ft "const struct man_meta *"
135 .Fo man_meta
136 .Fa "const struct man *man"
137 .Fc
138 .Ft "const struct mparse *"
139 .Fo man_mparse
140 .Fa "const struct man *man"
141 .Fc
142 .Ft "const struct man_node *"
143 .Fo man_node
144 .Fa "const struct man *man"
145 .Fc
146 .Vt extern const char * const * man_macronames;
147 .Sh DESCRIPTION
148 The
149 .Nm mandoc
150 library parses a
151 .Ux
152 manual into an abstract syntax tree (AST).
153 .Ux
154 manuals are composed of
155 .Xr mdoc 7
156 or
157 .Xr man 7 ,
158 and may be mixed with
159 .Xr roff 7 ,
160 .Xr tbl 7 ,
161 and
162 .Xr eqn 7
163 invocations.
164 .Pp
165 The following describes a general parse sequence:
166 .Bl -enum
167 .It
168 initiate a parsing sequence with
169 .Xr mchars_alloc 3
170 and
171 .Fn mparse_alloc ;
172 .It
173 open a file with
174 .Xr open 2
175 or
176 .Fn mparse_open ;
177 .It
178 parse it with
179 .Fn mparse_readfd ;
180 .It
181 retrieve the syntax tree with
182 .Fn mparse_result ;
183 .It
184 iterate over parse nodes with
185 .Fn mdoc_node
186 or
187 .Fn man_node ;
188 .It
189 free all allocated memory with
190 .Fn mparse_free
191 and
192 .Xr mchars_free 3 ,
193 or invoke
194 .Fn mparse_reset
195 and parse new files.
196 .El
197 .Sh REFERENCE
198 This section documents the functions, types, and variables available
199 via
200 .In mandoc.h ,
201 with the exception of those documented in
202 .Xr mandoc_escape 3
203 and
204 .Xr mchars_alloc 3 .
205 .Ss Types
206 .Bl -ohang
207 .It Vt "enum mandocerr"
208 An error or warning message during parsing.
209 .It Vt "enum mandoclevel"
210 A classification of an
211 .Vt "enum mandocerr"
212 as regards system operation.
213 .It Vt "struct mchars"
214 An opaque pointer to a a character table.
215 Created with
216 .Xr mchars_alloc 3
217 and freed with
218 .Xr mchars_free 3 .
219 .It Vt "struct mparse"
220 An opaque pointer to a running parse sequence.
221 Created with
222 .Fn mparse_alloc
223 and freed with
224 .Fn mparse_free .
225 This may be used across parsed input if
226 .Fn mparse_reset
227 is called between parses.
228 .It Vt "mandocmsg"
229 A prototype for a function to handle error and warning
230 messages emitted by the parser.
231 .El
232 .Ss Functions
233 .Bl -ohang
234 .It Fn man_deroff
235 Obtain a text-only representation of a
236 .Vt struct man_node ,
237 including text contained in its child nodes.
238 To be used on children of the pointer returned from
239 .Fn man_node .
240 When it is no longer needed, the pointer returned from
241 .Fn man_deroff
242 can be passed to
243 .Xr free 3 .
244 .It Fn man_meta
245 Obtain the meta-data of a successful
246 .Xr man 7
247 parse.
248 This may only be used on a pointer returned by
249 .Fn mparse_result .
250 Declared in
251 .In man.h ,
252 implemented in
253 .Pa man.c .
254 .It Fn man_mparse
255 Get the parser used for the current output.
256 Declared in
257 .In man.h ,
258 implemented in
259 .Pa man.c .
260 .It Fn man_node
261 Obtain the root node of a successful
262 .Xr man 7
263 parse.
264 This may only be used on a pointer returned by
265 .Fn mparse_result .
266 Declared in
267 .In man.h ,
268 implemented in
269 .Pa man.c .
270 .It Fn mdoc_deroff
271 Obtain a text-only representation of a
272 .Vt struct mdoc_node ,
273 including text contained in its child nodes.
274 To be used on children of the pointer returned from
275 .Fn mdoc_node .
276 When it is no longer needed, the pointer returned from
277 .Fn mdoc_deroff
278 can be passed to
279 .Xr free 3 .
280 .It Fn mdoc_meta
281 Obtain the meta-data of a successful
282 .Xr mdoc
283 parse.
284 This may only be used on a pointer returned by
285 .Fn mparse_result .
286 Declared in
287 .In mdoc.h ,
288 implemented in
289 .Pa mdoc.c .
290 .It Fn mdoc_node
291 Obtain the root node of a successful
292 .Xr mdoc
293 parse.
294 This may only be used on a pointer returned by
295 .Fn mparse_result .
296 Declared in
297 .In mdoc.h ,
298 implemented in
299 .Pa mdoc.c .
300 .It Fn mparse_alloc
301 Allocate a parser.
302 The arguments have the following effect:
303 .Bl -tag -offset 5n -width inttype
304 .It Ar options
305 When the
306 .Dv MPARSE_MDOC
307 or
308 .Dv MPARSE_MAN
309 bit is set, only that parser is used.
310 Otherwise, the document type is automatically detected.
311 .Pp
312 When the
313 .Dv MPARSE_SO
314 bit is set,
315 .Xr roff 7
316 .Ic \&so
317 file inclusion requests are always honoured.
318 Otherwise, if the request is the only content in an input file,
319 only the file name is remembered, to be returned in the
320 .Fa sodest
321 argument of
322 .Fn mparse_result .
323 .Pp
324 When the
325 .Dv MPARSE_QUICK
326 bit is set, parsing is aborted after the NAME section.
327 This is for example useful in
328 .Xr makewhatis 8
329 .Fl Q
330 to quickly build minimal databases.
331 .It Ar wlevel
332 Can be set to
333 .Dv MANDOCLEVEL_BADARG ,
334 .Dv MANDOCLEVEL_ERROR ,
335 or
336 .Dv MANDOCLEVEL_WARNING .
337 Messages below the selected level will be suppressed.
338 .It Ar mmsg
339 A callback function to handle errors and warnings.
340 See
341 .Pa main.c
342 for an example.
343 .It Ar mchars
344 An opaque pointer to a a character table obtained from
345 .Xr mchars_alloc 3 .
346 .It Ar defos
347 A default string for the
348 .Xr mdoc 7
349 .Sq \&Os
350 macro, overriding the
351 .Dv OSNAME
352 preprocessor definition and the results of
353 .Xr uname 3 .
354 .El
355 .Pp
356 The same parser may be used for multiple files so long as
357 .Fn mparse_reset
358 is called between parses.
359 .Fn mparse_free
360 must be called to free the memory allocated by this function.
361 Declared in
362 .In mandoc.h ,
363 implemented in
364 .Pa read.c .
365 .It Fn mparse_free
366 Free all memory allocated by
367 .Fn mparse_alloc .
368 Declared in
369 .In mandoc.h ,
370 implemented in
371 .Pa read.c .
372 .It Fn mparse_getkeep
373 Acquire the keep buffer.
374 Must follow a call of
375 .Fn mparse_keep .
376 Declared in
377 .In mandoc.h ,
378 implemented in
379 .Pa read.c .
380 .It Fn mparse_keep
381 Instruct the parser to retain a copy of its parsed input.
382 This can be acquired with subsequent
383 .Fn mparse_getkeep
384 calls.
385 Declared in
386 .In mandoc.h ,
387 implemented in
388 .Pa read.c .
389 .It Fn mparse_open
390 Open the file for reading.
391 If that fails and
392 .Fa fname
393 does not already end in
394 .Ql .gz ,
395 try again after appending
396 .Ql .gz .
397 Save the information whether the file is zipped or not.
398 Return a file descriptor open for reading in
399 .Fa fd ,
400 or -1 on failure.
401 It can be passed to
402 .Fn mparse_readfd
403 or used directly.
404 Declared in
405 .In mandoc.h ,
406 implemented in
407 .Pa read.c .
408 .It Fn mparse_readfd
409 Parse a file descriptor opened with
410 .Xr open 2
411 or
412 .Fn mparse_open .
413 Pass the associated filename in
414 .Va fname .
415 This function may be called multiple times with different parameters; however,
416 .Fn mparse_reset
417 should be invoked between parses.
418 Declared in
419 .In mandoc.h ,
420 implemented in
421 .Pa read.c .
422 .It Fn mparse_reset
423 Reset a parser so that
424 .Fn mparse_readfd
425 may be used again.
426 Declared in
427 .In mandoc.h ,
428 implemented in
429 .Pa read.c .
430 .It Fn mparse_result
431 Obtain the result of a parse.
432 One of the three pointers will be filled in.
433 Declared in
434 .In mandoc.h ,
435 implemented in
436 .Pa read.c .
437 .It Fn mparse_strerror
438 Return a statically-allocated string representation of an error code.
439 Declared in
440 .In mandoc.h ,
441 implemented in
442 .Pa read.c .
443 .It Fn mparse_strlevel
444 Return a statically-allocated string representation of a level code.
445 Declared in
446 .In mandoc.h ,
447 implemented in
448 .Pa read.c .
449 .El
450 .Ss Variables
451 .Bl -ohang
452 .It Va man_macronames
453 The string representation of a man macro as indexed by
454 .Vt "enum mant" .
455 .It Va mdoc_argnames
456 The string representation of a mdoc macro argument as indexed by
457 .Vt "enum mdocargt" .
458 .It Va mdoc_macronames
459 The string representation of a mdoc macro as indexed by
460 .Vt "enum mdoct" .
461 .El
462 .Sh IMPLEMENTATION NOTES
463 This section consists of structural documentation for
464 .Xr mdoc 7
465 and
466 .Xr man 7
467 syntax trees and strings.
468 .Ss Man and Mdoc Strings
469 Strings may be extracted from mdoc and man meta-data, or from text
470 nodes (MDOC_TEXT and MAN_TEXT, respectively).
471 These strings have special non-printing formatting cues embedded in the
472 text itself, as well as
473 .Xr roff 7
474 escapes preserved from input.
475 Implementing systems will need to handle both situations to produce
476 human-readable text.
477 In general, strings may be assumed to consist of 7-bit ASCII characters.
478 .Pp
479 The following non-printing characters may be embedded in text strings:
480 .Bl -tag -width Ds
481 .It Dv ASCII_NBRSP
482 A non-breaking space character.
483 .It Dv ASCII_HYPH
484 A soft hyphen.
485 .It Dv ASCII_BREAK
486 A breakable zero-width space.
487 .El
488 .Pp
489 Escape characters are also passed verbatim into text strings.
490 An escape character is a sequence of characters beginning with the
491 backslash
492 .Pq Sq \e .
493 To construct human-readable text, these should be intercepted with
494 .Xr mandoc_escape 3
495 and converted with one the functions described in
496 .Xr mchars_alloc 3 .
497 .Ss Man Abstract Syntax Tree
498 This AST is governed by the ontological rules dictated in
499 .Xr man 7
500 and derives its terminology accordingly.
501 .Pp
502 The AST is composed of
503 .Vt struct man_node
504 nodes with element, root and text types as declared by the
505 .Va type
506 field.
507 Each node also provides its parse point (the
508 .Va line ,
509 .Va sec ,
510 and
511 .Va pos
512 fields), its position in the tree (the
513 .Va parent ,
514 .Va child ,
515 .Va next
516 and
517 .Va prev
518 fields) and some type-specific data.
519 .Pp
520 The tree itself is arranged according to the following normal form,
521 where capitalised non-terminals represent nodes.
522 .Pp
523 .Bl -tag -width "ELEMENTXX" -compact
524 .It ROOT
525 \(<- mnode+
526 .It mnode
527 \(<- ELEMENT | TEXT | BLOCK
528 .It BLOCK
529 \(<- HEAD BODY
530 .It HEAD
531 \(<- mnode*
532 .It BODY
533 \(<- mnode*
534 .It ELEMENT
535 \(<- ELEMENT | TEXT*
536 .It TEXT
537 \(<- [[:ascii:]]*
538 .El
539 .Pp
540 The only elements capable of nesting other elements are those with
541 next-line scope as documented in
542 .Xr man 7 .
543 .Ss Mdoc Abstract Syntax Tree
544 This AST is governed by the ontological
545 rules dictated in
546 .Xr mdoc 7
547 and derives its terminology accordingly.
548 .Qq In-line
549 elements described in
550 .Xr mdoc 7
551 are described simply as
552 .Qq elements .
553 .Pp
554 The AST is composed of
555 .Vt struct mdoc_node
556 nodes with block, head, body, element, root and text types as declared
557 by the
558 .Va type
559 field.
560 Each node also provides its parse point (the
561 .Va line ,
562 .Va sec ,
563 and
564 .Va pos
565 fields), its position in the tree (the
566 .Va parent ,
567 .Va child ,
568 .Va nchild ,
569 .Va next
570 and
571 .Va prev
572 fields) and some type-specific data, in particular, for nodes generated
573 from macros, the generating macro in the
574 .Va tok
575 field.
576 .Pp
577 The tree itself is arranged according to the following normal form,
578 where capitalised non-terminals represent nodes.
579 .Pp
580 .Bl -tag -width "ELEMENTXX" -compact
581 .It ROOT
582 \(<- mnode+
583 .It mnode
584 \(<- BLOCK | ELEMENT | TEXT
585 .It BLOCK
586 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
587 .It ELEMENT
588 \(<- TEXT*
589 .It HEAD
590 \(<- mnode*
591 .It BODY
592 \(<- mnode* [ENDBODY mnode*]
593 .It TAIL
594 \(<- mnode*
595 .It TEXT
596 \(<- [[:ascii:]]*
597 .El
598 .Pp
599 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
600 the BLOCK production: these refer to punctuation marks.
601 Furthermore, although a TEXT node will generally have a non-zero-length
602 string, in the specific case of
603 .Sq \&.Bd \-literal ,
604 an empty line will produce a zero-length string.
605 Multiple body parts are only found in invocations of
606 .Sq \&Bl \-column ,
607 where a new body introduces a new phrase.
608 .Pp
609 The
610 .Xr mdoc 7
611 syntax tree accommodates for broken block structures as well.
612 The ENDBODY node is available to end the formatting associated
613 with a given block before the physical end of that block.
614 It has a non-null
615 .Va end
616 field, is of the BODY
617 .Va type ,
618 has the same
619 .Va tok
620 as the BLOCK it is ending, and has a
621 .Va pending
622 field pointing to that BLOCK's BODY node.
623 It is an indirect child of that BODY node
624 and has no children of its own.
625 .Pp
626 An ENDBODY node is generated when a block ends while one of its child
627 blocks is still open, like in the following example:
628 .Bd -literal -offset indent
629 \&.Ao ao
630 \&.Bo bo ac
631 \&.Ac bc
632 \&.Bc end
633 .Ed
634 .Pp
635 This example results in the following block structure:
636 .Bd -literal -offset indent
637 BLOCK Ao
638 HEAD Ao
639 BODY Ao
640 TEXT ao
641 BLOCK Bo, pending -> Ao
642 HEAD Bo
643 BODY Bo
644 TEXT bo
645 TEXT ac
646 ENDBODY Ao, pending -> Ao
647 TEXT bc
648 TEXT end
649 .Ed
650 .Pp
651 Here, the formatting of the
652 .Sq \&Ao
653 block extends from TEXT ao to TEXT ac,
654 while the formatting of the
655 .Sq \&Bo
656 block extends from TEXT bo to TEXT bc.
657 It renders as follows in
658 .Fl T Ns Cm ascii
659 mode:
660 .Pp
661 .Dl <ao [bo ac> bc] end
662 .Pp
663 Support for badly-nested blocks is only provided for backward
664 compatibility with some older
665 .Xr mdoc 7
666 implementations.
667 Using badly-nested blocks is
668 .Em strongly discouraged ;
669 for example, the
670 .Fl T Ns Cm html
671 and
672 .Fl T Ns Cm xhtml
673 front-ends to
674 .Xr mandoc 1
675 are unable to render them in any meaningful way.
676 Furthermore, behaviour when encountering badly-nested blocks is not
677 consistent across troff implementations, especially when using multiple
678 levels of badly-nested blocks.
679 .Sh SEE ALSO
680 .Xr mandoc 1 ,
681 .Xr mandoc_escape 3 ,
682 .Xr mandoc_malloc 3 ,
683 .Xr mchars_alloc 3 ,
684 .Xr eqn 7 ,
685 .Xr man 7 ,
686 .Xr mandoc_char 7 ,
687 .Xr mdoc 7 ,
688 .Xr roff 7 ,
689 .Xr tbl 7
690 .Sh AUTHORS
691 The
692 .Nm
693 library was written by
694 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .