]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
The st_size member of struct stat is off_t, which is signed,
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.31 2015/01/15 04:26:40 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: January 15 2015 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm man_deroff ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mdoc_deroff ,
28 .Nm mdoc_meta ,
29 .Nm mdoc_node ,
30 .Nm mparse_alloc ,
31 .Nm mparse_free ,
32 .Nm mparse_getkeep ,
33 .Nm mparse_keep ,
34 .Nm mparse_open ,
35 .Nm mparse_readfd ,
36 .Nm mparse_reset ,
37 .Nm mparse_result ,
38 .Nm mparse_strerror ,
39 .Nm mparse_strlevel
40 .Nm mparse_wait ,
41 .Nd mandoc macro compiler library
42 .Sh SYNOPSIS
43 .In sys/types.h
44 .In mandoc.h
45 .Pp
46 .Fd "#define ASCII_NBRSP"
47 .Fd "#define ASCII_HYPH"
48 .Fd "#define ASCII_BREAK"
49 .Ft struct mparse *
50 .Fo mparse_alloc
51 .Fa "int options"
52 .Fa "enum mandoclevel wlevel"
53 .Fa "mandocmsg mmsg"
54 .Fa "const struct mchars *mchars"
55 .Fa "char *defos"
56 .Fc
57 .Ft void
58 .Fo (*mandocmsg)
59 .Fa "enum mandocerr errtype"
60 .Fa "enum mandoclevel level"
61 .Fa "const char *file"
62 .Fa "int line"
63 .Fa "int col"
64 .Fa "const char *msg"
65 .Fc
66 .Ft void
67 .Fo mparse_free
68 .Fa "struct mparse *parse"
69 .Fc
70 .Ft const char *
71 .Fo mparse_getkeep
72 .Fa "const struct mparse *parse"
73 .Fc
74 .Ft void
75 .Fo mparse_keep
76 .Fa "struct mparse *parse"
77 .Fc
78 .Ft "enum mandoclevel"
79 .Fo mparse_open
80 .Fa "struct mparse *parse"
81 .Fa "int *fd"
82 .Fa "const char *fname"
83 .Fc
84 .Ft "enum mandoclevel"
85 .Fo mparse_readfd
86 .Fa "struct mparse *parse"
87 .Fa "int fd"
88 .Fa "const char *fname"
89 .Fc
90 .Ft void
91 .Fo mparse_reset
92 .Fa "struct mparse *parse"
93 .Fc
94 .Ft void
95 .Fo mparse_result
96 .Fa "struct mparse *parse"
97 .Fa "struct mdoc **mdoc"
98 .Fa "struct man **man"
99 .Fa "char **sodest"
100 .Fc
101 .Ft "const char *"
102 .Fo mparse_strerror
103 .Fa "enum mandocerr"
104 .Fc
105 .Ft "const char *"
106 .Fo mparse_strlevel
107 .Fa "enum mandoclevel"
108 .Fc
109 .Ft "enum mandoclevel"
110 .Fo mparse_wait
111 .Fa "struct mparse *parse"
112 .Fc
113 .In sys/types.h
114 .In mandoc.h
115 .In mdoc.h
116 .Ft void
117 .Fo mdoc_deroff
118 .Fa "char **dest"
119 .Fa "const struct mdoc_node *node"
120 .Fc
121 .Ft "const struct mdoc_meta *"
122 .Fo mdoc_meta
123 .Fa "const struct mdoc *mdoc"
124 .Fc
125 .Ft "const struct mdoc_node *"
126 .Fo mdoc_node
127 .Fa "const struct mdoc *mdoc"
128 .Fc
129 .Vt extern const char * const * mdoc_argnames;
130 .Vt extern const char * const * mdoc_macronames;
131 .In sys/types.h
132 .In mandoc.h
133 .In man.h
134 .Ft void
135 .Fo man_deroff
136 .Fa "char **dest"
137 .Fa "const struct man_node *node"
138 .Fc
139 .Ft "const struct man_meta *"
140 .Fo man_meta
141 .Fa "const struct man *man"
142 .Fc
143 .Ft "const struct mparse *"
144 .Fo man_mparse
145 .Fa "const struct man *man"
146 .Fc
147 .Ft "const struct man_node *"
148 .Fo man_node
149 .Fa "const struct man *man"
150 .Fc
151 .Vt extern const char * const * man_macronames;
152 .Sh DESCRIPTION
153 The
154 .Nm mandoc
155 library parses a
156 .Ux
157 manual into an abstract syntax tree (AST).
158 .Ux
159 manuals are composed of
160 .Xr mdoc 7
161 or
162 .Xr man 7 ,
163 and may be mixed with
164 .Xr roff 7 ,
165 .Xr tbl 7 ,
166 and
167 .Xr eqn 7
168 invocations.
169 .Pp
170 The following describes a general parse sequence:
171 .Bl -enum
172 .It
173 initiate a parsing sequence with
174 .Xr mchars_alloc 3
175 and
176 .Fn mparse_alloc ;
177 .It
178 open a file with
179 .Xr open 2
180 or
181 .Fn mparse_open ;
182 .It
183 parse it with
184 .Fn mparse_readfd ;
185 .It
186 retrieve the syntax tree with
187 .Fn mparse_result ;
188 .It
189 iterate over parse nodes with
190 .Fn mdoc_node
191 or
192 .Fn man_node ;
193 .It
194 free all allocated memory with
195 .Fn mparse_free
196 and
197 .Xr mchars_free 3 ,
198 or invoke
199 .Fn mparse_reset
200 and parse new files.
201 .El
202 .Sh REFERENCE
203 This section documents the functions, types, and variables available
204 via
205 .In mandoc.h ,
206 with the exception of those documented in
207 .Xr mandoc_escape 3
208 and
209 .Xr mchars_alloc 3 .
210 .Ss Types
211 .Bl -ohang
212 .It Vt "enum mandocerr"
213 An error or warning message during parsing.
214 .It Vt "enum mandoclevel"
215 A classification of an
216 .Vt "enum mandocerr"
217 as regards system operation.
218 .It Vt "struct mchars"
219 An opaque pointer to a a character table.
220 Created with
221 .Xr mchars_alloc 3
222 and freed with
223 .Xr mchars_free 3 .
224 .It Vt "struct mparse"
225 An opaque pointer to a running parse sequence.
226 Created with
227 .Fn mparse_alloc
228 and freed with
229 .Fn mparse_free .
230 This may be used across parsed input if
231 .Fn mparse_reset
232 is called between parses.
233 .It Vt "mandocmsg"
234 A prototype for a function to handle error and warning
235 messages emitted by the parser.
236 .El
237 .Ss Functions
238 .Bl -ohang
239 .It Fn man_deroff
240 Obtain a text-only representation of a
241 .Vt struct man_node ,
242 including text contained in its child nodes.
243 To be used on children of the pointer returned from
244 .Fn man_node .
245 When it is no longer needed, the pointer returned from
246 .Fn man_deroff
247 can be passed to
248 .Xr free 3 .
249 .It Fn man_meta
250 Obtain the meta-data of a successful
251 .Xr man 7
252 parse.
253 This may only be used on a pointer returned by
254 .Fn mparse_result .
255 Declared in
256 .In man.h ,
257 implemented in
258 .Pa man.c .
259 .It Fn man_mparse
260 Get the parser used for the current output.
261 Declared in
262 .In man.h ,
263 implemented in
264 .Pa man.c .
265 .It Fn man_node
266 Obtain the root node of a successful
267 .Xr man 7
268 parse.
269 This may only be used on a pointer returned by
270 .Fn mparse_result .
271 Declared in
272 .In man.h ,
273 implemented in
274 .Pa man.c .
275 .It Fn mdoc_deroff
276 Obtain a text-only representation of a
277 .Vt struct mdoc_node ,
278 including text contained in its child nodes.
279 To be used on children of the pointer returned from
280 .Fn mdoc_node .
281 When it is no longer needed, the pointer returned from
282 .Fn mdoc_deroff
283 can be passed to
284 .Xr free 3 .
285 .It Fn mdoc_meta
286 Obtain the meta-data of a successful
287 .Xr mdoc
288 parse.
289 This may only be used on a pointer returned by
290 .Fn mparse_result .
291 Declared in
292 .In mdoc.h ,
293 implemented in
294 .Pa mdoc.c .
295 .It Fn mdoc_node
296 Obtain the root node of a successful
297 .Xr mdoc
298 parse.
299 This may only be used on a pointer returned by
300 .Fn mparse_result .
301 Declared in
302 .In mdoc.h ,
303 implemented in
304 .Pa mdoc.c .
305 .It Fn mparse_alloc
306 Allocate a parser.
307 The arguments have the following effect:
308 .Bl -tag -offset 5n -width inttype
309 .It Ar options
310 When the
311 .Dv MPARSE_MDOC
312 or
313 .Dv MPARSE_MAN
314 bit is set, only that parser is used.
315 Otherwise, the document type is automatically detected.
316 .Pp
317 When the
318 .Dv MPARSE_SO
319 bit is set,
320 .Xr roff 7
321 .Ic \&so
322 file inclusion requests are always honoured.
323 Otherwise, if the request is the only content in an input file,
324 only the file name is remembered, to be returned in the
325 .Fa sodest
326 argument of
327 .Fn mparse_result .
328 .Pp
329 When the
330 .Dv MPARSE_QUICK
331 bit is set, parsing is aborted after the NAME section.
332 This is for example useful in
333 .Xr makewhatis 8
334 .Fl Q
335 to quickly build minimal databases.
336 .It Ar wlevel
337 Can be set to
338 .Dv MANDOCLEVEL_BADARG ,
339 .Dv MANDOCLEVEL_ERROR ,
340 or
341 .Dv MANDOCLEVEL_WARNING .
342 Messages below the selected level will be suppressed.
343 .It Ar mmsg
344 A callback function to handle errors and warnings.
345 See
346 .Pa main.c
347 for an example.
348 .It Ar mchars
349 An opaque pointer to a a character table obtained from
350 .Xr mchars_alloc 3 .
351 .It Ar defos
352 A default string for the
353 .Xr mdoc 7
354 .Sq \&Os
355 macro, overriding the
356 .Dv OSNAME
357 preprocessor definition and the results of
358 .Xr uname 3 .
359 .El
360 .Pp
361 The same parser may be used for multiple files so long as
362 .Fn mparse_reset
363 is called between parses.
364 .Fn mparse_free
365 must be called to free the memory allocated by this function.
366 Declared in
367 .In mandoc.h ,
368 implemented in
369 .Pa read.c .
370 .It Fn mparse_free
371 Free all memory allocated by
372 .Fn mparse_alloc .
373 Declared in
374 .In mandoc.h ,
375 implemented in
376 .Pa read.c .
377 .It Fn mparse_getkeep
378 Acquire the keep buffer.
379 Must follow a call of
380 .Fn mparse_keep .
381 Declared in
382 .In mandoc.h ,
383 implemented in
384 .Pa read.c .
385 .It Fn mparse_keep
386 Instruct the parser to retain a copy of its parsed input.
387 This can be acquired with subsequent
388 .Fn mparse_getkeep
389 calls.
390 Declared in
391 .In mandoc.h ,
392 implemented in
393 .Pa read.c .
394 .It Fn mparse_open
395 If the
396 .Fa fname
397 ends in
398 .Pa .gz ,
399 open with
400 .Xr gunzip 1 ;
401 otherwise, with
402 .Xr open 2 .
403 If
404 .Xr open 2
405 fails, append
406 .Pa .gz
407 and try with
408 .Xr gunzip 1 .
409 Return a file descriptor open for reading in
410 .Fa fd ,
411 or -1 on failure.
412 It can be passed to
413 .Fn mparse_readfd
414 or used directly.
415 Declared in
416 .In mandoc.h ,
417 implemented in
418 .Pa read.c .
419 .It Fn mparse_readfd
420 Parse a file descriptor opened with
421 .Xr open 2
422 or
423 .Fn mparse_open .
424 Pass the associated filename in
425 .Va fname .
426 Calls
427 .Fn mparse_wait
428 before returning.
429 This function may be called multiple times with different parameters; however,
430 .Fn mparse_reset
431 should be invoked between parses.
432 Declared in
433 .In mandoc.h ,
434 implemented in
435 .Pa read.c .
436 .It Fn mparse_reset
437 Reset a parser so that
438 .Fn mparse_readfd
439 may be used again.
440 Declared in
441 .In mandoc.h ,
442 implemented in
443 .Pa read.c .
444 .It Fn mparse_result
445 Obtain the result of a parse.
446 One of the three pointers will be filled in.
447 Declared in
448 .In mandoc.h ,
449 implemented in
450 .Pa read.c .
451 .It Fn mparse_strerror
452 Return a statically-allocated string representation of an error code.
453 Declared in
454 .In mandoc.h ,
455 implemented in
456 .Pa read.c .
457 .It Fn mparse_strlevel
458 Return a statically-allocated string representation of a level code.
459 Declared in
460 .In mandoc.h ,
461 implemented in
462 .Pa read.c .
463 .It Fn mparse_wait
464 Bury a
465 .Xr gunzip 1
466 child process that was spawned with
467 .Fn mparse_open .
468 To be called after the parse sequence is complete.
469 Not needed after
470 .Fn mparse_readfd ,
471 but does no harm in that case, either.
472 Returns
473 .Dv MANDOCLEVEL_OK
474 on success and
475 .Dv MANDOCLEVEL_SYSERR
476 on failure, that is, when
477 .Xr wait 2
478 fails, or when
479 .Xr gunzip 1
480 died from a signal or exited with non-zero status.
481 Declared in
482 .In mandoc.h ,
483 implemented in
484 .Pa read.c .
485 .El
486 .Ss Variables
487 .Bl -ohang
488 .It Va man_macronames
489 The string representation of a man macro as indexed by
490 .Vt "enum mant" .
491 .It Va mdoc_argnames
492 The string representation of a mdoc macro argument as indexed by
493 .Vt "enum mdocargt" .
494 .It Va mdoc_macronames
495 The string representation of a mdoc macro as indexed by
496 .Vt "enum mdoct" .
497 .El
498 .Sh IMPLEMENTATION NOTES
499 This section consists of structural documentation for
500 .Xr mdoc 7
501 and
502 .Xr man 7
503 syntax trees and strings.
504 .Ss Man and Mdoc Strings
505 Strings may be extracted from mdoc and man meta-data, or from text
506 nodes (MDOC_TEXT and MAN_TEXT, respectively).
507 These strings have special non-printing formatting cues embedded in the
508 text itself, as well as
509 .Xr roff 7
510 escapes preserved from input.
511 Implementing systems will need to handle both situations to produce
512 human-readable text.
513 In general, strings may be assumed to consist of 7-bit ASCII characters.
514 .Pp
515 The following non-printing characters may be embedded in text strings:
516 .Bl -tag -width Ds
517 .It Dv ASCII_NBRSP
518 A non-breaking space character.
519 .It Dv ASCII_HYPH
520 A soft hyphen.
521 .It Dv ASCII_BREAK
522 A breakable zero-width space.
523 .El
524 .Pp
525 Escape characters are also passed verbatim into text strings.
526 An escape character is a sequence of characters beginning with the
527 backslash
528 .Pq Sq \e .
529 To construct human-readable text, these should be intercepted with
530 .Xr mandoc_escape 3
531 and converted with one the functions described in
532 .Xr mchars_alloc 3 .
533 .Ss Man Abstract Syntax Tree
534 This AST is governed by the ontological rules dictated in
535 .Xr man 7
536 and derives its terminology accordingly.
537 .Pp
538 The AST is composed of
539 .Vt struct man_node
540 nodes with element, root and text types as declared by the
541 .Va type
542 field.
543 Each node also provides its parse point (the
544 .Va line ,
545 .Va sec ,
546 and
547 .Va pos
548 fields), its position in the tree (the
549 .Va parent ,
550 .Va child ,
551 .Va next
552 and
553 .Va prev
554 fields) and some type-specific data.
555 .Pp
556 The tree itself is arranged according to the following normal form,
557 where capitalised non-terminals represent nodes.
558 .Pp
559 .Bl -tag -width "ELEMENTXX" -compact
560 .It ROOT
561 \(<- mnode+
562 .It mnode
563 \(<- ELEMENT | TEXT | BLOCK
564 .It BLOCK
565 \(<- HEAD BODY
566 .It HEAD
567 \(<- mnode*
568 .It BODY
569 \(<- mnode*
570 .It ELEMENT
571 \(<- ELEMENT | TEXT*
572 .It TEXT
573 \(<- [[:ascii:]]*
574 .El
575 .Pp
576 The only elements capable of nesting other elements are those with
577 next-line scope as documented in
578 .Xr man 7 .
579 .Ss Mdoc Abstract Syntax Tree
580 This AST is governed by the ontological
581 rules dictated in
582 .Xr mdoc 7
583 and derives its terminology accordingly.
584 .Qq In-line
585 elements described in
586 .Xr mdoc 7
587 are described simply as
588 .Qq elements .
589 .Pp
590 The AST is composed of
591 .Vt struct mdoc_node
592 nodes with block, head, body, element, root and text types as declared
593 by the
594 .Va type
595 field.
596 Each node also provides its parse point (the
597 .Va line ,
598 .Va sec ,
599 and
600 .Va pos
601 fields), its position in the tree (the
602 .Va parent ,
603 .Va child ,
604 .Va nchild ,
605 .Va next
606 and
607 .Va prev
608 fields) and some type-specific data, in particular, for nodes generated
609 from macros, the generating macro in the
610 .Va tok
611 field.
612 .Pp
613 The tree itself is arranged according to the following normal form,
614 where capitalised non-terminals represent nodes.
615 .Pp
616 .Bl -tag -width "ELEMENTXX" -compact
617 .It ROOT
618 \(<- mnode+
619 .It mnode
620 \(<- BLOCK | ELEMENT | TEXT
621 .It BLOCK
622 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
623 .It ELEMENT
624 \(<- TEXT*
625 .It HEAD
626 \(<- mnode*
627 .It BODY
628 \(<- mnode* [ENDBODY mnode*]
629 .It TAIL
630 \(<- mnode*
631 .It TEXT
632 \(<- [[:ascii:]]*
633 .El
634 .Pp
635 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
636 the BLOCK production: these refer to punctuation marks.
637 Furthermore, although a TEXT node will generally have a non-zero-length
638 string, in the specific case of
639 .Sq \&.Bd \-literal ,
640 an empty line will produce a zero-length string.
641 Multiple body parts are only found in invocations of
642 .Sq \&Bl \-column ,
643 where a new body introduces a new phrase.
644 .Pp
645 The
646 .Xr mdoc 7
647 syntax tree accommodates for broken block structures as well.
648 The ENDBODY node is available to end the formatting associated
649 with a given block before the physical end of that block.
650 It has a non-null
651 .Va end
652 field, is of the BODY
653 .Va type ,
654 has the same
655 .Va tok
656 as the BLOCK it is ending, and has a
657 .Va pending
658 field pointing to that BLOCK's BODY node.
659 It is an indirect child of that BODY node
660 and has no children of its own.
661 .Pp
662 An ENDBODY node is generated when a block ends while one of its child
663 blocks is still open, like in the following example:
664 .Bd -literal -offset indent
665 \&.Ao ao
666 \&.Bo bo ac
667 \&.Ac bc
668 \&.Bc end
669 .Ed
670 .Pp
671 This example results in the following block structure:
672 .Bd -literal -offset indent
673 BLOCK Ao
674 HEAD Ao
675 BODY Ao
676 TEXT ao
677 BLOCK Bo, pending -> Ao
678 HEAD Bo
679 BODY Bo
680 TEXT bo
681 TEXT ac
682 ENDBODY Ao, pending -> Ao
683 TEXT bc
684 TEXT end
685 .Ed
686 .Pp
687 Here, the formatting of the
688 .Sq \&Ao
689 block extends from TEXT ao to TEXT ac,
690 while the formatting of the
691 .Sq \&Bo
692 block extends from TEXT bo to TEXT bc.
693 It renders as follows in
694 .Fl T Ns Cm ascii
695 mode:
696 .Pp
697 .Dl <ao [bo ac> bc] end
698 .Pp
699 Support for badly-nested blocks is only provided for backward
700 compatibility with some older
701 .Xr mdoc 7
702 implementations.
703 Using badly-nested blocks is
704 .Em strongly discouraged ;
705 for example, the
706 .Fl T Ns Cm html
707 and
708 .Fl T Ns Cm xhtml
709 front-ends to
710 .Xr mandoc 1
711 are unable to render them in any meaningful way.
712 Furthermore, behaviour when encountering badly-nested blocks is not
713 consistent across troff implementations, especially when using multiple
714 levels of badly-nested blocks.
715 .Sh SEE ALSO
716 .Xr mandoc 1 ,
717 .Xr mandoc_escape 3 ,
718 .Xr mandoc_malloc 3 ,
719 .Xr mchars_alloc 3 ,
720 .Xr eqn 7 ,
721 .Xr man 7 ,
722 .Xr mandoc_char 7 ,
723 .Xr mdoc 7 ,
724 .Xr roff 7 ,
725 .Xr tbl 7
726 .Sh AUTHORS
727 The
728 .Nm
729 library was written by
730 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .