]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
Cache the result of uname(3) such that we don't need to call it
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.23 2014/01/05 20:26:36 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: January 5 2014 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm mandoc_escape ,
24 .Nm man_meta ,
25 .Nm man_mparse ,
26 .Nm man_node ,
27 .Nm mchars_alloc ,
28 .Nm mchars_free ,
29 .Nm mchars_num2char ,
30 .Nm mchars_num2uc ,
31 .Nm mchars_spec2cp ,
32 .Nm mchars_spec2str ,
33 .Nm mdoc_meta ,
34 .Nm mdoc_node ,
35 .Nm mparse_alloc ,
36 .Nm mparse_free ,
37 .Nm mparse_getkeep ,
38 .Nm mparse_keep ,
39 .Nm mparse_readfd ,
40 .Nm mparse_reset ,
41 .Nm mparse_result ,
42 .Nm mparse_strerror ,
43 .Nm mparse_strlevel
44 .Nd mandoc macro compiler library
45 .Sh LIBRARY
46 .Lb libmandoc
47 .Sh SYNOPSIS
48 .In man.h
49 .In mdoc.h
50 .In mandoc.h
51 .Ft "enum mandoc_esc"
52 .Fo mandoc_escape
53 .Fa "const char **end"
54 .Fa "const char **start"
55 .Fa "int *sz"
56 .Fc
57 .Ft "const struct man_meta *"
58 .Fo man_meta
59 .Fa "const struct man *man"
60 .Fc
61 .Ft "const struct mparse *"
62 .Fo man_mparse
63 .Fa "const struct man *man"
64 .Fc
65 .Ft "const struct man_node *"
66 .Fo man_node
67 .Fa "const struct man *man"
68 .Fc
69 .Ft "struct mchars *"
70 .Fn mchars_alloc "void"
71 .Ft void
72 .Fn mchars_free "struct mchars *p"
73 .Ft char
74 .Fn mchars_num2char "const char *cp" "size_t sz"
75 .Ft int
76 .Fn mchars_num2uc "const char *cp" "size_t sz"
77 .Ft "const char *"
78 .Fo mchars_spec2str
79 .Fa "const struct mchars *p"
80 .Fa "const char *cp"
81 .Fa "size_t sz"
82 .Fa "size_t *rsz"
83 .Fc
84 .Ft int
85 .Fo mchars_spec2cp
86 .Fa "const struct mchars *p"
87 .Fa "const char *cp"
88 .Fa "size_t sz"
89 .Fc
90 .Ft "const struct mdoc_meta *"
91 .Fo mdoc_meta
92 .Fa "const struct mdoc *mdoc"
93 .Fc
94 .Ft "const struct mdoc_node *"
95 .Fo mdoc_node
96 .Fa "const struct mdoc *mdoc"
97 .Fc
98 .Ft void
99 .Fo mparse_alloc
100 .Fa "enum mparset inttype"
101 .Fa "enum mandoclevel wlevel"
102 .Fa "mandocmsg mmsg"
103 .Fa "char *defos"
104 .Fa "int quick"
105 .Fc
106 .Ft void
107 .Fo mparse_free
108 .Fa "struct mparse *parse"
109 .Fc
110 .Ft const char *
111 .Fo mparse_getkeep
112 .Fa "const struct mparse *parse"
113 .Fc
114 .Ft void
115 .Fo mparse_keep
116 .Fa "struct mparse *parse"
117 .Fc
118 .Ft "enum mandoclevel"
119 .Fo mparse_readfd
120 .Fa "struct mparse *parse"
121 .Fa "int fd"
122 .Fa "const char *fname"
123 .Fc
124 .Ft void
125 .Fo mparse_reset
126 .Fa "struct mparse *parse"
127 .Fc
128 .Ft void
129 .Fo mparse_result
130 .Fa "struct mparse *parse"
131 .Fa "struct mdoc **mdoc"
132 .Fa "struct man **man"
133 .Fc
134 .Ft "const char *"
135 .Fo mparse_strerror
136 .Fa "enum mandocerr"
137 .Fc
138 .Ft "const char *"
139 .Fo mparse_strlevel
140 .Fa "enum mandoclevel"
141 .Fc
142 .Vt extern const char * const * man_macronames;
143 .Vt extern const char * const * mdoc_argnames;
144 .Vt extern const char * const * mdoc_macronames;
145 .Fd "#define ASCII_NBRSP"
146 .Fd "#define ASCII_HYPH"
147 .Sh DESCRIPTION
148 The
149 .Nm mandoc
150 library parses a
151 .Ux
152 manual into an abstract syntax tree (AST).
153 .Ux
154 manuals are composed of
155 .Xr mdoc 7
156 or
157 .Xr man 7 ,
158 and may be mixed with
159 .Xr roff 7 ,
160 .Xr tbl 7 ,
161 and
162 .Xr eqn 7
163 invocations.
164 .Pp
165 The following describes a general parse sequence:
166 .Bl -enum
167 .It
168 initiate a parsing sequence with
169 .Fn mparse_alloc ;
170 .It
171 parse files or file descriptors with
172 .Fn mparse_readfd ;
173 .It
174 retrieve a parsed syntax tree, if the parse was successful, with
175 .Fn mparse_result ;
176 .It
177 iterate over parse nodes with
178 .Fn mdoc_node
179 or
180 .Fn man_node ;
181 .It
182 free all allocated memory with
183 .Fn mparse_free ,
184 or invoke
185 .Fn mparse_reset
186 and parse new files.
187 .El
188 .Pp
189 The
190 .Nm
191 library also contains routines for translating character strings into glyphs
192 .Pq see Fn mchars_alloc
193 and parsing escape sequences from strings
194 .Pq see Fn mandoc_escape .
195 .Sh REFERENCE
196 This section documents the functions, types, and variables available
197 via
198 .In mandoc.h .
199 .Ss Types
200 .Bl -ohang
201 .It Vt "enum mandoc_esc"
202 An escape sequence classification.
203 .It Vt "enum mandocerr"
204 A fatal error, error, or warning message during parsing.
205 .It Vt "enum mandoclevel"
206 A classification of an
207 .Vt "enum mandocerr"
208 as regards system operation.
209 .It Vt "struct mchars"
210 An opaque pointer to an object allowing for translation between
211 character strings and glyphs.
212 See
213 .Fn mchars_alloc .
214 .It Vt "enum mparset"
215 The type of parser when reading input.
216 This should usually be
217 .Dv MPARSE_AUTO
218 for auto-detection.
219 .It Vt "struct mparse"
220 An opaque pointer to a running parse sequence.
221 Created with
222 .Fn mparse_alloc
223 and freed with
224 .Fn mparse_free .
225 This may be used across parsed input if
226 .Fn mparse_reset
227 is called between parses.
228 .It Vt "mandocmsg"
229 A prototype for a function to handle fatal error, error, and warning
230 messages emitted by the parser.
231 .El
232 .Ss Functions
233 .Bl -ohang
234 .It Fn mandoc_escape
235 Scan an escape sequence, i.e., a character string beginning with
236 .Sq \e .
237 Pass a pointer to the character after the
238 .Sq \e
239 as
240 .Va end ;
241 it will be set to the supremum of the parsed escape sequence unless
242 returning
243 .Dv ESCAPE_ERROR ,
244 in which case the string is bogus and should be
245 thrown away.
246 If not
247 .Dv ESCAPE_ERROR
248 or
249 .Dv ESCAPE_IGNORE ,
250 .Va start
251 is set to the first relevant character of the substring (font, glyph,
252 whatever) of length
253 .Va sz .
254 Both
255 .Va start
256 and
257 .Va sz
258 may be
259 .Dv NULL .
260 Declared in
261 .In mandoc.h ,
262 implemented in
263 .Pa mandoc.c .
264 .It Fn man_meta
265 Obtain the meta-data of a successful parse.
266 This may only be used on a pointer returned by
267 .Fn mparse_result .
268 Declared in
269 .In man.h ,
270 implemented in
271 .Pa man.c .
272 .It Fn man_mparse
273 Get the parser used for the current output.
274 Declared in
275 .In man.h ,
276 implemented in
277 .Pa man.c .
278 .It Fn man_node
279 Obtain the root node of a successful parse.
280 This may only be used on a pointer returned by
281 .Fn mparse_result .
282 Declared in
283 .In man.h ,
284 implemented in
285 .Pa man.c .
286 .It Fn mchars_alloc
287 Allocate an
288 .Vt "struct mchars *"
289 object for translating special characters into glyphs.
290 See
291 .Xr mandoc_char 7
292 for an overview of special characters.
293 The object must be freed with
294 .Fn mchars_free .
295 Declared in
296 .In mandoc.h ,
297 implemented in
298 .Pa chars.c .
299 .It Fn mchars_free
300 Free an object created with
301 .Fn mchars_alloc .
302 Declared in
303 .In mandoc.h ,
304 implemented in
305 .Pa chars.c .
306 .It Fn mchars_num2char
307 Convert a character index (e.g., the \eN\(aq\(aq escape) into a
308 printable ASCII character.
309 Returns \e0 (the nil character) if the input sequence is malformed.
310 Declared in
311 .In mandoc.h ,
312 implemented in
313 .Pa chars.c .
314 .It Fn mchars_num2uc
315 Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
316 a Unicode codepoint.
317 Returns \e0 (the nil character) if the input sequence is malformed.
318 Declared in
319 .In mandoc.h ,
320 implemented in
321 .Pa chars.c .
322 .It Fn mchars_spec2cp
323 Convert a special character into a valid Unicode codepoint.
324 Returns \-1 on failure or a non-zero Unicode codepoint on success.
325 Declared in
326 .In mandoc.h ,
327 implemented in
328 .Pa chars.c .
329 .It Fn mchars_spec2str
330 Convert a special character into an ASCII string.
331 Returns
332 .Dv NULL
333 on failure.
334 Declared in
335 .In mandoc.h ,
336 implemented in
337 .Pa chars.c .
338 .It Fn mdoc_meta
339 Obtain the meta-data of a successful parse.
340 This may only be used on a pointer returned by
341 .Fn mparse_result .
342 Declared in
343 .In mdoc.h ,
344 implemented in
345 .Pa mdoc.c .
346 .It Fn mdoc_node
347 Obtain the root node of a successful parse.
348 This may only be used on a pointer returned by
349 .Fn mparse_result .
350 Declared in
351 .In mdoc.h ,
352 implemented in
353 .Pa mdoc.c .
354 .It Fn mparse_alloc
355 Allocate a parser.
356 The arguments have the following effect:
357 .Bl -tag -offset 5n -width inttype
358 .It Ar inttype
359 When set to
360 .Dv MPARSE_MDOC
361 or
362 .Dv MPARSE_MAN ,
363 only that parser will be used.
364 With
365 .Dv MPARSE_AUTO ,
366 the document type will be automatically detected.
367 .It Ar wlevel
368 Can be set to
369 .Dv MANDOCLEVEL_FATAL ,
370 .Dv MANDOCLEVEL_ERROR ,
371 or
372 .Dv MANDOCLEVEL_WARNING .
373 Messages below the selected level will be suppressed.
374 .It Ar mmsg
375 A callback function to handle errors and warnings.
376 See
377 .Pa main.c
378 for an example.
379 .It Ar defos
380 A default string for the
381 .Xr mdoc 7
382 .Sq \&Os
383 macro, overriding the
384 .Dv OSNAME
385 preprocessor definition and the results of
386 .Xr uname 3 .
387 .It Ar quick
388 When set, parsing is aborted after the NAME section.
389 This is for example useful to quickly build minimal databases.
390 .El
391 .Pp
392 The same parser may be used for multiple files so long as
393 .Fn mparse_reset
394 is called between parses.
395 .Fn mparse_free
396 must be called to free the memory allocated by this function.
397 Declared in
398 .In mandoc.h ,
399 implemented in
400 .Pa read.c .
401 .It Fn mparse_free
402 Free all memory allocated by
403 .Fn mparse_alloc .
404 Declared in
405 .In mandoc.h ,
406 implemented in
407 .Pa read.c .
408 .It Fn mparse_getkeep
409 Acquire the keep buffer.
410 Must follow a call of
411 .Fn mparse_keep .
412 Declared in
413 .In mandoc.h ,
414 implemented in
415 .Pa read.c .
416 .It Fn mparse_keep
417 Instruct the parser to retain a copy of its parsed input.
418 This can be acquired with subsequent
419 .Fn mparse_getkeep
420 calls.
421 Declared in
422 .In mandoc.h ,
423 implemented in
424 .Pa read.c .
425 .It Fn mparse_readfd
426 Parse a file or file descriptor.
427 If
428 .Va fd
429 is -1,
430 .Va fname
431 is opened for reading.
432 Otherwise,
433 .Va fname
434 is assumed to be the name associated with
435 .Va fd .
436 This may be called multiple times with different parameters; however,
437 .Fn mparse_reset
438 should be invoked between parses.
439 Declared in
440 .In mandoc.h ,
441 implemented in
442 .Pa read.c .
443 .It Fn mparse_reset
444 Reset a parser so that
445 .Fn mparse_readfd
446 may be used again.
447 Declared in
448 .In mandoc.h ,
449 implemented in
450 .Pa read.c .
451 .It Fn mparse_result
452 Obtain the result of a parse.
453 Only successful parses
454 .Po
455 i.e., those where
456 .Fn mparse_readfd
457 returned less than MANDOCLEVEL_FATAL
458 .Pc
459 should invoke this function, in which case one of the two pointers will
460 be filled in.
461 Declared in
462 .In mandoc.h ,
463 implemented in
464 .Pa read.c .
465 .It Fn mparse_strerror
466 Return a statically-allocated string representation of an error code.
467 Declared in
468 .In mandoc.h ,
469 implemented in
470 .Pa read.c .
471 .It Fn mparse_strlevel
472 Return a statically-allocated string representation of a level code.
473 Declared in
474 .In mandoc.h ,
475 implemented in
476 .Pa read.c .
477 .El
478 .Ss Variables
479 .Bl -ohang
480 .It Va man_macronames
481 The string representation of a man macro as indexed by
482 .Vt "enum mant" .
483 .It Va mdoc_argnames
484 The string representation of a mdoc macro argument as indexed by
485 .Vt "enum mdocargt" .
486 .It Va mdoc_macronames
487 The string representation of a mdoc macro as indexed by
488 .Vt "enum mdoct" .
489 .El
490 .Sh IMPLEMENTATION NOTES
491 This section consists of structural documentation for
492 .Xr mdoc 7
493 and
494 .Xr man 7
495 syntax trees and strings.
496 .Ss Man and Mdoc Strings
497 Strings may be extracted from mdoc and man meta-data, or from text
498 nodes (MDOC_TEXT and MAN_TEXT, respectively).
499 These strings have special non-printing formatting cues embedded in the
500 text itself, as well as
501 .Xr roff 7
502 escapes preserved from input.
503 Implementing systems will need to handle both situations to produce
504 human-readable text.
505 In general, strings may be assumed to consist of 7-bit ASCII characters.
506 .Pp
507 The following non-printing characters may be embedded in text strings:
508 .Bl -tag -width Ds
509 .It Dv ASCII_NBRSP
510 A non-breaking space character.
511 .It Dv ASCII_HYPH
512 A soft hyphen.
513 .El
514 .Pp
515 Escape characters are also passed verbatim into text strings.
516 An escape character is a sequence of characters beginning with the
517 backslash
518 .Pq Sq \e .
519 To construct human-readable text, these should be intercepted with
520 .Fn mandoc_escape
521 and converted with one of
522 .Fn mchars_num2char ,
523 .Fn mchars_spec2str ,
524 and so on.
525 .Ss Man Abstract Syntax Tree
526 This AST is governed by the ontological rules dictated in
527 .Xr man 7
528 and derives its terminology accordingly.
529 .Pp
530 The AST is composed of
531 .Vt struct man_node
532 nodes with element, root and text types as declared by the
533 .Va type
534 field.
535 Each node also provides its parse point (the
536 .Va line ,
537 .Va sec ,
538 and
539 .Va pos
540 fields), its position in the tree (the
541 .Va parent ,
542 .Va child ,
543 .Va next
544 and
545 .Va prev
546 fields) and some type-specific data.
547 .Pp
548 The tree itself is arranged according to the following normal form,
549 where capitalised non-terminals represent nodes.
550 .Pp
551 .Bl -tag -width "ELEMENTXX" -compact
552 .It ROOT
553 \(<- mnode+
554 .It mnode
555 \(<- ELEMENT | TEXT | BLOCK
556 .It BLOCK
557 \(<- HEAD BODY
558 .It HEAD
559 \(<- mnode*
560 .It BODY
561 \(<- mnode*
562 .It ELEMENT
563 \(<- ELEMENT | TEXT*
564 .It TEXT
565 \(<- [[:ascii:]]*
566 .El
567 .Pp
568 The only elements capable of nesting other elements are those with
569 next-lint scope as documented in
570 .Xr man 7 .
571 .Ss Mdoc Abstract Syntax Tree
572 This AST is governed by the ontological
573 rules dictated in
574 .Xr mdoc 7
575 and derives its terminology accordingly.
576 .Qq In-line
577 elements described in
578 .Xr mdoc 7
579 are described simply as
580 .Qq elements .
581 .Pp
582 The AST is composed of
583 .Vt struct mdoc_node
584 nodes with block, head, body, element, root and text types as declared
585 by the
586 .Va type
587 field.
588 Each node also provides its parse point (the
589 .Va line ,
590 .Va sec ,
591 and
592 .Va pos
593 fields), its position in the tree (the
594 .Va parent ,
595 .Va child ,
596 .Va nchild ,
597 .Va next
598 and
599 .Va prev
600 fields) and some type-specific data, in particular, for nodes generated
601 from macros, the generating macro in the
602 .Va tok
603 field.
604 .Pp
605 The tree itself is arranged according to the following normal form,
606 where capitalised non-terminals represent nodes.
607 .Pp
608 .Bl -tag -width "ELEMENTXX" -compact
609 .It ROOT
610 \(<- mnode+
611 .It mnode
612 \(<- BLOCK | ELEMENT | TEXT
613 .It BLOCK
614 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
615 .It ELEMENT
616 \(<- TEXT*
617 .It HEAD
618 \(<- mnode*
619 .It BODY
620 \(<- mnode* [ENDBODY mnode*]
621 .It TAIL
622 \(<- mnode*
623 .It TEXT
624 \(<- [[:ascii:]]*
625 .El
626 .Pp
627 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
628 the BLOCK production: these refer to punctuation marks.
629 Furthermore, although a TEXT node will generally have a non-zero-length
630 string, in the specific case of
631 .Sq \&.Bd \-literal ,
632 an empty line will produce a zero-length string.
633 Multiple body parts are only found in invocations of
634 .Sq \&Bl \-column ,
635 where a new body introduces a new phrase.
636 .Pp
637 The
638 .Xr mdoc 7
639 syntax tree accommodates for broken block structures as well.
640 The ENDBODY node is available to end the formatting associated
641 with a given block before the physical end of that block.
642 It has a non-null
643 .Va end
644 field, is of the BODY
645 .Va type ,
646 has the same
647 .Va tok
648 as the BLOCK it is ending, and has a
649 .Va pending
650 field pointing to that BLOCK's BODY node.
651 It is an indirect child of that BODY node
652 and has no children of its own.
653 .Pp
654 An ENDBODY node is generated when a block ends while one of its child
655 blocks is still open, like in the following example:
656 .Bd -literal -offset indent
657 \&.Ao ao
658 \&.Bo bo ac
659 \&.Ac bc
660 \&.Bc end
661 .Ed
662 .Pp
663 This example results in the following block structure:
664 .Bd -literal -offset indent
665 BLOCK Ao
666 HEAD Ao
667 BODY Ao
668 TEXT ao
669 BLOCK Bo, pending -> Ao
670 HEAD Bo
671 BODY Bo
672 TEXT bo
673 TEXT ac
674 ENDBODY Ao, pending -> Ao
675 TEXT bc
676 TEXT end
677 .Ed
678 .Pp
679 Here, the formatting of the
680 .Sq \&Ao
681 block extends from TEXT ao to TEXT ac,
682 while the formatting of the
683 .Sq \&Bo
684 block extends from TEXT bo to TEXT bc.
685 It renders as follows in
686 .Fl T Ns Cm ascii
687 mode:
688 .Pp
689 .Dl <ao [bo ac> bc] end
690 .Pp
691 Support for badly-nested blocks is only provided for backward
692 compatibility with some older
693 .Xr mdoc 7
694 implementations.
695 Using badly-nested blocks is
696 .Em strongly discouraged ;
697 for example, the
698 .Fl T Ns Cm html
699 and
700 .Fl T Ns Cm xhtml
701 front-ends to
702 .Xr mandoc 1
703 are unable to render them in any meaningful way.
704 Furthermore, behaviour when encountering badly-nested blocks is not
705 consistent across troff implementations, especially when using multiple
706 levels of badly-nested blocks.
707 .Sh SEE ALSO
708 .Xr mandoc 1 ,
709 .Xr eqn 7 ,
710 .Xr man 7 ,
711 .Xr mandoc_char 7 ,
712 .Xr mdoc 7 ,
713 .Xr roff 7 ,
714 .Xr tbl 7
715 .Sh AUTHORS
716 The
717 .Nm
718 library was written by
719 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .