]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
first steps toward the 1.14.4 release
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.41 2017/07/04 23:40:01 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: July 4 2017 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm deroff ,
24 .Nm mandocmsg ,
25 .Nm man_mparse ,
26 .Nm man_validate ,
27 .Nm mdoc_validate ,
28 .Nm mparse_alloc ,
29 .Nm mparse_free ,
30 .Nm mparse_getkeep ,
31 .Nm mparse_keep ,
32 .Nm mparse_open ,
33 .Nm mparse_readfd ,
34 .Nm mparse_reset ,
35 .Nm mparse_result ,
36 .Nm mparse_strerror ,
37 .Nm mparse_strlevel ,
38 .Nm mparse_updaterc
39 .Nd mandoc macro compiler library
40 .Sh SYNOPSIS
41 .In sys/types.h
42 .In mandoc.h
43 .Pp
44 .Fd "#define ASCII_NBRSP"
45 .Fd "#define ASCII_HYPH"
46 .Fd "#define ASCII_BREAK"
47 .Ft struct mparse *
48 .Fo mparse_alloc
49 .Fa "int options"
50 .Fa "enum mandocerr mmin"
51 .Fa "mandocmsg mmsg"
52 .Fa "enum mandoc_os oe_e"
53 .Fa "char *os_s"
54 .Fc
55 .Ft void
56 .Fo (*mandocmsg)
57 .Fa "enum mandocerr errtype"
58 .Fa "enum mandoclevel level"
59 .Fa "const char *file"
60 .Fa "int line"
61 .Fa "int col"
62 .Fa "const char *msg"
63 .Fc
64 .Ft void
65 .Fo mparse_free
66 .Fa "struct mparse *parse"
67 .Fc
68 .Ft const char *
69 .Fo mparse_getkeep
70 .Fa "const struct mparse *parse"
71 .Fc
72 .Ft void
73 .Fo mparse_keep
74 .Fa "struct mparse *parse"
75 .Fc
76 .Ft int
77 .Fo mparse_open
78 .Fa "struct mparse *parse"
79 .Fa "const char *fname"
80 .Fc
81 .Ft "enum mandoclevel"
82 .Fo mparse_readfd
83 .Fa "struct mparse *parse"
84 .Fa "int fd"
85 .Fa "const char *fname"
86 .Fc
87 .Ft void
88 .Fo mparse_reset
89 .Fa "struct mparse *parse"
90 .Fc
91 .Ft void
92 .Fo mparse_result
93 .Fa "struct mparse *parse"
94 .Fa "struct roff_man **man"
95 .Fa "char **sodest"
96 .Fc
97 .Ft "const char *"
98 .Fo mparse_strerror
99 .Fa "enum mandocerr"
100 .Fc
101 .Ft "const char *"
102 .Fo mparse_strlevel
103 .Fa "enum mandoclevel"
104 .Fc
105 .Ft void
106 .Fo mparse_updaterc
107 .Fa "struct mparse *parse"
108 .Fa "enum mandoclevel *rc"
109 .Fc
110 .In roff.h
111 .Ft void
112 .Fo deroff
113 .Fa "char **dest"
114 .Fa "const struct roff_node *node"
115 .Fc
116 .In sys/types.h
117 .In mandoc.h
118 .In mdoc.h
119 .Vt extern const char * const * mdoc_argnames;
120 .Vt extern const char * const * mdoc_macronames;
121 .Ft void
122 .Fo mdoc_validate
123 .Fa "struct roff_man *mdoc"
124 .Fc
125 .In sys/types.h
126 .In mandoc.h
127 .In man.h
128 .Vt extern const char * const * man_macronames;
129 .Ft "const struct mparse *"
130 .Fo man_mparse
131 .Fa "const struct roff_man *man"
132 .Fc
133 .Ft void
134 .Fo man_validate
135 .Fa "struct roff_man *man"
136 .Fc
137 .Sh DESCRIPTION
138 The
139 .Nm mandoc
140 library parses a
141 .Ux
142 manual into an abstract syntax tree (AST).
143 .Ux
144 manuals are composed of
145 .Xr mdoc 7
146 or
147 .Xr man 7 ,
148 and may be mixed with
149 .Xr roff 7 ,
150 .Xr tbl 7 ,
151 and
152 .Xr eqn 7
153 invocations.
154 .Pp
155 The following describes a general parse sequence:
156 .Bl -enum
157 .It
158 initiate a parsing sequence with
159 .Xr mchars_alloc 3
160 and
161 .Fn mparse_alloc ;
162 .It
163 open a file with
164 .Xr open 2
165 or
166 .Fn mparse_open ;
167 .It
168 parse it with
169 .Fn mparse_readfd ;
170 .It
171 close it with
172 .Xr close 2 ;
173 .It
174 retrieve the syntax tree with
175 .Fn mparse_result ;
176 .It
177 depending on whether the
178 .Fa macroset
179 member of the returned
180 .Vt struct roff_man
181 is
182 .Dv MACROSET_MDOC
183 or
184 .Dv MACROSET_MAN ,
185 validate it with
186 .Fn mdoc_validate
187 or
188 .Fn man_validate ,
189 respectively;
190 .It
191 if information about the validity of the input is needed, fetch it with
192 .Fn mparse_updaterc ;
193 .It
194 iterate over parse nodes with starting from the
195 .Fa first
196 member of the returned
197 .Vt struct roff_man ;
198 .It
199 free all allocated memory with
200 .Fn mparse_free
201 and
202 .Xr mchars_free 3 ,
203 or invoke
204 .Fn mparse_reset
205 and go back to step 2 to parse new files.
206 .El
207 .Sh REFERENCE
208 This section documents the functions, types, and variables available
209 via
210 .In mandoc.h ,
211 with the exception of those documented in
212 .Xr mandoc_escape 3
213 and
214 .Xr mchars_alloc 3 .
215 .Ss Types
216 .Bl -ohang
217 .It Vt "enum mandocerr"
218 An error or warning message during parsing.
219 .It Vt "enum mandoclevel"
220 A classification of an
221 .Vt "enum mandocerr"
222 as regards system operation.
223 See the DIAGNOSTICS section in
224 .Xr mandoc 1
225 regarding the meanings of the levels.
226 .It Vt "struct mparse"
227 An opaque pointer to a running parse sequence.
228 Created with
229 .Fn mparse_alloc
230 and freed with
231 .Fn mparse_free .
232 This may be used across parsed input if
233 .Fn mparse_reset
234 is called between parses.
235 .It Vt "mandocmsg"
236 A prototype for a function to handle error and warning
237 messages emitted by the parser.
238 .El
239 .Ss Functions
240 .Bl -ohang
241 .It Fn deroff
242 Obtain a text-only representation of a
243 .Vt struct roff_node ,
244 including text contained in its child nodes.
245 To be used on children of the
246 .Fa first
247 member of
248 .Vt struct roff_man .
249 When it is no longer needed, the pointer returned from
250 .Fn deroff
251 can be passed to
252 .Xr free 3 .
253 .It Fn man_mparse
254 Get the parser used for the current output.
255 Declared in
256 .In man.h ,
257 implemented in
258 .Pa man.c .
259 .It Fn man_validate
260 Validate the
261 .Dv MACROSET_MAN
262 parse tree obtained with
263 .Fn mparse_result .
264 Declared in
265 .In man.h ,
266 implemented in
267 .Pa man.c .
268 .It Fn mdoc_validate
269 Validate the
270 .Dv MACROSET_MDOC
271 parse tree obtained with
272 .Fn mparse_result .
273 Declared in
274 .In mdoc.h ,
275 implemented in
276 .Pa mdoc.c .
277 .It Fn mparse_alloc
278 Allocate a parser.
279 The arguments have the following effect:
280 .Bl -tag -offset 5n -width inttype
281 .It Ar options
282 When the
283 .Dv MPARSE_MDOC
284 or
285 .Dv MPARSE_MAN
286 bit is set, only that parser is used.
287 Otherwise, the document type is automatically detected.
288 .Pp
289 When the
290 .Dv MPARSE_SO
291 bit is set,
292 .Xr roff 7
293 .Ic \&so
294 file inclusion requests are always honoured.
295 Otherwise, if the request is the only content in an input file,
296 only the file name is remembered, to be returned in the
297 .Fa sodest
298 argument of
299 .Fn mparse_result .
300 .Pp
301 When the
302 .Dv MPARSE_QUICK
303 bit is set, parsing is aborted after the NAME section.
304 This is for example useful in
305 .Xr makewhatis 8
306 .Fl Q
307 to quickly build minimal databases.
308 .It Ar mmin
309 Can be set to
310 .Dv MANDOCERR_BASE ,
311 .Dv MANDOCERR_STYLE ,
312 .Dv MANDOCERR_WARNING ,
313 .Dv MANDOCERR_ERROR ,
314 .Dv MANDOCERR_UNSUPP ,
315 or
316 .Dv MANDOCERR_MAX .
317 Messages below the selected level will be suppressed.
318 .It Ar mmsg
319 A callback function to handle errors and warnings.
320 See
321 .Pa main.c
322 for an example.
323 If printing of error messages is not desired,
324 .Dv NULL
325 may be passed.
326 .It Ar os_e
327 Operating system to check base system conventions for.
328 If
329 .Dv MANDOC_OS_OTHER ,
330 the system is automatically detected from
331 .Ic \&Os ,
332 .Fl Ios ,
333 or
334 .Xr uname 3 .
335 .It Ar os_s
336 A default string for the
337 .Xr mdoc 7
338 .Ic \&Os
339 macro, overriding the
340 .Dv OSNAME
341 preprocessor definition and the results of
342 .Xr uname 3 .
343 Passing
344 .Dv NULL
345 sets no default.
346 .El
347 .Pp
348 The same parser may be used for multiple files so long as
349 .Fn mparse_reset
350 is called between parses.
351 .Fn mparse_free
352 must be called to free the memory allocated by this function.
353 Declared in
354 .In mandoc.h ,
355 implemented in
356 .Pa read.c .
357 .It Fn mparse_free
358 Free all memory allocated by
359 .Fn mparse_alloc .
360 Declared in
361 .In mandoc.h ,
362 implemented in
363 .Pa read.c .
364 .It Fn mparse_getkeep
365 Acquire the keep buffer.
366 Must follow a call of
367 .Fn mparse_keep .
368 Declared in
369 .In mandoc.h ,
370 implemented in
371 .Pa read.c .
372 .It Fn mparse_keep
373 Instruct the parser to retain a copy of its parsed input.
374 This can be acquired with subsequent
375 .Fn mparse_getkeep
376 calls.
377 Declared in
378 .In mandoc.h ,
379 implemented in
380 .Pa read.c .
381 .It Fn mparse_open
382 Open the file for reading.
383 If that fails and
384 .Fa fname
385 does not already end in
386 .Ql .gz ,
387 try again after appending
388 .Ql .gz .
389 Save the information whether the file is zipped or not.
390 Return a file descriptor open for reading or -1 on failure.
391 It can be passed to
392 .Fn mparse_readfd
393 or used directly.
394 Declared in
395 .In mandoc.h ,
396 implemented in
397 .Pa read.c .
398 .It Fn mparse_readfd
399 Parse a file descriptor opened with
400 .Xr open 2
401 or
402 .Fn mparse_open .
403 Pass the associated filename in
404 .Va fname .
405 This function may be called multiple times with different parameters; however,
406 .Xr close 2
407 and
408 .Fn mparse_reset
409 should be invoked between parses.
410 Declared in
411 .In mandoc.h ,
412 implemented in
413 .Pa read.c .
414 .It Fn mparse_reset
415 Reset a parser so that
416 .Fn mparse_readfd
417 may be used again.
418 Declared in
419 .In mandoc.h ,
420 implemented in
421 .Pa read.c .
422 .It Fn mparse_result
423 Obtain the result of a parse.
424 One of the two pointers will be filled in.
425 Declared in
426 .In mandoc.h ,
427 implemented in
428 .Pa read.c .
429 .It Fn mparse_strerror
430 Return a statically-allocated string representation of an error code.
431 Declared in
432 .In mandoc.h ,
433 implemented in
434 .Pa read.c .
435 .It Fn mparse_strlevel
436 Return a statically-allocated string representation of a level code.
437 Declared in
438 .In mandoc.h ,
439 implemented in
440 .Pa read.c .
441 .It Fn mparse_updaterc
442 If the highest warning or error level that occurred during the current
443 .Fa parse
444 is higher than
445 .Pf * Fa rc ,
446 update
447 .Pf * Fa rc
448 accordingly.
449 This is useful after calling
450 .Fn mdoc_validate
451 or
452 .Fn man_validate .
453 Declared in
454 .In mandoc.h ,
455 implemented in
456 .Pa read.c .
457 .El
458 .Ss Variables
459 .Bl -ohang
460 .It Va man_macronames
461 The string representation of a
462 .Xr man 7
463 macro as indexed by
464 .Vt "enum mant" .
465 .It Va mdoc_argnames
466 The string representation of an
467 .Xr mdoc 7
468 macro argument as indexed by
469 .Vt "enum mdocargt" .
470 .It Va mdoc_macronames
471 The string representation of an
472 .Xr mdoc 7
473 macro as indexed by
474 .Vt "enum mdoct" .
475 .El
476 .Sh IMPLEMENTATION NOTES
477 This section consists of structural documentation for
478 .Xr mdoc 7
479 and
480 .Xr man 7
481 syntax trees and strings.
482 .Ss Man and Mdoc Strings
483 Strings may be extracted from mdoc and man meta-data, or from text
484 nodes (MDOC_TEXT and MAN_TEXT, respectively).
485 These strings have special non-printing formatting cues embedded in the
486 text itself, as well as
487 .Xr roff 7
488 escapes preserved from input.
489 Implementing systems will need to handle both situations to produce
490 human-readable text.
491 In general, strings may be assumed to consist of 7-bit ASCII characters.
492 .Pp
493 The following non-printing characters may be embedded in text strings:
494 .Bl -tag -width Ds
495 .It Dv ASCII_NBRSP
496 A non-breaking space character.
497 .It Dv ASCII_HYPH
498 A soft hyphen.
499 .It Dv ASCII_BREAK
500 A breakable zero-width space.
501 .El
502 .Pp
503 Escape characters are also passed verbatim into text strings.
504 An escape character is a sequence of characters beginning with the
505 backslash
506 .Pq Sq \e .
507 To construct human-readable text, these should be intercepted with
508 .Xr mandoc_escape 3
509 and converted with one the functions described in
510 .Xr mchars_alloc 3 .
511 .Ss Man Abstract Syntax Tree
512 This AST is governed by the ontological rules dictated in
513 .Xr man 7
514 and derives its terminology accordingly.
515 .Pp
516 The AST is composed of
517 .Vt struct roff_node
518 nodes with element, root and text types as declared by the
519 .Va type
520 field.
521 Each node also provides its parse point (the
522 .Va line ,
523 .Va pos ,
524 and
525 .Va sec
526 fields), its position in the tree (the
527 .Va parent ,
528 .Va child ,
529 .Va next
530 and
531 .Va prev
532 fields) and some type-specific data.
533 .Pp
534 The tree itself is arranged according to the following normal form,
535 where capitalised non-terminals represent nodes.
536 .Pp
537 .Bl -tag -width "ELEMENTXX" -compact
538 .It ROOT
539 \(<- mnode+
540 .It mnode
541 \(<- ELEMENT | TEXT | BLOCK
542 .It BLOCK
543 \(<- HEAD BODY
544 .It HEAD
545 \(<- mnode*
546 .It BODY
547 \(<- mnode*
548 .It ELEMENT
549 \(<- ELEMENT | TEXT*
550 .It TEXT
551 \(<- [[:ascii:]]*
552 .El
553 .Pp
554 The only elements capable of nesting other elements are those with
555 next-line scope as documented in
556 .Xr man 7 .
557 .Ss Mdoc Abstract Syntax Tree
558 This AST is governed by the ontological
559 rules dictated in
560 .Xr mdoc 7
561 and derives its terminology accordingly.
562 .Qq In-line
563 elements described in
564 .Xr mdoc 7
565 are described simply as
566 .Qq elements .
567 .Pp
568 The AST is composed of
569 .Vt struct roff_node
570 nodes with block, head, body, element, root and text types as declared
571 by the
572 .Va type
573 field.
574 Each node also provides its parse point (the
575 .Va line ,
576 .Va pos ,
577 and
578 .Va sec
579 fields), its position in the tree (the
580 .Va parent ,
581 .Va child ,
582 .Va last ,
583 .Va next
584 and
585 .Va prev
586 fields) and some type-specific data, in particular, for nodes generated
587 from macros, the generating macro in the
588 .Va tok
589 field.
590 .Pp
591 The tree itself is arranged according to the following normal form,
592 where capitalised non-terminals represent nodes.
593 .Pp
594 .Bl -tag -width "ELEMENTXX" -compact
595 .It ROOT
596 \(<- mnode+
597 .It mnode
598 \(<- BLOCK | ELEMENT | TEXT
599 .It BLOCK
600 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
601 .It ELEMENT
602 \(<- TEXT*
603 .It HEAD
604 \(<- mnode*
605 .It BODY
606 \(<- mnode* [ENDBODY mnode*]
607 .It TAIL
608 \(<- mnode*
609 .It TEXT
610 \(<- [[:ascii:]]*
611 .El
612 .Pp
613 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
614 the BLOCK production: these refer to punctuation marks.
615 Furthermore, although a TEXT node will generally have a non-zero-length
616 string, in the specific case of
617 .Sq \&.Bd \-literal ,
618 an empty line will produce a zero-length string.
619 Multiple body parts are only found in invocations of
620 .Sq \&Bl \-column ,
621 where a new body introduces a new phrase.
622 .Pp
623 The
624 .Xr mdoc 7
625 syntax tree accommodates for broken block structures as well.
626 The ENDBODY node is available to end the formatting associated
627 with a given block before the physical end of that block.
628 It has a non-null
629 .Va end
630 field, is of the BODY
631 .Va type ,
632 has the same
633 .Va tok
634 as the BLOCK it is ending, and has a
635 .Va pending
636 field pointing to that BLOCK's BODY node.
637 It is an indirect child of that BODY node
638 and has no children of its own.
639 .Pp
640 An ENDBODY node is generated when a block ends while one of its child
641 blocks is still open, like in the following example:
642 .Bd -literal -offset indent
643 \&.Ao ao
644 \&.Bo bo ac
645 \&.Ac bc
646 \&.Bc end
647 .Ed
648 .Pp
649 This example results in the following block structure:
650 .Bd -literal -offset indent
651 BLOCK Ao
652 HEAD Ao
653 BODY Ao
654 TEXT ao
655 BLOCK Bo, pending -> Ao
656 HEAD Bo
657 BODY Bo
658 TEXT bo
659 TEXT ac
660 ENDBODY Ao, pending -> Ao
661 TEXT bc
662 TEXT end
663 .Ed
664 .Pp
665 Here, the formatting of the
666 .Ic \&Ao
667 block extends from TEXT ao to TEXT ac,
668 while the formatting of the
669 .Ic \&Bo
670 block extends from TEXT bo to TEXT bc.
671 It renders as follows in
672 .Fl T Ns Cm ascii
673 mode:
674 .Pp
675 .Dl <ao [bo ac> bc] end
676 .Pp
677 Support for badly-nested blocks is only provided for backward
678 compatibility with some older
679 .Xr mdoc 7
680 implementations.
681 Using badly-nested blocks is
682 .Em strongly discouraged ;
683 for example, the
684 .Fl T Ns Cm html
685 front-end to
686 .Xr mandoc 1
687 is unable to render them in any meaningful way.
688 Furthermore, behaviour when encountering badly-nested blocks is not
689 consistent across troff implementations, especially when using multiple
690 levels of badly-nested blocks.
691 .Sh SEE ALSO
692 .Xr mandoc 1 ,
693 .Xr man.cgi 3 ,
694 .Xr mandoc_escape 3 ,
695 .Xr mandoc_headers 3 ,
696 .Xr mandoc_malloc 3 ,
697 .Xr mansearch 3 ,
698 .Xr mchars_alloc 3 ,
699 .Xr tbl 3 ,
700 .Xr eqn 7 ,
701 .Xr man 7 ,
702 .Xr mandoc_char 7 ,
703 .Xr mdoc 7 ,
704 .Xr roff 7 ,
705 .Xr tbl 7
706 .Sh AUTHORS
707 .An -nosplit
708 The
709 .Nm
710 library was written by
711 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
712 and is maintained by
713 .An Ingo Schwarze Aq Mt schwarze@openbsd.org .