]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
ba042f545c478d3e291e932f3e769c05ff309661
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.42 2018/08/23 19:33:27 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: August 23 2018 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm deroff ,
24 .Nm mandocmsg ,
25 .Nm man_validate ,
26 .Nm mdoc_validate ,
27 .Nm mparse_alloc ,
28 .Nm mparse_copy ,
29 .Nm mparse_free ,
30 .Nm mparse_open ,
31 .Nm mparse_readfd ,
32 .Nm mparse_reset ,
33 .Nm mparse_result ,
34 .Nm mparse_strerror ,
35 .Nm mparse_strlevel ,
36 .Nm mparse_updaterc
37 .Nd mandoc macro compiler library
38 .Sh SYNOPSIS
39 .In sys/types.h
40 .In mandoc.h
41 .Pp
42 .Fd "#define ASCII_NBRSP"
43 .Fd "#define ASCII_HYPH"
44 .Fd "#define ASCII_BREAK"
45 .Ft struct mparse *
46 .Fo mparse_alloc
47 .Fa "int options"
48 .Fa "enum mandocerr mmin"
49 .Fa "mandocmsg mmsg"
50 .Fa "enum mandoc_os oe_e"
51 .Fa "char *os_s"
52 .Fc
53 .Ft void
54 .Fo (*mandocmsg)
55 .Fa "enum mandocerr errtype"
56 .Fa "enum mandoclevel level"
57 .Fa "const char *file"
58 .Fa "int line"
59 .Fa "int col"
60 .Fa "const char *msg"
61 .Fc
62 .Ft void
63 .Fo mparse_free
64 .Fa "struct mparse *parse"
65 .Fc
66 .Ft void
67 .Fo mparse_copy
68 .Fa "const struct mparse *parse"
69 .Fc
70 .Ft int
71 .Fo mparse_open
72 .Fa "struct mparse *parse"
73 .Fa "const char *fname"
74 .Fc
75 .Ft "enum mandoclevel"
76 .Fo mparse_readfd
77 .Fa "struct mparse *parse"
78 .Fa "int fd"
79 .Fa "const char *fname"
80 .Fc
81 .Ft void
82 .Fo mparse_reset
83 .Fa "struct mparse *parse"
84 .Fc
85 .Ft void
86 .Fo mparse_result
87 .Fa "struct mparse *parse"
88 .Fa "struct roff_man **man"
89 .Fa "char **sodest"
90 .Fc
91 .Ft "const char *"
92 .Fo mparse_strerror
93 .Fa "enum mandocerr"
94 .Fc
95 .Ft "const char *"
96 .Fo mparse_strlevel
97 .Fa "enum mandoclevel"
98 .Fc
99 .Ft void
100 .Fo mparse_updaterc
101 .Fa "struct mparse *parse"
102 .Fa "enum mandoclevel *rc"
103 .Fc
104 .In roff.h
105 .Ft void
106 .Fo deroff
107 .Fa "char **dest"
108 .Fa "const struct roff_node *node"
109 .Fc
110 .In sys/types.h
111 .In mandoc.h
112 .In mdoc.h
113 .Vt extern const char * const * mdoc_argnames;
114 .Vt extern const char * const * mdoc_macronames;
115 .Ft void
116 .Fo mdoc_validate
117 .Fa "struct roff_man *mdoc"
118 .Fc
119 .In sys/types.h
120 .In mandoc.h
121 .In man.h
122 .Vt extern const char * const * man_macronames;
123 .Ft void
124 .Fo man_validate
125 .Fa "struct roff_man *man"
126 .Fc
127 .Sh DESCRIPTION
128 The
129 .Nm mandoc
130 library parses a
131 .Ux
132 manual into an abstract syntax tree (AST).
133 .Ux
134 manuals are composed of
135 .Xr mdoc 7
136 or
137 .Xr man 7 ,
138 and may be mixed with
139 .Xr roff 7 ,
140 .Xr tbl 7 ,
141 and
142 .Xr eqn 7
143 invocations.
144 .Pp
145 The following describes a general parse sequence:
146 .Bl -enum
147 .It
148 initiate a parsing sequence with
149 .Xr mchars_alloc 3
150 and
151 .Fn mparse_alloc ;
152 .It
153 open a file with
154 .Xr open 2
155 or
156 .Fn mparse_open ;
157 .It
158 parse it with
159 .Fn mparse_readfd ;
160 .It
161 close it with
162 .Xr close 2 ;
163 .It
164 retrieve the syntax tree with
165 .Fn mparse_result ;
166 .It
167 depending on whether the
168 .Fa macroset
169 member of the returned
170 .Vt struct roff_man
171 is
172 .Dv MACROSET_MDOC
173 or
174 .Dv MACROSET_MAN ,
175 validate it with
176 .Fn mdoc_validate
177 or
178 .Fn man_validate ,
179 respectively;
180 .It
181 if information about the validity of the input is needed, fetch it with
182 .Fn mparse_updaterc ;
183 .It
184 iterate over parse nodes with starting from the
185 .Fa first
186 member of the returned
187 .Vt struct roff_man ;
188 .It
189 free all allocated memory with
190 .Fn mparse_free
191 and
192 .Xr mchars_free 3 ,
193 or invoke
194 .Fn mparse_reset
195 and go back to step 2 to parse new files.
196 .El
197 .Sh REFERENCE
198 This section documents the functions, types, and variables available
199 via
200 .In mandoc.h ,
201 with the exception of those documented in
202 .Xr mandoc_escape 3
203 and
204 .Xr mchars_alloc 3 .
205 .Ss Types
206 .Bl -ohang
207 .It Vt "enum mandocerr"
208 An error or warning message during parsing.
209 .It Vt "enum mandoclevel"
210 A classification of an
211 .Vt "enum mandocerr"
212 as regards system operation.
213 See the DIAGNOSTICS section in
214 .Xr mandoc 1
215 regarding the meanings of the levels.
216 .It Vt "struct mparse"
217 An opaque pointer to a running parse sequence.
218 Created with
219 .Fn mparse_alloc
220 and freed with
221 .Fn mparse_free .
222 This may be used across parsed input if
223 .Fn mparse_reset
224 is called between parses.
225 .It Vt "mandocmsg"
226 A prototype for a function to handle error and warning
227 messages emitted by the parser.
228 .El
229 .Ss Functions
230 .Bl -ohang
231 .It Fn deroff
232 Obtain a text-only representation of a
233 .Vt struct roff_node ,
234 including text contained in its child nodes.
235 To be used on children of the
236 .Fa first
237 member of
238 .Vt struct roff_man .
239 When it is no longer needed, the pointer returned from
240 .Fn deroff
241 can be passed to
242 .Xr free 3 .
243 .It Fn man_validate
244 Validate the
245 .Dv MACROSET_MAN
246 parse tree obtained with
247 .Fn mparse_result .
248 Declared in
249 .In man.h ,
250 implemented in
251 .Pa man.c .
252 .It Fn mdoc_validate
253 Validate the
254 .Dv MACROSET_MDOC
255 parse tree obtained with
256 .Fn mparse_result .
257 Declared in
258 .In mdoc.h ,
259 implemented in
260 .Pa mdoc.c .
261 .It Fn mparse_alloc
262 Allocate a parser.
263 The arguments have the following effect:
264 .Bl -tag -offset 5n -width inttype
265 .It Ar options
266 When the
267 .Dv MPARSE_MDOC
268 or
269 .Dv MPARSE_MAN
270 bit is set, only that parser is used.
271 Otherwise, the document type is automatically detected.
272 .Pp
273 When the
274 .Dv MPARSE_SO
275 bit is set,
276 .Xr roff 7
277 .Ic \&so
278 file inclusion requests are always honoured.
279 Otherwise, if the request is the only content in an input file,
280 only the file name is remembered, to be returned in the
281 .Fa sodest
282 argument of
283 .Fn mparse_result .
284 .Pp
285 When the
286 .Dv MPARSE_QUICK
287 bit is set, parsing is aborted after the NAME section.
288 This is for example useful in
289 .Xr makewhatis 8
290 .Fl Q
291 to quickly build minimal databases.
292 .It Ar mmin
293 Can be set to
294 .Dv MANDOCERR_BASE ,
295 .Dv MANDOCERR_STYLE ,
296 .Dv MANDOCERR_WARNING ,
297 .Dv MANDOCERR_ERROR ,
298 .Dv MANDOCERR_UNSUPP ,
299 or
300 .Dv MANDOCERR_MAX .
301 Messages below the selected level will be suppressed.
302 .It Ar mmsg
303 A callback function to handle errors and warnings.
304 See
305 .Pa main.c
306 for an example.
307 If printing of error messages is not desired,
308 .Dv NULL
309 may be passed.
310 .It Ar os_e
311 Operating system to check base system conventions for.
312 If
313 .Dv MANDOC_OS_OTHER ,
314 the system is automatically detected from
315 .Ic \&Os ,
316 .Fl Ios ,
317 or
318 .Xr uname 3 .
319 .It Ar os_s
320 A default string for the
321 .Xr mdoc 7
322 .Ic \&Os
323 macro, overriding the
324 .Dv OSNAME
325 preprocessor definition and the results of
326 .Xr uname 3 .
327 Passing
328 .Dv NULL
329 sets no default.
330 .El
331 .Pp
332 The same parser may be used for multiple files so long as
333 .Fn mparse_reset
334 is called between parses.
335 .Fn mparse_free
336 must be called to free the memory allocated by this function.
337 Declared in
338 .In mandoc.h ,
339 implemented in
340 .Pa read.c .
341 .It Fn mparse_free
342 Free all memory allocated by
343 .Fn mparse_alloc .
344 Declared in
345 .In mandoc.h ,
346 implemented in
347 .Pa read.c .
348 .It Fn mparse_copy
349 Dump a copy of the input to the standard output; used for
350 .Fl man T Ns Cm man .
351 Declared in
352 .In mandoc.h ,
353 implemented in
354 .Pa read.c .
355 .It Fn mparse_open
356 Open the file for reading.
357 If that fails and
358 .Fa fname
359 does not already end in
360 .Ql .gz ,
361 try again after appending
362 .Ql .gz .
363 Save the information whether the file is zipped or not.
364 Return a file descriptor open for reading or -1 on failure.
365 It can be passed to
366 .Fn mparse_readfd
367 or used directly.
368 Declared in
369 .In mandoc.h ,
370 implemented in
371 .Pa read.c .
372 .It Fn mparse_readfd
373 Parse a file descriptor opened with
374 .Xr open 2
375 or
376 .Fn mparse_open .
377 Pass the associated filename in
378 .Va fname .
379 This function may be called multiple times with different parameters; however,
380 .Xr close 2
381 and
382 .Fn mparse_reset
383 should be invoked between parses.
384 Declared in
385 .In mandoc.h ,
386 implemented in
387 .Pa read.c .
388 .It Fn mparse_reset
389 Reset a parser so that
390 .Fn mparse_readfd
391 may be used again.
392 Declared in
393 .In mandoc.h ,
394 implemented in
395 .Pa read.c .
396 .It Fn mparse_result
397 Obtain the result of a parse.
398 One of the two pointers will be filled in.
399 Declared in
400 .In mandoc.h ,
401 implemented in
402 .Pa read.c .
403 .It Fn mparse_strerror
404 Return a statically-allocated string representation of an error code.
405 Declared in
406 .In mandoc.h ,
407 implemented in
408 .Pa read.c .
409 .It Fn mparse_strlevel
410 Return a statically-allocated string representation of a level code.
411 Declared in
412 .In mandoc.h ,
413 implemented in
414 .Pa read.c .
415 .It Fn mparse_updaterc
416 If the highest warning or error level that occurred during the current
417 .Fa parse
418 is higher than
419 .Pf * Fa rc ,
420 update
421 .Pf * Fa rc
422 accordingly.
423 This is useful after calling
424 .Fn mdoc_validate
425 or
426 .Fn man_validate .
427 Declared in
428 .In mandoc.h ,
429 implemented in
430 .Pa read.c .
431 .El
432 .Ss Variables
433 .Bl -ohang
434 .It Va man_macronames
435 The string representation of a
436 .Xr man 7
437 macro as indexed by
438 .Vt "enum mant" .
439 .It Va mdoc_argnames
440 The string representation of an
441 .Xr mdoc 7
442 macro argument as indexed by
443 .Vt "enum mdocargt" .
444 .It Va mdoc_macronames
445 The string representation of an
446 .Xr mdoc 7
447 macro as indexed by
448 .Vt "enum mdoct" .
449 .El
450 .Sh IMPLEMENTATION NOTES
451 This section consists of structural documentation for
452 .Xr mdoc 7
453 and
454 .Xr man 7
455 syntax trees and strings.
456 .Ss Man and Mdoc Strings
457 Strings may be extracted from mdoc and man meta-data, or from text
458 nodes (MDOC_TEXT and MAN_TEXT, respectively).
459 These strings have special non-printing formatting cues embedded in the
460 text itself, as well as
461 .Xr roff 7
462 escapes preserved from input.
463 Implementing systems will need to handle both situations to produce
464 human-readable text.
465 In general, strings may be assumed to consist of 7-bit ASCII characters.
466 .Pp
467 The following non-printing characters may be embedded in text strings:
468 .Bl -tag -width Ds
469 .It Dv ASCII_NBRSP
470 A non-breaking space character.
471 .It Dv ASCII_HYPH
472 A soft hyphen.
473 .It Dv ASCII_BREAK
474 A breakable zero-width space.
475 .El
476 .Pp
477 Escape characters are also passed verbatim into text strings.
478 An escape character is a sequence of characters beginning with the
479 backslash
480 .Pq Sq \e .
481 To construct human-readable text, these should be intercepted with
482 .Xr mandoc_escape 3
483 and converted with one the functions described in
484 .Xr mchars_alloc 3 .
485 .Ss Man Abstract Syntax Tree
486 This AST is governed by the ontological rules dictated in
487 .Xr man 7
488 and derives its terminology accordingly.
489 .Pp
490 The AST is composed of
491 .Vt struct roff_node
492 nodes with element, root and text types as declared by the
493 .Va type
494 field.
495 Each node also provides its parse point (the
496 .Va line ,
497 .Va pos ,
498 and
499 .Va sec
500 fields), its position in the tree (the
501 .Va parent ,
502 .Va child ,
503 .Va next
504 and
505 .Va prev
506 fields) and some type-specific data.
507 .Pp
508 The tree itself is arranged according to the following normal form,
509 where capitalised non-terminals represent nodes.
510 .Pp
511 .Bl -tag -width "ELEMENTXX" -compact
512 .It ROOT
513 \(<- mnode+
514 .It mnode
515 \(<- ELEMENT | TEXT | BLOCK
516 .It BLOCK
517 \(<- HEAD BODY
518 .It HEAD
519 \(<- mnode*
520 .It BODY
521 \(<- mnode*
522 .It ELEMENT
523 \(<- ELEMENT | TEXT*
524 .It TEXT
525 \(<- [[:ascii:]]*
526 .El
527 .Pp
528 The only elements capable of nesting other elements are those with
529 next-line scope as documented in
530 .Xr man 7 .
531 .Ss Mdoc Abstract Syntax Tree
532 This AST is governed by the ontological
533 rules dictated in
534 .Xr mdoc 7
535 and derives its terminology accordingly.
536 .Qq In-line
537 elements described in
538 .Xr mdoc 7
539 are described simply as
540 .Qq elements .
541 .Pp
542 The AST is composed of
543 .Vt struct roff_node
544 nodes with block, head, body, element, root and text types as declared
545 by the
546 .Va type
547 field.
548 Each node also provides its parse point (the
549 .Va line ,
550 .Va pos ,
551 and
552 .Va sec
553 fields), its position in the tree (the
554 .Va parent ,
555 .Va child ,
556 .Va last ,
557 .Va next
558 and
559 .Va prev
560 fields) and some type-specific data, in particular, for nodes generated
561 from macros, the generating macro in the
562 .Va tok
563 field.
564 .Pp
565 The tree itself is arranged according to the following normal form,
566 where capitalised non-terminals represent nodes.
567 .Pp
568 .Bl -tag -width "ELEMENTXX" -compact
569 .It ROOT
570 \(<- mnode+
571 .It mnode
572 \(<- BLOCK | ELEMENT | TEXT
573 .It BLOCK
574 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
575 .It ELEMENT
576 \(<- TEXT*
577 .It HEAD
578 \(<- mnode*
579 .It BODY
580 \(<- mnode* [ENDBODY mnode*]
581 .It TAIL
582 \(<- mnode*
583 .It TEXT
584 \(<- [[:ascii:]]*
585 .El
586 .Pp
587 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
588 the BLOCK production: these refer to punctuation marks.
589 Furthermore, although a TEXT node will generally have a non-zero-length
590 string, in the specific case of
591 .Sq \&.Bd \-literal ,
592 an empty line will produce a zero-length string.
593 Multiple body parts are only found in invocations of
594 .Sq \&Bl \-column ,
595 where a new body introduces a new phrase.
596 .Pp
597 The
598 .Xr mdoc 7
599 syntax tree accommodates for broken block structures as well.
600 The ENDBODY node is available to end the formatting associated
601 with a given block before the physical end of that block.
602 It has a non-null
603 .Va end
604 field, is of the BODY
605 .Va type ,
606 has the same
607 .Va tok
608 as the BLOCK it is ending, and has a
609 .Va pending
610 field pointing to that BLOCK's BODY node.
611 It is an indirect child of that BODY node
612 and has no children of its own.
613 .Pp
614 An ENDBODY node is generated when a block ends while one of its child
615 blocks is still open, like in the following example:
616 .Bd -literal -offset indent
617 \&.Ao ao
618 \&.Bo bo ac
619 \&.Ac bc
620 \&.Bc end
621 .Ed
622 .Pp
623 This example results in the following block structure:
624 .Bd -literal -offset indent
625 BLOCK Ao
626 HEAD Ao
627 BODY Ao
628 TEXT ao
629 BLOCK Bo, pending -> Ao
630 HEAD Bo
631 BODY Bo
632 TEXT bo
633 TEXT ac
634 ENDBODY Ao, pending -> Ao
635 TEXT bc
636 TEXT end
637 .Ed
638 .Pp
639 Here, the formatting of the
640 .Ic \&Ao
641 block extends from TEXT ao to TEXT ac,
642 while the formatting of the
643 .Ic \&Bo
644 block extends from TEXT bo to TEXT bc.
645 It renders as follows in
646 .Fl T Ns Cm ascii
647 mode:
648 .Pp
649 .Dl <ao [bo ac> bc] end
650 .Pp
651 Support for badly-nested blocks is only provided for backward
652 compatibility with some older
653 .Xr mdoc 7
654 implementations.
655 Using badly-nested blocks is
656 .Em strongly discouraged ;
657 for example, the
658 .Fl T Ns Cm html
659 front-end to
660 .Xr mandoc 1
661 is unable to render them in any meaningful way.
662 Furthermore, behaviour when encountering badly-nested blocks is not
663 consistent across troff implementations, especially when using multiple
664 levels of badly-nested blocks.
665 .Sh SEE ALSO
666 .Xr mandoc 1 ,
667 .Xr man.cgi 3 ,
668 .Xr mandoc_escape 3 ,
669 .Xr mandoc_headers 3 ,
670 .Xr mandoc_malloc 3 ,
671 .Xr mansearch 3 ,
672 .Xr mchars_alloc 3 ,
673 .Xr tbl 3 ,
674 .Xr eqn 7 ,
675 .Xr man 7 ,
676 .Xr mandoc_char 7 ,
677 .Xr mdoc 7 ,
678 .Xr roff 7 ,
679 .Xr tbl 7
680 .Sh AUTHORS
681 .An -nosplit
682 The
683 .Nm
684 library was written by
685 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
686 and is maintained by
687 .An Ingo Schwarze Aq Mt schwarze@openbsd.org .