]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
66148c4d12a6042db12a1243264b03ff5936e12b
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.6 2011/05/01 10:40:52 kristaps Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: May 1 2011 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm mandoc_escape ,
24 .Nm man_meta ,
25 .Nm man_node ,
26 .Nm mchars_alloc ,
27 .Nm mchars_free ,
28 .Nm mchars_num2char ,
29 .Nm mchars_res2cp ,
30 .Nm mchars_res2str ,
31 .Nm mchars_spec2cp ,
32 .Nm mchars_spec2str ,
33 .Nm mdoc_meta ,
34 .Nm mdoc_node ,
35 .Nm mparse_alloc ,
36 .Nm mparse_free ,
37 .Nm mparse_readfd ,
38 .Nm mparse_reset ,
39 .Nm mparse_result ,
40 .Nm mparse_strerror ,
41 .Nm mparse_strlevel
42 .Nd mandoc macro compiler library
43 .Sh SYNOPSIS
44 .In man.h
45 .In mdoc.h
46 .In mandoc.h
47 .Ft "enum mandoc_esc"
48 .Fo mandoc_escape
49 .Fa "const char **in"
50 .Fa "const char **seq"
51 .Fa "int *len"
52 .Fc
53 .Ft "const struct man_meta *"
54 .Fo man_meta
55 .Fa "const struct man *man"
56 .Fc
57 .Ft "const struct man_node *"
58 .Fo man_node
59 .Fa "const struct man *man"
60 .Fc
61 .Ft "struct mchars *"
62 .Fn mchars_alloc
63 .Ft void
64 .Fn mchars_free "struct mchars *p"
65 .Ft char
66 .Fn mchars_num2char "const char *cp" "size_t sz"
67 .Ft "const char *"
68 .Fo mchars_res2str
69 .Fa "struct mchars *p"
70 .Fa "const char *cp"
71 .Fa "size_t sz"
72 .Fa "size_t *rsz"
73 .Fc
74 .Ft int
75 .Fo mchars_res2cp
76 .Fa "struct mchars *p"
77 .Fa "const char *cp"
78 .Fa "size_t sz"
79 .Ft "const char *"
80 .Fc
81 .Ft "const char *"
82 .Fo mchars_spec2str
83 .Fa "struct mchars *p"
84 .Fa "const char *cp"
85 .Fa "size_t sz"
86 .Fa "size_t *rsz"
87 .Fc
88 .Ft int
89 .Fo mchars_spec2cp
90 .Fa "struct mchars *p"
91 .Fa "const char *cp"
92 .Fa "size_t sz"
93 .Ft "const char *"
94 .Fc
95 .Ft "const struct mdoc_meta *"
96 .Fo mdoc_meta
97 .Fa "const struct mdoc *mdoc"
98 .Fc
99 .Ft "const struct mdoc_node *"
100 .Fo mdoc_node
101 .Fa "const struct mdoc *mdoc"
102 .Fc
103 .Ft void
104 .Fo mparse_alloc
105 .Fa "enum mparset type"
106 .Fa "enum mandoclevel wlevel"
107 .Fa "mandocmsg msg"
108 .Fa "void *msgarg"
109 .Fc
110 .Ft void
111 .Fo mparse_free
112 .Fa "struct mparse *parse"
113 .Fc
114 .Ft "enum mandoclevel"
115 .Fo mparse_readfd
116 .Fa "struct mparse *parse"
117 .Fa "int fd"
118 .Fa "const char *fname"
119 .Fc
120 .Ft void
121 .Fo mparse_reset
122 .Fa "struct mparse *parse"
123 .Fc
124 .Ft void
125 .Fo mparse_result
126 .Fa "struct mparse *parse"
127 .Fa "struct mdoc **mdoc"
128 .Fa "struct man **man"
129 .Fc
130 .Ft "const char *"
131 .Fo mparse_strerror
132 .Fa "enum mandocerr"
133 .Fc
134 .Ft "const char *"
135 .Fo mparse_strlevel
136 .Fa "enum mandoclevel"
137 .Fc
138 .Vt extern const char * const * man_macronames;
139 .Vt extern const char * const * mdoc_argnames;
140 .Vt extern const char * const * mdoc_macronames;
141 .Fd "#define ASCII_NBRSP"
142 .Fd "#define ASCII_HYPH"
143 .Sh DESCRIPTION
144 The
145 .Nm mandoc
146 library parses a
147 .Ux
148 manual into an abstract syntax tree (AST).
149 .Ux
150 manuals are composed of
151 .Xr mdoc 7
152 or
153 .Xr man 7 ,
154 and may be mixed with
155 .Xr roff 7 ,
156 .Xr tbl 7 ,
157 and
158 .Xr eqn 7
159 invocations.
160 .Pp
161 The following describes a general parse sequence:
162 .Bl -enum
163 .It
164 initiate a parsing sequence with
165 .Fn mparse_alloc ;
166 .It
167 parse files or file descriptors with
168 .Fn mparse_readfd ;
169 .It
170 retrieve a parsed syntax tree, if the parse was successful, with
171 .Fn mparse_result ;
172 .It
173 iterate over parse nodes with
174 .Fn mdoc_node
175 or
176 .Fn man_node ;
177 .It
178 free all allocated memory with
179 .Fn mparse_free ,
180 or invoke
181 .Fn mparse_reset
182 and parse new files.
183 .El
184 .Pp
185 The
186 .Nm
187 library also contains routines for translating character strings into glyphs
188 .Pq see Fn mchars_alloc
189 and parsing escape sequences from strings
190 .Pq see Fn mandoc_escape .
191 .Sh REFERENCE
192 This section documents the functions, types, and variables available
193 via
194 .In mandoc.h .
195 .Ss Types
196 .Bl -ohang
197 .It Vt "enum mandoc_esc"
198 .It Vt "enum mandocerr"
199 .It Vt "enum mandoclevel"
200 .It Vt "struct mchars"
201 An opaque pointer to an object allowing for translation between
202 character strings and glyphs.
203 See
204 .Fn mchars_alloc .
205 .It Vt "enum mparset"
206 .It Vt "struct mparse"
207 .It Vt "mandocmsg"
208 .El
209 .Ss Functions
210 .Bl -ohang
211 .It Fn mandoc_escape
212 Scan an escape sequence, i.e., a character string beginning with
213 .Sq \e .
214 Pass a pointer to this string as
215 .Va end ;
216 it will be set to the supremum of the parsed escape sequence unless
217 returning ESCAPE_ERROR, in which case the string is bogus and should be
218 thrown away.
219 If not ESCAPE_ERROR or ESCAPE_IGNORE,
220 .Va start
221 is set to the first relevant character of the substring (font, glyph,
222 whatever) of length
223 .Va sz .
224 Both
225 .Va start
226 and
227 .Va sz
228 may be NULL.
229 .It Fn man_meta
230 Obtain the meta-data of a successful parse.
231 This may only be used on a pointer returned by
232 .Fn mparse_result .
233 .It Fn man_node
234 Obtain the root node of a successful parse.
235 This may only be used on a pointer returned by
236 .Fn mparse_result .
237 .It Fn mchars_alloc
238 Allocate an
239 .Vt "struct mchars *"
240 object for translating special characters into glyphs.
241 See
242 .Xr mandoc_char 7
243 for an overview of special characters.
244 The object must be freed with
245 .Fn mchars_free .
246 .It Fn mchars_free
247 Free an object created with
248 .Fn mchars_alloc .
249 .It Fn mchars_num2char
250 Convert a character index as found in \eN\(aq\(aq into a printable
251 character.
252 Returns \e0 (the nil character) if the input sequence is malformed.
253 .It Fn mchars_res2cp
254 Convert a predefined character into a valid Unicode codepoint.
255 Returns \-1 on failure and 0 if no code-point exists (if this occurs,
256 the caller should fall back to
257 .Fn mchars_res2str ) .
258 .It Fn mchars_res2str
259 Convert a predefined character into an ASCII string.
260 Returns NULL on failure.
261 .It Fn mchars_spec2cp
262 Convert a special character into a valid Unicode codepoint.
263 Returns \-1 on failure and 0 if no code-point exists (if this occurs,
264 the caller should fall back to
265 .Fn mchars_spec2str ) .
266 .It Fn mchars_spec2str
267 Convert a special character into an ASCII string.
268 Returns NULL on failure.
269 .It Fn mdoc_meta
270 Obtain the meta-data of a successful parse.
271 This may only be used on a pointer returned by
272 .Fn mparse_result .
273 .It Fn mdoc_node
274 Obtain the root node of a successful parse.
275 This may only be used on a pointer returned by
276 .Fn mparse_result .
277 .It Fn mparse_alloc
278 Allocate a parser.
279 The same parser may be used for multiple files so long as
280 .Fn mparse_reset
281 is called between parses.
282 .Fn mparse_free
283 must be called to free the memory allocated by this function.
284 .It Fn mparse_free
285 Free all memory allocated by
286 .Fn mparse_alloc .
287 .It Fn mparse_readfd
288 Parse a file or file descriptor.
289 If
290 .Va fd
291 is -1,
292 .Va fname
293 is opened for reading.
294 Otherwise,
295 .Va fname
296 is assumed to be the name associated with
297 .Va fd .
298 This may be called multiple times with different parameters; however,
299 .Fn mparse_reset
300 should be invoked between parses.
301 .It Fn mparse_reset
302 Reset a parser so that
303 .Fn mparse_readfd
304 may be used again.
305 .It Fn mparse_result
306 Obtain the result of a parse.
307 Only successful parses
308 .Po
309 i.e., those where
310 .Fn mparse_readfd
311 returned less than MANDOCLEVEL_FATAL
312 .Pc
313 should invoke this function, in which case one of the two pointers will
314 be filled in.
315 .It Fn mparse_strerror
316 Return a statically-allocated string representation of an error code.
317 .It Fn mparse_strlevel
318 Return a statically-allocated string representation of a level code.
319 .El
320 .Ss Variables
321 .Bl -ohang
322 .It Va man_macronames
323 The string representation of a man macro as indexed by
324 .Vt "enum mant" .
325 .It Va mdoc_argnames
326 The string representation of a mdoc macro argument as indexed by
327 .Vt "enum mdocargt" .
328 .It Va mdoc_macronames
329 The string representation of a mdoc macro as indexed by
330 .Vt "enum mdoct" .
331 .El
332 .Sh IMPLEMENTATION NOTES
333 This section consists of structural documentation for
334 .Xr mdoc 7
335 and
336 .Xr man 7
337 syntax trees.
338 .Ss Man Abstract Syntax Tree
339 This AST is governed by the ontological rules dictated in
340 .Xr man 7
341 and derives its terminology accordingly.
342 .Pp
343 The AST is composed of
344 .Vt struct man_node
345 nodes with element, root and text types as declared by the
346 .Va type
347 field.
348 Each node also provides its parse point (the
349 .Va line ,
350 .Va sec ,
351 and
352 .Va pos
353 fields), its position in the tree (the
354 .Va parent ,
355 .Va child ,
356 .Va next
357 and
358 .Va prev
359 fields) and some type-specific data.
360 .Pp
361 The tree itself is arranged according to the following normal form,
362 where capitalised non-terminals represent nodes.
363 .Pp
364 .Bl -tag -width "ELEMENTXX" -compact
365 .It ROOT
366 \(<- mnode+
367 .It mnode
368 \(<- ELEMENT | TEXT | BLOCK
369 .It BLOCK
370 \(<- HEAD BODY
371 .It HEAD
372 \(<- mnode*
373 .It BODY
374 \(<- mnode*
375 .It ELEMENT
376 \(<- ELEMENT | TEXT*
377 .It TEXT
378 \(<- [[:alpha:]]*
379 .El
380 .Pp
381 The only elements capable of nesting other elements are those with
382 next-lint scope as documented in
383 .Xr man 7 .
384 .Ss Mdoc Abstract Syntax Tree
385 This AST is governed by the ontological
386 rules dictated in
387 .Xr mdoc 7
388 and derives its terminology accordingly.
389 .Qq In-line
390 elements described in
391 .Xr mdoc 7
392 are described simply as
393 .Qq elements .
394 .Pp
395 The AST is composed of
396 .Vt struct mdoc_node
397 nodes with block, head, body, element, root and text types as declared
398 by the
399 .Va type
400 field.
401 Each node also provides its parse point (the
402 .Va line ,
403 .Va sec ,
404 and
405 .Va pos
406 fields), its position in the tree (the
407 .Va parent ,
408 .Va child ,
409 .Va nchild ,
410 .Va next
411 and
412 .Va prev
413 fields) and some type-specific data, in particular, for nodes generated
414 from macros, the generating macro in the
415 .Va tok
416 field.
417 .Pp
418 The tree itself is arranged according to the following normal form,
419 where capitalised non-terminals represent nodes.
420 .Pp
421 .Bl -tag -width "ELEMENTXX" -compact
422 .It ROOT
423 \(<- mnode+
424 .It mnode
425 \(<- BLOCK | ELEMENT | TEXT
426 .It BLOCK
427 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
428 .It ELEMENT
429 \(<- TEXT*
430 .It HEAD
431 \(<- mnode*
432 .It BODY
433 \(<- mnode* [ENDBODY mnode*]
434 .It TAIL
435 \(<- mnode*
436 .It TEXT
437 \(<- [[:printable:],0x1e]*
438 .El
439 .Pp
440 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
441 the BLOCK production: these refer to punctuation marks.
442 Furthermore, although a TEXT node will generally have a non-zero-length
443 string, in the specific case of
444 .Sq \&.Bd \-literal ,
445 an empty line will produce a zero-length string.
446 Multiple body parts are only found in invocations of
447 .Sq \&Bl \-column ,
448 where a new body introduces a new phrase.
449 .Pp
450 The
451 .Xr mdoc 7
452 syntax tree accommodates for broken block structures as well.
453 The ENDBODY node is available to end the formatting associated
454 with a given block before the physical end of that block.
455 It has a non-null
456 .Va end
457 field, is of the BODY
458 .Va type ,
459 has the same
460 .Va tok
461 as the BLOCK it is ending, and has a
462 .Va pending
463 field pointing to that BLOCK's BODY node.
464 It is an indirect child of that BODY node
465 and has no children of its own.
466 .Pp
467 An ENDBODY node is generated when a block ends while one of its child
468 blocks is still open, like in the following example:
469 .Bd -literal -offset indent
470 \&.Ao ao
471 \&.Bo bo ac
472 \&.Ac bc
473 \&.Bc end
474 .Ed
475 .Pp
476 This example results in the following block structure:
477 .Bd -literal -offset indent
478 BLOCK Ao
479 HEAD Ao
480 BODY Ao
481 TEXT ao
482 BLOCK Bo, pending -> Ao
483 HEAD Bo
484 BODY Bo
485 TEXT bo
486 TEXT ac
487 ENDBODY Ao, pending -> Ao
488 TEXT bc
489 TEXT end
490 .Ed
491 .Pp
492 Here, the formatting of the
493 .Sq \&Ao
494 block extends from TEXT ao to TEXT ac,
495 while the formatting of the
496 .Sq \&Bo
497 block extends from TEXT bo to TEXT bc.
498 It renders as follows in
499 .Fl T Ns Cm ascii
500 mode:
501 .Pp
502 .Dl <ao [bo ac> bc] end
503 .Pp
504 Support for badly-nested blocks is only provided for backward
505 compatibility with some older
506 .Xr mdoc 7
507 implementations.
508 Using badly-nested blocks is
509 .Em strongly discouraged ;
510 for example, the
511 .Fl T Ns Cm html
512 and
513 .Fl T Ns Cm xhtml
514 front-ends to
515 .Xr mandoc 1
516 are unable to render them in any meaningful way.
517 Furthermore, behaviour when encountering badly-nested blocks is not
518 consistent across troff implementations, especially when using multiple
519 levels of badly-nested blocks.
520 .Sh SEE ALSO
521 .Xr mandoc 1 ,
522 .Xr eqn 7 ,
523 .Xr man 7 ,
524 .Xr mandoc_char 7 ,
525 .Xr mdoc 7 ,
526 .Xr roff 7 ,
527 .Xr tbl 7
528 .Sh AUTHORS
529 The
530 .Nm
531 library was written by
532 .An Kristaps Dzonsons Aq kristaps@bsd.lv .