]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
Add support for 1/2, 1/4, and 3/4 (needed by eqn).
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.12 2011/07/11 08:43:27 kristaps Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: July 11 2011 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm mandoc_escape ,
24 .Nm man_meta ,
25 .Nm man_node ,
26 .Nm mchars_alloc ,
27 .Nm mchars_free ,
28 .Nm mchars_num2char ,
29 .Nm mchars_num2uc ,
30 .Nm mchars_spec2cp ,
31 .Nm mchars_spec2str ,
32 .Nm mdoc_meta ,
33 .Nm mdoc_node ,
34 .Nm mparse_alloc ,
35 .Nm mparse_free ,
36 .Nm mparse_readfd ,
37 .Nm mparse_reset ,
38 .Nm mparse_result ,
39 .Nm mparse_strerror ,
40 .Nm mparse_strlevel
41 .Nd mandoc macro compiler library
42 .Sh LIBRARY
43 .Lb mandoc
44 .Sh SYNOPSIS
45 .In man.h
46 .In mdoc.h
47 .In mandoc.h
48 .Ft "enum mandoc_esc"
49 .Fo mandoc_escape
50 .Fa "const char **in"
51 .Fa "const char **seq"
52 .Fa "int *len"
53 .Fc
54 .Ft "const struct man_meta *"
55 .Fo man_meta
56 .Fa "const struct man *man"
57 .Fc
58 .Ft "const struct man_node *"
59 .Fo man_node
60 .Fa "const struct man *man"
61 .Fc
62 .Ft "struct mchars *"
63 .Fn mchars_alloc
64 .Ft void
65 .Fn mchars_free "struct mchars *p"
66 .Ft char
67 .Fn mchars_num2char "const char *cp" "size_t sz"
68 .Ft int
69 .Fn mchars_num2uc "const char *cp" "size_t sz"
70 .Ft "const char *"
71 .Fo mchars_spec2str
72 .Fa "struct mchars *p"
73 .Fa "const char *cp"
74 .Fa "size_t sz"
75 .Fa "size_t *rsz"
76 .Fc
77 .Ft int
78 .Fo mchars_spec2cp
79 .Fa "struct mchars *p"
80 .Fa "const char *cp"
81 .Fa "size_t sz"
82 .Ft "const char *"
83 .Fc
84 .Ft "const struct mdoc_meta *"
85 .Fo mdoc_meta
86 .Fa "const struct mdoc *mdoc"
87 .Fc
88 .Ft "const struct mdoc_node *"
89 .Fo mdoc_node
90 .Fa "const struct mdoc *mdoc"
91 .Fc
92 .Ft void
93 .Fo mparse_alloc
94 .Fa "enum mparset type"
95 .Fa "enum mandoclevel wlevel"
96 .Fa "mandocmsg msg"
97 .Fa "void *msgarg"
98 .Fc
99 .Ft void
100 .Fo mparse_free
101 .Fa "struct mparse *parse"
102 .Fc
103 .Ft "enum mandoclevel"
104 .Fo mparse_readfd
105 .Fa "struct mparse *parse"
106 .Fa "int fd"
107 .Fa "const char *fname"
108 .Fc
109 .Ft void
110 .Fo mparse_reset
111 .Fa "struct mparse *parse"
112 .Fc
113 .Ft void
114 .Fo mparse_result
115 .Fa "struct mparse *parse"
116 .Fa "struct mdoc **mdoc"
117 .Fa "struct man **man"
118 .Fc
119 .Ft "const char *"
120 .Fo mparse_strerror
121 .Fa "enum mandocerr"
122 .Fc
123 .Ft "const char *"
124 .Fo mparse_strlevel
125 .Fa "enum mandoclevel"
126 .Fc
127 .Vt extern const char * const * man_macronames;
128 .Vt extern const char * const * mdoc_argnames;
129 .Vt extern const char * const * mdoc_macronames;
130 .Fd "#define ASCII_NBRSP"
131 .Fd "#define ASCII_HYPH"
132 .Sh DESCRIPTION
133 The
134 .Nm mandoc
135 library parses a
136 .Ux
137 manual into an abstract syntax tree (AST).
138 .Ux
139 manuals are composed of
140 .Xr mdoc 7
141 or
142 .Xr man 7 ,
143 and may be mixed with
144 .Xr roff 7 ,
145 .Xr tbl 7 ,
146 and
147 .Xr eqn 7
148 invocations.
149 .Pp
150 The following describes a general parse sequence:
151 .Bl -enum
152 .It
153 initiate a parsing sequence with
154 .Fn mparse_alloc ;
155 .It
156 parse files or file descriptors with
157 .Fn mparse_readfd ;
158 .It
159 retrieve a parsed syntax tree, if the parse was successful, with
160 .Fn mparse_result ;
161 .It
162 iterate over parse nodes with
163 .Fn mdoc_node
164 or
165 .Fn man_node ;
166 .It
167 free all allocated memory with
168 .Fn mparse_free ,
169 or invoke
170 .Fn mparse_reset
171 and parse new files.
172 .El
173 .Pp
174 The
175 .Nm
176 library also contains routines for translating character strings into glyphs
177 .Pq see Fn mchars_alloc
178 and parsing escape sequences from strings
179 .Pq see Fn mandoc_escape .
180 .Sh REFERENCE
181 This section documents the functions, types, and variables available
182 via
183 .In mandoc.h .
184 .Ss Types
185 .Bl -ohang
186 .It Vt "enum mandoc_esc"
187 An escape sequence classification.
188 .It Vt "enum mandocerr"
189 A fatal error, error, or warning message during parsing.
190 .It Vt "enum mandoclevel"
191 A classification of an
192 .Vt "enum mandoclevel"
193 as regards system operation.
194 .It Vt "struct mchars"
195 An opaque pointer to an object allowing for translation between
196 character strings and glyphs.
197 See
198 .Fn mchars_alloc .
199 .It Vt "enum mparset"
200 The type of parser when reading input.
201 This should usually be
202 .Dv MPARSE_AUTO
203 for auto-detection.
204 .It Vt "struct mparse"
205 An opaque pointer to a running parse sequence.
206 Created with
207 .Fn mparse_alloc
208 and freed with
209 .Fn mparse_free .
210 This may be used across parsed input if
211 .Fn mparse_reset
212 is called between parses.
213 .It Vt "mandocmsg"
214 A prototype for a function to handle fatal error, error, and warning
215 messages emitted by the parser.
216 .El
217 .Ss Functions
218 .Bl -ohang
219 .It Fn mandoc_escape
220 Scan an escape sequence, i.e., a character string beginning with
221 .Sq \e .
222 Pass a pointer to this string as
223 .Va end ;
224 it will be set to the supremum of the parsed escape sequence unless
225 returning
226 .Dv ESCAPE_ERROR ,
227 in which case the string is bogus and should be
228 thrown away.
229 If not
230 .Dv ESCAPE_ERROR
231 or
232 .Dv ESCAPE_IGNORE ,
233 .Va start
234 is set to the first relevant character of the substring (font, glyph,
235 whatever) of length
236 .Va sz .
237 Both
238 .Va start
239 and
240 .Va sz
241 may be
242 .Dv NULL .
243 .It Fn man_meta
244 Obtain the meta-data of a successful parse.
245 This may only be used on a pointer returned by
246 .Fn mparse_result .
247 .It Fn man_node
248 Obtain the root node of a successful parse.
249 This may only be used on a pointer returned by
250 .Fn mparse_result .
251 .It Fn mchars_alloc
252 Allocate an
253 .Vt "struct mchars *"
254 object for translating special characters into glyphs.
255 See
256 .Xr mandoc_char 7
257 for an overview of special characters.
258 The object must be freed with
259 .Fn mchars_free .
260 .It Fn mchars_free
261 Free an object created with
262 .Fn mchars_alloc .
263 .It Fn mchars_num2char
264 Convert a character index (e.g., the \eN\(aq\(aq escape) into a
265 printable ASCII character.
266 Returns \e0 (the nil character) if the input sequence is malformed.
267 .It Fn mchars_num2uc
268 Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
269 a Unicode codepoint.
270 Returns \e0 (the nil character) if the input sequence is malformed.
271 .It Fn mchars_spec2cp
272 Convert a special character into a valid Unicode codepoint.
273 Returns \-1 on failure or a non-zero Unicode codepoint on success.
274 .It Fn mchars_spec2str
275 Convert a special character into an ASCII string.
276 Returns
277 .Dv NULL
278 on failure.
279 .It Fn mdoc_meta
280 Obtain the meta-data of a successful parse.
281 This may only be used on a pointer returned by
282 .Fn mparse_result .
283 .It Fn mdoc_node
284 Obtain the root node of a successful parse.
285 This may only be used on a pointer returned by
286 .Fn mparse_result .
287 .It Fn mparse_alloc
288 Allocate a parser.
289 The same parser may be used for multiple files so long as
290 .Fn mparse_reset
291 is called between parses.
292 .Fn mparse_free
293 must be called to free the memory allocated by this function.
294 .It Fn mparse_free
295 Free all memory allocated by
296 .Fn mparse_alloc .
297 .It Fn mparse_readfd
298 Parse a file or file descriptor.
299 If
300 .Va fd
301 is -1,
302 .Va fname
303 is opened for reading.
304 Otherwise,
305 .Va fname
306 is assumed to be the name associated with
307 .Va fd .
308 This may be called multiple times with different parameters; however,
309 .Fn mparse_reset
310 should be invoked between parses.
311 .It Fn mparse_reset
312 Reset a parser so that
313 .Fn mparse_readfd
314 may be used again.
315 .It Fn mparse_result
316 Obtain the result of a parse.
317 Only successful parses
318 .Po
319 i.e., those where
320 .Fn mparse_readfd
321 returned less than MANDOCLEVEL_FATAL
322 .Pc
323 should invoke this function, in which case one of the two pointers will
324 be filled in.
325 .It Fn mparse_strerror
326 Return a statically-allocated string representation of an error code.
327 .It Fn mparse_strlevel
328 Return a statically-allocated string representation of a level code.
329 .El
330 .Ss Variables
331 .Bl -ohang
332 .It Va man_macronames
333 The string representation of a man macro as indexed by
334 .Vt "enum mant" .
335 .It Va mdoc_argnames
336 The string representation of a mdoc macro argument as indexed by
337 .Vt "enum mdocargt" .
338 .It Va mdoc_macronames
339 The string representation of a mdoc macro as indexed by
340 .Vt "enum mdoct" .
341 .El
342 .Sh IMPLEMENTATION NOTES
343 This section consists of structural documentation for
344 .Xr mdoc 7
345 and
346 .Xr man 7
347 syntax trees and strings.
348 .Ss Man and Mdoc Strings
349 Strings may be extracted from mdoc and man meta-data, or from text
350 nodes (MDOC_TEXT and MAN_TEXT, respectively).
351 These strings have special non-printing formatting cues embedded in the
352 text itself, as well as
353 .Xr roff 7
354 escapes preserved from input.
355 Implementing systems will need to handle both situations to produce
356 human-readable text.
357 In general, strings may be assumed to consist of 7-bit ASCII characters.
358 .Pp
359 The following non-printing characters may be embedded in text strings:
360 .Bl -tag -width Ds
361 .It Dv ASCII_NBRSP
362 A non-breaking space character.
363 .It Dv ASCII_HYPH
364 A soft hyphen.
365 .El
366 .Pp
367 Escape characters are also passed verbatim into text strings.
368 An escape character is a sequence of characters beginning with the
369 backslash
370 .Pq Sq \e .
371 To construct human-readable text, these should be intercepted with
372 .Fn mandoc_escape
373 and converted with one of
374 .Fn mchars_num2char ,
375 .Fn mchars_spec2str ,
376 and so on.
377 .Ss Man Abstract Syntax Tree
378 This AST is governed by the ontological rules dictated in
379 .Xr man 7
380 and derives its terminology accordingly.
381 .Pp
382 The AST is composed of
383 .Vt struct man_node
384 nodes with element, root and text types as declared by the
385 .Va type
386 field.
387 Each node also provides its parse point (the
388 .Va line ,
389 .Va sec ,
390 and
391 .Va pos
392 fields), its position in the tree (the
393 .Va parent ,
394 .Va child ,
395 .Va next
396 and
397 .Va prev
398 fields) and some type-specific data.
399 .Pp
400 The tree itself is arranged according to the following normal form,
401 where capitalised non-terminals represent nodes.
402 .Pp
403 .Bl -tag -width "ELEMENTXX" -compact
404 .It ROOT
405 \(<- mnode+
406 .It mnode
407 \(<- ELEMENT | TEXT | BLOCK
408 .It BLOCK
409 \(<- HEAD BODY
410 .It HEAD
411 \(<- mnode*
412 .It BODY
413 \(<- mnode*
414 .It ELEMENT
415 \(<- ELEMENT | TEXT*
416 .It TEXT
417 \(<- [[:ascii:]]*
418 .El
419 .Pp
420 The only elements capable of nesting other elements are those with
421 next-lint scope as documented in
422 .Xr man 7 .
423 .Ss Mdoc Abstract Syntax Tree
424 This AST is governed by the ontological
425 rules dictated in
426 .Xr mdoc 7
427 and derives its terminology accordingly.
428 .Qq In-line
429 elements described in
430 .Xr mdoc 7
431 are described simply as
432 .Qq elements .
433 .Pp
434 The AST is composed of
435 .Vt struct mdoc_node
436 nodes with block, head, body, element, root and text types as declared
437 by the
438 .Va type
439 field.
440 Each node also provides its parse point (the
441 .Va line ,
442 .Va sec ,
443 and
444 .Va pos
445 fields), its position in the tree (the
446 .Va parent ,
447 .Va child ,
448 .Va nchild ,
449 .Va next
450 and
451 .Va prev
452 fields) and some type-specific data, in particular, for nodes generated
453 from macros, the generating macro in the
454 .Va tok
455 field.
456 .Pp
457 The tree itself is arranged according to the following normal form,
458 where capitalised non-terminals represent nodes.
459 .Pp
460 .Bl -tag -width "ELEMENTXX" -compact
461 .It ROOT
462 \(<- mnode+
463 .It mnode
464 \(<- BLOCK | ELEMENT | TEXT
465 .It BLOCK
466 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
467 .It ELEMENT
468 \(<- TEXT*
469 .It HEAD
470 \(<- mnode*
471 .It BODY
472 \(<- mnode* [ENDBODY mnode*]
473 .It TAIL
474 \(<- mnode*
475 .It TEXT
476 \(<- [[:ascii:]]*
477 .El
478 .Pp
479 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
480 the BLOCK production: these refer to punctuation marks.
481 Furthermore, although a TEXT node will generally have a non-zero-length
482 string, in the specific case of
483 .Sq \&.Bd \-literal ,
484 an empty line will produce a zero-length string.
485 Multiple body parts are only found in invocations of
486 .Sq \&Bl \-column ,
487 where a new body introduces a new phrase.
488 .Pp
489 The
490 .Xr mdoc 7
491 syntax tree accommodates for broken block structures as well.
492 The ENDBODY node is available to end the formatting associated
493 with a given block before the physical end of that block.
494 It has a non-null
495 .Va end
496 field, is of the BODY
497 .Va type ,
498 has the same
499 .Va tok
500 as the BLOCK it is ending, and has a
501 .Va pending
502 field pointing to that BLOCK's BODY node.
503 It is an indirect child of that BODY node
504 and has no children of its own.
505 .Pp
506 An ENDBODY node is generated when a block ends while one of its child
507 blocks is still open, like in the following example:
508 .Bd -literal -offset indent
509 \&.Ao ao
510 \&.Bo bo ac
511 \&.Ac bc
512 \&.Bc end
513 .Ed
514 .Pp
515 This example results in the following block structure:
516 .Bd -literal -offset indent
517 BLOCK Ao
518 HEAD Ao
519 BODY Ao
520 TEXT ao
521 BLOCK Bo, pending -> Ao
522 HEAD Bo
523 BODY Bo
524 TEXT bo
525 TEXT ac
526 ENDBODY Ao, pending -> Ao
527 TEXT bc
528 TEXT end
529 .Ed
530 .Pp
531 Here, the formatting of the
532 .Sq \&Ao
533 block extends from TEXT ao to TEXT ac,
534 while the formatting of the
535 .Sq \&Bo
536 block extends from TEXT bo to TEXT bc.
537 It renders as follows in
538 .Fl T Ns Cm ascii
539 mode:
540 .Pp
541 .Dl <ao [bo ac> bc] end
542 .Pp
543 Support for badly-nested blocks is only provided for backward
544 compatibility with some older
545 .Xr mdoc 7
546 implementations.
547 Using badly-nested blocks is
548 .Em strongly discouraged ;
549 for example, the
550 .Fl T Ns Cm html
551 and
552 .Fl T Ns Cm xhtml
553 front-ends to
554 .Xr mandoc 1
555 are unable to render them in any meaningful way.
556 Furthermore, behaviour when encountering badly-nested blocks is not
557 consistent across troff implementations, especially when using multiple
558 levels of badly-nested blocks.
559 .Sh SEE ALSO
560 .Xr mandoc 1 ,
561 .Xr eqn 7 ,
562 .Xr man 7 ,
563 .Xr mandoc_char 7 ,
564 .Xr mdoc 7 ,
565 .Xr roff 7 ,
566 .Xr tbl 7
567 .Sh AUTHORS
568 The
569 .Nm
570 library was written by
571 .An Kristaps Dzonsons Aq kristaps@bsd.lv .