]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.3
Implement line breaking of the generated HTML code at space characters
[mandoc.git] / mandoc.3
1 .\" $Id: mandoc.3,v 1.38 2017/01/09 01:37:03 schwarze Exp $
2 .\"
3 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\" Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 .\"
6 .\" Permission to use, copy, modify, and distribute this software for any
7 .\" purpose with or without fee is hereby granted, provided that the above
8 .\" copyright notice and this permission notice appear in all copies.
9 .\"
10 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 .\"
18 .Dd $Mdocdate: January 9 2017 $
19 .Dt MANDOC 3
20 .Os
21 .Sh NAME
22 .Nm mandoc ,
23 .Nm deroff ,
24 .Nm mandocmsg ,
25 .Nm man_mparse ,
26 .Nm man_validate ,
27 .Nm mdoc_validate ,
28 .Nm mparse_alloc ,
29 .Nm mparse_free ,
30 .Nm mparse_getkeep ,
31 .Nm mparse_keep ,
32 .Nm mparse_open ,
33 .Nm mparse_readfd ,
34 .Nm mparse_reset ,
35 .Nm mparse_result ,
36 .Nm mparse_strerror ,
37 .Nm mparse_strlevel ,
38 .Nm mparse_updaterc
39 .Nd mandoc macro compiler library
40 .Sh SYNOPSIS
41 .In sys/types.h
42 .In mandoc.h
43 .Pp
44 .Fd "#define ASCII_NBRSP"
45 .Fd "#define ASCII_HYPH"
46 .Fd "#define ASCII_BREAK"
47 .Ft struct mparse *
48 .Fo mparse_alloc
49 .Fa "int options"
50 .Fa "enum mandoclevel wlevel"
51 .Fa "mandocmsg mmsg"
52 .Fa "char *defos"
53 .Fc
54 .Ft void
55 .Fo (*mandocmsg)
56 .Fa "enum mandocerr errtype"
57 .Fa "enum mandoclevel level"
58 .Fa "const char *file"
59 .Fa "int line"
60 .Fa "int col"
61 .Fa "const char *msg"
62 .Fc
63 .Ft void
64 .Fo mparse_free
65 .Fa "struct mparse *parse"
66 .Fc
67 .Ft const char *
68 .Fo mparse_getkeep
69 .Fa "const struct mparse *parse"
70 .Fc
71 .Ft void
72 .Fo mparse_keep
73 .Fa "struct mparse *parse"
74 .Fc
75 .Ft int
76 .Fo mparse_open
77 .Fa "struct mparse *parse"
78 .Fa "const char *fname"
79 .Fc
80 .Ft "enum mandoclevel"
81 .Fo mparse_readfd
82 .Fa "struct mparse *parse"
83 .Fa "int fd"
84 .Fa "const char *fname"
85 .Fc
86 .Ft void
87 .Fo mparse_reset
88 .Fa "struct mparse *parse"
89 .Fc
90 .Ft void
91 .Fo mparse_result
92 .Fa "struct mparse *parse"
93 .Fa "struct roff_man **man"
94 .Fa "char **sodest"
95 .Fc
96 .Ft "const char *"
97 .Fo mparse_strerror
98 .Fa "enum mandocerr"
99 .Fc
100 .Ft "const char *"
101 .Fo mparse_strlevel
102 .Fa "enum mandoclevel"
103 .Fc
104 .Ft void
105 .Fo mparse_updaterc
106 .Fa "struct mparse *parse"
107 .Fa "enum mandoclevel *rc"
108 .Fc
109 .In roff.h
110 .Ft void
111 .Fo deroff
112 .Fa "char **dest"
113 .Fa "const struct roff_node *node"
114 .Fc
115 .In sys/types.h
116 .In mandoc.h
117 .In mdoc.h
118 .Vt extern const char * const * mdoc_argnames;
119 .Vt extern const char * const * mdoc_macronames;
120 .Ft void
121 .Fo mdoc_validate
122 .Fa "struct roff_man *mdoc"
123 .Fc
124 .In sys/types.h
125 .In mandoc.h
126 .In man.h
127 .Vt extern const char * const * man_macronames;
128 .Ft "const struct mparse *"
129 .Fo man_mparse
130 .Fa "const struct roff_man *man"
131 .Fc
132 .Ft void
133 .Fo man_validate
134 .Fa "struct roff_man *man"
135 .Fc
136 .Sh DESCRIPTION
137 The
138 .Nm mandoc
139 library parses a
140 .Ux
141 manual into an abstract syntax tree (AST).
142 .Ux
143 manuals are composed of
144 .Xr mdoc 7
145 or
146 .Xr man 7 ,
147 and may be mixed with
148 .Xr roff 7 ,
149 .Xr tbl 7 ,
150 and
151 .Xr eqn 7
152 invocations.
153 .Pp
154 The following describes a general parse sequence:
155 .Bl -enum
156 .It
157 initiate a parsing sequence with
158 .Xr mchars_alloc 3
159 and
160 .Fn mparse_alloc ;
161 .It
162 open a file with
163 .Xr open 2
164 or
165 .Fn mparse_open ;
166 .It
167 parse it with
168 .Fn mparse_readfd ;
169 .It
170 close it with
171 .Xr close 2 ;
172 .It
173 retrieve the syntax tree with
174 .Fn mparse_result ;
175 .It
176 depending on whether the
177 .Fa macroset
178 member of the returned
179 .Vt struct roff_man
180 is
181 .Dv MACROSET_MDOC
182 or
183 .Dv MACROSET_MAN ,
184 validate it with
185 .Fn mdoc_validate
186 or
187 .Fn man_validate ,
188 respectively;
189 .It
190 if information about the validity of the input is needed, fetch it with
191 .Fn mparse_updaterc ;
192 .It
193 iterate over parse nodes with starting from the
194 .Fa first
195 member of the returned
196 .Vt struct roff_man ;
197 .It
198 free all allocated memory with
199 .Fn mparse_free
200 and
201 .Xr mchars_free 3 ,
202 or invoke
203 .Fn mparse_reset
204 and go back to step 2 to parse new files.
205 .El
206 .Sh REFERENCE
207 This section documents the functions, types, and variables available
208 via
209 .In mandoc.h ,
210 with the exception of those documented in
211 .Xr mandoc_escape 3
212 and
213 .Xr mchars_alloc 3 .
214 .Ss Types
215 .Bl -ohang
216 .It Vt "enum mandocerr"
217 An error or warning message during parsing.
218 .It Vt "enum mandoclevel"
219 A classification of an
220 .Vt "enum mandocerr"
221 as regards system operation.
222 See the DIAGNOSTICS section in
223 .Xr mandoc 1
224 regarding the meanings of the levels.
225 .It Vt "struct mparse"
226 An opaque pointer to a running parse sequence.
227 Created with
228 .Fn mparse_alloc
229 and freed with
230 .Fn mparse_free .
231 This may be used across parsed input if
232 .Fn mparse_reset
233 is called between parses.
234 .It Vt "mandocmsg"
235 A prototype for a function to handle error and warning
236 messages emitted by the parser.
237 .El
238 .Ss Functions
239 .Bl -ohang
240 .It Fn deroff
241 Obtain a text-only representation of a
242 .Vt struct roff_node ,
243 including text contained in its child nodes.
244 To be used on children of the
245 .Fa first
246 member of
247 .Vt struct roff_man .
248 When it is no longer needed, the pointer returned from
249 .Fn deroff
250 can be passed to
251 .Xr free 3 .
252 .It Fn man_mparse
253 Get the parser used for the current output.
254 Declared in
255 .In man.h ,
256 implemented in
257 .Pa man.c .
258 .It Fn man_validate
259 Validate the
260 .Dv MACROSET_MAN
261 parse tree obtained with
262 .Fn mparse_result .
263 Declared in
264 .In man.h ,
265 implemented in
266 .Pa man.c .
267 .It Fn mdoc_validate
268 Validate the
269 .Dv MACROSET_MDOC
270 parse tree obtained with
271 .Fn mparse_result .
272 Declared in
273 .In mdoc.h ,
274 implemented in
275 .Pa mdoc.c .
276 .It Fn mparse_alloc
277 Allocate a parser.
278 The arguments have the following effect:
279 .Bl -tag -offset 5n -width inttype
280 .It Ar options
281 When the
282 .Dv MPARSE_MDOC
283 or
284 .Dv MPARSE_MAN
285 bit is set, only that parser is used.
286 Otherwise, the document type is automatically detected.
287 .Pp
288 When the
289 .Dv MPARSE_SO
290 bit is set,
291 .Xr roff 7
292 .Ic \&so
293 file inclusion requests are always honoured.
294 Otherwise, if the request is the only content in an input file,
295 only the file name is remembered, to be returned in the
296 .Fa sodest
297 argument of
298 .Fn mparse_result .
299 .Pp
300 When the
301 .Dv MPARSE_QUICK
302 bit is set, parsing is aborted after the NAME section.
303 This is for example useful in
304 .Xr makewhatis 8
305 .Fl Q
306 to quickly build minimal databases.
307 .It Ar wlevel
308 Can be set to
309 .Dv MANDOCLEVEL_BADARG ,
310 .Dv MANDOCLEVEL_ERROR ,
311 or
312 .Dv MANDOCLEVEL_WARNING .
313 Messages below the selected level will be suppressed.
314 .It Ar mmsg
315 A callback function to handle errors and warnings.
316 See
317 .Pa main.c
318 for an example.
319 If printing of error messages is not desired,
320 .Dv NULL
321 may be passed.
322 .It Ar defos
323 A default string for the
324 .Xr mdoc 7
325 .Sq \&Os
326 macro, overriding the
327 .Dv OSNAME
328 preprocessor definition and the results of
329 .Xr uname 3 .
330 Passing
331 .Dv NULL
332 sets no default.
333 .El
334 .Pp
335 The same parser may be used for multiple files so long as
336 .Fn mparse_reset
337 is called between parses.
338 .Fn mparse_free
339 must be called to free the memory allocated by this function.
340 Declared in
341 .In mandoc.h ,
342 implemented in
343 .Pa read.c .
344 .It Fn mparse_free
345 Free all memory allocated by
346 .Fn mparse_alloc .
347 Declared in
348 .In mandoc.h ,
349 implemented in
350 .Pa read.c .
351 .It Fn mparse_getkeep
352 Acquire the keep buffer.
353 Must follow a call of
354 .Fn mparse_keep .
355 Declared in
356 .In mandoc.h ,
357 implemented in
358 .Pa read.c .
359 .It Fn mparse_keep
360 Instruct the parser to retain a copy of its parsed input.
361 This can be acquired with subsequent
362 .Fn mparse_getkeep
363 calls.
364 Declared in
365 .In mandoc.h ,
366 implemented in
367 .Pa read.c .
368 .It Fn mparse_open
369 Open the file for reading.
370 If that fails and
371 .Fa fname
372 does not already end in
373 .Ql .gz ,
374 try again after appending
375 .Ql .gz .
376 Save the information whether the file is zipped or not.
377 Return a file descriptor open for reading or -1 on failure.
378 It can be passed to
379 .Fn mparse_readfd
380 or used directly.
381 Declared in
382 .In mandoc.h ,
383 implemented in
384 .Pa read.c .
385 .It Fn mparse_readfd
386 Parse a file descriptor opened with
387 .Xr open 2
388 or
389 .Fn mparse_open .
390 Pass the associated filename in
391 .Va fname .
392 This function may be called multiple times with different parameters; however,
393 .Xr close 2
394 and
395 .Fn mparse_reset
396 should be invoked between parses.
397 Declared in
398 .In mandoc.h ,
399 implemented in
400 .Pa read.c .
401 .It Fn mparse_reset
402 Reset a parser so that
403 .Fn mparse_readfd
404 may be used again.
405 Declared in
406 .In mandoc.h ,
407 implemented in
408 .Pa read.c .
409 .It Fn mparse_result
410 Obtain the result of a parse.
411 One of the two pointers will be filled in.
412 Declared in
413 .In mandoc.h ,
414 implemented in
415 .Pa read.c .
416 .It Fn mparse_strerror
417 Return a statically-allocated string representation of an error code.
418 Declared in
419 .In mandoc.h ,
420 implemented in
421 .Pa read.c .
422 .It Fn mparse_strlevel
423 Return a statically-allocated string representation of a level code.
424 Declared in
425 .In mandoc.h ,
426 implemented in
427 .Pa read.c .
428 .It Fn mparse_updaterc
429 If the highest warning or error level that occurred during the current
430 .Fa parse
431 is higher than
432 .Pf * Fa rc ,
433 update
434 .Pf * Fa rc
435 accordingly.
436 This is useful after calling
437 .Fn mdoc_validate
438 or
439 .Fn man_validate .
440 Declared in
441 .In mandoc.h ,
442 implemented in
443 .Pa read.c .
444 .El
445 .Ss Variables
446 .Bl -ohang
447 .It Va man_macronames
448 The string representation of a
449 .Xr man 7
450 macro as indexed by
451 .Vt "enum mant" .
452 .It Va mdoc_argnames
453 The string representation of an
454 .Xr mdoc 7
455 macro argument as indexed by
456 .Vt "enum mdocargt" .
457 .It Va mdoc_macronames
458 The string representation of an
459 .Xr mdoc 7
460 macro as indexed by
461 .Vt "enum mdoct" .
462 .El
463 .Sh IMPLEMENTATION NOTES
464 This section consists of structural documentation for
465 .Xr mdoc 7
466 and
467 .Xr man 7
468 syntax trees and strings.
469 .Ss Man and Mdoc Strings
470 Strings may be extracted from mdoc and man meta-data, or from text
471 nodes (MDOC_TEXT and MAN_TEXT, respectively).
472 These strings have special non-printing formatting cues embedded in the
473 text itself, as well as
474 .Xr roff 7
475 escapes preserved from input.
476 Implementing systems will need to handle both situations to produce
477 human-readable text.
478 In general, strings may be assumed to consist of 7-bit ASCII characters.
479 .Pp
480 The following non-printing characters may be embedded in text strings:
481 .Bl -tag -width Ds
482 .It Dv ASCII_NBRSP
483 A non-breaking space character.
484 .It Dv ASCII_HYPH
485 A soft hyphen.
486 .It Dv ASCII_BREAK
487 A breakable zero-width space.
488 .El
489 .Pp
490 Escape characters are also passed verbatim into text strings.
491 An escape character is a sequence of characters beginning with the
492 backslash
493 .Pq Sq \e .
494 To construct human-readable text, these should be intercepted with
495 .Xr mandoc_escape 3
496 and converted with one the functions described in
497 .Xr mchars_alloc 3 .
498 .Ss Man Abstract Syntax Tree
499 This AST is governed by the ontological rules dictated in
500 .Xr man 7
501 and derives its terminology accordingly.
502 .Pp
503 The AST is composed of
504 .Vt struct roff_node
505 nodes with element, root and text types as declared by the
506 .Va type
507 field.
508 Each node also provides its parse point (the
509 .Va line ,
510 .Va pos ,
511 and
512 .Va sec
513 fields), its position in the tree (the
514 .Va parent ,
515 .Va child ,
516 .Va next
517 and
518 .Va prev
519 fields) and some type-specific data.
520 .Pp
521 The tree itself is arranged according to the following normal form,
522 where capitalised non-terminals represent nodes.
523 .Pp
524 .Bl -tag -width "ELEMENTXX" -compact
525 .It ROOT
526 \(<- mnode+
527 .It mnode
528 \(<- ELEMENT | TEXT | BLOCK
529 .It BLOCK
530 \(<- HEAD BODY
531 .It HEAD
532 \(<- mnode*
533 .It BODY
534 \(<- mnode*
535 .It ELEMENT
536 \(<- ELEMENT | TEXT*
537 .It TEXT
538 \(<- [[:ascii:]]*
539 .El
540 .Pp
541 The only elements capable of nesting other elements are those with
542 next-line scope as documented in
543 .Xr man 7 .
544 .Ss Mdoc Abstract Syntax Tree
545 This AST is governed by the ontological
546 rules dictated in
547 .Xr mdoc 7
548 and derives its terminology accordingly.
549 .Qq In-line
550 elements described in
551 .Xr mdoc 7
552 are described simply as
553 .Qq elements .
554 .Pp
555 The AST is composed of
556 .Vt struct roff_node
557 nodes with block, head, body, element, root and text types as declared
558 by the
559 .Va type
560 field.
561 Each node also provides its parse point (the
562 .Va line ,
563 .Va pos ,
564 and
565 .Va sec
566 fields), its position in the tree (the
567 .Va parent ,
568 .Va child ,
569 .Va last ,
570 .Va next
571 and
572 .Va prev
573 fields) and some type-specific data, in particular, for nodes generated
574 from macros, the generating macro in the
575 .Va tok
576 field.
577 .Pp
578 The tree itself is arranged according to the following normal form,
579 where capitalised non-terminals represent nodes.
580 .Pp
581 .Bl -tag -width "ELEMENTXX" -compact
582 .It ROOT
583 \(<- mnode+
584 .It mnode
585 \(<- BLOCK | ELEMENT | TEXT
586 .It BLOCK
587 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
588 .It ELEMENT
589 \(<- TEXT*
590 .It HEAD
591 \(<- mnode*
592 .It BODY
593 \(<- mnode* [ENDBODY mnode*]
594 .It TAIL
595 \(<- mnode*
596 .It TEXT
597 \(<- [[:ascii:]]*
598 .El
599 .Pp
600 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
601 the BLOCK production: these refer to punctuation marks.
602 Furthermore, although a TEXT node will generally have a non-zero-length
603 string, in the specific case of
604 .Sq \&.Bd \-literal ,
605 an empty line will produce a zero-length string.
606 Multiple body parts are only found in invocations of
607 .Sq \&Bl \-column ,
608 where a new body introduces a new phrase.
609 .Pp
610 The
611 .Xr mdoc 7
612 syntax tree accommodates for broken block structures as well.
613 The ENDBODY node is available to end the formatting associated
614 with a given block before the physical end of that block.
615 It has a non-null
616 .Va end
617 field, is of the BODY
618 .Va type ,
619 has the same
620 .Va tok
621 as the BLOCK it is ending, and has a
622 .Va pending
623 field pointing to that BLOCK's BODY node.
624 It is an indirect child of that BODY node
625 and has no children of its own.
626 .Pp
627 An ENDBODY node is generated when a block ends while one of its child
628 blocks is still open, like in the following example:
629 .Bd -literal -offset indent
630 \&.Ao ao
631 \&.Bo bo ac
632 \&.Ac bc
633 \&.Bc end
634 .Ed
635 .Pp
636 This example results in the following block structure:
637 .Bd -literal -offset indent
638 BLOCK Ao
639 HEAD Ao
640 BODY Ao
641 TEXT ao
642 BLOCK Bo, pending -> Ao
643 HEAD Bo
644 BODY Bo
645 TEXT bo
646 TEXT ac
647 ENDBODY Ao, pending -> Ao
648 TEXT bc
649 TEXT end
650 .Ed
651 .Pp
652 Here, the formatting of the
653 .Sq \&Ao
654 block extends from TEXT ao to TEXT ac,
655 while the formatting of the
656 .Sq \&Bo
657 block extends from TEXT bo to TEXT bc.
658 It renders as follows in
659 .Fl T Ns Cm ascii
660 mode:
661 .Pp
662 .Dl <ao [bo ac> bc] end
663 .Pp
664 Support for badly-nested blocks is only provided for backward
665 compatibility with some older
666 .Xr mdoc 7
667 implementations.
668 Using badly-nested blocks is
669 .Em strongly discouraged ;
670 for example, the
671 .Fl T Ns Cm html
672 and
673 .Fl T Ns Cm xhtml
674 front-ends to
675 .Xr mandoc 1
676 are unable to render them in any meaningful way.
677 Furthermore, behaviour when encountering badly-nested blocks is not
678 consistent across troff implementations, especially when using multiple
679 levels of badly-nested blocks.
680 .Sh SEE ALSO
681 .Xr mandoc 1 ,
682 .Xr man.cgi 3 ,
683 .Xr mandoc_escape 3 ,
684 .Xr mandoc_headers 3 ,
685 .Xr mandoc_malloc 3 ,
686 .Xr mansearch 3 ,
687 .Xr mchars_alloc 3 ,
688 .Xr tbl 3 ,
689 .Xr eqn 7 ,
690 .Xr man 7 ,
691 .Xr mandoc_char 7 ,
692 .Xr mdoc 7 ,
693 .Xr roff 7 ,
694 .Xr tbl 7
695 .Sh AUTHORS
696 .An -nosplit
697 The
698 .Nm
699 library was written by
700 .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
701 and is maintained by
702 .An Ingo Schwarze Aq Mt schwarze@openbsd.org .