]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.3
8b15b9983f853d0170f17978ce61c976a393e265
[mandoc.git] / mdoc.3
1 .\" $Id: mdoc.3,v 1.44 2010/06/27 16:18:13 kristaps Exp $
2 .\"
3 .\" Copyright (c) 2009-2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 .\"
5 .\" Permission to use, copy, modify, and distribute this software for any
6 .\" purpose with or without fee is hereby granted, provided that the above
7 .\" copyright notice and this permission notice appear in all copies.
8 .\"
9 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 .\"
17 .Dd $Mdocdate: June 27 2010 $
18 .Dt MDOC 3
19 .Os
20 .Sh NAME
21 .Nm mdoc ,
22 .Nm mdoc_alloc ,
23 .Nm mdoc_endparse ,
24 .Nm mdoc_free ,
25 .Nm mdoc_meta ,
26 .Nm mdoc_node ,
27 .Nm mdoc_parseln ,
28 .Nm mdoc_reset
29 .Nd mdoc macro compiler library
30 .Sh SYNOPSIS
31 .In mandoc.h
32 .In regs.h
33 .In mdoc.h
34 .Vt extern const char * const * mdoc_macronames;
35 .Vt extern const char * const * mdoc_argnames;
36 .Ft "struct mdoc *"
37 .Fo mdoc_alloc
38 .Fa "struct regset *regs"
39 .Fa "void *data"
40 .Fa "int pflags"
41 .Fa "mandocmsg msgs"
42 .Fc
43 .Ft int
44 .Fn mdoc_endparse "struct mdoc *mdoc"
45 .Ft void
46 .Fn mdoc_free "struct mdoc *mdoc"
47 .Ft "const struct mdoc_meta *"
48 .Fn mdoc_meta "const struct mdoc *mdoc"
49 .Ft "const struct mdoc_node *"
50 .Fn mdoc_node "const struct mdoc *mdoc"
51 .Ft int
52 .Fo mdoc_parseln
53 .Fa "struct mdoc *mdoc"
54 .Fa "int line"
55 .Fa "char *buf"
56 .Fc
57 .Ft int
58 .Fn mdoc_reset "struct mdoc *mdoc"
59 .Sh DESCRIPTION
60 The
61 .Nm mdoc
62 library parses lines of
63 .Xr mdoc 7
64 input
65 into an abstract syntax tree (AST).
66 .Pp
67 In general, applications initiate a parsing sequence with
68 .Fn mdoc_alloc ,
69 parse each line in a document with
70 .Fn mdoc_parseln ,
71 close the parsing session with
72 .Fn mdoc_endparse ,
73 operate over the syntax tree returned by
74 .Fn mdoc_node
75 and
76 .Fn mdoc_meta ,
77 then free all allocated memory with
78 .Fn mdoc_free .
79 The
80 .Fn mdoc_reset
81 function may be used in order to reset the parser for another input
82 sequence.
83 See the
84 .Sx EXAMPLES
85 section for a simple example.
86 .Pp
87 This section further defines the
88 .Sx Types ,
89 .Sx Functions
90 and
91 .Sx Variables
92 available to programmers.
93 Following that, the
94 .Sx Abstract Syntax Tree
95 section documents the output tree.
96 .Ss Types
97 Both functions (see
98 .Sx Functions )
99 and variables (see
100 .Sx Variables )
101 may use the following types:
102 .Bl -ohang
103 .It Vt struct mdoc
104 An opaque type defined in
105 .Pa mdoc.c .
106 Its values are only used privately within the library.
107 .It Vt struct mdoc_node
108 A parsed node.
109 Defined in
110 .Pa mdoc.h .
111 See
112 .Sx Abstract Syntax Tree
113 for details.
114 .It Vt mandocmsg
115 A function callback type defined in
116 .Pa mandoc.h .
117 .El
118 .Ss Functions
119 Function descriptions follow:
120 .Bl -ohang
121 .It Fn mdoc_alloc
122 Allocates a parsing structure.
123 The
124 .Fa data
125 pointer is passed to
126 .Fa msgs .
127 The
128 .Fa pflags
129 arguments are defined in
130 .Pa mdoc.h .
131 Returns NULL on failure.
132 If non-NULL, the pointer must be freed with
133 .Fn mdoc_free .
134 .It Fn mdoc_reset
135 Reset the parser for another parse routine.
136 After its use,
137 .Fn mdoc_parseln
138 behaves as if invoked for the first time.
139 If it returns 0, memory could not be allocated.
140 .It Fn mdoc_free
141 Free all resources of a parser.
142 The pointer is no longer valid after invocation.
143 .It Fn mdoc_parseln
144 Parse a nil-terminated line of input.
145 This line should not contain the trailing newline.
146 Returns 0 on failure, 1 on success.
147 The input buffer
148 .Fa buf
149 is modified by this function.
150 .It Fn mdoc_endparse
151 Signals that the parse is complete.
152 Note that if
153 .Fn mdoc_endparse
154 is called subsequent to
155 .Fn mdoc_node ,
156 the resulting tree is incomplete.
157 Returns 0 on failure, 1 on success.
158 .It Fn mdoc_node
159 Returns the first node of the parse.
160 Note that if
161 .Fn mdoc_parseln
162 or
163 .Fn mdoc_endparse
164 return 0, the tree will be incomplete.
165 .It Fn mdoc_meta
166 Returns the document's parsed meta-data.
167 If this information has not yet been supplied or
168 .Fn mdoc_parseln
169 or
170 .Fn mdoc_endparse
171 return 0, the data will be incomplete.
172 .El
173 .Ss Variables
174 The following variables are also defined:
175 .Bl -ohang
176 .It Va mdoc_macronames
177 An array of string-ified token names.
178 .It Va mdoc_argnames
179 An array of string-ified token argument names.
180 .El
181 .Ss Abstract Syntax Tree
182 The
183 .Nm
184 functions produce an abstract syntax tree (AST) describing input in a
185 regular form.
186 It may be reviewed at any time with
187 .Fn mdoc_nodes ;
188 however, if called before
189 .Fn mdoc_endparse ,
190 or after
191 .Fn mdoc_endparse
192 or
193 .Fn mdoc_parseln
194 fail, it may be incomplete.
195 .Pp
196 This AST is governed by the ontological
197 rules dictated in
198 .Xr mdoc 7
199 and derives its terminology accordingly.
200 .Qq In-line
201 elements described in
202 .Xr mdoc 7
203 are described simply as
204 .Qq elements .
205 .Pp
206 The AST is composed of
207 .Vt struct mdoc_node
208 nodes with block, head, body, element, root and text types as declared
209 by the
210 .Va type
211 field.
212 Each node also provides its parse point (the
213 .Va line ,
214 .Va sec ,
215 and
216 .Va pos
217 fields), its position in the tree (the
218 .Va parent ,
219 .Va child ,
220 .Va next
221 and
222 .Va prev
223 fields) and some type-specific data.
224 .Pp
225 The tree itself is arranged according to the following normal form,
226 where capitalised non-terminals represent nodes.
227 .Pp
228 .Bl -tag -width "ELEMENTXX" -compact
229 .It ROOT
230 \(<- mnode+
231 .It mnode
232 \(<- BLOCK | ELEMENT | TEXT
233 .It BLOCK
234 \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
235 .It ELEMENT
236 \(<- TEXT*
237 .It HEAD
238 \(<- mnode+
239 .It BODY
240 \(<- mnode+
241 .It TAIL
242 \(<- mnode+
243 .It TEXT
244 \(<- [[:printable:],0x1e]*
245 .El
246 .Pp
247 Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
248 the BLOCK production: these refer to punctuation marks.
249 Furthermore, although a TEXT node will generally have a non-zero-length
250 string, in the specific case of
251 .Sq \&.Bd \-literal ,
252 an empty line will produce a zero-length string.
253 Multiple body parts are only found in invocations of
254 .Sq \&Bl \-column ,
255 where a new body introduces a new phrase.
256 .Sh EXAMPLES
257 The following example reads lines from stdin and parses them, operating
258 on the finished parse tree with
259 .Fn parsed .
260 This example does not error-check nor free memory upon failure.
261 .Bd -literal -offset indent
262 struct regset regs;
263 struct mdoc *mdoc;
264 const struct mdoc_node *node;
265 char *buf;
266 size_t len;
267 int line;
268
269 bzero(&regs, sizeof(struct regset));
270 line = 1;
271 mdoc = mdoc_alloc(&regs, NULL, 0, NULL);
272 buf = NULL;
273 alloc_len = 0;
274
275 while ((len = getline(&buf, &alloc_len, stdin)) >= 0) {
276 if (len && buflen[len - 1] = '\en')
277 buf[len - 1] = '\e0';
278 if ( ! mdoc_parseln(mdoc, line, buf))
279 errx(1, "mdoc_parseln");
280 line++;
281 }
282
283 if ( ! mdoc_endparse(mdoc))
284 errx(1, "mdoc_endparse");
285 if (NULL == (node = mdoc_node(mdoc)))
286 errx(1, "mdoc_node");
287
288 parsed(mdoc, node);
289 mdoc_free(mdoc);
290 .Ed
291 .Pp
292 Please see
293 .Pa main.c
294 in the source archive for a rigorous reference.
295 .Sh SEE ALSO
296 .Xr mandoc 1 ,
297 .Xr mdoc 7
298 .Sh AUTHORS
299 The
300 .Nm
301 library was written by
302 .An Kristaps Dzonsons Aq kristaps@bsd.lv .