]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
*** empty log message ***
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.13 2008/11/27 17:27:50 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <time.h>
27
28 #include "libmdocml.h"
29 #include "private.h"
30
31 /* FIXME: warn if Pp occurs before/after Sh etc. (see mdoc.samples). */
32
33 /* FIXME: warn about "X section only" macros. */
34
35 /* FIXME: warn about empty lists. */
36
37 /* FIXME: ; : } ) (etc.) after text macros? */
38
39 /* FIXME: NAME section needs specific elements. */
40
41 #define ROFF_MAXARG 32
42
43 enum roffd {
44 ROFF_ENTER = 0,
45 ROFF_EXIT
46 };
47
48 enum rofftype {
49 ROFF_COMMENT,
50 ROFF_TEXT,
51 ROFF_LAYOUT,
52 ROFF_SPECIAL
53 };
54
55 #define ROFFCALL_ARGS \
56 int tok, struct rofftree *tree, \
57 const char *argv[], enum roffd type
58
59 struct rofftree;
60
61 struct rofftok {
62 int (*cb)(ROFFCALL_ARGS); /* Callback. */
63 const int *args; /* Args (or NULL). */
64 const int *parents;
65 const int *children;
66 int ctx;
67 enum rofftype type; /* Type of macro. */
68 int flags;
69 #define ROFF_PARSED (1 << 0) /* "Parsed". */
70 #define ROFF_CALLABLE (1 << 1) /* "Callable". */
71 #define ROFF_QUOTES (1 << 2) /* Quoted args. */
72 #define ROFF_SHALLOW (1 << 3) /* Nesting block. */
73 };
74
75 struct roffarg {
76 int flags;
77 #define ROFF_VALUE (1 << 0) /* Has a value. */
78 };
79
80 struct roffnode {
81 int tok; /* Token id. */
82 struct roffnode *parent; /* Parent (or NULL). */
83 size_t line; /* Parsed at line. */
84 };
85
86 struct rofftree {
87 struct roffnode *last; /* Last parsed node. */
88 time_t date; /* `Dd' results. */
89 char *cur;
90 char os[64]; /* `Os' results. */
91 char title[64]; /* `Dt' results. */
92 char section[64]; /* `Dt' results. */
93 char volume[64]; /* `Dt' results. */
94 int state;
95 #define ROFF_PRELUDE (1 << 1) /* In roff prelude. */
96 /* FIXME: if we had prev ptrs, this wouldn't be necessary. */
97 #define ROFF_PRELUDE_Os (1 << 2) /* `Os' is parsed. */
98 #define ROFF_PRELUDE_Dt (1 << 3) /* `Dt' is parsed. */
99 #define ROFF_PRELUDE_Dd (1 << 4) /* `Dd' is parsed. */
100 #define ROFF_BODY (1 << 5) /* In roff body. */
101 struct md_mbuf *mbuf; /* Output (or NULL). */
102 const struct md_args *args; /* Global args. */
103 const struct md_rbuf *rbuf; /* Input. */
104 const struct roffcb *cb;
105 };
106
107 static int roff_Dd(ROFFCALL_ARGS);
108 static int roff_Dt(ROFFCALL_ARGS);
109 static int roff_Os(ROFFCALL_ARGS);
110
111 static int roff_layout(ROFFCALL_ARGS);
112 static int roff_text(ROFFCALL_ARGS);
113 static int roff_comment(ROFFCALL_ARGS);
114 static int roff_close(ROFFCALL_ARGS);
115 static int roff_special(ROFFCALL_ARGS);
116
117 static struct roffnode *roffnode_new(int, struct rofftree *);
118 static void roffnode_free(int, struct rofftree *);
119
120 static void roff_warn(const struct rofftree *,
121 const char *, char *, ...);
122 static void roff_err(const struct rofftree *,
123 const char *, char *, ...);
124
125 static int roffscan(int, const int *);
126 static int rofffindtok(const char *);
127 static int rofffindarg(const char *);
128 static int rofffindcallable(const char *);
129 static int roffargs(const struct rofftree *,
130 int, char *, char **);
131 static int roffargok(int, int);
132 static int roffnextopt(const struct rofftree *,
133 int, const char ***, char **);
134 static int roffparse(struct rofftree *, char *, size_t);
135 static int textparse(const struct rofftree *,
136 const char *, size_t);
137
138
139 static const int roffarg_An[] = { ROFF_Split, ROFF_Nosplit,
140 ROFF_ARGMAX };
141 static const int roffarg_Bd[] = { ROFF_Ragged, ROFF_Unfilled,
142 ROFF_Literal, ROFF_File, ROFF_Offset, ROFF_Filled,
143 ROFF_Compact, ROFF_ARGMAX };
144 static const int roffarg_Bk[] = { ROFF_Words, ROFF_ARGMAX };
145 static const int roffarg_Ex[] = { ROFF_Std, ROFF_ARGMAX };
146 static const int roffarg_Rv[] = { ROFF_Std, ROFF_ARGMAX };
147 static const int roffarg_Bl[] = { ROFF_Bullet, ROFF_Dash,
148 ROFF_Hyphen, ROFF_Item, ROFF_Enum, ROFF_Tag, ROFF_Diag,
149 ROFF_Hang, ROFF_Ohang, ROFF_Inset, ROFF_Column, ROFF_Offset,
150 ROFF_Width, ROFF_Compact, ROFF_ARGMAX };
151 static const int roffarg_St[] = {
152 ROFF_p1003_1_88, ROFF_p1003_1_90, ROFF_p1003_1_96,
153 ROFF_p1003_1_2001, ROFF_p1003_1_2004, ROFF_p1003_1,
154 ROFF_p1003_1b, ROFF_p1003_1b_93, ROFF_p1003_1c_95,
155 ROFF_p1003_1g_2000, ROFF_p1003_2_92, ROFF_p1387_2_95,
156 ROFF_p1003_2, ROFF_p1387_2, ROFF_isoC_90, ROFF_isoC_amd1,
157 ROFF_isoC_tcor1, ROFF_isoC_tcor2, ROFF_isoC_99, ROFF_ansiC,
158 ROFF_ansiC_89, ROFF_ansiC_99, ROFF_ieee754, ROFF_iso8802_3,
159 ROFF_xpg3, ROFF_xpg4, ROFF_xpg4_2, ROFF_xpg4_3, ROFF_xbd5,
160 ROFF_xcu5, ROFF_xsh5, ROFF_xns5, ROFF_xns5_2d2_0,
161 ROFF_xcurses4_2, ROFF_susv2, ROFF_susv3, ROFF_svid4,
162 ROFF_ARGMAX };
163
164 static const int roffchild_Bl[] = { ROFF_It, ROFF_El, ROFF_MAX };
165 static const int roffchild_Fo[] = { ROFF_Fa, ROFF_Fc, ROFF_MAX };
166 static const int roffchild_Oo[] = { ROFF_Op, ROFF_Oc, ROFF_MAX };
167 static const int roffchild_Rs[] = { ROFF_Re, ROFF__A, ROFF__B,
168 ROFF__D, ROFF__I, ROFF__J, ROFF__N, ROFF__O, ROFF__P,
169 ROFF__R, ROFF__T, ROFF__V, ROFF_MAX };
170
171 static const int roffparent_El[] = { ROFF_Bl, ROFF_It, ROFF_MAX };
172 static const int roffparent_Fc[] = { ROFF_Fo, ROFF_Fa, ROFF_MAX };
173 static const int roffparent_Oc[] = { ROFF_Oo, ROFF_Oc, ROFF_MAX };
174 static const int roffparent_It[] = { ROFF_Bl, ROFF_It, ROFF_MAX };
175 static const int roffparent_Re[] = { ROFF_Rs, ROFF_MAX };
176
177 /* Table of all known tokens. */
178 static const struct rofftok tokens[ROFF_MAX] =
179 {
180 {roff_comment, NULL, NULL, NULL, 0, ROFF_COMMENT, 0 }, /* \" */
181 { roff_Dd, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Dd */
182 { roff_Dt, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Dt */
183 { roff_Os, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Os */
184 { roff_layout, NULL, NULL, NULL, ROFF_Sh, ROFF_LAYOUT, ROFF_PARSED }, /* Sh */
185 { roff_layout, NULL, NULL, NULL, ROFF_Ss, ROFF_LAYOUT, ROFF_PARSED }, /* Ss */
186 { roff_text, NULL, NULL, NULL, ROFF_Pp, ROFF_TEXT, 0 }, /* Pp */
187 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* D1 */
188 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Dl */
189 { roff_layout, roffarg_Bd, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Bd */
190 { roff_close, NULL, NULL, NULL, ROFF_Bd, ROFF_LAYOUT, 0 }, /* Ed */
191 { roff_layout, roffarg_Bl, NULL, roffchild_Bl, 0, ROFF_LAYOUT, 0 }, /* Bl */
192 { roff_close, NULL, roffparent_El, NULL, ROFF_Bl, ROFF_LAYOUT, 0 }, /* El */
193 { roff_layout, NULL, roffparent_It, NULL, ROFF_It, ROFF_LAYOUT, ROFF_SHALLOW }, /* It */
194 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ad */
195 { roff_text, roffarg_An, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* An */
196 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ar */
197 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Cd */ /* XXX man.4 only */
198 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Cm */
199 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dv */ /* XXX needs arg */
200 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Er */ /* XXX needs arg */
201 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ev */ /* XXX needs arg */
202 { roff_text, roffarg_Ex, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ex */
203 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Fa */ /* XXX needs arg */
204 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Fd */
205 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Fl */
206 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Fn */ /* XXX needs arg */ /* FIXME */
207 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ft */
208 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ic */ /* XXX needs arg */
209 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* In */
210 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Li */
211 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Nd */
212 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Nm */ /* FIXME */
213 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Op */
214 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ot */ /* XXX deprecated */
215 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pa */
216 { roff_text, roffarg_Rv, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Rv */
217 { roff_text, roffarg_St, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* St */
218 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Va */
219 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Vt */ /* XXX needs arg */
220 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xr */ /* XXX needs arg */
221 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* %A */
222 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %B */
223 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %D */
224 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %I */
225 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %J */
226 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %N */
227 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %O */
228 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %P */
229 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %R */
230 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* %T */
231 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %V */
232 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ac */
233 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ao */
234 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Aq */
235 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* At */ /* XXX at most 2 args */
236 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bc */
237 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Bf */ /* FIXME */
238 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bo */
239 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bq */
240 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Bsx */
241 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Bx */
242 {roff_special, NULL, NULL, NULL, 0, ROFF_SPECIAL, 0 }, /* Db */
243 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dc */
244 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Do */
245 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dq */
246 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ec */
247 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ef */ /* FIXME */
248 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Em */ /* XXX needs arg */
249 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Eo */
250 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Fx */
251 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ms */
252 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* No */
253 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ns */
254 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Nx */
255 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ox */
256 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pc */
257 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Pf */
258 { roff_text, NULL, NULL, NULL, 0, ROFF_LAYOUT, ROFF_PARSED | ROFF_CALLABLE }, /* Po */
259 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pq */
260 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qc */
261 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ql */
262 { roff_layout, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qo */
263 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qq */
264 { roff_close, NULL, roffparent_Re, NULL, ROFF_Rs, ROFF_LAYOUT, 0 }, /* Re */
265 { roff_layout, NULL, NULL, roffchild_Rs, 0, ROFF_LAYOUT, 0 }, /* Rs */
266 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sc */
267 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* So */
268 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sq */
269 {roff_special, NULL, NULL, NULL, 0, ROFF_SPECIAL, 0 }, /* Sm */
270 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sx */
271 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sy */
272 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Tn */
273 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ux */
274 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xc */
275 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xo */
276 { roff_layout, NULL, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Fo */
277 { roff_close, NULL, roffparent_Fc, NULL, ROFF_Fo, ROFF_LAYOUT, 0 }, /* Fc */
278 { roff_layout, NULL, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Oo */
279 { roff_close, NULL, roffparent_Oc, NULL, ROFF_Oo, ROFF_LAYOUT, 0 }, /* Oc */
280 { roff_layout, roffarg_Bk, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Bk */
281 { roff_close, NULL, NULL, NULL, ROFF_Bk, ROFF_LAYOUT, 0 }, /* Ek */
282 };
283
284 /* Table of all known token arguments. */
285 static const int tokenargs[ROFF_ARGMAX] =
286 {
287 0, 0, 0, 0,
288 0, ROFF_VALUE, ROFF_VALUE, 0,
289 0, 0, 0, 0,
290 0, 0, 0, 0,
291 0, 0, 0, 0,
292 0, 0, 0, 0,
293 0, 0, 0, 0,
294 0, 0, 0, 0,
295 0, 0, 0, 0,
296 0, 0, 0, 0,
297 0, 0, 0, 0,
298 0, 0, 0, 0,
299 0, 0, 0, 0,
300 0, 0, 0, 0,
301 0, 0, 0, 0,
302 };
303
304 const char *const toknamesp[ROFF_MAX] =
305 {
306 "\\\"", "Dd", "Dt", "Os",
307 "Sh", "Ss", "Pp", "D1",
308 "Dl", "Bd", "Ed", "Bl",
309 "El", "It", "Ad", "An",
310 "Ar", "Cd", "Cm", "Dv",
311 "Er", "Ev", "Ex", "Fa",
312 "Fd", "Fl", "Fn", "Ft",
313 "Ic", "In", "Li", "Nd",
314 "Nm", "Op", "Ot", "Pa",
315 "Rv", "St", "Va", "Vt",
316 "Xr", "\%A", "\%B", "\%D",
317 "\%I", "\%J", "\%N", "\%O",
318 "\%P", "\%R", "\%T", "\%V",
319 "Ac", "Ao", "Aq", "At",
320 "Bc", "Bf", "Bo", "Bq",
321 "Bsx", "Bx", "Db", "Dc",
322 "Do", "Dq", "Ec", "Ef",
323 "Em", "Eo", "Fx", "Ms",
324 "No", "Ns", "Nx", "Ox",
325 "Pc", "Pf", "Po", "Pq",
326 "Qc", "Ql", "Qo", "Qq",
327 "Re", "Rs", "Sc", "So",
328 "Sq", "Sm", "Sx", "Sy",
329 "Tn", "Ux", "Xc", "Xo",
330 "Fo", "Fc", "Oo", "Oc",
331 "Bk", "Ek",
332 };
333
334 const char *const tokargnamesp[ROFF_ARGMAX] =
335 {
336 "split", "nosplit", "ragged",
337 "unfilled", "literal", "file",
338 "offset", "bullet", "dash",
339 "hyphen", "item", "enum",
340 "tag", "diag", "hang",
341 "ohang", "inset", "column",
342 "width", "compact", "std",
343 "p1003.1-88", "p1003.1-90", "p1003.1-96",
344 "p1003.1-2001", "p1003.1-2004", "p1003.1",
345 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
346 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
347 "p1003.2", "p1387.2", "isoC-90",
348 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
349 "isoC-99", "ansiC", "ansiC-89",
350 "ansiC-99", "ieee754", "iso8802-3",
351 "xpg3", "xpg4", "xpg4.2",
352 "xpg4.3", "xbd5", "xcu5",
353 "xsh5", "xns5", "xns5.2d2.0",
354 "xcurses4.2", "susv2", "susv3",
355 "svid4", "filled", "words",
356 };
357
358
359 const char *const *toknames = toknamesp;
360 const char *const *tokargnames = tokargnamesp;
361
362
363 int
364 roff_free(struct rofftree *tree, int flush)
365 {
366 int error;
367
368 assert(tree->mbuf);
369 if ( ! flush)
370 tree->mbuf = NULL;
371
372 /* LINTED */
373 while (tree->last)
374 if ( ! (*tokens[tree->last->tok].cb)
375 (tree->last->tok, tree, NULL, ROFF_EXIT))
376 /* Disallow flushing. */
377 tree->mbuf = NULL;
378
379 error = tree->mbuf ? 0 : 1;
380
381 if (tree->mbuf && (ROFF_PRELUDE & tree->state)) {
382 /*roff_warn(tree, "prelude never finished");*/
383 error = 1;
384 }
385
386 free(tree);
387 return(error ? 0 : 1);
388 }
389
390
391 struct rofftree *
392 roff_alloc(const struct md_args *args, struct md_mbuf *out,
393 const struct md_rbuf *in, const struct roffcb *cb)
394 {
395 struct rofftree *tree;
396
397 if (NULL == (tree = calloc(1, sizeof(struct rofftree))))
398 err(1, "calloc");
399
400 tree->state = ROFF_PRELUDE;
401 tree->args = args;
402 tree->mbuf = out;
403 tree->rbuf = in;
404 tree->cb = cb;
405
406 return(tree);
407 }
408
409
410 int
411 roff_engine(struct rofftree *tree, char *buf, size_t sz)
412 {
413
414 tree->cur = buf;
415
416 if (0 == sz) {
417 roff_warn(tree, buf, "blank line");
418 return(0);
419 } else if ('.' != *buf)
420 return(textparse(tree, buf, sz));
421
422 return(roffparse(tree, buf, sz));
423 }
424
425
426 static int
427 textparse(const struct rofftree *tree, const char *buf, size_t sz)
428 {
429
430 /* Print text. */
431 return(1);
432 }
433
434
435 static int
436 roffargs(const struct rofftree *tree,
437 int tok, char *buf, char **argv)
438 {
439 int i;
440 char *p;
441
442 assert(tok >= 0 && tok < ROFF_MAX);
443 assert('.' == *buf);
444
445 p = buf;
446
447 /* LINTED */
448 for (i = 0; *buf && i < ROFF_MAXARG; i++) {
449 if ('\"' == *buf) {
450 argv[i] = ++buf;
451 while (*buf && '\"' != *buf)
452 buf++;
453 if (0 == *buf) {
454 roff_err(tree, argv[i], "unclosed "
455 "quote in argument "
456 "list for `%s'",
457 toknames[tok]);
458 return(0);
459 }
460 } else {
461 argv[i] = buf++;
462 while (*buf && ! isspace(*buf))
463 buf++;
464 if (0 == *buf)
465 continue;
466 }
467 *buf++ = 0;
468 while (*buf && isspace(*buf))
469 buf++;
470 }
471
472 assert(i > 0);
473 if (ROFF_MAXARG == i && *buf) {
474 roff_err(tree, p, "too many arguments for `%s'", toknames
475 [tok]);
476 return(0);
477 }
478
479 argv[i] = NULL;
480 return(1);
481 }
482
483
484 /* XXX */
485 static int
486 roffscan(int tok, const int *tokv)
487 {
488
489 if (NULL == tokv)
490 return(1);
491
492 for ( ; ROFF_MAX != *tokv; tokv++)
493 if (tok == *tokv)
494 return(1);
495
496 return(0);
497 }
498
499
500 static int
501 roffparse(struct rofftree *tree, char *buf, size_t sz)
502 {
503 int tok, t;
504 struct roffnode *n;
505 char *argv[ROFF_MAXARG];
506 const char **argvp;
507
508 assert(sz > 0);
509
510 if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
511 roff_err(tree, buf + 1, "bogus line macro");
512 return(0);
513 } else if (NULL == tokens[tok].cb) {
514 roff_err(tree, buf + 1, "unsupported macro `%s'",
515 toknames[tok]);
516 return(0);
517 } else if (ROFF_COMMENT == tokens[tok].type)
518 return(1);
519
520 if ( ! roffargs(tree, tok, buf, argv))
521 return(0);
522
523 argvp = (const char **)argv;
524
525 /*
526 * Prelude macros break some assumptions, so branch now.
527 */
528
529 if (ROFF_PRELUDE & tree->state) {
530 assert(NULL == tree->last);
531 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
532 } else
533 assert(tree->last);
534
535 assert(ROFF_BODY & tree->state);
536
537 /*
538 * First check that our possible parents and parent's possible
539 * children are satisfied.
540 */
541
542 if ( ! roffscan(tree->last->tok, tokens[tok].parents)) {
543 warnx("%s: invalid parent `%s' for `%s' (line %zu)",
544 tree->rbuf->name,
545 toknames[tree->last->tok],
546 toknames[tok], tree->rbuf->line);
547 return(0);
548 }
549
550 if ( ! roffscan(tok, tokens[tree->last->tok].children)) {
551 warnx("%s: invalid child `%s' for `%s' (line %zu)",
552 tree->rbuf->name, toknames[tok],
553 toknames[tree->last->tok],
554 tree->rbuf->line);
555 return(0);
556 }
557
558 /*
559 * Branch if we're not a layout token.
560 */
561
562 if (ROFF_LAYOUT != tokens[tok].type)
563 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
564
565 /*
566 * Check our scope rules.
567 */
568
569 if (0 == tokens[tok].ctx)
570 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
571
572 /*
573 * First consider implicit-end tags, like as follows:
574 * .Sh SECTION 1
575 * .Sh SECTION 2
576 * In this, we want to close the scope of the NAME section. If
577 * there's an intermediary implicit-end tag, such as
578 * .Sh SECTION 1
579 * .Ss Subsection 1
580 * .Sh SECTION 2
581 * then it must be closed as well.
582 */
583
584 if (tok == tokens[tok].ctx) {
585 /*
586 * First search up to the point where we must close.
587 * If one doesn't exist, then we can open a new scope.
588 */
589
590 for (n = tree->last; n; n = n->parent) {
591 assert(0 == tokens[n->tok].ctx ||
592 n->tok == tokens[n->tok].ctx);
593 if (n->tok == tok)
594 break;
595 if (ROFF_SHALLOW & tokens[tok].flags) {
596 n = NULL;
597 break;
598 }
599 }
600
601 /*
602 * Create a new scope, as no previous one exists to
603 * close out.
604 */
605
606 if (NULL == n)
607 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
608
609 /*
610 * Close out all intermediary scoped blocks, then hang
611 * the current scope from our predecessor's parent.
612 */
613
614 do {
615 t = tree->last->tok;
616 if ( ! (*tokens[t].cb)(t, tree, NULL, ROFF_EXIT))
617 return(0);
618 } while (t != tok);
619
620 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
621 }
622
623 /*
624 * Now consider explicit-end tags, where we want to close back
625 * to a specific tag. Example:
626 * .Bl
627 * .It Item.
628 * .El
629 * In this, the `El' tag closes out the scope of `Bl'.
630 */
631
632 assert(tree->last);
633 assert(tok != tokens[tok].ctx && 0 != tokens[tok].ctx);
634
635 do {
636 t = tree->last->tok;
637 if ( ! (*tokens[t].cb)(t, tree, NULL, ROFF_EXIT))
638 return(0);
639 } while (t != tokens[tok].ctx);
640
641 assert(tree->last);
642 return(1);
643 }
644
645
646 static int
647 rofffindarg(const char *name)
648 {
649 size_t i;
650
651 /* FIXME: use a table, this is slow but ok for now. */
652
653 /* LINTED */
654 for (i = 0; i < ROFF_ARGMAX; i++)
655 /* LINTED */
656 if (0 == strcmp(name, tokargnames[i]))
657 return((int)i);
658
659 return(ROFF_ARGMAX);
660 }
661
662
663 static int
664 rofffindtok(const char *buf)
665 {
666 char token[4];
667 size_t i;
668
669 for (i = 0; *buf && ! isspace(*buf) && i < 3; i++, buf++)
670 token[i] = *buf;
671
672 if (i == 3)
673 return(ROFF_MAX);
674
675 token[i] = 0;
676
677 /* FIXME: use a table, this is slow but ok for now. */
678
679 /* LINTED */
680 for (i = 0; i < ROFF_MAX; i++)
681 /* LINTED */
682 if (0 == strcmp(toknames[i], token))
683 return((int)i);
684
685 return(ROFF_MAX);
686 }
687
688
689 static int
690 rofffindcallable(const char *name)
691 {
692 int c;
693
694 if (ROFF_MAX == (c = rofffindtok(name)))
695 return(ROFF_MAX);
696 assert(c >= 0 && c < ROFF_MAX);
697 return(ROFF_CALLABLE & tokens[c].flags ? c : ROFF_MAX);
698 }
699
700
701 static struct roffnode *
702 roffnode_new(int tokid, struct rofftree *tree)
703 {
704 struct roffnode *p;
705
706 if (NULL == (p = malloc(sizeof(struct roffnode))))
707 err(1, "malloc");
708
709 p->line = tree->rbuf->line;
710 p->tok = tokid;
711 p->parent = tree->last;
712 tree->last = p;
713
714 return(p);
715 }
716
717
718 static int
719 roffargok(int tokid, int argid)
720 {
721 const int *c;
722
723 if (NULL == (c = tokens[tokid].args))
724 return(0);
725
726 for ( ; ROFF_ARGMAX != *c; c++)
727 if (argid == *c)
728 return(1);
729
730 return(0);
731 }
732
733
734 static void
735 roffnode_free(int tokid, struct rofftree *tree)
736 {
737 struct roffnode *p;
738
739 assert(tree->last);
740 assert(tree->last->tok == tokid);
741
742 p = tree->last;
743 tree->last = tree->last->parent;
744 free(p);
745 }
746
747
748 static int
749 roffnextopt(const struct rofftree *tree, int tok,
750 const char ***in, char **val)
751 {
752 const char *arg, **argv;
753 int v;
754
755 *val = NULL;
756 argv = *in;
757 assert(argv);
758
759 if (NULL == (arg = *argv))
760 return(-1);
761 if ('-' != *arg)
762 return(-1);
763
764 if (ROFF_ARGMAX == (v = rofffindarg(arg + 1))) {
765 roff_warn(tree, arg, "argument-like parameter `%s' to "
766 "`%s'", &arg[1], toknames[tok]);
767 return(-1);
768 }
769
770 if ( ! roffargok(tok, v)) {
771 roff_warn(tree, arg, "invalid argument parameter `%s' to "
772 "`%s'", tokargnames[v], toknames[tok]);
773 return(-1);
774 }
775
776 if ( ! (ROFF_VALUE & tokenargs[v]))
777 return(v);
778
779 *in = ++argv;
780
781 if (NULL == *argv) {
782 roff_err(tree, arg, "empty value of `%s' for `%s'",
783 tokargnames[v], toknames[tok]);
784 return(ROFF_ARGMAX);
785 }
786
787 return(v);
788 }
789
790
791 /* ARGSUSED */
792 static int
793 roff_Dd(ROFFCALL_ARGS)
794 {
795
796 if (ROFF_BODY & tree->state) {
797 assert( ! (ROFF_PRELUDE & tree->state));
798 assert(ROFF_PRELUDE_Dd & tree->state);
799 return(roff_text(tok, tree, argv, type));
800 }
801
802 assert(ROFF_PRELUDE & tree->state);
803 assert( ! (ROFF_BODY & tree->state));
804
805 if (ROFF_PRELUDE_Dd & tree->state) {
806 roff_err(tree, *argv, "repeated `Dd' in prelude");
807 return(0);
808 } else if (ROFF_PRELUDE_Dt & tree->state) {
809 roff_err(tree, *argv, "out-of-order `Dd' in prelude");
810 return(0);
811 }
812
813 /* TODO: parse date. */
814
815 assert(NULL == tree->last);
816 tree->state |= ROFF_PRELUDE_Dd;
817
818 return(1);
819 }
820
821
822 /* ARGSUSED */
823 static int
824 roff_Dt(ROFFCALL_ARGS)
825 {
826
827 if (ROFF_BODY & tree->state) {
828 assert( ! (ROFF_PRELUDE & tree->state));
829 assert(ROFF_PRELUDE_Dt & tree->state);
830 return(roff_text(tok, tree, argv, type));
831 }
832
833 assert(ROFF_PRELUDE & tree->state);
834 assert( ! (ROFF_BODY & tree->state));
835
836 if ( ! (ROFF_PRELUDE_Dd & tree->state)) {
837 roff_err(tree, *argv, "out-of-order `Dt' in prelude");
838 return(0);
839 } else if (ROFF_PRELUDE_Dt & tree->state) {
840 roff_err(tree, *argv, "repeated `Dt' in prelude");
841 return(0);
842 }
843
844 /* TODO: parse date. */
845
846 assert(NULL == tree->last);
847 tree->state |= ROFF_PRELUDE_Dt;
848
849 return(1);
850 }
851
852
853 /* ARGSUSED */
854 static int
855 roff_Os(ROFFCALL_ARGS)
856 {
857
858 if (ROFF_EXIT == type) {
859 assert(ROFF_PRELUDE_Os & tree->state);
860 return(roff_layout(tok, tree, argv, type));
861 } else if (ROFF_BODY & tree->state) {
862 assert( ! (ROFF_PRELUDE & tree->state));
863 assert(ROFF_PRELUDE_Os & tree->state);
864 return(roff_text(tok, tree, argv, type));
865 }
866
867 assert(ROFF_PRELUDE & tree->state);
868 if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
869 ! (ROFF_PRELUDE_Dd & tree->state)) {
870 roff_err(tree, *argv, "out-of-order `Os' in prelude");
871 return(0);
872 }
873
874 /* TODO: extract OS. */
875
876 tree->state |= ROFF_PRELUDE_Os;
877 tree->state &= ~ROFF_PRELUDE;
878 tree->state |= ROFF_BODY;
879
880 assert(NULL == tree->last);
881
882 return(roff_layout(tok, tree, argv, type));
883 }
884
885
886 /* ARGSUSED */
887 static int
888 roff_layout(ROFFCALL_ARGS)
889 {
890 int i, c, argcp[ROFF_MAXARG];
891 char *v, *argvp[ROFF_MAXARG];
892
893 if (ROFF_PRELUDE & tree->state) {
894 roff_err(tree, *argv, "`%s' disallowed in prelude",
895 toknames[tok]);
896 return(0);
897 }
898
899 if (ROFF_EXIT == type) {
900 roffnode_free(tok, tree);
901 return((*tree->cb->roffblkout)(tree->args, tok));
902 }
903
904 i = 0;
905 argv++;
906
907 while (-1 != (c = roffnextopt(tree, tok, &argv, &v))) {
908 if (ROFF_ARGMAX == c)
909 return(0);
910
911 argcp[i] = c;
912 argvp[i] = v;
913 i++;
914 argv++;
915 }
916
917 argcp[i] = ROFF_ARGMAX;
918 argvp[i] = NULL;
919
920 if (NULL == roffnode_new(tok, tree))
921 return(0);
922
923 if ( ! (*tree->cb->roffin)(tree->args, tok, argcp, argvp))
924 return(0);
925
926 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
927
928 /* TODO: print all tokens. */
929
930 if ( ! ((*tree->cb->roffout)(tree->args, tok)))
931 return(0);
932 return((*tree->cb->roffblkin)(tree->args, tok));
933 }
934
935 while (*argv) {
936 if (ROFF_MAX != (c = rofffindcallable(*argv))) {
937 if (NULL == tokens[c].cb) {
938 roff_err(tree, *argv, "unsupported "
939 "macro `%s'",
940 toknames[c]);
941 return(0);
942 }
943 if ( ! (*tokens[c].cb)(c, tree,
944 argv, ROFF_ENTER))
945 return(0);
946 }
947
948 /* TODO: print token. */
949
950 argv++;
951 }
952
953 if ( ! ((*tree->cb->roffout)(tree->args, tok)))
954 return(0);
955
956 return((*tree->cb->roffblkin)(tree->args, tok));
957 }
958
959
960 /* ARGSUSED */
961 static int
962 roff_text(ROFFCALL_ARGS)
963 {
964 int i, c, argcp[ROFF_MAXARG];
965 char *v, *argvp[ROFF_MAXARG];
966
967 if (ROFF_PRELUDE & tree->state) {
968 roff_err(tree, *argv, "`%s' disallowed in prelude",
969 toknames[tok]);
970 return(0);
971 }
972
973 i = 0;
974 argv++;
975
976 while (-1 != (c = roffnextopt(tree, tok, &argv, &v))) {
977 if (ROFF_ARGMAX == c)
978 return(0);
979
980 argcp[i] = c;
981 argvp[i] = v;
982 i++;
983 argv++;
984 }
985
986 argcp[i] = ROFF_ARGMAX;
987 argvp[i] = NULL;
988
989 if ( ! (*tree->cb->roffin)(tree->args, tok, argcp, argvp))
990 return(0);
991
992 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
993
994 /* TODO: print all tokens. */
995
996 return((*tree->cb->roffout)(tree->args, tok));
997 }
998
999 while (*argv) {
1000 if (ROFF_MAX != (c = rofffindcallable(*argv))) {
1001 if (NULL == tokens[c].cb) {
1002 roff_err(tree, *argv, "unsupported "
1003 "macro `%s'",
1004 toknames[c]);
1005 return(0);
1006 }
1007 if ( ! (*tokens[c].cb)(c, tree,
1008 argv, ROFF_ENTER))
1009 return(0);
1010 }
1011
1012 /* TODO: print token. */
1013
1014 argv++;
1015 }
1016
1017 return((*tree->cb->roffout)(tree->args, tok));
1018 }
1019
1020
1021 /* ARGSUSED */
1022 static int
1023 roff_comment(ROFFCALL_ARGS)
1024 {
1025
1026 return(1);
1027 }
1028
1029
1030 /* ARGSUSED */
1031 static int
1032 roff_close(ROFFCALL_ARGS)
1033 {
1034
1035 return(1);
1036 }
1037
1038
1039 /* ARGSUSED */
1040 static int
1041 roff_special(ROFFCALL_ARGS)
1042 {
1043
1044 return((*tree->cb->roffspecial)(tok));
1045 }
1046
1047
1048 static void
1049 roff_warn(const struct rofftree *tree, const char *pos, char *fmt, ...)
1050 {
1051 va_list ap;
1052 char buf[128];
1053
1054 va_start(ap, fmt);
1055 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1056 va_end(ap);
1057
1058 (*tree->cb->roffmsg)(tree->args, ROFF_WARN, tree->cur, pos,
1059 tree->rbuf->name, tree->rbuf->line, buf);
1060 }
1061
1062
1063 static void
1064 roff_err(const struct rofftree *tree, const char *pos, char *fmt, ...)
1065 {
1066 va_list ap;
1067 char buf[128];
1068
1069 va_start(ap, fmt);
1070 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1071 va_end(ap);
1072
1073 (*tree->cb->roffmsg)(tree->args, ROFF_ERROR, tree->cur, pos,
1074 tree->rbuf->name, tree->rbuf->line, buf);
1075 }