]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Initial pushing to mbuf.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.14 2008/11/28 11:21:12 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <time.h>
27
28 #include "libmdocml.h"
29 #include "private.h"
30
31 /* FIXME: warn if Pp occurs before/after Sh etc. (see mdoc.samples). */
32
33 /* FIXME: warn about "X section only" macros. */
34
35 /* FIXME: warn about empty lists. */
36
37 /* FIXME: ; : } ) (etc.) after text macros? */
38
39 /* FIXME: NAME section needs specific elements. */
40
41 #define ROFF_MAXARG 32
42
43 enum roffd {
44 ROFF_ENTER = 0,
45 ROFF_EXIT
46 };
47
48 enum rofftype {
49 ROFF_COMMENT,
50 ROFF_TEXT,
51 ROFF_LAYOUT,
52 ROFF_SPECIAL
53 };
54
55 #define ROFFCALL_ARGS \
56 int tok, struct rofftree *tree, \
57 const char *argv[], enum roffd type
58
59 struct rofftree;
60
61 struct rofftok {
62 int (*cb)(ROFFCALL_ARGS); /* Callback. */
63 const int *args; /* Args (or NULL). */
64 const int *parents;
65 const int *children;
66 int ctx;
67 enum rofftype type; /* Type of macro. */
68 int flags;
69 #define ROFF_PARSED (1 << 0) /* "Parsed". */
70 #define ROFF_CALLABLE (1 << 1) /* "Callable". */
71 #define ROFF_QUOTES (1 << 2) /* Quoted args. */
72 #define ROFF_SHALLOW (1 << 3) /* Nesting block. */
73 };
74
75 struct roffarg {
76 int flags;
77 #define ROFF_VALUE (1 << 0) /* Has a value. */
78 };
79
80 struct roffnode {
81 int tok; /* Token id. */
82 struct roffnode *parent; /* Parent (or NULL). */
83 size_t line; /* Parsed at line. */
84 };
85
86 struct rofftree {
87 struct roffnode *last; /* Last parsed node. */
88 char *cur;
89
90 time_t date; /* `Dd' results. */
91 char os[64]; /* `Os' results. */
92 char title[64]; /* `Dt' results. */
93 char section[64]; /* `Dt' results. */
94 char volume[64]; /* `Dt' results. */
95
96 int state;
97 #define ROFF_PRELUDE (1 << 1) /* In roff prelude. */
98 #define ROFF_PRELUDE_Os (1 << 2) /* `Os' is parsed. */
99 #define ROFF_PRELUDE_Dt (1 << 3) /* `Dt' is parsed. */
100 #define ROFF_PRELUDE_Dd (1 << 4) /* `Dd' is parsed. */
101 #define ROFF_BODY (1 << 5) /* In roff body. */
102
103 struct md_mbuf *mbuf; /* Output (or NULL). */
104 const struct md_args *args; /* Global args. */
105 const struct md_rbuf *rbuf; /* Input. */
106 const struct roffcb *cb;
107 };
108
109 static int roff_Dd(ROFFCALL_ARGS);
110 static int roff_Dt(ROFFCALL_ARGS);
111 static int roff_Os(ROFFCALL_ARGS);
112
113 static int roff_layout(ROFFCALL_ARGS);
114 static int roff_text(ROFFCALL_ARGS);
115 static int roff_comment(ROFFCALL_ARGS);
116 static int roff_close(ROFFCALL_ARGS);
117 static int roff_special(ROFFCALL_ARGS);
118
119 static struct roffnode *roffnode_new(int, struct rofftree *);
120 static void roffnode_free(int, struct rofftree *);
121
122 static void roff_warn(const struct rofftree *,
123 const char *, char *, ...);
124 static void roff_err(const struct rofftree *,
125 const char *, char *, ...);
126
127 static int roffscan(int, const int *);
128 static int rofffindtok(const char *);
129 static int rofffindarg(const char *);
130 static int rofffindcallable(const char *);
131 static int roffargs(const struct rofftree *,
132 int, char *, char **);
133 static int roffargok(int, int);
134 static int roffnextopt(const struct rofftree *,
135 int, const char ***, char **);
136 static int roffparse(struct rofftree *, char *, size_t);
137 static int textparse(const struct rofftree *,
138 const char *, size_t);
139
140
141 static const int roffarg_An[] = { ROFF_Split, ROFF_Nosplit,
142 ROFF_ARGMAX };
143 static const int roffarg_Bd[] = { ROFF_Ragged, ROFF_Unfilled,
144 ROFF_Literal, ROFF_File, ROFF_Offset, ROFF_Filled,
145 ROFF_Compact, ROFF_ARGMAX };
146 static const int roffarg_Bk[] = { ROFF_Words, ROFF_ARGMAX };
147 static const int roffarg_Ex[] = { ROFF_Std, ROFF_ARGMAX };
148 static const int roffarg_Rv[] = { ROFF_Std, ROFF_ARGMAX };
149 static const int roffarg_Bl[] = { ROFF_Bullet, ROFF_Dash,
150 ROFF_Hyphen, ROFF_Item, ROFF_Enum, ROFF_Tag, ROFF_Diag,
151 ROFF_Hang, ROFF_Ohang, ROFF_Inset, ROFF_Column, ROFF_Offset,
152 ROFF_Width, ROFF_Compact, ROFF_ARGMAX };
153 static const int roffarg_St[] = {
154 ROFF_p1003_1_88, ROFF_p1003_1_90, ROFF_p1003_1_96,
155 ROFF_p1003_1_2001, ROFF_p1003_1_2004, ROFF_p1003_1,
156 ROFF_p1003_1b, ROFF_p1003_1b_93, ROFF_p1003_1c_95,
157 ROFF_p1003_1g_2000, ROFF_p1003_2_92, ROFF_p1387_2_95,
158 ROFF_p1003_2, ROFF_p1387_2, ROFF_isoC_90, ROFF_isoC_amd1,
159 ROFF_isoC_tcor1, ROFF_isoC_tcor2, ROFF_isoC_99, ROFF_ansiC,
160 ROFF_ansiC_89, ROFF_ansiC_99, ROFF_ieee754, ROFF_iso8802_3,
161 ROFF_xpg3, ROFF_xpg4, ROFF_xpg4_2, ROFF_xpg4_3, ROFF_xbd5,
162 ROFF_xcu5, ROFF_xsh5, ROFF_xns5, ROFF_xns5_2d2_0,
163 ROFF_xcurses4_2, ROFF_susv2, ROFF_susv3, ROFF_svid4,
164 ROFF_ARGMAX };
165
166 static const int roffchild_Bl[] = { ROFF_It, ROFF_El, ROFF_MAX };
167 static const int roffchild_Fo[] = { ROFF_Fa, ROFF_Fc, ROFF_MAX };
168 static const int roffchild_Oo[] = { ROFF_Op, ROFF_Oc, ROFF_MAX };
169 static const int roffchild_Rs[] = { ROFF_Re, ROFF__A, ROFF__B,
170 ROFF__D, ROFF__I, ROFF__J, ROFF__N, ROFF__O, ROFF__P,
171 ROFF__R, ROFF__T, ROFF__V, ROFF_MAX };
172
173 static const int roffparent_El[] = { ROFF_Bl, ROFF_It, ROFF_MAX };
174 static const int roffparent_Fc[] = { ROFF_Fo, ROFF_Fa, ROFF_MAX };
175 static const int roffparent_Oc[] = { ROFF_Oo, ROFF_Oc, ROFF_MAX };
176 static const int roffparent_It[] = { ROFF_Bl, ROFF_It, ROFF_MAX };
177 static const int roffparent_Re[] = { ROFF_Rs, ROFF_MAX };
178
179 /* Table of all known tokens. */
180 static const struct rofftok tokens[ROFF_MAX] = {
181 {roff_comment, NULL, NULL, NULL, 0, ROFF_COMMENT, 0 }, /* \" */
182 { roff_Dd, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Dd */
183 { roff_Dt, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Dt */
184 { roff_Os, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Os */
185 { roff_layout, NULL, NULL, NULL, ROFF_Sh, ROFF_LAYOUT, ROFF_PARSED }, /* Sh */
186 { roff_layout, NULL, NULL, NULL, ROFF_Ss, ROFF_LAYOUT, ROFF_PARSED }, /* Ss */
187 { roff_text, NULL, NULL, NULL, ROFF_Pp, ROFF_TEXT, 0 }, /* Pp */
188 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* D1 */
189 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Dl */
190 { roff_layout, roffarg_Bd, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Bd */
191 { roff_close, NULL, NULL, NULL, ROFF_Bd, ROFF_LAYOUT, 0 }, /* Ed */
192 { roff_layout, roffarg_Bl, NULL, roffchild_Bl, 0, ROFF_LAYOUT, 0 }, /* Bl */
193 { roff_close, NULL, roffparent_El, NULL, ROFF_Bl, ROFF_LAYOUT, 0 }, /* El */
194 { roff_layout, NULL, roffparent_It, NULL, ROFF_It, ROFF_LAYOUT, ROFF_SHALLOW }, /* It */
195 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ad */
196 { roff_text, roffarg_An, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* An */
197 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ar */
198 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Cd */ /* XXX man.4 only */
199 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Cm */
200 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dv */ /* XXX needs arg */
201 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Er */ /* XXX needs arg */
202 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ev */ /* XXX needs arg */
203 { roff_text, roffarg_Ex, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ex */
204 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Fa */ /* XXX needs arg */
205 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Fd */
206 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Fl */
207 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Fn */ /* XXX needs arg */ /* FIXME */
208 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ft */
209 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ic */ /* XXX needs arg */
210 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* In */
211 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Li */
212 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Nd */
213 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Nm */ /* FIXME */
214 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Op */
215 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ot */ /* XXX deprecated */
216 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pa */
217 { roff_text, roffarg_Rv, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Rv */
218 { roff_text, roffarg_St, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* St */
219 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Va */
220 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Vt */ /* XXX needs arg */
221 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xr */ /* XXX needs arg */
222 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* %A */
223 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %B */
224 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %D */
225 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %I */
226 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %J */
227 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %N */
228 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %O */
229 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %P */
230 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %R */
231 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* %T */
232 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %V */
233 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ac */
234 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ao */
235 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Aq */
236 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* At */ /* XXX at most 2 args */
237 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bc */
238 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Bf */ /* FIXME */
239 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bo */
240 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bq */
241 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Bsx */
242 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Bx */
243 {roff_special, NULL, NULL, NULL, 0, ROFF_SPECIAL, 0 }, /* Db */
244 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dc */
245 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Do */
246 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dq */
247 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ec */
248 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ef */ /* FIXME */
249 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Em */ /* XXX needs arg */
250 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Eo */
251 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Fx */
252 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ms */
253 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* No */
254 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ns */
255 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Nx */
256 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ox */
257 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pc */
258 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Pf */
259 { roff_text, NULL, NULL, NULL, 0, ROFF_LAYOUT, ROFF_PARSED | ROFF_CALLABLE }, /* Po */
260 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pq */
261 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qc */
262 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ql */
263 { roff_layout, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qo */
264 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qq */
265 { roff_close, NULL, roffparent_Re, NULL, ROFF_Rs, ROFF_LAYOUT, 0 }, /* Re */
266 { roff_layout, NULL, NULL, roffchild_Rs, 0, ROFF_LAYOUT, 0 }, /* Rs */
267 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sc */
268 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* So */
269 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sq */
270 {roff_special, NULL, NULL, NULL, 0, ROFF_SPECIAL, 0 }, /* Sm */
271 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sx */
272 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sy */
273 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Tn */
274 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Ux */
275 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xc */
276 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xo */
277 { roff_layout, NULL, NULL, roffchild_Fo, 0, ROFF_LAYOUT, 0 }, /* Fo */
278 { roff_close, NULL, roffparent_Fc, NULL, ROFF_Fo, ROFF_LAYOUT, 0 }, /* Fc */
279 { roff_layout, NULL, NULL, roffchild_Oo, 0, ROFF_LAYOUT, 0 }, /* Oo */
280 { roff_close, NULL, roffparent_Oc, NULL, ROFF_Oo, ROFF_LAYOUT, 0 }, /* Oc */
281 { roff_layout, roffarg_Bk, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Bk */
282 { roff_close, NULL, NULL, NULL, ROFF_Bk, ROFF_LAYOUT, 0 }, /* Ek */
283 };
284
285 /* Table of all known token arguments. */
286 static const int tokenargs[ROFF_ARGMAX] = {
287 0, 0, 0, 0,
288 0, ROFF_VALUE, ROFF_VALUE, 0,
289 0, 0, 0, 0,
290 0, 0, 0, 0,
291 0, 0, 0, 0,
292 0, 0, 0, 0,
293 0, 0, 0, 0,
294 0, 0, 0, 0,
295 0, 0, 0, 0,
296 0, 0, 0, 0,
297 0, 0, 0, 0,
298 0, 0, 0, 0,
299 0, 0, 0, 0,
300 0, 0, 0, 0,
301 0, 0, 0, 0,
302 };
303
304 const char *const toknamesp[ROFF_MAX] = {
305 "\\\"", "Dd", "Dt", "Os",
306 "Sh", "Ss", "Pp", "D1",
307 "Dl", "Bd", "Ed", "Bl",
308 "El", "It", "Ad", "An",
309 "Ar", "Cd", "Cm", "Dv",
310 "Er", "Ev", "Ex", "Fa",
311 "Fd", "Fl", "Fn", "Ft",
312 "Ic", "In", "Li", "Nd",
313 "Nm", "Op", "Ot", "Pa",
314 "Rv", "St", "Va", "Vt",
315 "Xr", "\%A", "\%B", "\%D",
316 "\%I", "\%J", "\%N", "\%O",
317 "\%P", "\%R", "\%T", "\%V",
318 "Ac", "Ao", "Aq", "At",
319 "Bc", "Bf", "Bo", "Bq",
320 "Bsx", "Bx", "Db", "Dc",
321 "Do", "Dq", "Ec", "Ef",
322 "Em", "Eo", "Fx", "Ms",
323 "No", "Ns", "Nx", "Ox",
324 "Pc", "Pf", "Po", "Pq",
325 "Qc", "Ql", "Qo", "Qq",
326 "Re", "Rs", "Sc", "So",
327 "Sq", "Sm", "Sx", "Sy",
328 "Tn", "Ux", "Xc", "Xo",
329 "Fo", "Fc", "Oo", "Oc",
330 "Bk", "Ek",
331 };
332
333 const char *const tokargnamesp[ROFF_ARGMAX] = {
334 "split", "nosplit", "ragged",
335 "unfilled", "literal", "file",
336 "offset", "bullet", "dash",
337 "hyphen", "item", "enum",
338 "tag", "diag", "hang",
339 "ohang", "inset", "column",
340 "width", "compact", "std",
341 "p1003.1-88", "p1003.1-90", "p1003.1-96",
342 "p1003.1-2001", "p1003.1-2004", "p1003.1",
343 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
344 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
345 "p1003.2", "p1387.2", "isoC-90",
346 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
347 "isoC-99", "ansiC", "ansiC-89",
348 "ansiC-99", "ieee754", "iso8802-3",
349 "xpg3", "xpg4", "xpg4.2",
350 "xpg4.3", "xbd5", "xcu5",
351 "xsh5", "xns5", "xns5.2d2.0",
352 "xcurses4.2", "susv2", "susv3",
353 "svid4", "filled", "words",
354 };
355
356 const char *const *toknames = toknamesp;
357 const char *const *tokargnames = tokargnamesp;
358
359
360 int
361 roff_free(struct rofftree *tree, int flush)
362 {
363 int error, tok;
364
365 assert(tree->mbuf);
366 if ( ! flush)
367 tree->mbuf = NULL;
368
369 /* LINTED */
370 while (tree->last) {
371 if (tree->last->parent) {
372 tok = tree->last->tok;
373 if (tokens[tok].ctx == 0) {
374 warnx("%s: closing out explicit scope "
375 "of `%s' from line %zu",
376 tree->rbuf->name,
377 toknames[tok],
378 tree->last->line);
379 tree->mbuf = NULL;
380 }
381 }
382 if ( ! (*tokens[tree->last->tok].cb)
383 (tree->last->tok, tree, NULL, ROFF_EXIT))
384 /* Disallow flushing. */
385 tree->mbuf = NULL;
386 }
387
388 error = tree->mbuf ? 0 : 1;
389
390 if (tree->mbuf && (ROFF_PRELUDE & tree->state)) {
391 warnx("%s: prelude never finished",
392 tree->rbuf->name);
393 error = 1;
394 }
395
396 free(tree);
397 return(error ? 0 : 1);
398 }
399
400
401 struct rofftree *
402 roff_alloc(const struct md_args *args, struct md_mbuf *out,
403 const struct md_rbuf *in, const struct roffcb *cb)
404 {
405 struct rofftree *tree;
406
407 if (NULL == (tree = calloc(1, sizeof(struct rofftree))))
408 err(1, "calloc");
409
410 tree->state = ROFF_PRELUDE;
411 tree->args = args;
412 tree->mbuf = out;
413 tree->rbuf = in;
414 tree->cb = cb;
415
416 return(tree);
417 }
418
419
420 int
421 roff_engine(struct rofftree *tree, char *buf, size_t sz)
422 {
423
424 tree->cur = NULL;
425
426 if (0 == sz) {
427 roff_warn(tree, buf, "blank line");
428 return(0);
429 } else if ('.' != *buf)
430 return(textparse(tree, buf, sz));
431
432 return(roffparse(tree, buf, sz));
433 }
434
435
436 static int
437 textparse(const struct rofftree *tree, const char *buf, size_t sz)
438 {
439
440 /* Print text. */
441 return(1);
442 }
443
444
445 static int
446 roffargs(const struct rofftree *tree,
447 int tok, char *buf, char **argv)
448 {
449 int i;
450 char *p;
451
452 assert(tok >= 0 && tok < ROFF_MAX);
453 assert('.' == *buf);
454
455 p = buf;
456
457 /* LINTED */
458 for (i = 0; *buf && i < ROFF_MAXARG; i++) {
459 if ('\"' == *buf) {
460 argv[i] = ++buf;
461 while (*buf && '\"' != *buf)
462 buf++;
463 if (0 == *buf) {
464 roff_err(tree, argv[i], "unclosed "
465 "quote in argument "
466 "list for `%s'",
467 toknames[tok]);
468 return(0);
469 }
470 } else {
471 argv[i] = buf++;
472 while (*buf && ! isspace(*buf))
473 buf++;
474 if (0 == *buf)
475 continue;
476 }
477 *buf++ = 0;
478 while (*buf && isspace(*buf))
479 buf++;
480 }
481
482 assert(i > 0);
483 if (ROFF_MAXARG == i && *buf) {
484 roff_err(tree, p, "too many arguments for `%s'", toknames
485 [tok]);
486 return(0);
487 }
488
489 argv[i] = NULL;
490 return(1);
491 }
492
493
494 /* XXX */
495 static int
496 roffscan(int tok, const int *tokv)
497 {
498
499 if (NULL == tokv)
500 return(1);
501
502 for ( ; ROFF_MAX != *tokv; tokv++)
503 if (tok == *tokv)
504 return(1);
505
506 return(0);
507 }
508
509
510 static int
511 roffparse(struct rofftree *tree, char *buf, size_t sz)
512 {
513 int tok, t;
514 struct roffnode *n;
515 char *argv[ROFF_MAXARG];
516 const char **argvp;
517
518 assert(sz > 0);
519
520 if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
521 roff_err(tree, buf + 1, "bogus line macro");
522 return(0);
523 } else if (NULL == tokens[tok].cb) {
524 roff_err(tree, buf + 1, "unsupported macro `%s'",
525 toknames[tok]);
526 return(0);
527 } else if (ROFF_COMMENT == tokens[tok].type)
528 return(1);
529
530 if ( ! roffargs(tree, tok, buf, argv))
531 return(0);
532
533 argvp = (const char **)argv;
534
535 /*
536 * Prelude macros break some assumptions, so branch now.
537 */
538
539 if (ROFF_PRELUDE & tree->state) {
540 assert(NULL == tree->last);
541 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
542 } else
543 assert(tree->last);
544
545 assert(ROFF_BODY & tree->state);
546
547 /*
548 * First check that our possible parents and parent's possible
549 * children are satisfied.
550 */
551
552 if ( ! roffscan(tree->last->tok, tokens[tok].parents)) {
553 roff_err(tree, *argvp, "`%s' has invalid parent `%s' "
554 "from line %zu", toknames[tok],
555 toknames[tree->rbuf->line],
556 tree->rbuf->line);
557 return(0);
558 }
559
560 if ( ! roffscan(tok, tokens[tree->last->tok].children)) {
561 roff_err(tree, *argvp, "`%s' is invalid child for "
562 "`%s' from line %zu", toknames[tok],
563 toknames[tree->rbuf->line],
564 tree->rbuf->line);
565 return(0);
566 }
567
568 /*
569 * Branch if we're not a layout token.
570 */
571
572 if (ROFF_LAYOUT != tokens[tok].type)
573 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
574
575 /*
576 * Check our scope rules.
577 */
578
579 if (0 == tokens[tok].ctx)
580 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
581
582 /*
583 * First consider implicit-end tags, like as follows:
584 * .Sh SECTION 1
585 * .Sh SECTION 2
586 * In this, we want to close the scope of the NAME section. If
587 * there's an intermediary implicit-end tag, such as
588 * .Sh SECTION 1
589 * .Ss Subsection 1
590 * .Sh SECTION 2
591 * then it must be closed as well.
592 */
593
594 if (tok == tokens[tok].ctx) {
595 /*
596 * First search up to the point where we must close.
597 * If one doesn't exist, then we can open a new scope.
598 */
599
600 for (n = tree->last; n; n = n->parent) {
601 assert(0 == tokens[n->tok].ctx ||
602 n->tok == tokens[n->tok].ctx);
603 if (n->tok == tok)
604 break;
605 if (ROFF_SHALLOW & tokens[tok].flags) {
606 n = NULL;
607 break;
608 }
609 }
610
611 /*
612 * Create a new scope, as no previous one exists to
613 * close out.
614 */
615
616 if (NULL == n)
617 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
618
619 /*
620 * Close out all intermediary scoped blocks, then hang
621 * the current scope from our predecessor's parent.
622 */
623
624 do {
625 t = tree->last->tok;
626 if ( ! (*tokens[t].cb)(t, tree, NULL, ROFF_EXIT))
627 return(0);
628 } while (t != tok);
629
630 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
631 }
632
633 /*
634 * Now consider explicit-end tags, where we want to close back
635 * to a specific tag. Example:
636 * .Bl
637 * .It Item.
638 * .El
639 * In this, the `El' tag closes out the scope of `Bl'.
640 */
641
642 assert(tree->last);
643 assert(tok != tokens[tok].ctx && 0 != tokens[tok].ctx);
644
645 /* LINTED */
646 do {
647 t = tree->last->tok;
648 if ( ! (*tokens[t].cb)(t, tree, NULL, ROFF_EXIT))
649 return(0);
650 } while (t != tokens[tok].ctx);
651
652 assert(tree->last);
653 return(1);
654 }
655
656
657 static int
658 rofffindarg(const char *name)
659 {
660 size_t i;
661
662 /* FIXME: use a table, this is slow but ok for now. */
663
664 /* LINTED */
665 for (i = 0; i < ROFF_ARGMAX; i++)
666 /* LINTED */
667 if (0 == strcmp(name, tokargnames[i]))
668 return((int)i);
669
670 return(ROFF_ARGMAX);
671 }
672
673
674 static int
675 rofffindtok(const char *buf)
676 {
677 char token[4];
678 size_t i;
679
680 for (i = 0; *buf && ! isspace(*buf) && i < 3; i++, buf++)
681 token[i] = *buf;
682
683 if (i == 3)
684 return(ROFF_MAX);
685
686 token[i] = 0;
687
688 /* FIXME: use a table, this is slow but ok for now. */
689
690 /* LINTED */
691 for (i = 0; i < ROFF_MAX; i++)
692 /* LINTED */
693 if (0 == strcmp(toknames[i], token))
694 return((int)i);
695
696 return(ROFF_MAX);
697 }
698
699
700 static int
701 rofffindcallable(const char *name)
702 {
703 int c;
704
705 if (ROFF_MAX == (c = rofffindtok(name)))
706 return(ROFF_MAX);
707 assert(c >= 0 && c < ROFF_MAX);
708 return(ROFF_CALLABLE & tokens[c].flags ? c : ROFF_MAX);
709 }
710
711
712 static struct roffnode *
713 roffnode_new(int tokid, struct rofftree *tree)
714 {
715 struct roffnode *p;
716
717 if (NULL == (p = malloc(sizeof(struct roffnode))))
718 err(1, "malloc");
719
720 p->line = tree->rbuf->line;
721 p->tok = tokid;
722 p->parent = tree->last;
723 tree->last = p;
724
725 return(p);
726 }
727
728
729 static int
730 roffargok(int tokid, int argid)
731 {
732 const int *c;
733
734 if (NULL == (c = tokens[tokid].args))
735 return(0);
736
737 for ( ; ROFF_ARGMAX != *c; c++)
738 if (argid == *c)
739 return(1);
740
741 return(0);
742 }
743
744
745 static void
746 roffnode_free(int tokid, struct rofftree *tree)
747 {
748 struct roffnode *p;
749
750 assert(tree->last);
751 assert(tree->last->tok == tokid);
752
753 p = tree->last;
754 tree->last = tree->last->parent;
755 free(p);
756 }
757
758
759 static int
760 roffnextopt(const struct rofftree *tree, int tok,
761 const char ***in, char **val)
762 {
763 const char *arg, **argv;
764 int v;
765
766 *val = NULL;
767 argv = *in;
768 assert(argv);
769
770 if (NULL == (arg = *argv))
771 return(-1);
772 if ('-' != *arg)
773 return(-1);
774
775 if (ROFF_ARGMAX == (v = rofffindarg(arg + 1))) {
776 roff_warn(tree, arg, "argument-like parameter `%s' to "
777 "`%s'", &arg[1], toknames[tok]);
778 return(-1);
779 }
780
781 if ( ! roffargok(tok, v)) {
782 roff_warn(tree, arg, "invalid argument parameter `%s' to "
783 "`%s'", tokargnames[v], toknames[tok]);
784 return(-1);
785 }
786
787 if ( ! (ROFF_VALUE & tokenargs[v]))
788 return(v);
789
790 *in = ++argv;
791
792 if (NULL == *argv) {
793 roff_err(tree, arg, "empty value of `%s' for `%s'",
794 tokargnames[v], toknames[tok]);
795 return(ROFF_ARGMAX);
796 }
797
798 return(v);
799 }
800
801
802 /* ARGSUSED */
803 static int
804 roff_Dd(ROFFCALL_ARGS)
805 {
806
807 if (ROFF_BODY & tree->state) {
808 assert( ! (ROFF_PRELUDE & tree->state));
809 assert(ROFF_PRELUDE_Dd & tree->state);
810 return(roff_text(tok, tree, argv, type));
811 }
812
813 assert(ROFF_PRELUDE & tree->state);
814 assert( ! (ROFF_BODY & tree->state));
815
816 if (ROFF_PRELUDE_Dd & tree->state) {
817 roff_err(tree, *argv, "repeated `Dd' in prelude");
818 return(0);
819 } else if (ROFF_PRELUDE_Dt & tree->state) {
820 roff_err(tree, *argv, "out-of-order `Dd' in prelude");
821 return(0);
822 }
823
824 /* TODO: parse date. */
825
826 assert(NULL == tree->last);
827 tree->state |= ROFF_PRELUDE_Dd;
828
829 return(1);
830 }
831
832
833 /* ARGSUSED */
834 static int
835 roff_Dt(ROFFCALL_ARGS)
836 {
837
838 if (ROFF_BODY & tree->state) {
839 assert( ! (ROFF_PRELUDE & tree->state));
840 assert(ROFF_PRELUDE_Dt & tree->state);
841 return(roff_text(tok, tree, argv, type));
842 }
843
844 assert(ROFF_PRELUDE & tree->state);
845 assert( ! (ROFF_BODY & tree->state));
846
847 if ( ! (ROFF_PRELUDE_Dd & tree->state)) {
848 roff_err(tree, *argv, "out-of-order `Dt' in prelude");
849 return(0);
850 } else if (ROFF_PRELUDE_Dt & tree->state) {
851 roff_err(tree, *argv, "repeated `Dt' in prelude");
852 return(0);
853 }
854
855 /* TODO: parse date. */
856
857 assert(NULL == tree->last);
858 tree->state |= ROFF_PRELUDE_Dt;
859
860 return(1);
861 }
862
863
864 /* ARGSUSED */
865 static int
866 roff_Os(ROFFCALL_ARGS)
867 {
868
869 if (ROFF_EXIT == type) {
870 assert(ROFF_PRELUDE_Os & tree->state);
871 return(roff_layout(tok, tree, argv, type));
872 } else if (ROFF_BODY & tree->state) {
873 assert( ! (ROFF_PRELUDE & tree->state));
874 assert(ROFF_PRELUDE_Os & tree->state);
875 return(roff_text(tok, tree, argv, type));
876 }
877
878 assert(ROFF_PRELUDE & tree->state);
879 if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
880 ! (ROFF_PRELUDE_Dd & tree->state)) {
881 roff_err(tree, *argv, "out-of-order `Os' in prelude");
882 return(0);
883 }
884
885 /* TODO: extract OS. */
886
887 tree->state |= ROFF_PRELUDE_Os;
888 tree->state &= ~ROFF_PRELUDE;
889 tree->state |= ROFF_BODY;
890
891 assert(NULL == tree->last);
892
893 return(roff_layout(tok, tree, argv, type));
894 }
895
896
897 /* ARGSUSED */
898 static int
899 roff_layout(ROFFCALL_ARGS)
900 {
901 int i, c, argcp[ROFF_MAXARG];
902 char *v, *argvp[ROFF_MAXARG];
903
904 if (ROFF_PRELUDE & tree->state) {
905 roff_err(tree, *argv, "`%s' disallowed in prelude",
906 toknames[tok]);
907 return(0);
908 }
909
910 if (ROFF_EXIT == type) {
911 roffnode_free(tok, tree);
912 return((*tree->cb->roffblkout)(tree->args, tok));
913 }
914
915 assert(tree->mbuf);
916
917 i = 0;
918 argv++;
919
920 while (-1 != (c = roffnextopt(tree, tok, &argv, &v))) {
921 if (ROFF_ARGMAX == c)
922 return(0);
923
924 argcp[i] = c;
925 argvp[i] = v;
926 i++;
927 argv++;
928 }
929
930 argcp[i] = ROFF_ARGMAX;
931 argvp[i] = NULL;
932
933 if (NULL == roffnode_new(tok, tree))
934 return(0);
935
936 if ( ! (*tree->cb->roffin)(tree->args, tok, argcp, argvp))
937 return(0);
938
939 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
940 while (*argv) {
941 if ( ! md_buf_putstring(tree->mbuf, *argv++))
942 return(0);
943 if ( ! md_buf_putchar(tree->mbuf, ' '))
944 return(0);
945 }
946
947 if ( ! md_buf_putchar(tree->mbuf, '\n'))
948 return(0);
949
950 if ( ! ((*tree->cb->roffout)(tree->args, tok)))
951 return(0);
952 return((*tree->cb->roffblkin)(tree->args, tok));
953 }
954
955 while (*argv) {
956 if (ROFF_MAX != (c = rofffindcallable(*argv))) {
957 if (NULL == tokens[c].cb) {
958 roff_err(tree, *argv, "unsupported "
959 "macro `%s'",
960 toknames[c]);
961 return(0);
962 }
963 if ( ! (*tokens[c].cb)(c, tree, argv, ROFF_ENTER))
964 return(0);
965 break;
966 }
967
968 if ( ! md_buf_putstring(tree->mbuf, *argv++))
969 return(0);
970 if ( ! md_buf_putchar(tree->mbuf, ' '))
971 return(0);
972 }
973
974 if (NULL == *argv && ! md_buf_putchar(tree->mbuf, '\n'))
975 return(0);
976
977 if ( ! ((*tree->cb->roffout)(tree->args, tok)))
978 return(0);
979 return((*tree->cb->roffblkin)(tree->args, tok));
980 }
981
982
983 /* ARGSUSED */
984 static int
985 roff_text(ROFFCALL_ARGS)
986 {
987 int i, c, argcp[ROFF_MAXARG];
988 char *v, *argvp[ROFF_MAXARG];
989
990 assert(tree->mbuf);
991
992 if (ROFF_PRELUDE & tree->state) {
993 roff_err(tree, *argv, "`%s' disallowed in prelude",
994 toknames[tok]);
995 return(0);
996 }
997
998 i = 0;
999 argv++;
1000
1001 while (-1 != (c = roffnextopt(tree, tok, &argv, &v))) {
1002 if (ROFF_ARGMAX == c)
1003 return(0);
1004
1005 argcp[i] = c;
1006 argvp[i] = v;
1007 i++;
1008 argv++;
1009 }
1010
1011 argcp[i] = ROFF_ARGMAX;
1012 argvp[i] = NULL;
1013
1014 if ( ! (*tree->cb->roffin)(tree->args, tok, argcp, argvp))
1015 return(0);
1016
1017 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
1018 while (*argv) {
1019 if ( ! md_buf_putstring(tree->mbuf, *argv++))
1020 return(0);
1021 if ( ! md_buf_putchar(tree->mbuf, ' '))
1022 return(0);
1023 }
1024 if ( ! md_buf_putchar(tree->mbuf, '\n'))
1025 return(0);
1026 return((*tree->cb->roffout)(tree->args, tok));
1027 }
1028
1029 while (*argv) {
1030 if (ROFF_MAX != (c = rofffindcallable(*argv))) {
1031 if (NULL == tokens[c].cb) {
1032 roff_err(tree, *argv, "unsupported "
1033 "macro `%s'",
1034 toknames[c]);
1035 return(0);
1036 }
1037 if ( ! (*tokens[c].cb)(c, tree,
1038 argv, ROFF_ENTER))
1039 return(0);
1040 break;
1041 }
1042
1043 if ( ! md_buf_putstring(tree->mbuf, *argv++))
1044 return(0);
1045 if ( ! md_buf_putchar(tree->mbuf, ' '))
1046 return(0);
1047 }
1048
1049 if (NULL == *argv && ! md_buf_putchar(tree->mbuf, '\n'))
1050 return(0);
1051
1052 return((*tree->cb->roffout)(tree->args, tok));
1053 }
1054
1055
1056 /* ARGSUSED */
1057 static int
1058 roff_comment(ROFFCALL_ARGS)
1059 {
1060
1061 return(1);
1062 }
1063
1064
1065 /* ARGSUSED */
1066 static int
1067 roff_close(ROFFCALL_ARGS)
1068 {
1069
1070 return(1);
1071 }
1072
1073
1074 /* ARGSUSED */
1075 static int
1076 roff_special(ROFFCALL_ARGS)
1077 {
1078
1079 return((*tree->cb->roffspecial)(tree->args, tok));
1080 }
1081
1082
1083 static void
1084 roff_warn(const struct rofftree *tree, const char *pos, char *fmt, ...)
1085 {
1086 va_list ap;
1087 char buf[128];
1088
1089 va_start(ap, fmt);
1090 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1091 va_end(ap);
1092
1093 (*tree->cb->roffmsg)(tree->args, ROFF_WARN, tree->cur, pos,
1094 tree->rbuf->name, tree->rbuf->line, buf);
1095 }
1096
1097
1098 static void
1099 roff_err(const struct rofftree *tree, const char *pos, char *fmt, ...)
1100 {
1101 va_list ap;
1102 char buf[128];
1103
1104 va_start(ap, fmt);
1105 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1106 va_end(ap);
1107
1108 (*tree->cb->roffmsg)(tree->args, ROFF_ERROR, tree->cur, pos,
1109 tree->rbuf->name, tree->rbuf->line, buf);
1110 }