]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Annotated omit-punctuation macros.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.19 2008/11/29 16:23:22 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <time.h>
27
28 #include "libmdocml.h"
29 #include "private.h"
30
31 /* FIXME: warn if Pp occurs before/after Sh etc. (see mdoc.samples). */
32
33 /* FIXME: warn about "X section only" macros. */
34
35 /* FIXME: warn about empty lists. */
36
37 /* FIXME: ; : } ) (etc.) after text macros? */
38
39 /* FIXME: NAME section needs specific elements. */
40
41 /* FIXME: don't print Os, just do roffbegin. */
42
43 #define ROFF_MAXARG 32
44
45 enum roffd {
46 ROFF_ENTER = 0,
47 ROFF_EXIT
48 };
49
50 enum rofftype {
51 ROFF_COMMENT,
52 ROFF_TEXT,
53 ROFF_LAYOUT,
54 ROFF_SPECIAL
55 };
56
57 #define ROFFCALL_ARGS \
58 int tok, struct rofftree *tree, \
59 char *argv[], enum roffd type
60
61 struct rofftree;
62
63 struct rofftok {
64 int (*cb)(ROFFCALL_ARGS); /* Callback. */
65 const int *args; /* Args (or NULL). */
66 const int *parents;
67 const int *children;
68 int ctx;
69 enum rofftype type; /* Type of macro. */
70 int flags;
71 #define ROFF_PARSED (1 << 0) /* "Parsed". */
72 #define ROFF_CALLABLE (1 << 1) /* "Callable". */
73 #define ROFF_QUOTES (1 << 2) /* Quoted args. */
74 #define ROFF_SHALLOW (1 << 3) /* Nesting block. */
75 #define ROFF_PUNCT (1 << 4)
76 };
77
78 struct roffarg {
79 int flags;
80 #define ROFF_VALUE (1 << 0) /* Has a value. */
81 };
82
83 struct roffnode {
84 int tok; /* Token id. */
85 struct roffnode *parent; /* Parent (or NULL). */
86 };
87
88 struct rofftree {
89 struct roffnode *last; /* Last parsed node. */
90 char *cur;
91
92 time_t date; /* `Dd' results. */
93 char os[64]; /* `Os' results. */
94 char title[64]; /* `Dt' results. */
95 char section[64]; /* `Dt' results. */
96 char volume[64]; /* `Dt' results. */
97
98 int state;
99 #define ROFF_PRELUDE (1 << 1) /* In roff prelude. */
100 #define ROFF_PRELUDE_Os (1 << 2) /* `Os' is parsed. */
101 #define ROFF_PRELUDE_Dt (1 << 3) /* `Dt' is parsed. */
102 #define ROFF_PRELUDE_Dd (1 << 4) /* `Dd' is parsed. */
103 #define ROFF_BODY (1 << 5) /* In roff body. */
104
105 struct roffcb cb;
106 void *arg;
107 };
108
109 static int roff_Dd(ROFFCALL_ARGS);
110 static int roff_Dt(ROFFCALL_ARGS);
111 static int roff_Os(ROFFCALL_ARGS);
112
113 static int roff_layout(ROFFCALL_ARGS);
114 static int roff_text(ROFFCALL_ARGS);
115 static int roff_comment(ROFFCALL_ARGS);
116 static int roff_close(ROFFCALL_ARGS);
117 static int roff_special(ROFFCALL_ARGS);
118
119 static struct roffnode *roffnode_new(int, struct rofftree *);
120 static void roffnode_free(struct rofftree *);
121
122 static void roff_warn(const struct rofftree *,
123 const char *, char *, ...);
124 static void roff_err(const struct rofftree *,
125 const char *, char *, ...);
126
127 static int roffscan(int, const int *);
128 static int rofffindtok(const char *);
129 static int rofffindarg(const char *);
130 static int rofffindcallable(const char *);
131 static int roffargs(const struct rofftree *,
132 int, char *, char **);
133 static int roffargok(int, int);
134 static int roffnextopt(const struct rofftree *,
135 int, char ***, char **);
136 static int roffparse(struct rofftree *, char *);
137 static int textparse(const struct rofftree *, char *);
138
139
140 static const int roffarg_An[] = { ROFF_Split, ROFF_Nosplit,
141 ROFF_ARGMAX };
142 static const int roffarg_Bd[] = { ROFF_Ragged, ROFF_Unfilled,
143 ROFF_Literal, ROFF_File, ROFF_Offset, ROFF_Filled,
144 ROFF_Compact, ROFF_ARGMAX };
145 static const int roffarg_Bk[] = { ROFF_Words, ROFF_ARGMAX };
146 static const int roffarg_Ex[] = { ROFF_Std, ROFF_ARGMAX };
147 static const int roffarg_Rv[] = { ROFF_Std, ROFF_ARGMAX };
148 static const int roffarg_Bl[] = { ROFF_Bullet, ROFF_Dash,
149 ROFF_Hyphen, ROFF_Item, ROFF_Enum, ROFF_Tag, ROFF_Diag,
150 ROFF_Hang, ROFF_Ohang, ROFF_Inset, ROFF_Column, ROFF_Offset,
151 ROFF_Width, ROFF_Compact, ROFF_ARGMAX };
152 static const int roffarg_St[] = {
153 ROFF_p1003_1_88, ROFF_p1003_1_90, ROFF_p1003_1_96,
154 ROFF_p1003_1_2001, ROFF_p1003_1_2004, ROFF_p1003_1,
155 ROFF_p1003_1b, ROFF_p1003_1b_93, ROFF_p1003_1c_95,
156 ROFF_p1003_1g_2000, ROFF_p1003_2_92, ROFF_p1387_2_95,
157 ROFF_p1003_2, ROFF_p1387_2, ROFF_isoC_90, ROFF_isoC_amd1,
158 ROFF_isoC_tcor1, ROFF_isoC_tcor2, ROFF_isoC_99, ROFF_ansiC,
159 ROFF_ansiC_89, ROFF_ansiC_99, ROFF_ieee754, ROFF_iso8802_3,
160 ROFF_xpg3, ROFF_xpg4, ROFF_xpg4_2, ROFF_xpg4_3, ROFF_xbd5,
161 ROFF_xcu5, ROFF_xsh5, ROFF_xns5, ROFF_xns5_2d2_0,
162 ROFF_xcurses4_2, ROFF_susv2, ROFF_susv3, ROFF_svid4,
163 ROFF_ARGMAX };
164
165 static const int roffchild_Bl[] = { ROFF_It, ROFF_El, ROFF_MAX };
166 static const int roffchild_Fo[] = { ROFF_Fa, ROFF_Fc, ROFF_MAX };
167 static const int roffchild_Oo[] = { ROFF_Op, ROFF_Oc, ROFF_MAX };
168 static const int roffchild_Rs[] = { ROFF_Re, ROFF__A, ROFF__B,
169 ROFF__D, ROFF__I, ROFF__J, ROFF__N, ROFF__O, ROFF__P,
170 ROFF__R, ROFF__T, ROFF__V, ROFF_MAX };
171
172 static const int roffparent_El[] = { ROFF_Bl, ROFF_It, ROFF_MAX };
173 static const int roffparent_Fc[] = { ROFF_Fo, ROFF_Fa, ROFF_MAX };
174 static const int roffparent_Oc[] = { ROFF_Oo, ROFF_Oc, ROFF_MAX };
175 static const int roffparent_It[] = { ROFF_Bl, ROFF_It, ROFF_MAX };
176 static const int roffparent_Re[] = { ROFF_Rs, ROFF_MAX };
177
178 /* Table of all known tokens. */
179 static const struct rofftok tokens[ROFF_MAX] = {
180 {roff_comment, NULL, NULL, NULL, 0, ROFF_COMMENT, 0 }, /* \" */
181 { roff_Dd, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Dd */
182 { roff_Dt, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Dt */
183 { roff_Os, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Os */
184 { roff_layout, NULL, NULL, NULL, ROFF_Sh, ROFF_LAYOUT, ROFF_PARSED }, /* Sh */
185 { roff_layout, NULL, NULL, NULL, ROFF_Ss, ROFF_LAYOUT, ROFF_PARSED }, /* Ss */
186 { roff_text, NULL, NULL, NULL, ROFF_Pp, ROFF_TEXT, 0 }, /* Pp */
187 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* D1 */
188 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Dl */
189 { roff_layout, roffarg_Bd, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Bd */
190 { roff_close, NULL, NULL, NULL, ROFF_Bd, ROFF_LAYOUT, 0 }, /* Ed */
191 { roff_layout, roffarg_Bl, NULL, roffchild_Bl, 0, ROFF_LAYOUT, 0 }, /* Bl */
192 { roff_close, NULL, roffparent_El, NULL, ROFF_Bl, ROFF_LAYOUT, 0 }, /* El */
193 { roff_layout, NULL, roffparent_It, NULL, ROFF_It, ROFF_LAYOUT, ROFF_SHALLOW }, /* It */
194 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Ad */
195 { roff_text, roffarg_An, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* An */
196 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Ar */
197 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Cd */ /* XXX man.4 only */
198 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Cm */
199 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Dv */ /* XXX needs arg */
200 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Er */ /* XXX needs arg */
201 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Ev */ /* XXX needs arg */
202 { roff_text, roffarg_Ex, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ex */
203 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Fa */ /* XXX needs arg */
204 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Fd */
205 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Fl */
206 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Fn */ /* XXX needs arg */ /* FIXME */
207 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Ft */
208 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Ic */ /* XXX needs arg */
209 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* In */
210 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Li */
211 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_QUOTES }, /* Nd */
212 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Nm */ /* FIXME */
213 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Op */
214 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ot */ /* XXX deprecated */
215 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Pa */
216 { roff_text, roffarg_Rv, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Rv */
217 { roff_text, roffarg_St, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* St */
218 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Va */
219 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Vt */ /* XXX needs arg */
220 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Xr */ /* XXX needs arg */
221 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* %A */
222 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %B */
223 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %D */
224 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %I */
225 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE}, /* %J */
226 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %N */
227 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %O */
228 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %P */
229 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %R */
230 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* %T */
231 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* %V */
232 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ac */
233 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ao */
234 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Aq */
235 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* At */ /* XXX at most 2 args */
236 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bc */
237 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Bf */ /* FIXME */
238 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bo */
239 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Bq */
240 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Bsx */
241 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Bx */
242 {roff_special, NULL, NULL, NULL, 0, ROFF_SPECIAL, 0 }, /* Db */
243 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dc */
244 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Do */
245 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Dq */
246 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ec */
247 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, 0 }, /* Ef */ /* FIXME */
248 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Em */ /* XXX needs arg */
249 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Eo */
250 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Fx */
251 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Ms */
252 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* No */
253 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ns */
254 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Nx */
255 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Ox */
256 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pc */
257 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED }, /* Pf */
258 { roff_text, NULL, NULL, NULL, 0, ROFF_LAYOUT, ROFF_PARSED | ROFF_CALLABLE }, /* Po */
259 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Pq */
260 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qc */
261 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Ql */
262 { roff_layout, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qo */
263 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Qq */
264 { roff_close, NULL, roffparent_Re, NULL, ROFF_Rs, ROFF_LAYOUT, 0 }, /* Re */
265 { roff_layout, NULL, NULL, roffchild_Rs, 0, ROFF_LAYOUT, 0 }, /* Rs */
266 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sc */
267 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* So */
268 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sq */
269 {roff_special, NULL, NULL, NULL, 0, ROFF_SPECIAL, 0 }, /* Sm */
270 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Sx */
271 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Sy */
272 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE | ROFF_PUNCT }, /* Tn */
273 { roff_text, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_PUNCT }, /* Ux */
274 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xc */
275 { NULL, NULL, NULL, NULL, 0, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Xo */
276 { roff_layout, NULL, NULL, roffchild_Fo, 0, ROFF_LAYOUT, 0 }, /* Fo */
277 { roff_close, NULL, roffparent_Fc, NULL, ROFF_Fo, ROFF_LAYOUT, 0 }, /* Fc */
278 { roff_layout, NULL, NULL, roffchild_Oo, 0, ROFF_LAYOUT, 0 }, /* Oo */
279 { roff_close, NULL, roffparent_Oc, NULL, ROFF_Oo, ROFF_LAYOUT, 0 }, /* Oc */
280 { roff_layout, roffarg_Bk, NULL, NULL, 0, ROFF_LAYOUT, 0 }, /* Bk */
281 { roff_close, NULL, NULL, NULL, ROFF_Bk, ROFF_LAYOUT, 0 }, /* Ek */
282 };
283
284 /* Table of all known token arguments. */
285 static const int tokenargs[ROFF_ARGMAX] = {
286 0, 0, 0, 0,
287 0, ROFF_VALUE, ROFF_VALUE, 0,
288 0, 0, 0, 0,
289 0, 0, 0, 0,
290 0, 0, ROFF_VALUE, 0,
291 0, 0, 0, 0,
292 0, 0, 0, 0,
293 0, 0, 0, 0,
294 0, 0, 0, 0,
295 0, 0, 0, 0,
296 0, 0, 0, 0,
297 0, 0, 0, 0,
298 0, 0, 0, 0,
299 0, 0, 0, 0,
300 0, 0, 0, 0,
301 };
302
303 const char *const toknamesp[ROFF_MAX] = {
304 "\\\"", "Dd", "Dt", "Os",
305 "Sh", "Ss", "Pp", "D1",
306 "Dl", "Bd", "Ed", "Bl",
307 "El", "It", "Ad", "An",
308 "Ar", "Cd", "Cm", "Dv",
309 "Er", "Ev", "Ex", "Fa",
310 "Fd", "Fl", "Fn", "Ft",
311 "Ic", "In", "Li", "Nd",
312 "Nm", "Op", "Ot", "Pa",
313 "Rv", "St", "Va", "Vt",
314 "Xr", "\%A", "\%B", "\%D",
315 "\%I", "\%J", "\%N", "\%O",
316 "\%P", "\%R", "\%T", "\%V",
317 "Ac", "Ao", "Aq", "At",
318 "Bc", "Bf", "Bo", "Bq",
319 "Bsx", "Bx", "Db", "Dc",
320 "Do", "Dq", "Ec", "Ef",
321 "Em", "Eo", "Fx", "Ms",
322 "No", "Ns", "Nx", "Ox",
323 "Pc", "Pf", "Po", "Pq",
324 "Qc", "Ql", "Qo", "Qq",
325 "Re", "Rs", "Sc", "So",
326 "Sq", "Sm", "Sx", "Sy",
327 "Tn", "Ux", "Xc", "Xo",
328 "Fo", "Fc", "Oo", "Oc",
329 "Bk", "Ek",
330 };
331
332 const char *const tokargnamesp[ROFF_ARGMAX] = {
333 "split", "nosplit", "ragged",
334 "unfilled", "literal", "file",
335 "offset", "bullet", "dash",
336 "hyphen", "item", "enum",
337 "tag", "diag", "hang",
338 "ohang", "inset", "column",
339 "width", "compact", "std",
340 "p1003.1-88", "p1003.1-90", "p1003.1-96",
341 "p1003.1-2001", "p1003.1-2004", "p1003.1",
342 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
343 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
344 "p1003.2", "p1387.2", "isoC-90",
345 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
346 "isoC-99", "ansiC", "ansiC-89",
347 "ansiC-99", "ieee754", "iso8802-3",
348 "xpg3", "xpg4", "xpg4.2",
349 "xpg4.3", "xbd5", "xcu5",
350 "xsh5", "xns5", "xns5.2d2.0",
351 "xcurses4.2", "susv2", "susv3",
352 "svid4", "filled", "words",
353 };
354
355 const char *const *toknames = toknamesp;
356 const char *const *tokargnames = tokargnamesp;
357
358
359 int
360 roff_free(struct rofftree *tree, int flush)
361 {
362 int error, t;
363 struct roffnode *n;
364
365 error = 0;
366
367 if ( ! flush)
368 goto end;
369
370 error = 1;
371
372 if (ROFF_PRELUDE & tree->state) {
373 roff_warn(tree, NULL, "prelude never finished");
374 goto end;
375 }
376
377 for (n = tree->last; n->parent; n = n->parent) {
378 if (0 != tokens[n->tok].ctx)
379 continue;
380 roff_warn(tree, NULL, "closing explicit scope `%s'",
381 toknames[n->tok]);
382 goto end;
383 }
384
385 while (tree->last) {
386 t = tree->last->tok;
387 if ( ! (*tokens[t].cb)(t, tree, NULL, ROFF_EXIT))
388 goto end;
389 }
390
391 error = 0;
392
393 end:
394
395 while (tree->last)
396 roffnode_free(tree);
397
398 free(tree);
399
400 return(error ? 0 : 1);
401 }
402
403
404 struct rofftree *
405 roff_alloc(const struct roffcb *cb, void *args)
406 {
407 struct rofftree *tree;
408
409 assert(args);
410 assert(cb);
411
412 if (NULL == (tree = calloc(1, sizeof(struct rofftree))))
413 err(1, "calloc");
414
415 tree->state = ROFF_PRELUDE;
416 tree->arg = args;
417
418 (void)memcpy(&tree->cb, cb, sizeof(struct roffcb));
419
420 return(tree);
421 }
422
423
424 int
425 roff_engine(struct rofftree *tree, char *buf)
426 {
427
428 tree->cur = buf;
429 assert(buf);
430
431 if (0 == *buf) {
432 roff_warn(tree, buf, "blank line");
433 return(0);
434 } else if ('.' != *buf)
435 return(textparse(tree, buf));
436
437 return(roffparse(tree, buf));
438 }
439
440
441 static int
442 textparse(const struct rofftree *tree, char *buf)
443 {
444
445 return((*tree->cb.roffdata)(tree->arg, buf));
446 }
447
448
449 static int
450 roffargs(const struct rofftree *tree,
451 int tok, char *buf, char **argv)
452 {
453 int i;
454 char *p;
455
456 assert(tok >= 0 && tok < ROFF_MAX);
457 assert('.' == *buf);
458
459 p = buf;
460
461 /* LINTED */
462 for (i = 0; *buf && i < ROFF_MAXARG; i++) {
463 if ('\"' == *buf) {
464 argv[i] = ++buf;
465 while (*buf && '\"' != *buf)
466 buf++;
467 if (0 == *buf) {
468 roff_err(tree, argv[i], "unclosed "
469 "quote in argument "
470 "list for `%s'",
471 toknames[tok]);
472 return(0);
473 }
474 } else {
475 argv[i] = buf++;
476 while (*buf && ! isspace(*buf))
477 buf++;
478 if (0 == *buf)
479 continue;
480 }
481 *buf++ = 0;
482 while (*buf && isspace(*buf))
483 buf++;
484 }
485
486 assert(i > 0);
487 if (ROFF_MAXARG == i && *buf) {
488 roff_err(tree, p, "too many arguments for `%s'", toknames
489 [tok]);
490 return(0);
491 }
492
493 argv[i] = NULL;
494 return(1);
495 }
496
497
498 /* XXX */
499 static int
500 roffscan(int tok, const int *tokv)
501 {
502
503 if (NULL == tokv)
504 return(1);
505
506 for ( ; ROFF_MAX != *tokv; tokv++)
507 if (tok == *tokv)
508 return(1);
509
510 return(0);
511 }
512
513
514 static int
515 roffparse(struct rofftree *tree, char *buf)
516 {
517 int tok, t;
518 struct roffnode *n;
519 char *argv[ROFF_MAXARG];
520 char **argvp;
521
522 if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
523 roff_err(tree, buf + 1, "bogus line macro");
524 return(0);
525 } else if (NULL == tokens[tok].cb) {
526 roff_err(tree, buf + 1, "unsupported macro `%s'",
527 toknames[tok]);
528 return(0);
529 } else if (ROFF_COMMENT == tokens[tok].type)
530 return(1);
531
532 if ( ! roffargs(tree, tok, buf, argv))
533 return(0);
534
535 argvp = (char **)argv;
536
537 /*
538 * Prelude macros break some assumptions, so branch now.
539 */
540
541 if (ROFF_PRELUDE & tree->state) {
542 assert(NULL == tree->last);
543 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
544 } else
545 assert(tree->last);
546
547 assert(ROFF_BODY & tree->state);
548
549 /*
550 * First check that our possible parents and parent's possible
551 * children are satisfied.
552 */
553
554 if ( ! roffscan(tree->last->tok, tokens[tok].parents)) {
555 roff_err(tree, *argvp, "`%s' has invalid parent `%s'",
556 toknames[tok],
557 toknames[tree->last->tok]);
558 return(0);
559 }
560
561 if ( ! roffscan(tok, tokens[tree->last->tok].children)) {
562 roff_err(tree, *argvp, "`%s' is invalid child of `%s'",
563 toknames[tok],
564 toknames[tree->last->tok]);
565 return(0);
566 }
567
568 /*
569 * Branch if we're not a layout token.
570 */
571
572 if (ROFF_LAYOUT != tokens[tok].type)
573 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
574
575 /*
576 * Check our scope rules.
577 */
578
579 if (0 == tokens[tok].ctx)
580 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
581
582 /*
583 * First consider implicit-end tags, like as follows:
584 * .Sh SECTION 1
585 * .Sh SECTION 2
586 * In this, we want to close the scope of the NAME section. If
587 * there's an intermediary implicit-end tag, such as
588 * .Sh SECTION 1
589 * .Ss Subsection 1
590 * .Sh SECTION 2
591 * then it must be closed as well.
592 */
593
594 if (tok == tokens[tok].ctx) {
595 /*
596 * First search up to the point where we must close.
597 * If one doesn't exist, then we can open a new scope.
598 */
599
600 for (n = tree->last; n; n = n->parent) {
601 assert(0 == tokens[n->tok].ctx ||
602 n->tok == tokens[n->tok].ctx);
603 if (n->tok == tok)
604 break;
605 if (ROFF_SHALLOW & tokens[tok].flags) {
606 n = NULL;
607 break;
608 }
609 }
610
611 /*
612 * Create a new scope, as no previous one exists to
613 * close out.
614 */
615
616 if (NULL == n)
617 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
618
619 /*
620 * Close out all intermediary scoped blocks, then hang
621 * the current scope from our predecessor's parent.
622 */
623
624 do {
625 t = tree->last->tok;
626 if ( ! (*tokens[t].cb)(t, tree, NULL, ROFF_EXIT))
627 return(0);
628 } while (t != tok);
629
630 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
631 }
632
633 /*
634 * Now consider explicit-end tags, where we want to close back
635 * to a specific tag. Example:
636 * .Bl
637 * .It Item.
638 * .El
639 * In this, the `El' tag closes out the scope of `Bl'.
640 */
641
642 assert(tree->last);
643 assert(tok != tokens[tok].ctx && 0 != tokens[tok].ctx);
644
645 /* LINTED */
646 do {
647 t = tree->last->tok;
648 if ( ! (*tokens[t].cb)(t, tree, NULL, ROFF_EXIT))
649 return(0);
650 } while (t != tokens[tok].ctx);
651
652 assert(tree->last);
653 return(1);
654 }
655
656
657 static int
658 rofffindarg(const char *name)
659 {
660 size_t i;
661
662 /* FIXME: use a table, this is slow but ok for now. */
663
664 /* LINTED */
665 for (i = 0; i < ROFF_ARGMAX; i++)
666 /* LINTED */
667 if (0 == strcmp(name, tokargnames[i]))
668 return((int)i);
669
670 return(ROFF_ARGMAX);
671 }
672
673
674 static int
675 rofffindtok(const char *buf)
676 {
677 char token[4];
678 size_t i;
679
680 for (i = 0; *buf && ! isspace(*buf) && i < 3; i++, buf++)
681 token[i] = *buf;
682
683 if (i == 3)
684 return(ROFF_MAX);
685
686 token[i] = 0;
687
688 /* FIXME: use a table, this is slow but ok for now. */
689
690 /* LINTED */
691 for (i = 0; i < ROFF_MAX; i++)
692 /* LINTED */
693 if (0 == strcmp(toknames[i], token))
694 return((int)i);
695
696 return(ROFF_MAX);
697 }
698
699
700 static int
701 rofffindcallable(const char *name)
702 {
703 int c;
704
705 if (ROFF_MAX == (c = rofffindtok(name)))
706 return(ROFF_MAX);
707 assert(c >= 0 && c < ROFF_MAX);
708 return(ROFF_CALLABLE & tokens[c].flags ? c : ROFF_MAX);
709 }
710
711
712 static struct roffnode *
713 roffnode_new(int tokid, struct rofftree *tree)
714 {
715 struct roffnode *p;
716
717 if (NULL == (p = malloc(sizeof(struct roffnode))))
718 err(1, "malloc");
719
720 p->tok = tokid;
721 p->parent = tree->last;
722 tree->last = p;
723
724 return(p);
725 }
726
727
728 static int
729 roffargok(int tokid, int argid)
730 {
731 const int *c;
732
733 if (NULL == (c = tokens[tokid].args))
734 return(0);
735
736 for ( ; ROFF_ARGMAX != *c; c++)
737 if (argid == *c)
738 return(1);
739
740 return(0);
741 }
742
743
744 static void
745 roffnode_free(struct rofftree *tree)
746 {
747 struct roffnode *p;
748
749 assert(tree->last);
750
751 p = tree->last;
752 tree->last = tree->last->parent;
753 free(p);
754 }
755
756
757 static int
758 roffnextopt(const struct rofftree *tree, int tok,
759 char ***in, char **val)
760 {
761 char *arg, **argv;
762 int v;
763
764 *val = NULL;
765 argv = *in;
766 assert(argv);
767
768 if (NULL == (arg = *argv))
769 return(-1);
770 if ('-' != *arg)
771 return(-1);
772
773 if (ROFF_ARGMAX == (v = rofffindarg(arg + 1))) {
774 roff_warn(tree, arg, "argument-like parameter `%s' to "
775 "`%s'", &arg[1], toknames[tok]);
776 return(-1);
777 }
778
779 if ( ! roffargok(tok, v)) {
780 roff_warn(tree, arg, "invalid argument parameter `%s' to "
781 "`%s'", tokargnames[v], toknames[tok]);
782 return(-1);
783 }
784
785 if ( ! (ROFF_VALUE & tokenargs[v]))
786 return(v);
787
788 *in = ++argv;
789
790 if (NULL == *argv) {
791 roff_err(tree, arg, "empty value of `%s' for `%s'",
792 tokargnames[v], toknames[tok]);
793 return(ROFF_ARGMAX);
794 }
795
796 return(v);
797 }
798
799
800 /* ARGSUSED */
801 static int
802 roff_Dd(ROFFCALL_ARGS)
803 {
804
805 if (ROFF_BODY & tree->state) {
806 assert( ! (ROFF_PRELUDE & tree->state));
807 assert(ROFF_PRELUDE_Dd & tree->state);
808 return(roff_text(tok, tree, argv, type));
809 }
810
811 assert(ROFF_PRELUDE & tree->state);
812 assert( ! (ROFF_BODY & tree->state));
813
814 if (ROFF_PRELUDE_Dd & tree->state) {
815 roff_err(tree, *argv, "repeated `Dd' in prelude");
816 return(0);
817 } else if (ROFF_PRELUDE_Dt & tree->state) {
818 roff_err(tree, *argv, "out-of-order `Dd' in prelude");
819 return(0);
820 }
821
822 /* TODO: parse date. */
823
824 assert(NULL == tree->last);
825 tree->state |= ROFF_PRELUDE_Dd;
826
827 return(1);
828 }
829
830
831 /* ARGSUSED */
832 static int
833 roff_Dt(ROFFCALL_ARGS)
834 {
835
836 if (ROFF_BODY & tree->state) {
837 assert( ! (ROFF_PRELUDE & tree->state));
838 assert(ROFF_PRELUDE_Dt & tree->state);
839 return(roff_text(tok, tree, argv, type));
840 }
841
842 assert(ROFF_PRELUDE & tree->state);
843 assert( ! (ROFF_BODY & tree->state));
844
845 if ( ! (ROFF_PRELUDE_Dd & tree->state)) {
846 roff_err(tree, *argv, "out-of-order `Dt' in prelude");
847 return(0);
848 } else if (ROFF_PRELUDE_Dt & tree->state) {
849 roff_err(tree, *argv, "repeated `Dt' in prelude");
850 return(0);
851 }
852
853 /* TODO: parse date. */
854
855 assert(NULL == tree->last);
856 tree->state |= ROFF_PRELUDE_Dt;
857
858 return(1);
859 }
860
861
862 /* ARGSUSED */
863 static int
864 roff_Os(ROFFCALL_ARGS)
865 {
866
867 if (ROFF_EXIT == type) {
868 roffnode_free(tree);
869 return((*tree->cb.rofftail)(tree->arg));
870 } else if (ROFF_BODY & tree->state) {
871 assert( ! (ROFF_PRELUDE & tree->state));
872 assert(ROFF_PRELUDE_Os & tree->state);
873 return(roff_text(tok, tree, argv, type));
874 }
875
876 assert(ROFF_PRELUDE & tree->state);
877 if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
878 ! (ROFF_PRELUDE_Dd & tree->state)) {
879 roff_err(tree, *argv, "out-of-order `Os' in prelude");
880 return(0);
881 }
882
883 /* TODO: extract OS. */
884
885 tree->state |= ROFF_PRELUDE_Os;
886 tree->state &= ~ROFF_PRELUDE;
887 tree->state |= ROFF_BODY;
888
889 assert(NULL == tree->last);
890
891 if (NULL == roffnode_new(tok, tree))
892 return(0);
893
894 return((*tree->cb.roffhead)(tree->arg));
895 }
896
897
898 /* ARGSUSED */
899 static int
900 roff_layout(ROFFCALL_ARGS)
901 {
902 int i, c, argcp[ROFF_MAXARG];
903 char *v, *argvp[ROFF_MAXARG];
904
905 if (ROFF_PRELUDE & tree->state) {
906 roff_err(tree, *argv, "`%s' disallowed in prelude",
907 toknames[tok]);
908 return(0);
909 }
910
911 if (ROFF_EXIT == type) {
912 roffnode_free(tree);
913 return((*tree->cb.roffblkout)(tree->arg, tok));
914 }
915
916 i = 0;
917 argv++;
918
919 while (-1 != (c = roffnextopt(tree, tok, &argv, &v))) {
920 if (ROFF_ARGMAX == c)
921 return(0);
922
923 argcp[i] = c;
924 argvp[i] = v;
925 i++;
926 argv++;
927 }
928
929 argcp[i] = ROFF_ARGMAX;
930 argvp[i] = NULL;
931
932 if (NULL == roffnode_new(tok, tree))
933 return(0);
934
935 if ( ! (*tree->cb.roffblkin)(tree->arg, tok, argcp, argvp))
936 return(0);
937
938 if (NULL == *argv)
939 return(1);
940
941 if ( ! (*tree->cb.roffin)(tree->arg, tok, argcp, argvp))
942 return(0);
943
944 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
945 while (*argv) {
946 if ( ! (*tree->cb.roffdata)(tree->arg, *argv++))
947 return(0);
948 }
949 return((*tree->cb.roffout)(tree->arg, tok));
950 }
951
952 while (*argv) {
953 if (ROFF_MAX != (c = rofffindcallable(*argv))) {
954 if (NULL == tokens[c].cb) {
955 roff_err(tree, *argv, "unsupported "
956 "macro `%s'",
957 toknames[c]);
958 return(0);
959 }
960 if ( ! (*tokens[c].cb)(c, tree, argv, ROFF_ENTER))
961 return(0);
962 break;
963 }
964
965 assert(tree->arg);
966 if ( ! (*tree->cb.roffdata)(tree->arg, *argv++))
967 return(0);
968 }
969
970 return((*tree->cb.roffout)(tree->arg, tok));
971 }
972
973
974 /* ARGSUSED */
975 static int
976 roff_text(ROFFCALL_ARGS)
977 {
978 int i, c, argcp[ROFF_MAXARG];
979 char *v, *argvp[ROFF_MAXARG];
980
981 if (ROFF_PRELUDE & tree->state) {
982 roff_err(tree, *argv, "`%s' disallowed in prelude",
983 toknames[tok]);
984 return(0);
985 }
986
987 i = 0;
988 argv++;
989
990 while (-1 != (c = roffnextopt(tree, tok, &argv, &v))) {
991 if (ROFF_ARGMAX == c)
992 return(0);
993
994 argcp[i] = c;
995 argvp[i] = v;
996 i++;
997 argv++;
998 }
999
1000 argcp[i] = ROFF_ARGMAX;
1001 argvp[i] = NULL;
1002
1003 if ( ! (*tree->cb.roffin)(tree->arg, tok, argcp, argvp))
1004 return(0);
1005
1006 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
1007 while (*argv) {
1008 if ( ! (*tree->cb.roffdata)(tree->arg, *argv++))
1009 return(0);
1010 }
1011 return((*tree->cb.roffout)(tree->arg, tok));
1012 }
1013
1014 while (*argv) {
1015 if (ROFF_MAX != (c = rofffindcallable(*argv))) {
1016 if (NULL == tokens[c].cb) {
1017 roff_err(tree, *argv, "unsupported "
1018 "macro `%s'",
1019 toknames[c]);
1020 return(0);
1021 }
1022 if ( ! (*tokens[c].cb)(c, tree,
1023 argv, ROFF_ENTER))
1024 return(0);
1025 break;
1026 }
1027 if ( ! (*tree->cb.roffdata)(tree->arg, *argv++))
1028 return(0);
1029 }
1030
1031 return((*tree->cb.roffout)(tree->arg, tok));
1032 }
1033
1034
1035 /* ARGSUSED */
1036 static int
1037 roff_comment(ROFFCALL_ARGS)
1038 {
1039
1040 return(1);
1041 }
1042
1043
1044 /* ARGSUSED */
1045 static int
1046 roff_close(ROFFCALL_ARGS)
1047 {
1048
1049 return(1);
1050 }
1051
1052
1053 /* ARGSUSED */
1054 static int
1055 roff_special(ROFFCALL_ARGS)
1056 {
1057
1058 return((*tree->cb.roffspecial)(tree->arg, tok));
1059 }
1060
1061
1062 static void
1063 roff_warn(const struct rofftree *tree, const char *pos, char *fmt, ...)
1064 {
1065 va_list ap;
1066 char buf[128];
1067
1068 va_start(ap, fmt);
1069 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1070 va_end(ap);
1071
1072 (*tree->cb.roffmsg)(tree->arg,
1073 ROFF_WARN, tree->cur, pos, buf);
1074 }
1075
1076
1077 static void
1078 roff_err(const struct rofftree *tree, const char *pos, char *fmt, ...)
1079 {
1080 va_list ap;
1081 char buf[128];
1082
1083 va_start(ap, fmt);
1084 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
1085 va_end(ap);
1086
1087 (*tree->cb.roffmsg)(tree->arg,
1088 ROFF_ERROR, tree->cur, pos, buf);
1089 }