]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Move .sp to the roff modules. Enough infrastructure is in place
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.299 2017/05/05 15:17:32 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 /*
45 * An incredibly-simple string buffer.
46 */
47 struct roffstr {
48 char *p; /* nil-terminated buffer */
49 size_t sz; /* saved strlen(p) */
50 };
51
52 /*
53 * A key-value roffstr pair as part of a singly-linked list.
54 */
55 struct roffkv {
56 struct roffstr key;
57 struct roffstr val;
58 struct roffkv *next; /* next in list */
59 };
60
61 /*
62 * A single number register as part of a singly-linked list.
63 */
64 struct roffreg {
65 struct roffstr key;
66 int val;
67 struct roffreg *next;
68 };
69
70 /*
71 * Association of request and macro names with token IDs.
72 */
73 struct roffreq {
74 enum roff_tok tok;
75 char name[];
76 };
77
78 struct roff {
79 struct mparse *parse; /* parse point */
80 struct roff_man *man; /* mdoc or man parser */
81 struct roffnode *last; /* leaf of stack */
82 int *rstack; /* stack of inverted `ie' values */
83 struct ohash *reqtab; /* request lookup table */
84 struct roffreg *regtab; /* number registers */
85 struct roffkv *strtab; /* user-defined strings & macros */
86 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
87 struct roffstr *xtab; /* single-byte trans table (`tr') */
88 const char *current_string; /* value of last called user macro */
89 struct tbl_node *first_tbl; /* first table parsed */
90 struct tbl_node *last_tbl; /* last table parsed */
91 struct tbl_node *tbl; /* current table being parsed */
92 struct eqn_node *last_eqn; /* last equation parsed */
93 struct eqn_node *first_eqn; /* first equation parsed */
94 struct eqn_node *eqn; /* current equation being parsed */
95 int eqn_inline; /* current equation is inline */
96 int options; /* parse options */
97 int rstacksz; /* current size limit of rstack */
98 int rstackpos; /* position in rstack */
99 int format; /* current file in mdoc or man format */
100 int argc; /* number of args of the last macro */
101 char control; /* control character */
102 };
103
104 struct roffnode {
105 enum roff_tok tok; /* type of node */
106 struct roffnode *parent; /* up one in stack */
107 int line; /* parse line */
108 int col; /* parse col */
109 char *name; /* node name, e.g. macro name */
110 char *end; /* end-rules: custom token */
111 int endspan; /* end-rules: next-line or infty */
112 int rule; /* current evaluation rule */
113 };
114
115 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
116 enum roff_tok tok, /* tok of macro */ \
117 struct buf *buf, /* input buffer */ \
118 int ln, /* parse line */ \
119 int ppos, /* original pos in buffer */ \
120 int pos, /* current pos in buffer */ \
121 int *offs /* reset offset of buffer data */
122
123 typedef enum rofferr (*roffproc)(ROFF_ARGS);
124
125 struct roffmac {
126 roffproc proc; /* process new macro */
127 roffproc text; /* process as child text of macro */
128 roffproc sub; /* process as child of macro */
129 int flags;
130 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
131 };
132
133 struct predef {
134 const char *name; /* predefined input name */
135 const char *str; /* replacement symbol */
136 };
137
138 #define PREDEF(__name, __str) \
139 { (__name), (__str) },
140
141 /* --- function prototypes ------------------------------------------------ */
142
143 static void roffnode_cleanscope(struct roff *);
144 static void roffnode_pop(struct roff *);
145 static void roffnode_push(struct roff *, enum roff_tok,
146 const char *, int, int);
147 static enum rofferr roff_block(ROFF_ARGS);
148 static enum rofferr roff_block_text(ROFF_ARGS);
149 static enum rofferr roff_block_sub(ROFF_ARGS);
150 static enum rofferr roff_br(ROFF_ARGS);
151 static enum rofferr roff_cblock(ROFF_ARGS);
152 static enum rofferr roff_cc(ROFF_ARGS);
153 static void roff_ccond(struct roff *, int, int);
154 static enum rofferr roff_cond(ROFF_ARGS);
155 static enum rofferr roff_cond_text(ROFF_ARGS);
156 static enum rofferr roff_cond_sub(ROFF_ARGS);
157 static enum rofferr roff_ds(ROFF_ARGS);
158 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
159 static int roff_evalcond(struct roff *r, int, char *, int *);
160 static int roff_evalnum(struct roff *, int,
161 const char *, int *, int *, int);
162 static int roff_evalpar(struct roff *, int,
163 const char *, int *, int *, int);
164 static int roff_evalstrcond(const char *, int *);
165 static void roff_free1(struct roff *);
166 static void roff_freereg(struct roffreg *);
167 static void roff_freestr(struct roffkv *);
168 static size_t roff_getname(struct roff *, char **, int, int);
169 static int roff_getnum(const char *, int *, int *, int);
170 static int roff_getop(const char *, int *, char *);
171 static int roff_getregn(const struct roff *,
172 const char *, size_t);
173 static int roff_getregro(const struct roff *,
174 const char *name);
175 static const char *roff_getstrn(const struct roff *,
176 const char *, size_t);
177 static int roff_hasregn(const struct roff *,
178 const char *, size_t);
179 static enum rofferr roff_insec(ROFF_ARGS);
180 static enum rofferr roff_it(ROFF_ARGS);
181 static enum rofferr roff_line_ignore(ROFF_ARGS);
182 static void roff_man_alloc1(struct roff_man *);
183 static void roff_man_free1(struct roff_man *);
184 static enum rofferr roff_nr(ROFF_ARGS);
185 static enum rofferr roff_onearg(ROFF_ARGS);
186 static enum roff_tok roff_parse(struct roff *, char *, int *,
187 int, int);
188 static enum rofferr roff_parsetext(struct buf *, int, int *);
189 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
190 static enum rofferr roff_rm(ROFF_ARGS);
191 static enum rofferr roff_rr(ROFF_ARGS);
192 static void roff_setstr(struct roff *,
193 const char *, const char *, int);
194 static void roff_setstrn(struct roffkv **, const char *,
195 size_t, const char *, size_t, int);
196 static enum rofferr roff_so(ROFF_ARGS);
197 static enum rofferr roff_tr(ROFF_ARGS);
198 static enum rofferr roff_Dd(ROFF_ARGS);
199 static enum rofferr roff_TH(ROFF_ARGS);
200 static enum rofferr roff_TE(ROFF_ARGS);
201 static enum rofferr roff_TS(ROFF_ARGS);
202 static enum rofferr roff_EQ(ROFF_ARGS);
203 static enum rofferr roff_EN(ROFF_ARGS);
204 static enum rofferr roff_T_(ROFF_ARGS);
205 static enum rofferr roff_unsupp(ROFF_ARGS);
206 static enum rofferr roff_userdef(ROFF_ARGS);
207
208 /* --- constant data ------------------------------------------------------ */
209
210 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
211 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
212
213 const char *__roff_name[MAN_MAX + 1] = {
214 "br", "ft", "ll", "sp",
215 NULL,
216 "ab", "ad", "af", "aln",
217 "als", "am", "am1", "ami",
218 "ami1", "as", "as1", "asciify",
219 "backtrace", "bd", "bleedat", "blm",
220 "box", "boxa", "bp", "BP",
221 "break", "breakchar", "brnl", "brp",
222 "brpnl", "c2", "cc", "ce",
223 "cf", "cflags", "ch", "char",
224 "chop", "class", "close", "CL",
225 "color", "composite", "continue", "cp",
226 "cropat", "cs", "cu", "da",
227 "dch", "Dd", "de", "de1",
228 "defcolor", "dei", "dei1", "device",
229 "devicem", "di", "do", "ds",
230 "ds1", "dwh", "dt", "ec",
231 "ecr", "ecs", "el", "em",
232 "EN", "eo", "EP", "EQ",
233 "errprint", "ev", "evc", "ex",
234 "fallback", "fam", "fc", "fchar",
235 "fcolor", "fdeferlig", "feature", "fkern",
236 "fl", "flig", "fp", "fps",
237 "fschar", "fspacewidth", "fspecial", "ftr",
238 "fzoom", "gcolor", "hc", "hcode",
239 "hidechar", "hla", "hlm", "hpf",
240 "hpfa", "hpfcode", "hw", "hy",
241 "hylang", "hylen", "hym", "hypp",
242 "hys", "ie", "if", "ig",
243 "index", "it", "itc", "IX",
244 "kern", "kernafter", "kernbefore", "kernpair",
245 "lc", "lc_ctype", "lds", "length",
246 "letadj", "lf", "lg", "lhang",
247 "linetabs", "lnr", "lnrf", "lpfx",
248 "ls", "lsm", "lt", "mc",
249 "mediasize", "minss", "mk", "mso",
250 "na", "ne", "nh", "nhychar",
251 "nm", "nn", "nop", "nr",
252 "nrf", "nroff", "ns", "nx",
253 "open", "opena", "os", "output",
254 "padj", "papersize", "pc", "pev",
255 "pi", "PI", "pl", "pm",
256 "pn", "pnr", "po", "ps",
257 "psbb", "pshape", "pso", "ptr",
258 "pvs", "rchar", "rd", "recursionlimit",
259 "return", "rfschar", "rhang", "rj",
260 "rm", "rn", "rnn", "rr",
261 "rs", "rt", "schar", "sentchar",
262 "shc", "shift", "sizes", "so",
263 "spacewidth", "special", "spreadwarn", "ss",
264 "sty", "substring", "sv", "sy",
265 "T&", "ta", "tc", "TE",
266 "TH", "ti", "tkf", "tl",
267 "tm", "tm1", "tmc", "tr",
268 "track", "transchar", "trf", "trimat",
269 "trin", "trnt", "troff", "TS",
270 "uf", "ul", "unformat", "unwatch",
271 "unwatchn", "vpt", "vs", "warn",
272 "warnscale", "watch", "watchlength", "watchn",
273 "wh", "while", "write", "writec",
274 "writem", "xflag", ".", NULL,
275 "text",
276 "Dd", "Dt", "Os", "Sh",
277 "Ss", "Pp", "D1", "Dl",
278 "Bd", "Ed", "Bl", "El",
279 "It", "Ad", "An", "Ap",
280 "Ar", "Cd", "Cm", "Dv",
281 "Er", "Ev", "Ex", "Fa",
282 "Fd", "Fl", "Fn", "Ft",
283 "Ic", "In", "Li", "Nd",
284 "Nm", "Op", "Ot", "Pa",
285 "Rv", "St", "Va", "Vt",
286 "Xr", "%A", "%B", "%D",
287 "%I", "%J", "%N", "%O",
288 "%P", "%R", "%T", "%V",
289 "Ac", "Ao", "Aq", "At",
290 "Bc", "Bf", "Bo", "Bq",
291 "Bsx", "Bx", "Db", "Dc",
292 "Do", "Dq", "Ec", "Ef",
293 "Em", "Eo", "Fx", "Ms",
294 "No", "Ns", "Nx", "Ox",
295 "Pc", "Pf", "Po", "Pq",
296 "Qc", "Ql", "Qo", "Qq",
297 "Re", "Rs", "Sc", "So",
298 "Sq", "Sm", "Sx", "Sy",
299 "Tn", "Ux", "Xc", "Xo",
300 "Fo", "Fc", "Oo", "Oc",
301 "Bk", "Ek", "Bt", "Hf",
302 "Fr", "Ud", "Lb", "Lp",
303 "Lk", "Mt", "Brq", "Bro",
304 "Brc", "%C", "Es", "En",
305 "Dx", "%Q", "%U", "Ta",
306 NULL,
307 "TH", "SH", "SS", "TP",
308 "LP", "PP", "P", "IP",
309 "HP", "SM", "SB", "BI",
310 "IB", "BR", "RB", "R",
311 "B", "I", "IR", "RI",
312 "nf", "fi",
313 "RE", "RS", "DT", "UC",
314 "PD", "AT", "in",
315 "OP", "EX", "EE", "UR",
316 "UE", NULL
317 };
318 const char *const *roff_name = __roff_name;
319
320 static struct roffmac roffs[TOKEN_NONE] = {
321 { roff_br, NULL, NULL, 0 }, /* br */
322 { roff_onearg, NULL, NULL, 0 }, /* ft */
323 { roff_onearg, NULL, NULL, 0 }, /* ll */
324 { roff_onearg, NULL, NULL, 0 }, /* sp */
325 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
326 { roff_unsupp, NULL, NULL, 0 }, /* ab */
327 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
328 { roff_line_ignore, NULL, NULL, 0 }, /* af */
329 { roff_unsupp, NULL, NULL, 0 }, /* aln */
330 { roff_unsupp, NULL, NULL, 0 }, /* als */
331 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
332 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
333 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
334 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
335 { roff_ds, NULL, NULL, 0 }, /* as */
336 { roff_ds, NULL, NULL, 0 }, /* as1 */
337 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
338 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
339 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
340 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
341 { roff_unsupp, NULL, NULL, 0 }, /* blm */
342 { roff_unsupp, NULL, NULL, 0 }, /* box */
343 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
344 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
345 { roff_unsupp, NULL, NULL, 0 }, /* BP */
346 { roff_unsupp, NULL, NULL, 0 }, /* break */
347 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
348 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
349 { roff_br, NULL, NULL, 0 }, /* brp */
350 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
351 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
352 { roff_cc, NULL, NULL, 0 }, /* cc */
353 { roff_line_ignore, NULL, NULL, 0 }, /* ce */
354 { roff_insec, NULL, NULL, 0 }, /* cf */
355 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
356 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
357 { roff_unsupp, NULL, NULL, 0 }, /* char */
358 { roff_unsupp, NULL, NULL, 0 }, /* chop */
359 { roff_line_ignore, NULL, NULL, 0 }, /* class */
360 { roff_insec, NULL, NULL, 0 }, /* close */
361 { roff_unsupp, NULL, NULL, 0 }, /* CL */
362 { roff_line_ignore, NULL, NULL, 0 }, /* color */
363 { roff_unsupp, NULL, NULL, 0 }, /* composite */
364 { roff_unsupp, NULL, NULL, 0 }, /* continue */
365 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
366 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
367 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
368 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
369 { roff_unsupp, NULL, NULL, 0 }, /* da */
370 { roff_unsupp, NULL, NULL, 0 }, /* dch */
371 { roff_Dd, NULL, NULL, 0 }, /* Dd */
372 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
373 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
374 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
375 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
376 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
377 { roff_unsupp, NULL, NULL, 0 }, /* device */
378 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
379 { roff_unsupp, NULL, NULL, 0 }, /* di */
380 { roff_unsupp, NULL, NULL, 0 }, /* do */
381 { roff_ds, NULL, NULL, 0 }, /* ds */
382 { roff_ds, NULL, NULL, 0 }, /* ds1 */
383 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
384 { roff_unsupp, NULL, NULL, 0 }, /* dt */
385 { roff_unsupp, NULL, NULL, 0 }, /* ec */
386 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
387 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
388 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
389 { roff_unsupp, NULL, NULL, 0 }, /* em */
390 { roff_EN, NULL, NULL, 0 }, /* EN */
391 { roff_unsupp, NULL, NULL, 0 }, /* eo */
392 { roff_unsupp, NULL, NULL, 0 }, /* EP */
393 { roff_EQ, NULL, NULL, 0 }, /* EQ */
394 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
395 { roff_unsupp, NULL, NULL, 0 }, /* ev */
396 { roff_unsupp, NULL, NULL, 0 }, /* evc */
397 { roff_unsupp, NULL, NULL, 0 }, /* ex */
398 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
399 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
400 { roff_unsupp, NULL, NULL, 0 }, /* fc */
401 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
402 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
403 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
404 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
405 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
406 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
407 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
408 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
409 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
410 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
411 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
412 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
413 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
414 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
415 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
416 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
417 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
418 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
419 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
420 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
421 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
422 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
423 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
424 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
425 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
426 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
427 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
428 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
429 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
430 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
431 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
432 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
434 { roff_unsupp, NULL, NULL, 0 }, /* index */
435 { roff_it, NULL, NULL, 0 }, /* it */
436 { roff_unsupp, NULL, NULL, 0 }, /* itc */
437 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
438 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
439 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
440 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
441 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
442 { roff_unsupp, NULL, NULL, 0 }, /* lc */
443 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
444 { roff_unsupp, NULL, NULL, 0 }, /* lds */
445 { roff_unsupp, NULL, NULL, 0 }, /* length */
446 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
447 { roff_insec, NULL, NULL, 0 }, /* lf */
448 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
449 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
450 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
451 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
452 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
453 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
454 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
455 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
456 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
457 { roff_line_ignore, NULL, NULL, 0 }, /* mc */
458 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
459 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
460 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
461 { roff_insec, NULL, NULL, 0 }, /* mso */
462 { roff_line_ignore, NULL, NULL, 0 }, /* na */
463 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
464 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
465 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
466 { roff_unsupp, NULL, NULL, 0 }, /* nm */
467 { roff_unsupp, NULL, NULL, 0 }, /* nn */
468 { roff_unsupp, NULL, NULL, 0 }, /* nop */
469 { roff_nr, NULL, NULL, 0 }, /* nr */
470 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
471 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
472 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
473 { roff_insec, NULL, NULL, 0 }, /* nx */
474 { roff_insec, NULL, NULL, 0 }, /* open */
475 { roff_insec, NULL, NULL, 0 }, /* opena */
476 { roff_line_ignore, NULL, NULL, 0 }, /* os */
477 { roff_unsupp, NULL, NULL, 0 }, /* output */
478 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
479 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
480 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
481 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
482 { roff_insec, NULL, NULL, 0 }, /* pi */
483 { roff_unsupp, NULL, NULL, 0 }, /* PI */
484 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
485 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
486 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
487 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
488 { roff_line_ignore, NULL, NULL, 0 }, /* po */
489 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
490 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
491 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
492 { roff_insec, NULL, NULL, 0 }, /* pso */
493 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
494 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
495 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
496 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
497 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
498 { roff_unsupp, NULL, NULL, 0 }, /* return */
499 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
500 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
501 { roff_line_ignore, NULL, NULL, 0 }, /* rj */
502 { roff_rm, NULL, NULL, 0 }, /* rm */
503 { roff_unsupp, NULL, NULL, 0 }, /* rn */
504 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
505 { roff_rr, NULL, NULL, 0 }, /* rr */
506 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
507 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
508 { roff_unsupp, NULL, NULL, 0 }, /* schar */
509 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
510 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
511 { roff_unsupp, NULL, NULL, 0 }, /* shift */
512 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
513 { roff_so, NULL, NULL, 0 }, /* so */
514 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
515 { roff_line_ignore, NULL, NULL, 0 }, /* special */
516 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
518 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
519 { roff_unsupp, NULL, NULL, 0 }, /* substring */
520 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
521 { roff_insec, NULL, NULL, 0 }, /* sy */
522 { roff_T_, NULL, NULL, 0 }, /* T& */
523 { roff_unsupp, NULL, NULL, 0 }, /* ta */
524 { roff_unsupp, NULL, NULL, 0 }, /* tc */
525 { roff_TE, NULL, NULL, 0 }, /* TE */
526 { roff_TH, NULL, NULL, 0 }, /* TH */
527 { roff_unsupp, NULL, NULL, 0 }, /* ti */
528 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
529 { roff_unsupp, NULL, NULL, 0 }, /* tl */
530 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
531 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
532 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
533 { roff_tr, NULL, NULL, 0 }, /* tr */
534 { roff_line_ignore, NULL, NULL, 0 }, /* track */
535 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
536 { roff_insec, NULL, NULL, 0 }, /* trf */
537 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
538 { roff_unsupp, NULL, NULL, 0 }, /* trin */
539 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
540 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
541 { roff_TS, NULL, NULL, 0 }, /* TS */
542 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
543 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
544 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
545 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
546 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
547 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
548 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
549 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
550 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
551 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
552 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
553 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
554 { roff_unsupp, NULL, NULL, 0 }, /* wh */
555 { roff_unsupp, NULL, NULL, 0 }, /* while */
556 { roff_insec, NULL, NULL, 0 }, /* write */
557 { roff_insec, NULL, NULL, 0 }, /* writec */
558 { roff_insec, NULL, NULL, 0 }, /* writem */
559 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
560 { roff_cblock, NULL, NULL, 0 }, /* . */
561 { roff_userdef, NULL, NULL, 0 }
562 };
563
564 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
565 const char *const __mdoc_reserved[] = {
566 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
567 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
568 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
569 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
570 "Dt", "Dv", "Dx", "D1",
571 "Ec", "Ed", "Ef", "Ek", "El", "Em",
572 "En", "Eo", "Er", "Es", "Ev", "Ex",
573 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
574 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
575 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
576 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
577 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
578 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
579 "Sc", "Sh", "Sm", "So", "Sq",
580 "Ss", "St", "Sx", "Sy",
581 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
582 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
583 "%P", "%Q", "%R", "%T", "%U", "%V",
584 NULL
585 };
586
587 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
588 const char *const __man_reserved[] = {
589 "AT", "B", "BI", "BR", "DT",
590 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
591 "LP", "OP", "P", "PD", "PP",
592 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
593 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
594 NULL
595 };
596
597 /* Array of injected predefined strings. */
598 #define PREDEFS_MAX 38
599 static const struct predef predefs[PREDEFS_MAX] = {
600 #include "predefs.in"
601 };
602
603 static int roffit_lines; /* number of lines to delay */
604 static char *roffit_macro; /* nil-terminated macro line */
605
606
607 /* --- request table ------------------------------------------------------ */
608
609 struct ohash *
610 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
611 {
612 struct ohash *htab;
613 struct roffreq *req;
614 enum roff_tok tok;
615 size_t sz;
616 unsigned int slot;
617
618 htab = mandoc_malloc(sizeof(*htab));
619 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
620
621 for (tok = mintok; tok < maxtok; tok++) {
622 if (roff_name[tok] == NULL)
623 continue;
624 sz = strlen(roff_name[tok]);
625 req = mandoc_malloc(sizeof(*req) + sz + 1);
626 req->tok = tok;
627 memcpy(req->name, roff_name[tok], sz + 1);
628 slot = ohash_qlookup(htab, req->name);
629 ohash_insert(htab, slot, req);
630 }
631 return htab;
632 }
633
634 void
635 roffhash_free(struct ohash *htab)
636 {
637 struct roffreq *req;
638 unsigned int slot;
639
640 if (htab == NULL)
641 return;
642 for (req = ohash_first(htab, &slot); req != NULL;
643 req = ohash_next(htab, &slot))
644 free(req);
645 ohash_delete(htab);
646 free(htab);
647 }
648
649 enum roff_tok
650 roffhash_find(struct ohash *htab, const char *name, size_t sz)
651 {
652 struct roffreq *req;
653 const char *end;
654
655 if (sz) {
656 end = name + sz;
657 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
658 } else
659 req = ohash_find(htab, ohash_qlookup(htab, name));
660 return req == NULL ? TOKEN_NONE : req->tok;
661 }
662
663 /* --- stack of request blocks -------------------------------------------- */
664
665 /*
666 * Pop the current node off of the stack of roff instructions currently
667 * pending.
668 */
669 static void
670 roffnode_pop(struct roff *r)
671 {
672 struct roffnode *p;
673
674 assert(r->last);
675 p = r->last;
676
677 r->last = r->last->parent;
678 free(p->name);
679 free(p->end);
680 free(p);
681 }
682
683 /*
684 * Push a roff node onto the instruction stack. This must later be
685 * removed with roffnode_pop().
686 */
687 static void
688 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
689 int line, int col)
690 {
691 struct roffnode *p;
692
693 p = mandoc_calloc(1, sizeof(struct roffnode));
694 p->tok = tok;
695 if (name)
696 p->name = mandoc_strdup(name);
697 p->parent = r->last;
698 p->line = line;
699 p->col = col;
700 p->rule = p->parent ? p->parent->rule : 0;
701
702 r->last = p;
703 }
704
705 /* --- roff parser state data management ---------------------------------- */
706
707 static void
708 roff_free1(struct roff *r)
709 {
710 struct tbl_node *tbl;
711 struct eqn_node *e;
712 int i;
713
714 while (NULL != (tbl = r->first_tbl)) {
715 r->first_tbl = tbl->next;
716 tbl_free(tbl);
717 }
718 r->first_tbl = r->last_tbl = r->tbl = NULL;
719
720 while (NULL != (e = r->first_eqn)) {
721 r->first_eqn = e->next;
722 eqn_free(e);
723 }
724 r->first_eqn = r->last_eqn = r->eqn = NULL;
725
726 while (r->last)
727 roffnode_pop(r);
728
729 free (r->rstack);
730 r->rstack = NULL;
731 r->rstacksz = 0;
732 r->rstackpos = -1;
733
734 roff_freereg(r->regtab);
735 r->regtab = NULL;
736
737 roff_freestr(r->strtab);
738 roff_freestr(r->xmbtab);
739 r->strtab = r->xmbtab = NULL;
740
741 if (r->xtab)
742 for (i = 0; i < 128; i++)
743 free(r->xtab[i].p);
744 free(r->xtab);
745 r->xtab = NULL;
746 }
747
748 void
749 roff_reset(struct roff *r)
750 {
751 roff_free1(r);
752 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
753 r->control = 0;
754 }
755
756 void
757 roff_free(struct roff *r)
758 {
759 roff_free1(r);
760 roffhash_free(r->reqtab);
761 free(r);
762 }
763
764 struct roff *
765 roff_alloc(struct mparse *parse, int options)
766 {
767 struct roff *r;
768
769 r = mandoc_calloc(1, sizeof(struct roff));
770 r->parse = parse;
771 r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
772 r->options = options;
773 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
774 r->rstackpos = -1;
775 return r;
776 }
777
778 /* --- syntax tree state data management ---------------------------------- */
779
780 static void
781 roff_man_free1(struct roff_man *man)
782 {
783
784 if (man->first != NULL)
785 roff_node_delete(man, man->first);
786 free(man->meta.msec);
787 free(man->meta.vol);
788 free(man->meta.os);
789 free(man->meta.arch);
790 free(man->meta.title);
791 free(man->meta.name);
792 free(man->meta.date);
793 }
794
795 static void
796 roff_man_alloc1(struct roff_man *man)
797 {
798
799 memset(&man->meta, 0, sizeof(man->meta));
800 man->first = mandoc_calloc(1, sizeof(*man->first));
801 man->first->type = ROFFT_ROOT;
802 man->last = man->first;
803 man->last_es = NULL;
804 man->flags = 0;
805 man->macroset = MACROSET_NONE;
806 man->lastsec = man->lastnamed = SEC_NONE;
807 man->next = ROFF_NEXT_CHILD;
808 }
809
810 void
811 roff_man_reset(struct roff_man *man)
812 {
813
814 roff_man_free1(man);
815 roff_man_alloc1(man);
816 }
817
818 void
819 roff_man_free(struct roff_man *man)
820 {
821
822 roff_man_free1(man);
823 free(man);
824 }
825
826 struct roff_man *
827 roff_man_alloc(struct roff *roff, struct mparse *parse,
828 const char *defos, int quick)
829 {
830 struct roff_man *man;
831
832 man = mandoc_calloc(1, sizeof(*man));
833 man->parse = parse;
834 man->roff = roff;
835 man->defos = defos;
836 man->quick = quick;
837 roff_man_alloc1(man);
838 roff->man = man;
839 return man;
840 }
841
842 /* --- syntax tree handling ----------------------------------------------- */
843
844 struct roff_node *
845 roff_node_alloc(struct roff_man *man, int line, int pos,
846 enum roff_type type, int tok)
847 {
848 struct roff_node *n;
849
850 n = mandoc_calloc(1, sizeof(*n));
851 n->line = line;
852 n->pos = pos;
853 n->tok = tok;
854 n->type = type;
855 n->sec = man->lastsec;
856
857 if (man->flags & MDOC_SYNOPSIS)
858 n->flags |= NODE_SYNPRETTY;
859 else
860 n->flags &= ~NODE_SYNPRETTY;
861 if (man->flags & MDOC_NEWLINE)
862 n->flags |= NODE_LINE;
863 man->flags &= ~MDOC_NEWLINE;
864
865 return n;
866 }
867
868 void
869 roff_node_append(struct roff_man *man, struct roff_node *n)
870 {
871
872 switch (man->next) {
873 case ROFF_NEXT_SIBLING:
874 if (man->last->next != NULL) {
875 n->next = man->last->next;
876 man->last->next->prev = n;
877 } else
878 man->last->parent->last = n;
879 man->last->next = n;
880 n->prev = man->last;
881 n->parent = man->last->parent;
882 break;
883 case ROFF_NEXT_CHILD:
884 if (man->last->child != NULL) {
885 n->next = man->last->child;
886 man->last->child->prev = n;
887 } else
888 man->last->last = n;
889 man->last->child = n;
890 n->parent = man->last;
891 break;
892 default:
893 abort();
894 }
895 man->last = n;
896
897 switch (n->type) {
898 case ROFFT_HEAD:
899 n->parent->head = n;
900 break;
901 case ROFFT_BODY:
902 if (n->end != ENDBODY_NOT)
903 return;
904 n->parent->body = n;
905 break;
906 case ROFFT_TAIL:
907 n->parent->tail = n;
908 break;
909 default:
910 return;
911 }
912
913 /*
914 * Copy over the normalised-data pointer of our parent. Not
915 * everybody has one, but copying a null pointer is fine.
916 */
917
918 n->norm = n->parent->norm;
919 assert(n->parent->type == ROFFT_BLOCK);
920 }
921
922 void
923 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
924 {
925 struct roff_node *n;
926
927 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
928 n->string = roff_strdup(man->roff, word);
929 roff_node_append(man, n);
930 n->flags |= NODE_VALID | NODE_ENDED;
931 man->next = ROFF_NEXT_SIBLING;
932 }
933
934 void
935 roff_word_append(struct roff_man *man, const char *word)
936 {
937 struct roff_node *n;
938 char *addstr, *newstr;
939
940 n = man->last;
941 addstr = roff_strdup(man->roff, word);
942 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
943 free(addstr);
944 free(n->string);
945 n->string = newstr;
946 man->next = ROFF_NEXT_SIBLING;
947 }
948
949 void
950 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
951 {
952 struct roff_node *n;
953
954 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
955 roff_node_append(man, n);
956 man->next = ROFF_NEXT_CHILD;
957 }
958
959 struct roff_node *
960 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
961 {
962 struct roff_node *n;
963
964 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
965 roff_node_append(man, n);
966 man->next = ROFF_NEXT_CHILD;
967 return n;
968 }
969
970 struct roff_node *
971 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
972 {
973 struct roff_node *n;
974
975 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
976 roff_node_append(man, n);
977 man->next = ROFF_NEXT_CHILD;
978 return n;
979 }
980
981 struct roff_node *
982 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
983 {
984 struct roff_node *n;
985
986 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
987 roff_node_append(man, n);
988 man->next = ROFF_NEXT_CHILD;
989 return n;
990 }
991
992 void
993 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
994 {
995 struct roff_node *n;
996
997 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
998 n->eqn = eqn;
999 if (eqn->ln > man->last->line)
1000 n->flags |= NODE_LINE;
1001 roff_node_append(man, n);
1002 man->next = ROFF_NEXT_SIBLING;
1003 }
1004
1005 void
1006 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1007 {
1008 struct roff_node *n;
1009
1010 if (man->macroset == MACROSET_MAN)
1011 man_breakscope(man, TOKEN_NONE);
1012 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1013 n->span = tbl;
1014 roff_node_append(man, n);
1015 n->flags |= NODE_VALID | NODE_ENDED;
1016 man->next = ROFF_NEXT_SIBLING;
1017 }
1018
1019 void
1020 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1021 {
1022
1023 /* Adjust siblings. */
1024
1025 if (n->prev)
1026 n->prev->next = n->next;
1027 if (n->next)
1028 n->next->prev = n->prev;
1029
1030 /* Adjust parent. */
1031
1032 if (n->parent != NULL) {
1033 if (n->parent->child == n)
1034 n->parent->child = n->next;
1035 if (n->parent->last == n)
1036 n->parent->last = n->prev;
1037 }
1038
1039 /* Adjust parse point. */
1040
1041 if (man == NULL)
1042 return;
1043 if (man->last == n) {
1044 if (n->prev == NULL) {
1045 man->last = n->parent;
1046 man->next = ROFF_NEXT_CHILD;
1047 } else {
1048 man->last = n->prev;
1049 man->next = ROFF_NEXT_SIBLING;
1050 }
1051 }
1052 if (man->first == n)
1053 man->first = NULL;
1054 }
1055
1056 void
1057 roff_node_free(struct roff_node *n)
1058 {
1059
1060 if (n->args != NULL)
1061 mdoc_argv_free(n->args);
1062 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1063 free(n->norm);
1064 free(n->string);
1065 free(n);
1066 }
1067
1068 void
1069 roff_node_delete(struct roff_man *man, struct roff_node *n)
1070 {
1071
1072 while (n->child != NULL)
1073 roff_node_delete(man, n->child);
1074 roff_node_unlink(man, n);
1075 roff_node_free(n);
1076 }
1077
1078 void
1079 deroff(char **dest, const struct roff_node *n)
1080 {
1081 char *cp;
1082 size_t sz;
1083
1084 if (n->type != ROFFT_TEXT) {
1085 for (n = n->child; n != NULL; n = n->next)
1086 deroff(dest, n);
1087 return;
1088 }
1089
1090 /* Skip leading whitespace. */
1091
1092 for (cp = n->string; *cp != '\0'; cp++) {
1093 if (cp[0] == '\\' && cp[1] != '\0' &&
1094 strchr(" %&0^|~", cp[1]) != NULL)
1095 cp++;
1096 else if ( ! isspace((unsigned char)*cp))
1097 break;
1098 }
1099
1100 /* Skip trailing backslash. */
1101
1102 sz = strlen(cp);
1103 if (sz > 0 && cp[sz - 1] == '\\')
1104 sz--;
1105
1106 /* Skip trailing whitespace. */
1107
1108 for (; sz; sz--)
1109 if ( ! isspace((unsigned char)cp[sz-1]))
1110 break;
1111
1112 /* Skip empty strings. */
1113
1114 if (sz == 0)
1115 return;
1116
1117 if (*dest == NULL) {
1118 *dest = mandoc_strndup(cp, sz);
1119 return;
1120 }
1121
1122 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1123 free(*dest);
1124 *dest = cp;
1125 }
1126
1127 /* --- main functions of the roff parser ---------------------------------- */
1128
1129 /*
1130 * In the current line, expand escape sequences that tend to get
1131 * used in numerical expressions and conditional requests.
1132 * Also check the syntax of the remaining escape sequences.
1133 */
1134 static enum rofferr
1135 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1136 {
1137 char ubuf[24]; /* buffer to print the number */
1138 const char *start; /* start of the string to process */
1139 char *stesc; /* start of an escape sequence ('\\') */
1140 const char *stnam; /* start of the name, after "[(*" */
1141 const char *cp; /* end of the name, e.g. before ']' */
1142 const char *res; /* the string to be substituted */
1143 char *nbuf; /* new buffer to copy buf->buf to */
1144 size_t maxl; /* expected length of the escape name */
1145 size_t naml; /* actual length of the escape name */
1146 enum mandoc_esc esc; /* type of the escape sequence */
1147 int inaml; /* length returned from mandoc_escape() */
1148 int expand_count; /* to avoid infinite loops */
1149 int npos; /* position in numeric expression */
1150 int arg_complete; /* argument not interrupted by eol */
1151 char term; /* character terminating the escape */
1152
1153 expand_count = 0;
1154 start = buf->buf + pos;
1155 stesc = strchr(start, '\0') - 1;
1156 while (stesc-- > start) {
1157
1158 /* Search backwards for the next backslash. */
1159
1160 if (*stesc != '\\')
1161 continue;
1162
1163 /* If it is escaped, skip it. */
1164
1165 for (cp = stesc - 1; cp >= start; cp--)
1166 if (*cp != '\\')
1167 break;
1168
1169 if ((stesc - cp) % 2 == 0) {
1170 stesc = (char *)cp;
1171 continue;
1172 }
1173
1174 /* Decide whether to expand or to check only. */
1175
1176 term = '\0';
1177 cp = stesc + 1;
1178 switch (*cp) {
1179 case '*':
1180 res = NULL;
1181 break;
1182 case 'B':
1183 case 'w':
1184 term = cp[1];
1185 /* FALLTHROUGH */
1186 case 'n':
1187 res = ubuf;
1188 break;
1189 default:
1190 esc = mandoc_escape(&cp, &stnam, &inaml);
1191 if (esc == ESCAPE_ERROR ||
1192 (esc == ESCAPE_SPECIAL &&
1193 mchars_spec2cp(stnam, inaml) < 0))
1194 mandoc_vmsg(MANDOCERR_ESC_BAD,
1195 r->parse, ln, (int)(stesc - buf->buf),
1196 "%.*s", (int)(cp - stesc), stesc);
1197 continue;
1198 }
1199
1200 if (EXPAND_LIMIT < ++expand_count) {
1201 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1202 ln, (int)(stesc - buf->buf), NULL);
1203 return ROFF_IGN;
1204 }
1205
1206 /*
1207 * The third character decides the length
1208 * of the name of the string or register.
1209 * Save a pointer to the name.
1210 */
1211
1212 if (term == '\0') {
1213 switch (*++cp) {
1214 case '\0':
1215 maxl = 0;
1216 break;
1217 case '(':
1218 cp++;
1219 maxl = 2;
1220 break;
1221 case '[':
1222 cp++;
1223 term = ']';
1224 maxl = 0;
1225 break;
1226 default:
1227 maxl = 1;
1228 break;
1229 }
1230 } else {
1231 cp += 2;
1232 maxl = 0;
1233 }
1234 stnam = cp;
1235
1236 /* Advance to the end of the name. */
1237
1238 naml = 0;
1239 arg_complete = 1;
1240 while (maxl == 0 || naml < maxl) {
1241 if (*cp == '\0') {
1242 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1243 ln, (int)(stesc - buf->buf), stesc);
1244 arg_complete = 0;
1245 break;
1246 }
1247 if (maxl == 0 && *cp == term) {
1248 cp++;
1249 break;
1250 }
1251 if (*cp++ != '\\' || stesc[1] != 'w') {
1252 naml++;
1253 continue;
1254 }
1255 switch (mandoc_escape(&cp, NULL, NULL)) {
1256 case ESCAPE_SPECIAL:
1257 case ESCAPE_UNICODE:
1258 case ESCAPE_NUMBERED:
1259 case ESCAPE_OVERSTRIKE:
1260 naml++;
1261 break;
1262 default:
1263 break;
1264 }
1265 }
1266
1267 /*
1268 * Retrieve the replacement string; if it is
1269 * undefined, resume searching for escapes.
1270 */
1271
1272 switch (stesc[1]) {
1273 case '*':
1274 if (arg_complete)
1275 res = roff_getstrn(r, stnam, naml);
1276 break;
1277 case 'B':
1278 npos = 0;
1279 ubuf[0] = arg_complete &&
1280 roff_evalnum(r, ln, stnam, &npos,
1281 NULL, ROFFNUM_SCALE) &&
1282 stnam + npos + 1 == cp ? '1' : '0';
1283 ubuf[1] = '\0';
1284 break;
1285 case 'n':
1286 if (arg_complete)
1287 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1288 roff_getregn(r, stnam, naml));
1289 else
1290 ubuf[0] = '\0';
1291 break;
1292 case 'w':
1293 /* use even incomplete args */
1294 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1295 24 * (int)naml);
1296 break;
1297 }
1298
1299 if (res == NULL) {
1300 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1301 r->parse, ln, (int)(stesc - buf->buf),
1302 "%.*s", (int)naml, stnam);
1303 res = "";
1304 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1305 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1306 ln, (int)(stesc - buf->buf), NULL);
1307 return ROFF_IGN;
1308 }
1309
1310 /* Replace the escape sequence by the string. */
1311
1312 *stesc = '\0';
1313 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1314 buf->buf, res, cp) + 1;
1315
1316 /* Prepare for the next replacement. */
1317
1318 start = nbuf + pos;
1319 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1320 free(buf->buf);
1321 buf->buf = nbuf;
1322 }
1323 return ROFF_CONT;
1324 }
1325
1326 /*
1327 * Process text streams.
1328 */
1329 static enum rofferr
1330 roff_parsetext(struct buf *buf, int pos, int *offs)
1331 {
1332 size_t sz;
1333 const char *start;
1334 char *p;
1335 int isz;
1336 enum mandoc_esc esc;
1337
1338 /* Spring the input line trap. */
1339
1340 if (roffit_lines == 1) {
1341 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1342 free(buf->buf);
1343 buf->buf = p;
1344 buf->sz = isz + 1;
1345 *offs = 0;
1346 free(roffit_macro);
1347 roffit_lines = 0;
1348 return ROFF_REPARSE;
1349 } else if (roffit_lines > 1)
1350 --roffit_lines;
1351
1352 /* Convert all breakable hyphens into ASCII_HYPH. */
1353
1354 start = p = buf->buf + pos;
1355
1356 while (*p != '\0') {
1357 sz = strcspn(p, "-\\");
1358 p += sz;
1359
1360 if (*p == '\0')
1361 break;
1362
1363 if (*p == '\\') {
1364 /* Skip over escapes. */
1365 p++;
1366 esc = mandoc_escape((const char **)&p, NULL, NULL);
1367 if (esc == ESCAPE_ERROR)
1368 break;
1369 while (*p == '-')
1370 p++;
1371 continue;
1372 } else if (p == start) {
1373 p++;
1374 continue;
1375 }
1376
1377 if (isalpha((unsigned char)p[-1]) &&
1378 isalpha((unsigned char)p[1]))
1379 *p = ASCII_HYPH;
1380 p++;
1381 }
1382 return ROFF_CONT;
1383 }
1384
1385 enum rofferr
1386 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1387 {
1388 enum roff_tok t;
1389 enum rofferr e;
1390 int pos; /* parse point */
1391 int spos; /* saved parse point for messages */
1392 int ppos; /* original offset in buf->buf */
1393 int ctl; /* macro line (boolean) */
1394
1395 ppos = pos = *offs;
1396
1397 /* Handle in-line equation delimiters. */
1398
1399 if (r->tbl == NULL &&
1400 r->last_eqn != NULL && r->last_eqn->delim &&
1401 (r->eqn == NULL || r->eqn_inline)) {
1402 e = roff_eqndelim(r, buf, pos);
1403 if (e == ROFF_REPARSE)
1404 return e;
1405 assert(e == ROFF_CONT);
1406 }
1407
1408 /* Expand some escape sequences. */
1409
1410 e = roff_res(r, buf, ln, pos);
1411 if (e == ROFF_IGN)
1412 return e;
1413 assert(e == ROFF_CONT);
1414
1415 ctl = roff_getcontrol(r, buf->buf, &pos);
1416
1417 /*
1418 * First, if a scope is open and we're not a macro, pass the
1419 * text through the macro's filter.
1420 * Equations process all content themselves.
1421 * Tables process almost all content themselves, but we want
1422 * to warn about macros before passing it there.
1423 */
1424
1425 if (r->last != NULL && ! ctl) {
1426 t = r->last->tok;
1427 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1428 if (e == ROFF_IGN)
1429 return e;
1430 assert(e == ROFF_CONT);
1431 }
1432 if (r->eqn != NULL)
1433 return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1434 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1435 return tbl_read(r->tbl, ln, buf->buf, ppos);
1436 if ( ! ctl)
1437 return roff_parsetext(buf, pos, offs);
1438
1439 /* Skip empty request lines. */
1440
1441 if (buf->buf[pos] == '"') {
1442 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1443 ln, pos, NULL);
1444 return ROFF_IGN;
1445 } else if (buf->buf[pos] == '\0')
1446 return ROFF_IGN;
1447
1448 /*
1449 * If a scope is open, go to the child handler for that macro,
1450 * as it may want to preprocess before doing anything with it.
1451 * Don't do so if an equation is open.
1452 */
1453
1454 if (r->last) {
1455 t = r->last->tok;
1456 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1457 }
1458
1459 /* No scope is open. This is a new request or macro. */
1460
1461 spos = pos;
1462 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1463
1464 /* Tables ignore most macros. */
1465
1466 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS)) {
1467 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1468 ln, pos, buf->buf + spos);
1469 if (t == ROFF_TS)
1470 return ROFF_IGN;
1471 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1472 pos++;
1473 while (buf->buf[pos] == ' ')
1474 pos++;
1475 return tbl_read(r->tbl, ln, buf->buf, pos);
1476 }
1477
1478 /*
1479 * This is neither a roff request nor a user-defined macro.
1480 * Let the standard macro set parsers handle it.
1481 */
1482
1483 if (t == TOKEN_NONE)
1484 return ROFF_CONT;
1485
1486 /* Execute a roff request or a user defined macro. */
1487
1488 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1489 }
1490
1491 void
1492 roff_endparse(struct roff *r)
1493 {
1494
1495 if (r->last)
1496 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1497 r->last->line, r->last->col,
1498 roff_name[r->last->tok]);
1499
1500 if (r->eqn) {
1501 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1502 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1503 eqn_end(&r->eqn);
1504 }
1505
1506 if (r->tbl) {
1507 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1508 r->tbl->line, r->tbl->pos, "TS");
1509 tbl_end(&r->tbl);
1510 }
1511 }
1512
1513 /*
1514 * Parse a roff node's type from the input buffer. This must be in the
1515 * form of ".foo xxx" in the usual way.
1516 */
1517 static enum roff_tok
1518 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1519 {
1520 char *cp;
1521 const char *mac;
1522 size_t maclen;
1523 enum roff_tok t;
1524
1525 cp = buf + *pos;
1526
1527 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1528 return TOKEN_NONE;
1529
1530 mac = cp;
1531 maclen = roff_getname(r, &cp, ln, ppos);
1532
1533 t = (r->current_string = roff_getstrn(r, mac, maclen))
1534 ? ROFF_USERDEF : roffhash_find(r->reqtab, mac, maclen);
1535
1536 if (t != TOKEN_NONE)
1537 *pos = cp - buf;
1538
1539 return t;
1540 }
1541
1542 /* --- handling of request blocks ----------------------------------------- */
1543
1544 static enum rofferr
1545 roff_cblock(ROFF_ARGS)
1546 {
1547
1548 /*
1549 * A block-close `..' should only be invoked as a child of an
1550 * ignore macro, otherwise raise a warning and just ignore it.
1551 */
1552
1553 if (r->last == NULL) {
1554 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1555 ln, ppos, "..");
1556 return ROFF_IGN;
1557 }
1558
1559 switch (r->last->tok) {
1560 case ROFF_am:
1561 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1562 case ROFF_ami:
1563 case ROFF_de:
1564 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1565 case ROFF_dei:
1566 case ROFF_ig:
1567 break;
1568 default:
1569 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1570 ln, ppos, "..");
1571 return ROFF_IGN;
1572 }
1573
1574 if (buf->buf[pos] != '\0')
1575 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1576 ".. %s", buf->buf + pos);
1577
1578 roffnode_pop(r);
1579 roffnode_cleanscope(r);
1580 return ROFF_IGN;
1581
1582 }
1583
1584 static void
1585 roffnode_cleanscope(struct roff *r)
1586 {
1587
1588 while (r->last) {
1589 if (--r->last->endspan != 0)
1590 break;
1591 roffnode_pop(r);
1592 }
1593 }
1594
1595 static void
1596 roff_ccond(struct roff *r, int ln, int ppos)
1597 {
1598
1599 if (NULL == r->last) {
1600 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1601 ln, ppos, "\\}");
1602 return;
1603 }
1604
1605 switch (r->last->tok) {
1606 case ROFF_el:
1607 case ROFF_ie:
1608 case ROFF_if:
1609 break;
1610 default:
1611 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1612 ln, ppos, "\\}");
1613 return;
1614 }
1615
1616 if (r->last->endspan > -1) {
1617 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1618 ln, ppos, "\\}");
1619 return;
1620 }
1621
1622 roffnode_pop(r);
1623 roffnode_cleanscope(r);
1624 return;
1625 }
1626
1627 static enum rofferr
1628 roff_block(ROFF_ARGS)
1629 {
1630 const char *name;
1631 char *iname, *cp;
1632 size_t namesz;
1633
1634 /* Ignore groff compatibility mode for now. */
1635
1636 if (tok == ROFF_de1)
1637 tok = ROFF_de;
1638 else if (tok == ROFF_dei1)
1639 tok = ROFF_dei;
1640 else if (tok == ROFF_am1)
1641 tok = ROFF_am;
1642 else if (tok == ROFF_ami1)
1643 tok = ROFF_ami;
1644
1645 /* Parse the macro name argument. */
1646
1647 cp = buf->buf + pos;
1648 if (tok == ROFF_ig) {
1649 iname = NULL;
1650 namesz = 0;
1651 } else {
1652 iname = cp;
1653 namesz = roff_getname(r, &cp, ln, ppos);
1654 iname[namesz] = '\0';
1655 }
1656
1657 /* Resolve the macro name argument if it is indirect. */
1658
1659 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1660 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1661 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1662 r->parse, ln, (int)(iname - buf->buf),
1663 "%.*s", (int)namesz, iname);
1664 namesz = 0;
1665 } else
1666 namesz = strlen(name);
1667 } else
1668 name = iname;
1669
1670 if (namesz == 0 && tok != ROFF_ig) {
1671 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1672 ln, ppos, roff_name[tok]);
1673 return ROFF_IGN;
1674 }
1675
1676 roffnode_push(r, tok, name, ln, ppos);
1677
1678 /*
1679 * At the beginning of a `de' macro, clear the existing string
1680 * with the same name, if there is one. New content will be
1681 * appended from roff_block_text() in multiline mode.
1682 */
1683
1684 if (tok == ROFF_de || tok == ROFF_dei)
1685 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1686
1687 if (*cp == '\0')
1688 return ROFF_IGN;
1689
1690 /* Get the custom end marker. */
1691
1692 iname = cp;
1693 namesz = roff_getname(r, &cp, ln, ppos);
1694
1695 /* Resolve the end marker if it is indirect. */
1696
1697 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1698 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1699 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1700 r->parse, ln, (int)(iname - buf->buf),
1701 "%.*s", (int)namesz, iname);
1702 namesz = 0;
1703 } else
1704 namesz = strlen(name);
1705 } else
1706 name = iname;
1707
1708 if (namesz)
1709 r->last->end = mandoc_strndup(name, namesz);
1710
1711 if (*cp != '\0')
1712 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1713 ln, pos, ".%s ... %s", roff_name[tok], cp);
1714
1715 return ROFF_IGN;
1716 }
1717
1718 static enum rofferr
1719 roff_block_sub(ROFF_ARGS)
1720 {
1721 enum roff_tok t;
1722 int i, j;
1723
1724 /*
1725 * First check whether a custom macro exists at this level. If
1726 * it does, then check against it. This is some of groff's
1727 * stranger behaviours. If we encountered a custom end-scope
1728 * tag and that tag also happens to be a "real" macro, then we
1729 * need to try interpreting it again as a real macro. If it's
1730 * not, then return ignore. Else continue.
1731 */
1732
1733 if (r->last->end) {
1734 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1735 if (buf->buf[i] != r->last->end[j])
1736 break;
1737
1738 if (r->last->end[j] == '\0' &&
1739 (buf->buf[i] == '\0' ||
1740 buf->buf[i] == ' ' ||
1741 buf->buf[i] == '\t')) {
1742 roffnode_pop(r);
1743 roffnode_cleanscope(r);
1744
1745 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1746 i++;
1747
1748 pos = i;
1749 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1750 TOKEN_NONE)
1751 return ROFF_RERUN;
1752 return ROFF_IGN;
1753 }
1754 }
1755
1756 /*
1757 * If we have no custom end-query or lookup failed, then try
1758 * pulling it out of the hashtable.
1759 */
1760
1761 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1762
1763 if (t != ROFF_cblock) {
1764 if (tok != ROFF_ig)
1765 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1766 return ROFF_IGN;
1767 }
1768
1769 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1770 }
1771
1772 static enum rofferr
1773 roff_block_text(ROFF_ARGS)
1774 {
1775
1776 if (tok != ROFF_ig)
1777 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1778
1779 return ROFF_IGN;
1780 }
1781
1782 static enum rofferr
1783 roff_cond_sub(ROFF_ARGS)
1784 {
1785 enum roff_tok t;
1786 char *ep;
1787 int rr;
1788
1789 rr = r->last->rule;
1790 roffnode_cleanscope(r);
1791 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1792
1793 /*
1794 * Fully handle known macros when they are structurally
1795 * required or when the conditional evaluated to true.
1796 */
1797
1798 if (t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT))
1799 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1800
1801 /*
1802 * If `\}' occurs on a macro line without a preceding macro,
1803 * drop the line completely.
1804 */
1805
1806 ep = buf->buf + pos;
1807 if (ep[0] == '\\' && ep[1] == '}')
1808 rr = 0;
1809
1810 /* Always check for the closing delimiter `\}'. */
1811
1812 while ((ep = strchr(ep, '\\')) != NULL) {
1813 if (*(++ep) == '}') {
1814 *ep = '&';
1815 roff_ccond(r, ln, ep - buf->buf - 1);
1816 }
1817 if (*ep != '\0')
1818 ++ep;
1819 }
1820 return rr ? ROFF_CONT : ROFF_IGN;
1821 }
1822
1823 static enum rofferr
1824 roff_cond_text(ROFF_ARGS)
1825 {
1826 char *ep;
1827 int rr;
1828
1829 rr = r->last->rule;
1830 roffnode_cleanscope(r);
1831
1832 ep = buf->buf + pos;
1833 while ((ep = strchr(ep, '\\')) != NULL) {
1834 if (*(++ep) == '}') {
1835 *ep = '&';
1836 roff_ccond(r, ln, ep - buf->buf - 1);
1837 }
1838 if (*ep != '\0')
1839 ++ep;
1840 }
1841 return rr ? ROFF_CONT : ROFF_IGN;
1842 }
1843
1844 /* --- handling of numeric and conditional expressions -------------------- */
1845
1846 /*
1847 * Parse a single signed integer number. Stop at the first non-digit.
1848 * If there is at least one digit, return success and advance the
1849 * parse point, else return failure and let the parse point unchanged.
1850 * Ignore overflows, treat them just like the C language.
1851 */
1852 static int
1853 roff_getnum(const char *v, int *pos, int *res, int flags)
1854 {
1855 int myres, scaled, n, p;
1856
1857 if (NULL == res)
1858 res = &myres;
1859
1860 p = *pos;
1861 n = v[p] == '-';
1862 if (n || v[p] == '+')
1863 p++;
1864
1865 if (flags & ROFFNUM_WHITE)
1866 while (isspace((unsigned char)v[p]))
1867 p++;
1868
1869 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1870 *res = 10 * *res + v[p] - '0';
1871 if (p == *pos + n)
1872 return 0;
1873
1874 if (n)
1875 *res = -*res;
1876
1877 /* Each number may be followed by one optional scaling unit. */
1878
1879 switch (v[p]) {
1880 case 'f':
1881 scaled = *res * 65536;
1882 break;
1883 case 'i':
1884 scaled = *res * 240;
1885 break;
1886 case 'c':
1887 scaled = *res * 240 / 2.54;
1888 break;
1889 case 'v':
1890 case 'P':
1891 scaled = *res * 40;
1892 break;
1893 case 'm':
1894 case 'n':
1895 scaled = *res * 24;
1896 break;
1897 case 'p':
1898 scaled = *res * 10 / 3;
1899 break;
1900 case 'u':
1901 scaled = *res;
1902 break;
1903 case 'M':
1904 scaled = *res * 6 / 25;
1905 break;
1906 default:
1907 scaled = *res;
1908 p--;
1909 break;
1910 }
1911 if (flags & ROFFNUM_SCALE)
1912 *res = scaled;
1913
1914 *pos = p + 1;
1915 return 1;
1916 }
1917
1918 /*
1919 * Evaluate a string comparison condition.
1920 * The first character is the delimiter.
1921 * Succeed if the string up to its second occurrence
1922 * matches the string up to its third occurence.
1923 * Advance the cursor after the third occurrence
1924 * or lacking that, to the end of the line.
1925 */
1926 static int
1927 roff_evalstrcond(const char *v, int *pos)
1928 {
1929 const char *s1, *s2, *s3;
1930 int match;
1931
1932 match = 0;
1933 s1 = v + *pos; /* initial delimiter */
1934 s2 = s1 + 1; /* for scanning the first string */
1935 s3 = strchr(s2, *s1); /* for scanning the second string */
1936
1937 if (NULL == s3) /* found no middle delimiter */
1938 goto out;
1939
1940 while ('\0' != *++s3) {
1941 if (*s2 != *s3) { /* mismatch */
1942 s3 = strchr(s3, *s1);
1943 break;
1944 }
1945 if (*s3 == *s1) { /* found the final delimiter */
1946 match = 1;
1947 break;
1948 }
1949 s2++;
1950 }
1951
1952 out:
1953 if (NULL == s3)
1954 s3 = strchr(s2, '\0');
1955 else if (*s3 != '\0')
1956 s3++;
1957 *pos = s3 - v;
1958 return match;
1959 }
1960
1961 /*
1962 * Evaluate an optionally negated single character, numerical,
1963 * or string condition.
1964 */
1965 static int
1966 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
1967 {
1968 char *cp, *name;
1969 size_t sz;
1970 int number, savepos, wanttrue;
1971
1972 if ('!' == v[*pos]) {
1973 wanttrue = 0;
1974 (*pos)++;
1975 } else
1976 wanttrue = 1;
1977
1978 switch (v[*pos]) {
1979 case '\0':
1980 return 0;
1981 case 'n':
1982 case 'o':
1983 (*pos)++;
1984 return wanttrue;
1985 case 'c':
1986 case 'd':
1987 case 'e':
1988 case 't':
1989 case 'v':
1990 (*pos)++;
1991 return !wanttrue;
1992 case 'r':
1993 cp = name = v + ++*pos;
1994 sz = roff_getname(r, &cp, ln, *pos);
1995 *pos = cp - v;
1996 return (sz && roff_hasregn(r, name, sz)) == wanttrue;
1997 default:
1998 break;
1999 }
2000
2001 savepos = *pos;
2002 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2003 return (number > 0) == wanttrue;
2004 else if (*pos == savepos)
2005 return roff_evalstrcond(v, pos) == wanttrue;
2006 else
2007 return 0;
2008 }
2009
2010 static enum rofferr
2011 roff_line_ignore(ROFF_ARGS)
2012 {
2013
2014 return ROFF_IGN;
2015 }
2016
2017 static enum rofferr
2018 roff_insec(ROFF_ARGS)
2019 {
2020
2021 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2022 ln, ppos, roff_name[tok]);
2023 return ROFF_IGN;
2024 }
2025
2026 static enum rofferr
2027 roff_unsupp(ROFF_ARGS)
2028 {
2029
2030 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2031 ln, ppos, roff_name[tok]);
2032 return ROFF_IGN;
2033 }
2034
2035 static enum rofferr
2036 roff_cond(ROFF_ARGS)
2037 {
2038
2039 roffnode_push(r, tok, NULL, ln, ppos);
2040
2041 /*
2042 * An `.el' has no conditional body: it will consume the value
2043 * of the current rstack entry set in prior `ie' calls or
2044 * defaults to DENY.
2045 *
2046 * If we're not an `el', however, then evaluate the conditional.
2047 */
2048
2049 r->last->rule = tok == ROFF_el ?
2050 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2051 roff_evalcond(r, ln, buf->buf, &pos);
2052
2053 /*
2054 * An if-else will put the NEGATION of the current evaluated
2055 * conditional into the stack of rules.
2056 */
2057
2058 if (tok == ROFF_ie) {
2059 if (r->rstackpos + 1 == r->rstacksz) {
2060 r->rstacksz += 16;
2061 r->rstack = mandoc_reallocarray(r->rstack,
2062 r->rstacksz, sizeof(int));
2063 }
2064 r->rstack[++r->rstackpos] = !r->last->rule;
2065 }
2066
2067 /* If the parent has false as its rule, then so do we. */
2068
2069 if (r->last->parent && !r->last->parent->rule)
2070 r->last->rule = 0;
2071
2072 /*
2073 * Determine scope.
2074 * If there is nothing on the line after the conditional,
2075 * not even whitespace, use next-line scope.
2076 */
2077
2078 if (buf->buf[pos] == '\0') {
2079 r->last->endspan = 2;
2080 goto out;
2081 }
2082
2083 while (buf->buf[pos] == ' ')
2084 pos++;
2085
2086 /* An opening brace requests multiline scope. */
2087
2088 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2089 r->last->endspan = -1;
2090 pos += 2;
2091 while (buf->buf[pos] == ' ')
2092 pos++;
2093 goto out;
2094 }
2095
2096 /*
2097 * Anything else following the conditional causes
2098 * single-line scope. Warn if the scope contains
2099 * nothing but trailing whitespace.
2100 */
2101
2102 if (buf->buf[pos] == '\0')
2103 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2104 ln, ppos, roff_name[tok]);
2105
2106 r->last->endspan = 1;
2107
2108 out:
2109 *offs = pos;
2110 return ROFF_RERUN;
2111 }
2112
2113 static enum rofferr
2114 roff_ds(ROFF_ARGS)
2115 {
2116 char *string;
2117 const char *name;
2118 size_t namesz;
2119
2120 /* Ignore groff compatibility mode for now. */
2121
2122 if (tok == ROFF_ds1)
2123 tok = ROFF_ds;
2124 else if (tok == ROFF_as1)
2125 tok = ROFF_as;
2126
2127 /*
2128 * The first word is the name of the string.
2129 * If it is empty or terminated by an escape sequence,
2130 * abort the `ds' request without defining anything.
2131 */
2132
2133 name = string = buf->buf + pos;
2134 if (*name == '\0')
2135 return ROFF_IGN;
2136
2137 namesz = roff_getname(r, &string, ln, pos);
2138 if (name[namesz] == '\\')
2139 return ROFF_IGN;
2140
2141 /* Read past the initial double-quote, if any. */
2142 if (*string == '"')
2143 string++;
2144
2145 /* The rest is the value. */
2146 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2147 ROFF_as == tok);
2148 return ROFF_IGN;
2149 }
2150
2151 /*
2152 * Parse a single operator, one or two characters long.
2153 * If the operator is recognized, return success and advance the
2154 * parse point, else return failure and let the parse point unchanged.
2155 */
2156 static int
2157 roff_getop(const char *v, int *pos, char *res)
2158 {
2159
2160 *res = v[*pos];
2161
2162 switch (*res) {
2163 case '+':
2164 case '-':
2165 case '*':
2166 case '/':
2167 case '%':
2168 case '&':
2169 case ':':
2170 break;
2171 case '<':
2172 switch (v[*pos + 1]) {
2173 case '=':
2174 *res = 'l';
2175 (*pos)++;
2176 break;
2177 case '>':
2178 *res = '!';
2179 (*pos)++;
2180 break;
2181 case '?':
2182 *res = 'i';
2183 (*pos)++;
2184 break;
2185 default:
2186 break;
2187 }
2188 break;
2189 case '>':
2190 switch (v[*pos + 1]) {
2191 case '=':
2192 *res = 'g';
2193 (*pos)++;
2194 break;
2195 case '?':
2196 *res = 'a';
2197 (*pos)++;
2198 break;
2199 default:
2200 break;
2201 }
2202 break;
2203 case '=':
2204 if ('=' == v[*pos + 1])
2205 (*pos)++;
2206 break;
2207 default:
2208 return 0;
2209 }
2210 (*pos)++;
2211
2212 return *res;
2213 }
2214
2215 /*
2216 * Evaluate either a parenthesized numeric expression
2217 * or a single signed integer number.
2218 */
2219 static int
2220 roff_evalpar(struct roff *r, int ln,
2221 const char *v, int *pos, int *res, int flags)
2222 {
2223
2224 if ('(' != v[*pos])
2225 return roff_getnum(v, pos, res, flags);
2226
2227 (*pos)++;
2228 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2229 return 0;
2230
2231 /*
2232 * Omission of the closing parenthesis
2233 * is an error in validation mode,
2234 * but ignored in evaluation mode.
2235 */
2236
2237 if (')' == v[*pos])
2238 (*pos)++;
2239 else if (NULL == res)
2240 return 0;
2241
2242 return 1;
2243 }
2244
2245 /*
2246 * Evaluate a complete numeric expression.
2247 * Proceed left to right, there is no concept of precedence.
2248 */
2249 static int
2250 roff_evalnum(struct roff *r, int ln, const char *v,
2251 int *pos, int *res, int flags)
2252 {
2253 int mypos, operand2;
2254 char operator;
2255
2256 if (NULL == pos) {
2257 mypos = 0;
2258 pos = &mypos;
2259 }
2260
2261 if (flags & ROFFNUM_WHITE)
2262 while (isspace((unsigned char)v[*pos]))
2263 (*pos)++;
2264
2265 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2266 return 0;
2267
2268 while (1) {
2269 if (flags & ROFFNUM_WHITE)
2270 while (isspace((unsigned char)v[*pos]))
2271 (*pos)++;
2272
2273 if ( ! roff_getop(v, pos, &operator))
2274 break;
2275
2276 if (flags & ROFFNUM_WHITE)
2277 while (isspace((unsigned char)v[*pos]))
2278 (*pos)++;
2279
2280 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2281 return 0;
2282
2283 if (flags & ROFFNUM_WHITE)
2284 while (isspace((unsigned char)v[*pos]))
2285 (*pos)++;
2286
2287 if (NULL == res)
2288 continue;
2289
2290 switch (operator) {
2291 case '+':
2292 *res += operand2;
2293 break;
2294 case '-':
2295 *res -= operand2;
2296 break;
2297 case '*':
2298 *res *= operand2;
2299 break;
2300 case '/':
2301 if (operand2 == 0) {
2302 mandoc_msg(MANDOCERR_DIVZERO,
2303 r->parse, ln, *pos, v);
2304 *res = 0;
2305 break;
2306 }
2307 *res /= operand2;
2308 break;
2309 case '%':
2310 if (operand2 == 0) {
2311 mandoc_msg(MANDOCERR_DIVZERO,
2312 r->parse, ln, *pos, v);
2313 *res = 0;
2314 break;
2315 }
2316 *res %= operand2;
2317 break;
2318 case '<':
2319 *res = *res < operand2;
2320 break;
2321 case '>':
2322 *res = *res > operand2;
2323 break;
2324 case 'l':
2325 *res = *res <= operand2;
2326 break;
2327 case 'g':
2328 *res = *res >= operand2;
2329 break;
2330 case '=':
2331 *res = *res == operand2;
2332 break;
2333 case '!':
2334 *res = *res != operand2;
2335 break;
2336 case '&':
2337 *res = *res && operand2;
2338 break;
2339 case ':':
2340 *res = *res || operand2;
2341 break;
2342 case 'i':
2343 if (operand2 < *res)
2344 *res = operand2;
2345 break;
2346 case 'a':
2347 if (operand2 > *res)
2348 *res = operand2;
2349 break;
2350 default:
2351 abort();
2352 }
2353 }
2354 return 1;
2355 }
2356
2357 /* --- register management ------------------------------------------------ */
2358
2359 void
2360 roff_setreg(struct roff *r, const char *name, int val, char sign)
2361 {
2362 struct roffreg *reg;
2363
2364 /* Search for an existing register with the same name. */
2365 reg = r->regtab;
2366
2367 while (reg && strcmp(name, reg->key.p))
2368 reg = reg->next;
2369
2370 if (NULL == reg) {
2371 /* Create a new register. */
2372 reg = mandoc_malloc(sizeof(struct roffreg));
2373 reg->key.p = mandoc_strdup(name);
2374 reg->key.sz = strlen(name);
2375 reg->val = 0;
2376 reg->next = r->regtab;
2377 r->regtab = reg;
2378 }
2379
2380 if ('+' == sign)
2381 reg->val += val;
2382 else if ('-' == sign)
2383 reg->val -= val;
2384 else
2385 reg->val = val;
2386 }
2387
2388 /*
2389 * Handle some predefined read-only number registers.
2390 * For now, return -1 if the requested register is not predefined;
2391 * in case a predefined read-only register having the value -1
2392 * were to turn up, another special value would have to be chosen.
2393 */
2394 static int
2395 roff_getregro(const struct roff *r, const char *name)
2396 {
2397
2398 switch (*name) {
2399 case '$': /* Number of arguments of the last macro evaluated. */
2400 return r->argc;
2401 case 'A': /* ASCII approximation mode is always off. */
2402 return 0;
2403 case 'g': /* Groff compatibility mode is always on. */
2404 return 1;
2405 case 'H': /* Fixed horizontal resolution. */
2406 return 24;
2407 case 'j': /* Always adjust left margin only. */
2408 return 0;
2409 case 'T': /* Some output device is always defined. */
2410 return 1;
2411 case 'V': /* Fixed vertical resolution. */
2412 return 40;
2413 default:
2414 return -1;
2415 }
2416 }
2417
2418 int
2419 roff_getreg(const struct roff *r, const char *name)
2420 {
2421 struct roffreg *reg;
2422 int val;
2423
2424 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2425 val = roff_getregro(r, name + 1);
2426 if (-1 != val)
2427 return val;
2428 }
2429
2430 for (reg = r->regtab; reg; reg = reg->next)
2431 if (0 == strcmp(name, reg->key.p))
2432 return reg->val;
2433
2434 return 0;
2435 }
2436
2437 static int
2438 roff_getregn(const struct roff *r, const char *name, size_t len)
2439 {
2440 struct roffreg *reg;
2441 int val;
2442
2443 if ('.' == name[0] && 2 == len) {
2444 val = roff_getregro(r, name + 1);
2445 if (-1 != val)
2446 return val;
2447 }
2448
2449 for (reg = r->regtab; reg; reg = reg->next)
2450 if (len == reg->key.sz &&
2451 0 == strncmp(name, reg->key.p, len))
2452 return reg->val;
2453
2454 return 0;
2455 }
2456
2457 static int
2458 roff_hasregn(const struct roff *r, const char *name, size_t len)
2459 {
2460 struct roffreg *reg;
2461 int val;
2462
2463 if ('.' == name[0] && 2 == len) {
2464 val = roff_getregro(r, name + 1);
2465 if (-1 != val)
2466 return 1;
2467 }
2468
2469 for (reg = r->regtab; reg; reg = reg->next)
2470 if (len == reg->key.sz &&
2471 0 == strncmp(name, reg->key.p, len))
2472 return 1;
2473
2474 return 0;
2475 }
2476
2477 static void
2478 roff_freereg(struct roffreg *reg)
2479 {
2480 struct roffreg *old_reg;
2481
2482 while (NULL != reg) {
2483 free(reg->key.p);
2484 old_reg = reg;
2485 reg = reg->next;
2486 free(old_reg);
2487 }
2488 }
2489
2490 static enum rofferr
2491 roff_nr(ROFF_ARGS)
2492 {
2493 char *key, *val;
2494 size_t keysz;
2495 int iv;
2496 char sign;
2497
2498 key = val = buf->buf + pos;
2499 if (*key == '\0')
2500 return ROFF_IGN;
2501
2502 keysz = roff_getname(r, &val, ln, pos);
2503 if (key[keysz] == '\\')
2504 return ROFF_IGN;
2505 key[keysz] = '\0';
2506
2507 sign = *val;
2508 if (sign == '+' || sign == '-')
2509 val++;
2510
2511 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2512 roff_setreg(r, key, iv, sign);
2513
2514 return ROFF_IGN;
2515 }
2516
2517 static enum rofferr
2518 roff_rr(ROFF_ARGS)
2519 {
2520 struct roffreg *reg, **prev;
2521 char *name, *cp;
2522 size_t namesz;
2523
2524 name = cp = buf->buf + pos;
2525 if (*name == '\0')
2526 return ROFF_IGN;
2527 namesz = roff_getname(r, &cp, ln, pos);
2528 name[namesz] = '\0';
2529
2530 prev = &r->regtab;
2531 while (1) {
2532 reg = *prev;
2533 if (reg == NULL || !strcmp(name, reg->key.p))
2534 break;
2535 prev = &reg->next;
2536 }
2537 if (reg != NULL) {
2538 *prev = reg->next;
2539 free(reg->key.p);
2540 free(reg);
2541 }
2542 return ROFF_IGN;
2543 }
2544
2545 /* --- handler functions for roff requests -------------------------------- */
2546
2547 static enum rofferr
2548 roff_rm(ROFF_ARGS)
2549 {
2550 const char *name;
2551 char *cp;
2552 size_t namesz;
2553
2554 cp = buf->buf + pos;
2555 while (*cp != '\0') {
2556 name = cp;
2557 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2558 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2559 if (name[namesz] == '\\')
2560 break;
2561 }
2562 return ROFF_IGN;
2563 }
2564
2565 static enum rofferr
2566 roff_it(ROFF_ARGS)
2567 {
2568 int iv;
2569
2570 /* Parse the number of lines. */
2571
2572 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2573 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2574 ln, ppos, buf->buf + 1);
2575 return ROFF_IGN;
2576 }
2577
2578 while (isspace((unsigned char)buf->buf[pos]))
2579 pos++;
2580
2581 /*
2582 * Arm the input line trap.
2583 * Special-casing "an-trap" is an ugly workaround to cope
2584 * with DocBook stupidly fiddling with man(7) internals.
2585 */
2586
2587 roffit_lines = iv;
2588 roffit_macro = mandoc_strdup(iv != 1 ||
2589 strcmp(buf->buf + pos, "an-trap") ?
2590 buf->buf + pos : "br");
2591 return ROFF_IGN;
2592 }
2593
2594 static enum rofferr
2595 roff_Dd(ROFF_ARGS)
2596 {
2597 const char *const *cp;
2598
2599 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2600 for (cp = __mdoc_reserved; *cp; cp++)
2601 roff_setstr(r, *cp, NULL, 0);
2602
2603 if (r->format == 0)
2604 r->format = MPARSE_MDOC;
2605
2606 return ROFF_CONT;
2607 }
2608
2609 static enum rofferr
2610 roff_TH(ROFF_ARGS)
2611 {
2612 const char *const *cp;
2613
2614 if ((r->options & MPARSE_QUICK) == 0)
2615 for (cp = __man_reserved; *cp; cp++)
2616 roff_setstr(r, *cp, NULL, 0);
2617
2618 if (r->format == 0)
2619 r->format = MPARSE_MAN;
2620
2621 return ROFF_CONT;
2622 }
2623
2624 static enum rofferr
2625 roff_TE(ROFF_ARGS)
2626 {
2627
2628 if (NULL == r->tbl)
2629 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2630 ln, ppos, "TE");
2631 else if ( ! tbl_end(&r->tbl)) {
2632 free(buf->buf);
2633 buf->buf = mandoc_strdup(".sp");
2634 buf->sz = 4;
2635 return ROFF_REPARSE;
2636 }
2637 return ROFF_IGN;
2638 }
2639
2640 static enum rofferr
2641 roff_T_(ROFF_ARGS)
2642 {
2643
2644 if (NULL == r->tbl)
2645 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2646 ln, ppos, "T&");
2647 else
2648 tbl_restart(ln, ppos, r->tbl);
2649
2650 return ROFF_IGN;
2651 }
2652
2653 /*
2654 * Handle in-line equation delimiters.
2655 */
2656 static enum rofferr
2657 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2658 {
2659 char *cp1, *cp2;
2660 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2661
2662 /*
2663 * Outside equations, look for an opening delimiter.
2664 * If we are inside an equation, we already know it is
2665 * in-line, or this function wouldn't have been called;
2666 * so look for a closing delimiter.
2667 */
2668
2669 cp1 = buf->buf + pos;
2670 cp2 = strchr(cp1, r->eqn == NULL ?
2671 r->last_eqn->odelim : r->last_eqn->cdelim);
2672 if (cp2 == NULL)
2673 return ROFF_CONT;
2674
2675 *cp2++ = '\0';
2676 bef_pr = bef_nl = aft_nl = aft_pr = "";
2677
2678 /* Handle preceding text, protecting whitespace. */
2679
2680 if (*buf->buf != '\0') {
2681 if (r->eqn == NULL)
2682 bef_pr = "\\&";
2683 bef_nl = "\n";
2684 }
2685
2686 /*
2687 * Prepare replacing the delimiter with an equation macro
2688 * and drop leading white space from the equation.
2689 */
2690
2691 if (r->eqn == NULL) {
2692 while (*cp2 == ' ')
2693 cp2++;
2694 mac = ".EQ";
2695 } else
2696 mac = ".EN";
2697
2698 /* Handle following text, protecting whitespace. */
2699
2700 if (*cp2 != '\0') {
2701 aft_nl = "\n";
2702 if (r->eqn != NULL)
2703 aft_pr = "\\&";
2704 }
2705
2706 /* Do the actual replacement. */
2707
2708 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2709 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2710 free(buf->buf);
2711 buf->buf = cp1;
2712
2713 /* Toggle the in-line state of the eqn subsystem. */
2714
2715 r->eqn_inline = r->eqn == NULL;
2716 return ROFF_REPARSE;
2717 }
2718
2719 static enum rofferr
2720 roff_EQ(ROFF_ARGS)
2721 {
2722 struct eqn_node *e;
2723
2724 assert(r->eqn == NULL);
2725 e = eqn_alloc(ppos, ln, r->parse);
2726
2727 if (r->last_eqn) {
2728 r->last_eqn->next = e;
2729 e->delim = r->last_eqn->delim;
2730 e->odelim = r->last_eqn->odelim;
2731 e->cdelim = r->last_eqn->cdelim;
2732 } else
2733 r->first_eqn = r->last_eqn = e;
2734
2735 r->eqn = r->last_eqn = e;
2736
2737 if (buf->buf[pos] != '\0')
2738 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2739 ".EQ %s", buf->buf + pos);
2740
2741 return ROFF_IGN;
2742 }
2743
2744 static enum rofferr
2745 roff_EN(ROFF_ARGS)
2746 {
2747
2748 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2749 return ROFF_IGN;
2750 }
2751
2752 static enum rofferr
2753 roff_TS(ROFF_ARGS)
2754 {
2755 struct tbl_node *tbl;
2756
2757 if (r->tbl) {
2758 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2759 ln, ppos, "TS breaks TS");
2760 tbl_end(&r->tbl);
2761 }
2762
2763 tbl = tbl_alloc(ppos, ln, r->parse);
2764
2765 if (r->last_tbl)
2766 r->last_tbl->next = tbl;
2767 else
2768 r->first_tbl = r->last_tbl = tbl;
2769
2770 r->tbl = r->last_tbl = tbl;
2771 return ROFF_IGN;
2772 }
2773
2774 static enum rofferr
2775 roff_onearg(ROFF_ARGS)
2776 {
2777 struct roff_node *n;
2778 char *cp;
2779
2780 roff_elem_alloc(r->man, ln, ppos, tok);
2781 n = r->man->last;
2782
2783 cp = buf->buf + pos;
2784 if (*cp != '\0') {
2785 while (*cp != '\0' && *cp != ' ')
2786 cp++;
2787 while (*cp == ' ')
2788 *cp++ = '\0';
2789 if (*cp != '\0')
2790 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2791 r->parse, ln, cp - buf->buf,
2792 "%s ... %s", roff_name[tok], cp);
2793 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2794 }
2795
2796 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
2797 r->man->last = n;
2798 r->man->next = ROFF_NEXT_SIBLING;
2799 return ROFF_IGN;
2800 }
2801
2802 static enum rofferr
2803 roff_br(ROFF_ARGS)
2804 {
2805 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
2806 if (buf->buf[pos] != '\0')
2807 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2808 "%s %s", roff_name[tok], buf->buf + pos);
2809 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
2810 r->man->next = ROFF_NEXT_SIBLING;
2811 return ROFF_IGN;
2812 }
2813
2814 static enum rofferr
2815 roff_cc(ROFF_ARGS)
2816 {
2817 const char *p;
2818
2819 p = buf->buf + pos;
2820
2821 if (*p == '\0' || (r->control = *p++) == '.')
2822 r->control = 0;
2823
2824 if (*p != '\0')
2825 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2826 ln, p - buf->buf, "cc ... %s", p);
2827
2828 return ROFF_IGN;
2829 }
2830
2831 static enum rofferr
2832 roff_tr(ROFF_ARGS)
2833 {
2834 const char *p, *first, *second;
2835 size_t fsz, ssz;
2836 enum mandoc_esc esc;
2837
2838 p = buf->buf + pos;
2839
2840 if (*p == '\0') {
2841 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2842 return ROFF_IGN;
2843 }
2844
2845 while (*p != '\0') {
2846 fsz = ssz = 1;
2847
2848 first = p++;
2849 if (*first == '\\') {
2850 esc = mandoc_escape(&p, NULL, NULL);
2851 if (esc == ESCAPE_ERROR) {
2852 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2853 ln, (int)(p - buf->buf), first);
2854 return ROFF_IGN;
2855 }
2856 fsz = (size_t)(p - first);
2857 }
2858
2859 second = p++;
2860 if (*second == '\\') {
2861 esc = mandoc_escape(&p, NULL, NULL);
2862 if (esc == ESCAPE_ERROR) {
2863 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2864 ln, (int)(p - buf->buf), second);
2865 return ROFF_IGN;
2866 }
2867 ssz = (size_t)(p - second);
2868 } else if (*second == '\0') {
2869 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2870 ln, first - buf->buf, "tr %s", first);
2871 second = " ";
2872 p--;
2873 }
2874
2875 if (fsz > 1) {
2876 roff_setstrn(&r->xmbtab, first, fsz,
2877 second, ssz, 0);
2878 continue;
2879 }
2880
2881 if (r->xtab == NULL)
2882 r->xtab = mandoc_calloc(128,
2883 sizeof(struct roffstr));
2884
2885 free(r->xtab[(int)*first].p);
2886 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2887 r->xtab[(int)*first].sz = ssz;
2888 }
2889
2890 return ROFF_IGN;
2891 }
2892
2893 static enum rofferr
2894 roff_so(ROFF_ARGS)
2895 {
2896 char *name, *cp;
2897
2898 name = buf->buf + pos;
2899 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2900
2901 /*
2902 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2903 * opening anything that's not in our cwd or anything beneath
2904 * it. Thus, explicitly disallow traversing up the file-system
2905 * or using absolute paths.
2906 */
2907
2908 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2909 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2910 ".so %s", name);
2911 buf->sz = mandoc_asprintf(&cp,
2912 ".sp\nSee the file %s.\n.sp", name) + 1;
2913 free(buf->buf);
2914 buf->buf = cp;
2915 *offs = 0;
2916 return ROFF_REPARSE;
2917 }
2918
2919 *offs = pos;
2920 return ROFF_SO;
2921 }
2922
2923 /* --- user defined strings and macros ------------------------------------ */
2924
2925 static enum rofferr
2926 roff_userdef(ROFF_ARGS)
2927 {
2928 const char *arg[9], *ap;
2929 char *cp, *n1, *n2;
2930 int expand_count, i, ib, ie;
2931 size_t asz, rsz;
2932
2933 /*
2934 * Collect pointers to macro argument strings
2935 * and NUL-terminate them.
2936 */
2937
2938 r->argc = 0;
2939 cp = buf->buf + pos;
2940 for (i = 0; i < 9; i++) {
2941 if (*cp == '\0')
2942 arg[i] = "";
2943 else {
2944 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
2945 r->argc = i + 1;
2946 }
2947 }
2948
2949 /*
2950 * Expand macro arguments.
2951 */
2952
2953 buf->sz = strlen(r->current_string) + 1;
2954 n1 = n2 = cp = mandoc_malloc(buf->sz);
2955 memcpy(n1, r->current_string, buf->sz);
2956 expand_count = 0;
2957 while (*cp != '\0') {
2958
2959 /* Scan ahead for the next argument invocation. */
2960
2961 if (*cp++ != '\\')
2962 continue;
2963 if (*cp++ != '$')
2964 continue;
2965 if (*cp == '*') { /* \\$* inserts all arguments */
2966 ib = 0;
2967 ie = r->argc - 1;
2968 } else { /* \\$1 .. \\$9 insert one argument */
2969 ib = ie = *cp - '1';
2970 if (ib < 0 || ib > 8)
2971 continue;
2972 }
2973 cp -= 2;
2974
2975 /*
2976 * Prevent infinite recursion.
2977 */
2978
2979 if (cp >= n2)
2980 expand_count = 1;
2981 else if (++expand_count > EXPAND_LIMIT) {
2982 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
2983 ln, (int)(cp - n1), NULL);
2984 free(buf->buf);
2985 buf->buf = n1;
2986 return ROFF_IGN;
2987 }
2988
2989 /*
2990 * Determine the size of the expanded argument,
2991 * taking escaping of quotes into account.
2992 */
2993
2994 asz = ie > ib ? ie - ib : 0; /* for blanks */
2995 for (i = ib; i <= ie; i++) {
2996 for (ap = arg[i]; *ap != '\0'; ap++) {
2997 asz++;
2998 if (*ap == '"')
2999 asz += 3;
3000 }
3001 }
3002 if (asz != 3) {
3003
3004 /*
3005 * Determine the size of the rest of the
3006 * unexpanded macro, including the NUL.
3007 */
3008
3009 rsz = buf->sz - (cp - n1) - 3;
3010
3011 /*
3012 * When shrinking, move before
3013 * releasing the storage.
3014 */
3015
3016 if (asz < 3)
3017 memmove(cp + asz, cp + 3, rsz);
3018
3019 /*
3020 * Resize the storage for the macro
3021 * and readjust the parse pointer.
3022 */
3023
3024 buf->sz += asz - 3;
3025 n2 = mandoc_realloc(n1, buf->sz);
3026 cp = n2 + (cp - n1);
3027 n1 = n2;
3028
3029 /*
3030 * When growing, make room
3031 * for the expanded argument.
3032 */
3033
3034 if (asz > 3)
3035 memmove(cp + asz, cp + 3, rsz);
3036 }
3037
3038 /* Copy the expanded argument, escaping quotes. */
3039
3040 n2 = cp;
3041 for (i = ib; i <= ie; i++) {
3042 for (ap = arg[i]; *ap != '\0'; ap++) {
3043 if (*ap == '"') {
3044 memcpy(n2, "\\(dq", 4);
3045 n2 += 4;
3046 } else
3047 *n2++ = *ap;
3048 }
3049 if (i < ie)
3050 *n2++ = ' ';
3051 }
3052 }
3053
3054 /*
3055 * Replace the macro invocation
3056 * by the expanded macro.
3057 */
3058
3059 free(buf->buf);
3060 buf->buf = n1;
3061 *offs = 0;
3062
3063 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3064 ROFF_REPARSE : ROFF_APPEND;
3065 }
3066
3067 static size_t
3068 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3069 {
3070 char *name, *cp;
3071 size_t namesz;
3072
3073 name = *cpp;
3074 if ('\0' == *name)
3075 return 0;
3076
3077 /* Read until end of name and terminate it with NUL. */
3078 for (cp = name; 1; cp++) {
3079 if ('\0' == *cp || ' ' == *cp) {
3080 namesz = cp - name;
3081 break;
3082 }
3083 if ('\\' != *cp)
3084 continue;
3085 namesz = cp - name;
3086 if ('{' == cp[1] || '}' == cp[1])
3087 break;
3088 cp++;
3089 if ('\\' == *cp)
3090 continue;
3091 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3092 "%.*s", (int)(cp - name + 1), name);
3093 mandoc_escape((const char **)&cp, NULL, NULL);
3094 break;
3095 }
3096
3097 /* Read past spaces. */
3098 while (' ' == *cp)
3099 cp++;
3100
3101 *cpp = cp;
3102 return namesz;
3103 }
3104
3105 /*
3106 * Store *string into the user-defined string called *name.
3107 * To clear an existing entry, call with (*r, *name, NULL, 0).
3108 * append == 0: replace mode
3109 * append == 1: single-line append mode
3110 * append == 2: multiline append mode, append '\n' after each call
3111 */
3112 static void
3113 roff_setstr(struct roff *r, const char *name, const char *string,
3114 int append)
3115 {
3116
3117 roff_setstrn(&r->strtab, name, strlen(name), string,
3118 string ? strlen(string) : 0, append);
3119 }
3120
3121 static void
3122 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3123 const char *string, size_t stringsz, int append)
3124 {
3125 struct roffkv *n;
3126 char *c;
3127 int i;
3128 size_t oldch, newch;
3129
3130 /* Search for an existing string with the same name. */
3131 n = *r;
3132
3133 while (n && (namesz != n->key.sz ||
3134 strncmp(n->key.p, name, namesz)))
3135 n = n->next;
3136
3137 if (NULL == n) {
3138 /* Create a new string table entry. */
3139 n = mandoc_malloc(sizeof(struct roffkv));
3140 n->key.p = mandoc_strndup(name, namesz);
3141 n->key.sz = namesz;
3142 n->val.p = NULL;
3143 n->val.sz = 0;
3144 n->next = *r;
3145 *r = n;
3146 } else if (0 == append) {
3147 free(n->val.p);
3148 n->val.p = NULL;
3149 n->val.sz = 0;
3150 }
3151
3152 if (NULL == string)
3153 return;
3154
3155 /*
3156 * One additional byte for the '\n' in multiline mode,
3157 * and one for the terminating '\0'.
3158 */
3159 newch = stringsz + (1 < append ? 2u : 1u);
3160
3161 if (NULL == n->val.p) {
3162 n->val.p = mandoc_malloc(newch);
3163 *n->val.p = '\0';
3164 oldch = 0;
3165 } else {
3166 oldch = n->val.sz;
3167 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3168 }
3169
3170 /* Skip existing content in the destination buffer. */
3171 c = n->val.p + (int)oldch;
3172
3173 /* Append new content to the destination buffer. */
3174 i = 0;
3175 while (i < (int)stringsz) {
3176 /*
3177 * Rudimentary roff copy mode:
3178 * Handle escaped backslashes.
3179 */
3180 if ('\\' == string[i] && '\\' == string[i + 1])
3181 i++;
3182 *c++ = string[i++];
3183 }
3184
3185 /* Append terminating bytes. */
3186 if (1 < append)
3187 *c++ = '\n';
3188
3189 *c = '\0';
3190 n->val.sz = (int)(c - n->val.p);
3191 }
3192
3193 static const char *
3194 roff_getstrn(const struct roff *r, const char *name, size_t len)
3195 {
3196 const struct roffkv *n;
3197 int i;
3198
3199 for (n = r->strtab; n; n = n->next)
3200 if (0 == strncmp(name, n->key.p, len) &&
3201 '\0' == n->key.p[(int)len])
3202 return n->val.p;
3203
3204 for (i = 0; i < PREDEFS_MAX; i++)
3205 if (0 == strncmp(name, predefs[i].name, len) &&
3206 '\0' == predefs[i].name[(int)len])
3207 return predefs[i].str;
3208
3209 return NULL;
3210 }
3211
3212 static void
3213 roff_freestr(struct roffkv *r)
3214 {
3215 struct roffkv *n, *nn;
3216
3217 for (n = r; n; n = nn) {
3218 free(n->key.p);
3219 free(n->val.p);
3220 nn = n->next;
3221 free(n);
3222 }
3223 }
3224
3225 /* --- accessors and utility functions ------------------------------------ */
3226
3227 const struct tbl_span *
3228 roff_span(const struct roff *r)
3229 {
3230
3231 return r->tbl ? tbl_span(r->tbl) : NULL;
3232 }
3233
3234 const struct eqn *
3235 roff_eqn(const struct roff *r)
3236 {
3237
3238 return r->last_eqn ? &r->last_eqn->eqn : NULL;
3239 }
3240
3241 /*
3242 * Duplicate an input string, making the appropriate character
3243 * conversations (as stipulated by `tr') along the way.
3244 * Returns a heap-allocated string with all the replacements made.
3245 */
3246 char *
3247 roff_strdup(const struct roff *r, const char *p)
3248 {
3249 const struct roffkv *cp;
3250 char *res;
3251 const char *pp;
3252 size_t ssz, sz;
3253 enum mandoc_esc esc;
3254
3255 if (NULL == r->xmbtab && NULL == r->xtab)
3256 return mandoc_strdup(p);
3257 else if ('\0' == *p)
3258 return mandoc_strdup("");
3259
3260 /*
3261 * Step through each character looking for term matches
3262 * (remember that a `tr' can be invoked with an escape, which is
3263 * a glyph but the escape is multi-character).
3264 * We only do this if the character hash has been initialised
3265 * and the string is >0 length.
3266 */
3267
3268 res = NULL;
3269 ssz = 0;
3270
3271 while ('\0' != *p) {
3272 assert((unsigned int)*p < 128);
3273 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3274 sz = r->xtab[(int)*p].sz;
3275 res = mandoc_realloc(res, ssz + sz + 1);
3276 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3277 ssz += sz;
3278 p++;
3279 continue;
3280 } else if ('\\' != *p) {
3281 res = mandoc_realloc(res, ssz + 2);
3282 res[ssz++] = *p++;
3283 continue;
3284 }
3285
3286 /* Search for term matches. */
3287 for (cp = r->xmbtab; cp; cp = cp->next)
3288 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3289 break;
3290
3291 if (NULL != cp) {
3292 /*
3293 * A match has been found.
3294 * Append the match to the array and move
3295 * forward by its keysize.
3296 */
3297 res = mandoc_realloc(res,
3298 ssz + cp->val.sz + 1);
3299 memcpy(res + ssz, cp->val.p, cp->val.sz);
3300 ssz += cp->val.sz;
3301 p += (int)cp->key.sz;
3302 continue;
3303 }
3304
3305 /*
3306 * Handle escapes carefully: we need to copy
3307 * over just the escape itself, or else we might
3308 * do replacements within the escape itself.
3309 * Make sure to pass along the bogus string.
3310 */
3311 pp = p++;
3312 esc = mandoc_escape(&p, NULL, NULL);
3313 if (ESCAPE_ERROR == esc) {
3314 sz = strlen(pp);
3315 res = mandoc_realloc(res, ssz + sz + 1);
3316 memcpy(res + ssz, pp, sz);
3317 break;
3318 }
3319 /*
3320 * We bail out on bad escapes.
3321 * No need to warn: we already did so when
3322 * roff_res() was called.
3323 */
3324 sz = (int)(p - pp);
3325 res = mandoc_realloc(res, ssz + sz + 1);
3326 memcpy(res + ssz, pp, sz);
3327 ssz += sz;
3328 }
3329
3330 res[(int)ssz] = '\0';
3331 return res;
3332 }
3333
3334 int
3335 roff_getformat(const struct roff *r)
3336 {
3337
3338 return r->format;
3339 }
3340
3341 /*
3342 * Find out whether a line is a macro line or not.
3343 * If it is, adjust the current position and return one; if it isn't,
3344 * return zero and don't change the current position.
3345 * If the control character has been set with `.cc', then let that grain
3346 * precedence.
3347 * This is slighly contrary to groff, where using the non-breaking
3348 * control character when `cc' has been invoked will cause the
3349 * non-breaking macro contents to be printed verbatim.
3350 */
3351 int
3352 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3353 {
3354 int pos;
3355
3356 pos = *ppos;
3357
3358 if (0 != r->control && cp[pos] == r->control)
3359 pos++;
3360 else if (0 != r->control)
3361 return 0;
3362 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3363 pos += 2;
3364 else if ('.' == cp[pos] || '\'' == cp[pos])
3365 pos++;
3366 else
3367 return 0;
3368
3369 while (' ' == cp[pos] || '\t' == cp[pos])
3370 pos++;
3371
3372 *ppos = pos;
3373 return 1;
3374 }