]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Cleanup, no functional change:
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.345 2018/12/12 21:54:35 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "tbl.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "libroff.h"
39
40 /* Maximum number of string expansions per line, to break infinite loops. */
41 #define EXPAND_LIMIT 1000
42
43 /* Types of definitions of macros and strings. */
44 #define ROFFDEF_USER (1 << 1) /* User-defined. */
45 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
46 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
47 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
48 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
49 ROFFDEF_REN | ROFFDEF_STD)
50 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
51
52 /* --- data types --------------------------------------------------------- */
53
54 /*
55 * An incredibly-simple string buffer.
56 */
57 struct roffstr {
58 char *p; /* nil-terminated buffer */
59 size_t sz; /* saved strlen(p) */
60 };
61
62 /*
63 * A key-value roffstr pair as part of a singly-linked list.
64 */
65 struct roffkv {
66 struct roffstr key;
67 struct roffstr val;
68 struct roffkv *next; /* next in list */
69 };
70
71 /*
72 * A single number register as part of a singly-linked list.
73 */
74 struct roffreg {
75 struct roffstr key;
76 int val;
77 int step;
78 struct roffreg *next;
79 };
80
81 /*
82 * Association of request and macro names with token IDs.
83 */
84 struct roffreq {
85 enum roff_tok tok;
86 char name[];
87 };
88
89 /*
90 * A macro processing context.
91 * More than one is needed when macro calls are nested.
92 */
93 struct mctx {
94 char **argv;
95 int argc;
96 int argsz;
97 };
98
99 struct roff {
100 struct mparse *parse; /* parse point */
101 struct roff_man *man; /* mdoc or man parser */
102 struct roffnode *last; /* leaf of stack */
103 struct mctx *mstack; /* stack of macro contexts */
104 int *rstack; /* stack of inverted `ie' values */
105 struct ohash *reqtab; /* request lookup table */
106 struct roffreg *regtab; /* number registers */
107 struct roffkv *strtab; /* user-defined strings & macros */
108 struct roffkv *rentab; /* renamed strings & macros */
109 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
110 struct roffstr *xtab; /* single-byte trans table (`tr') */
111 const char *current_string; /* value of last called user macro */
112 struct tbl_node *first_tbl; /* first table parsed */
113 struct tbl_node *last_tbl; /* last table parsed */
114 struct tbl_node *tbl; /* current table being parsed */
115 struct eqn_node *last_eqn; /* equation parser */
116 struct eqn_node *eqn; /* active equation parser */
117 int eqn_inline; /* current equation is inline */
118 int options; /* parse options */
119 int mstacksz; /* current size of mstack */
120 int mstackpos; /* position in mstack */
121 int rstacksz; /* current size limit of rstack */
122 int rstackpos; /* position in rstack */
123 int format; /* current file in mdoc or man format */
124 char control; /* control character */
125 char escape; /* escape character */
126 };
127
128 struct roffnode {
129 enum roff_tok tok; /* type of node */
130 struct roffnode *parent; /* up one in stack */
131 int line; /* parse line */
132 int col; /* parse col */
133 char *name; /* node name, e.g. macro name */
134 char *end; /* end-rules: custom token */
135 int endspan; /* end-rules: next-line or infty */
136 int rule; /* current evaluation rule */
137 };
138
139 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
140 enum roff_tok tok, /* tok of macro */ \
141 struct buf *buf, /* input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef int (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 roffproc proc; /* process new macro */
151 roffproc text; /* process as child text of macro */
152 roffproc sub; /* process as child of macro */
153 int flags;
154 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
155 };
156
157 struct predef {
158 const char *name; /* predefined input name */
159 const char *str; /* replacement symbol */
160 };
161
162 #define PREDEF(__name, __str) \
163 { (__name), (__str) },
164
165 /* --- function prototypes ------------------------------------------------ */
166
167 static int roffnode_cleanscope(struct roff *);
168 static int roffnode_pop(struct roff *);
169 static void roffnode_push(struct roff *, enum roff_tok,
170 const char *, int, int);
171 static void roff_addtbl(struct roff_man *, struct tbl_node *);
172 static int roff_als(ROFF_ARGS);
173 static int roff_block(ROFF_ARGS);
174 static int roff_block_text(ROFF_ARGS);
175 static int roff_block_sub(ROFF_ARGS);
176 static int roff_br(ROFF_ARGS);
177 static int roff_cblock(ROFF_ARGS);
178 static int roff_cc(ROFF_ARGS);
179 static int roff_ccond(struct roff *, int, int);
180 static int roff_char(ROFF_ARGS);
181 static int roff_cond(ROFF_ARGS);
182 static int roff_cond_text(ROFF_ARGS);
183 static int roff_cond_sub(ROFF_ARGS);
184 static int roff_ds(ROFF_ARGS);
185 static int roff_ec(ROFF_ARGS);
186 static int roff_eo(ROFF_ARGS);
187 static int roff_eqndelim(struct roff *, struct buf *, int);
188 static int roff_evalcond(struct roff *r, int, char *, int *);
189 static int roff_evalnum(struct roff *, int,
190 const char *, int *, int *, int);
191 static int roff_evalpar(struct roff *, int,
192 const char *, int *, int *, int);
193 static int roff_evalstrcond(const char *, int *);
194 static void roff_free1(struct roff *);
195 static void roff_freereg(struct roffreg *);
196 static void roff_freestr(struct roffkv *);
197 static size_t roff_getname(struct roff *, char **, int, int);
198 static int roff_getnum(const char *, int *, int *, int);
199 static int roff_getop(const char *, int *, char *);
200 static int roff_getregn(struct roff *,
201 const char *, size_t, char);
202 static int roff_getregro(const struct roff *,
203 const char *name);
204 static const char *roff_getstrn(struct roff *,
205 const char *, size_t, int *);
206 static int roff_hasregn(const struct roff *,
207 const char *, size_t);
208 static int roff_insec(ROFF_ARGS);
209 static int roff_it(ROFF_ARGS);
210 static int roff_line_ignore(ROFF_ARGS);
211 static void roff_man_alloc1(struct roff_man *);
212 static void roff_man_free1(struct roff_man *);
213 static int roff_manyarg(ROFF_ARGS);
214 static int roff_nop(ROFF_ARGS);
215 static int roff_nr(ROFF_ARGS);
216 static int roff_onearg(ROFF_ARGS);
217 static enum roff_tok roff_parse(struct roff *, char *, int *,
218 int, int);
219 static int roff_parsetext(struct roff *, struct buf *,
220 int, int *);
221 static int roff_renamed(ROFF_ARGS);
222 static int roff_res(struct roff *, struct buf *, int, int);
223 static int roff_return(ROFF_ARGS);
224 static int roff_rm(ROFF_ARGS);
225 static int roff_rn(ROFF_ARGS);
226 static int roff_rr(ROFF_ARGS);
227 static void roff_setregn(struct roff *, const char *,
228 size_t, int, char, int);
229 static void roff_setstr(struct roff *,
230 const char *, const char *, int);
231 static void roff_setstrn(struct roffkv **, const char *,
232 size_t, const char *, size_t, int);
233 static int roff_shift(ROFF_ARGS);
234 static int roff_so(ROFF_ARGS);
235 static int roff_tr(ROFF_ARGS);
236 static int roff_Dd(ROFF_ARGS);
237 static int roff_TE(ROFF_ARGS);
238 static int roff_TS(ROFF_ARGS);
239 static int roff_EQ(ROFF_ARGS);
240 static int roff_EN(ROFF_ARGS);
241 static int roff_T_(ROFF_ARGS);
242 static int roff_unsupp(ROFF_ARGS);
243 static int roff_userdef(ROFF_ARGS);
244
245 /* --- constant data ------------------------------------------------------ */
246
247 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
248 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
249
250 const char *__roff_name[MAN_MAX + 1] = {
251 "br", "ce", "ft", "ll",
252 "mc", "po", "rj", "sp",
253 "ta", "ti", NULL,
254 "ab", "ad", "af", "aln",
255 "als", "am", "am1", "ami",
256 "ami1", "as", "as1", "asciify",
257 "backtrace", "bd", "bleedat", "blm",
258 "box", "boxa", "bp", "BP",
259 "break", "breakchar", "brnl", "brp",
260 "brpnl", "c2", "cc",
261 "cf", "cflags", "ch", "char",
262 "chop", "class", "close", "CL",
263 "color", "composite", "continue", "cp",
264 "cropat", "cs", "cu", "da",
265 "dch", "Dd", "de", "de1",
266 "defcolor", "dei", "dei1", "device",
267 "devicem", "di", "do", "ds",
268 "ds1", "dwh", "dt", "ec",
269 "ecr", "ecs", "el", "em",
270 "EN", "eo", "EP", "EQ",
271 "errprint", "ev", "evc", "ex",
272 "fallback", "fam", "fc", "fchar",
273 "fcolor", "fdeferlig", "feature", "fkern",
274 "fl", "flig", "fp", "fps",
275 "fschar", "fspacewidth", "fspecial", "ftr",
276 "fzoom", "gcolor", "hc", "hcode",
277 "hidechar", "hla", "hlm", "hpf",
278 "hpfa", "hpfcode", "hw", "hy",
279 "hylang", "hylen", "hym", "hypp",
280 "hys", "ie", "if", "ig",
281 "index", "it", "itc", "IX",
282 "kern", "kernafter", "kernbefore", "kernpair",
283 "lc", "lc_ctype", "lds", "length",
284 "letadj", "lf", "lg", "lhang",
285 "linetabs", "lnr", "lnrf", "lpfx",
286 "ls", "lsm", "lt",
287 "mediasize", "minss", "mk", "mso",
288 "na", "ne", "nh", "nhychar",
289 "nm", "nn", "nop", "nr",
290 "nrf", "nroff", "ns", "nx",
291 "open", "opena", "os", "output",
292 "padj", "papersize", "pc", "pev",
293 "pi", "PI", "pl", "pm",
294 "pn", "pnr", "ps",
295 "psbb", "pshape", "pso", "ptr",
296 "pvs", "rchar", "rd", "recursionlimit",
297 "return", "rfschar", "rhang",
298 "rm", "rn", "rnn", "rr",
299 "rs", "rt", "schar", "sentchar",
300 "shc", "shift", "sizes", "so",
301 "spacewidth", "special", "spreadwarn", "ss",
302 "sty", "substring", "sv", "sy",
303 "T&", "tc", "TE",
304 "TH", "tkf", "tl",
305 "tm", "tm1", "tmc", "tr",
306 "track", "transchar", "trf", "trimat",
307 "trin", "trnt", "troff", "TS",
308 "uf", "ul", "unformat", "unwatch",
309 "unwatchn", "vpt", "vs", "warn",
310 "warnscale", "watch", "watchlength", "watchn",
311 "wh", "while", "write", "writec",
312 "writem", "xflag", ".", NULL,
313 NULL, "text",
314 "Dd", "Dt", "Os", "Sh",
315 "Ss", "Pp", "D1", "Dl",
316 "Bd", "Ed", "Bl", "El",
317 "It", "Ad", "An", "Ap",
318 "Ar", "Cd", "Cm", "Dv",
319 "Er", "Ev", "Ex", "Fa",
320 "Fd", "Fl", "Fn", "Ft",
321 "Ic", "In", "Li", "Nd",
322 "Nm", "Op", "Ot", "Pa",
323 "Rv", "St", "Va", "Vt",
324 "Xr", "%A", "%B", "%D",
325 "%I", "%J", "%N", "%O",
326 "%P", "%R", "%T", "%V",
327 "Ac", "Ao", "Aq", "At",
328 "Bc", "Bf", "Bo", "Bq",
329 "Bsx", "Bx", "Db", "Dc",
330 "Do", "Dq", "Ec", "Ef",
331 "Em", "Eo", "Fx", "Ms",
332 "No", "Ns", "Nx", "Ox",
333 "Pc", "Pf", "Po", "Pq",
334 "Qc", "Ql", "Qo", "Qq",
335 "Re", "Rs", "Sc", "So",
336 "Sq", "Sm", "Sx", "Sy",
337 "Tn", "Ux", "Xc", "Xo",
338 "Fo", "Fc", "Oo", "Oc",
339 "Bk", "Ek", "Bt", "Hf",
340 "Fr", "Ud", "Lb", "Lp",
341 "Lk", "Mt", "Brq", "Bro",
342 "Brc", "%C", "Es", "En",
343 "Dx", "%Q", "%U", "Ta",
344 NULL,
345 "TH", "SH", "SS", "TP",
346 "TQ",
347 "LP", "PP", "P", "IP",
348 "HP", "SM", "SB", "BI",
349 "IB", "BR", "RB", "R",
350 "B", "I", "IR", "RI",
351 "nf", "fi",
352 "RE", "RS", "DT", "UC",
353 "PD", "AT", "in",
354 "SY", "YS", "OP",
355 "EX", "EE", "UR",
356 "UE", "MT", "ME", NULL
357 };
358 const char *const *roff_name = __roff_name;
359
360 static struct roffmac roffs[TOKEN_NONE] = {
361 { roff_br, NULL, NULL, 0 }, /* br */
362 { roff_onearg, NULL, NULL, 0 }, /* ce */
363 { roff_onearg, NULL, NULL, 0 }, /* ft */
364 { roff_onearg, NULL, NULL, 0 }, /* ll */
365 { roff_onearg, NULL, NULL, 0 }, /* mc */
366 { roff_onearg, NULL, NULL, 0 }, /* po */
367 { roff_onearg, NULL, NULL, 0 }, /* rj */
368 { roff_onearg, NULL, NULL, 0 }, /* sp */
369 { roff_manyarg, NULL, NULL, 0 }, /* ta */
370 { roff_onearg, NULL, NULL, 0 }, /* ti */
371 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
372 { roff_unsupp, NULL, NULL, 0 }, /* ab */
373 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
374 { roff_line_ignore, NULL, NULL, 0 }, /* af */
375 { roff_unsupp, NULL, NULL, 0 }, /* aln */
376 { roff_als, NULL, NULL, 0 }, /* als */
377 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
378 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
379 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
380 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
381 { roff_ds, NULL, NULL, 0 }, /* as */
382 { roff_ds, NULL, NULL, 0 }, /* as1 */
383 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
384 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
385 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
386 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
387 { roff_unsupp, NULL, NULL, 0 }, /* blm */
388 { roff_unsupp, NULL, NULL, 0 }, /* box */
389 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
390 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
391 { roff_unsupp, NULL, NULL, 0 }, /* BP */
392 { roff_unsupp, NULL, NULL, 0 }, /* break */
393 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
394 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
395 { roff_br, NULL, NULL, 0 }, /* brp */
396 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
397 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
398 { roff_cc, NULL, NULL, 0 }, /* cc */
399 { roff_insec, NULL, NULL, 0 }, /* cf */
400 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
401 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
402 { roff_char, NULL, NULL, 0 }, /* char */
403 { roff_unsupp, NULL, NULL, 0 }, /* chop */
404 { roff_line_ignore, NULL, NULL, 0 }, /* class */
405 { roff_insec, NULL, NULL, 0 }, /* close */
406 { roff_unsupp, NULL, NULL, 0 }, /* CL */
407 { roff_line_ignore, NULL, NULL, 0 }, /* color */
408 { roff_unsupp, NULL, NULL, 0 }, /* composite */
409 { roff_unsupp, NULL, NULL, 0 }, /* continue */
410 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
411 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
412 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
413 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
414 { roff_unsupp, NULL, NULL, 0 }, /* da */
415 { roff_unsupp, NULL, NULL, 0 }, /* dch */
416 { roff_Dd, NULL, NULL, 0 }, /* Dd */
417 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
418 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
419 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
420 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
421 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
422 { roff_unsupp, NULL, NULL, 0 }, /* device */
423 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
424 { roff_unsupp, NULL, NULL, 0 }, /* di */
425 { roff_unsupp, NULL, NULL, 0 }, /* do */
426 { roff_ds, NULL, NULL, 0 }, /* ds */
427 { roff_ds, NULL, NULL, 0 }, /* ds1 */
428 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
429 { roff_unsupp, NULL, NULL, 0 }, /* dt */
430 { roff_ec, NULL, NULL, 0 }, /* ec */
431 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
432 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
433 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
434 { roff_unsupp, NULL, NULL, 0 }, /* em */
435 { roff_EN, NULL, NULL, 0 }, /* EN */
436 { roff_eo, NULL, NULL, 0 }, /* eo */
437 { roff_unsupp, NULL, NULL, 0 }, /* EP */
438 { roff_EQ, NULL, NULL, 0 }, /* EQ */
439 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
440 { roff_unsupp, NULL, NULL, 0 }, /* ev */
441 { roff_unsupp, NULL, NULL, 0 }, /* evc */
442 { roff_unsupp, NULL, NULL, 0 }, /* ex */
443 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
444 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
445 { roff_unsupp, NULL, NULL, 0 }, /* fc */
446 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
447 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
448 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
449 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
450 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
451 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
452 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
453 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
454 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
455 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
456 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
457 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
458 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
459 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
460 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
461 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
462 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
463 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
464 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
465 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
466 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
467 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
468 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
469 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
470 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
471 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
472 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
473 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
474 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
475 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
476 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
477 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
478 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
479 { roff_unsupp, NULL, NULL, 0 }, /* index */
480 { roff_it, NULL, NULL, 0 }, /* it */
481 { roff_unsupp, NULL, NULL, 0 }, /* itc */
482 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
483 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
484 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
485 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
486 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
487 { roff_unsupp, NULL, NULL, 0 }, /* lc */
488 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
489 { roff_unsupp, NULL, NULL, 0 }, /* lds */
490 { roff_unsupp, NULL, NULL, 0 }, /* length */
491 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
492 { roff_insec, NULL, NULL, 0 }, /* lf */
493 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
494 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
495 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
496 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
497 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
498 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
499 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
500 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
501 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
502 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
503 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
504 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
505 { roff_insec, NULL, NULL, 0 }, /* mso */
506 { roff_line_ignore, NULL, NULL, 0 }, /* na */
507 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
508 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
509 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
510 { roff_unsupp, NULL, NULL, 0 }, /* nm */
511 { roff_unsupp, NULL, NULL, 0 }, /* nn */
512 { roff_nop, NULL, NULL, 0 }, /* nop */
513 { roff_nr, NULL, NULL, 0 }, /* nr */
514 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
515 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
516 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
517 { roff_insec, NULL, NULL, 0 }, /* nx */
518 { roff_insec, NULL, NULL, 0 }, /* open */
519 { roff_insec, NULL, NULL, 0 }, /* opena */
520 { roff_line_ignore, NULL, NULL, 0 }, /* os */
521 { roff_unsupp, NULL, NULL, 0 }, /* output */
522 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
523 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
524 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
525 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
526 { roff_insec, NULL, NULL, 0 }, /* pi */
527 { roff_unsupp, NULL, NULL, 0 }, /* PI */
528 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
529 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
530 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
531 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
532 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
533 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
534 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
535 { roff_insec, NULL, NULL, 0 }, /* pso */
536 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
537 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
538 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
539 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
540 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
541 { roff_return, NULL, NULL, 0 }, /* return */
542 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
543 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
544 { roff_rm, NULL, NULL, 0 }, /* rm */
545 { roff_rn, NULL, NULL, 0 }, /* rn */
546 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
547 { roff_rr, NULL, NULL, 0 }, /* rr */
548 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
549 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
550 { roff_unsupp, NULL, NULL, 0 }, /* schar */
551 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
552 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
553 { roff_shift, NULL, NULL, 0 }, /* shift */
554 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
555 { roff_so, NULL, NULL, 0 }, /* so */
556 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
557 { roff_line_ignore, NULL, NULL, 0 }, /* special */
558 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
559 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
560 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
561 { roff_unsupp, NULL, NULL, 0 }, /* substring */
562 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
563 { roff_insec, NULL, NULL, 0 }, /* sy */
564 { roff_T_, NULL, NULL, 0 }, /* T& */
565 { roff_unsupp, NULL, NULL, 0 }, /* tc */
566 { roff_TE, NULL, NULL, 0 }, /* TE */
567 { roff_Dd, NULL, NULL, 0 }, /* TH */
568 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
569 { roff_unsupp, NULL, NULL, 0 }, /* tl */
570 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
571 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
572 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
573 { roff_tr, NULL, NULL, 0 }, /* tr */
574 { roff_line_ignore, NULL, NULL, 0 }, /* track */
575 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
576 { roff_insec, NULL, NULL, 0 }, /* trf */
577 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
578 { roff_unsupp, NULL, NULL, 0 }, /* trin */
579 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
580 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
581 { roff_TS, NULL, NULL, 0 }, /* TS */
582 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
583 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
584 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
585 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
586 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
587 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
588 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
589 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
590 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
591 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
592 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
593 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
594 { roff_unsupp, NULL, NULL, 0 }, /* wh */
595 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
596 { roff_insec, NULL, NULL, 0 }, /* write */
597 { roff_insec, NULL, NULL, 0 }, /* writec */
598 { roff_insec, NULL, NULL, 0 }, /* writem */
599 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
600 { roff_cblock, NULL, NULL, 0 }, /* . */
601 { roff_renamed, NULL, NULL, 0 },
602 { roff_userdef, NULL, NULL, 0 }
603 };
604
605 /* Array of injected predefined strings. */
606 #define PREDEFS_MAX 38
607 static const struct predef predefs[PREDEFS_MAX] = {
608 #include "predefs.in"
609 };
610
611 static int roffce_lines; /* number of input lines to center */
612 static struct roff_node *roffce_node; /* active request */
613 static int roffit_lines; /* number of lines to delay */
614 static char *roffit_macro; /* nil-terminated macro line */
615
616
617 /* --- request table ------------------------------------------------------ */
618
619 struct ohash *
620 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
621 {
622 struct ohash *htab;
623 struct roffreq *req;
624 enum roff_tok tok;
625 size_t sz;
626 unsigned int slot;
627
628 htab = mandoc_malloc(sizeof(*htab));
629 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
630
631 for (tok = mintok; tok < maxtok; tok++) {
632 if (roff_name[tok] == NULL)
633 continue;
634 sz = strlen(roff_name[tok]);
635 req = mandoc_malloc(sizeof(*req) + sz + 1);
636 req->tok = tok;
637 memcpy(req->name, roff_name[tok], sz + 1);
638 slot = ohash_qlookup(htab, req->name);
639 ohash_insert(htab, slot, req);
640 }
641 return htab;
642 }
643
644 void
645 roffhash_free(struct ohash *htab)
646 {
647 struct roffreq *req;
648 unsigned int slot;
649
650 if (htab == NULL)
651 return;
652 for (req = ohash_first(htab, &slot); req != NULL;
653 req = ohash_next(htab, &slot))
654 free(req);
655 ohash_delete(htab);
656 free(htab);
657 }
658
659 enum roff_tok
660 roffhash_find(struct ohash *htab, const char *name, size_t sz)
661 {
662 struct roffreq *req;
663 const char *end;
664
665 if (sz) {
666 end = name + sz;
667 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
668 } else
669 req = ohash_find(htab, ohash_qlookup(htab, name));
670 return req == NULL ? TOKEN_NONE : req->tok;
671 }
672
673 /* --- stack of request blocks -------------------------------------------- */
674
675 /*
676 * Pop the current node off of the stack of roff instructions currently
677 * pending.
678 */
679 static int
680 roffnode_pop(struct roff *r)
681 {
682 struct roffnode *p;
683 int inloop;
684
685 p = r->last;
686 inloop = p->tok == ROFF_while;
687 r->last = p->parent;
688 free(p->name);
689 free(p->end);
690 free(p);
691 return inloop;
692 }
693
694 /*
695 * Push a roff node onto the instruction stack. This must later be
696 * removed with roffnode_pop().
697 */
698 static void
699 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
700 int line, int col)
701 {
702 struct roffnode *p;
703
704 p = mandoc_calloc(1, sizeof(struct roffnode));
705 p->tok = tok;
706 if (name)
707 p->name = mandoc_strdup(name);
708 p->parent = r->last;
709 p->line = line;
710 p->col = col;
711 p->rule = p->parent ? p->parent->rule : 0;
712
713 r->last = p;
714 }
715
716 /* --- roff parser state data management ---------------------------------- */
717
718 static void
719 roff_free1(struct roff *r)
720 {
721 struct tbl_node *tbl;
722 int i;
723
724 while (NULL != (tbl = r->first_tbl)) {
725 r->first_tbl = tbl->next;
726 tbl_free(tbl);
727 }
728 r->first_tbl = r->last_tbl = r->tbl = NULL;
729
730 if (r->last_eqn != NULL)
731 eqn_free(r->last_eqn);
732 r->last_eqn = r->eqn = NULL;
733
734 while (r->mstackpos >= 0)
735 roff_userret(r);
736
737 while (r->last)
738 roffnode_pop(r);
739
740 free (r->rstack);
741 r->rstack = NULL;
742 r->rstacksz = 0;
743 r->rstackpos = -1;
744
745 roff_freereg(r->regtab);
746 r->regtab = NULL;
747
748 roff_freestr(r->strtab);
749 roff_freestr(r->rentab);
750 roff_freestr(r->xmbtab);
751 r->strtab = r->rentab = r->xmbtab = NULL;
752
753 if (r->xtab)
754 for (i = 0; i < 128; i++)
755 free(r->xtab[i].p);
756 free(r->xtab);
757 r->xtab = NULL;
758 }
759
760 void
761 roff_reset(struct roff *r)
762 {
763 roff_free1(r);
764 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
765 r->control = '\0';
766 r->escape = '\\';
767 roffce_lines = 0;
768 roffce_node = NULL;
769 roffit_lines = 0;
770 roffit_macro = NULL;
771 }
772
773 void
774 roff_free(struct roff *r)
775 {
776 int i;
777
778 roff_free1(r);
779 for (i = 0; i < r->mstacksz; i++)
780 free(r->mstack[i].argv);
781 free(r->mstack);
782 roffhash_free(r->reqtab);
783 free(r);
784 }
785
786 struct roff *
787 roff_alloc(struct mparse *parse, int options)
788 {
789 struct roff *r;
790
791 r = mandoc_calloc(1, sizeof(struct roff));
792 r->parse = parse;
793 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
794 r->options = options;
795 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
796 r->mstackpos = -1;
797 r->rstackpos = -1;
798 r->escape = '\\';
799 return r;
800 }
801
802 /* --- syntax tree state data management ---------------------------------- */
803
804 static void
805 roff_man_free1(struct roff_man *man)
806 {
807
808 if (man->first != NULL)
809 roff_node_delete(man, man->first);
810 free(man->meta.msec);
811 free(man->meta.vol);
812 free(man->meta.os);
813 free(man->meta.arch);
814 free(man->meta.title);
815 free(man->meta.name);
816 free(man->meta.date);
817 }
818
819 static void
820 roff_man_alloc1(struct roff_man *man)
821 {
822
823 memset(&man->meta, 0, sizeof(man->meta));
824 man->first = mandoc_calloc(1, sizeof(*man->first));
825 man->first->type = ROFFT_ROOT;
826 man->last = man->first;
827 man->last_es = NULL;
828 man->flags = 0;
829 man->macroset = MACROSET_NONE;
830 man->lastsec = man->lastnamed = SEC_NONE;
831 man->next = ROFF_NEXT_CHILD;
832 }
833
834 void
835 roff_man_reset(struct roff_man *man)
836 {
837
838 roff_man_free1(man);
839 roff_man_alloc1(man);
840 }
841
842 void
843 roff_man_free(struct roff_man *man)
844 {
845
846 roff_man_free1(man);
847 free(man);
848 }
849
850 struct roff_man *
851 roff_man_alloc(struct roff *roff, struct mparse *parse,
852 const char *os_s, int quick)
853 {
854 struct roff_man *man;
855
856 man = mandoc_calloc(1, sizeof(*man));
857 man->parse = parse;
858 man->roff = roff;
859 man->os_s = os_s;
860 man->quick = quick;
861 roff_man_alloc1(man);
862 roff->man = man;
863 return man;
864 }
865
866 /* --- syntax tree handling ----------------------------------------------- */
867
868 struct roff_node *
869 roff_node_alloc(struct roff_man *man, int line, int pos,
870 enum roff_type type, int tok)
871 {
872 struct roff_node *n;
873
874 n = mandoc_calloc(1, sizeof(*n));
875 n->line = line;
876 n->pos = pos;
877 n->tok = tok;
878 n->type = type;
879 n->sec = man->lastsec;
880
881 if (man->flags & MDOC_SYNOPSIS)
882 n->flags |= NODE_SYNPRETTY;
883 else
884 n->flags &= ~NODE_SYNPRETTY;
885 if (man->flags & MDOC_NEWLINE)
886 n->flags |= NODE_LINE;
887 man->flags &= ~MDOC_NEWLINE;
888
889 return n;
890 }
891
892 void
893 roff_node_append(struct roff_man *man, struct roff_node *n)
894 {
895
896 switch (man->next) {
897 case ROFF_NEXT_SIBLING:
898 if (man->last->next != NULL) {
899 n->next = man->last->next;
900 man->last->next->prev = n;
901 } else
902 man->last->parent->last = n;
903 man->last->next = n;
904 n->prev = man->last;
905 n->parent = man->last->parent;
906 break;
907 case ROFF_NEXT_CHILD:
908 if (man->last->child != NULL) {
909 n->next = man->last->child;
910 man->last->child->prev = n;
911 } else
912 man->last->last = n;
913 man->last->child = n;
914 n->parent = man->last;
915 break;
916 default:
917 abort();
918 }
919 man->last = n;
920
921 switch (n->type) {
922 case ROFFT_HEAD:
923 n->parent->head = n;
924 break;
925 case ROFFT_BODY:
926 if (n->end != ENDBODY_NOT)
927 return;
928 n->parent->body = n;
929 break;
930 case ROFFT_TAIL:
931 n->parent->tail = n;
932 break;
933 default:
934 return;
935 }
936
937 /*
938 * Copy over the normalised-data pointer of our parent. Not
939 * everybody has one, but copying a null pointer is fine.
940 */
941
942 n->norm = n->parent->norm;
943 assert(n->parent->type == ROFFT_BLOCK);
944 }
945
946 void
947 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
948 {
949 struct roff_node *n;
950
951 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
952 n->string = roff_strdup(man->roff, word);
953 roff_node_append(man, n);
954 n->flags |= NODE_VALID | NODE_ENDED;
955 man->next = ROFF_NEXT_SIBLING;
956 }
957
958 void
959 roff_word_append(struct roff_man *man, const char *word)
960 {
961 struct roff_node *n;
962 char *addstr, *newstr;
963
964 n = man->last;
965 addstr = roff_strdup(man->roff, word);
966 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
967 free(addstr);
968 free(n->string);
969 n->string = newstr;
970 man->next = ROFF_NEXT_SIBLING;
971 }
972
973 void
974 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
975 {
976 struct roff_node *n;
977
978 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
979 roff_node_append(man, n);
980 man->next = ROFF_NEXT_CHILD;
981 }
982
983 struct roff_node *
984 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
985 {
986 struct roff_node *n;
987
988 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
989 roff_node_append(man, n);
990 man->next = ROFF_NEXT_CHILD;
991 return n;
992 }
993
994 struct roff_node *
995 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
996 {
997 struct roff_node *n;
998
999 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1000 roff_node_append(man, n);
1001 man->next = ROFF_NEXT_CHILD;
1002 return n;
1003 }
1004
1005 struct roff_node *
1006 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1007 {
1008 struct roff_node *n;
1009
1010 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1011 roff_node_append(man, n);
1012 man->next = ROFF_NEXT_CHILD;
1013 return n;
1014 }
1015
1016 static void
1017 roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
1018 {
1019 struct roff_node *n;
1020 const struct tbl_span *span;
1021
1022 if (man->macroset == MACROSET_MAN)
1023 man_breakscope(man, ROFF_TS);
1024 while ((span = tbl_span(tbl)) != NULL) {
1025 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1026 n->span = span;
1027 roff_node_append(man, n);
1028 n->flags |= NODE_VALID | NODE_ENDED;
1029 man->next = ROFF_NEXT_SIBLING;
1030 }
1031 }
1032
1033 void
1034 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1035 {
1036
1037 /* Adjust siblings. */
1038
1039 if (n->prev)
1040 n->prev->next = n->next;
1041 if (n->next)
1042 n->next->prev = n->prev;
1043
1044 /* Adjust parent. */
1045
1046 if (n->parent != NULL) {
1047 if (n->parent->child == n)
1048 n->parent->child = n->next;
1049 if (n->parent->last == n)
1050 n->parent->last = n->prev;
1051 }
1052
1053 /* Adjust parse point. */
1054
1055 if (man == NULL)
1056 return;
1057 if (man->last == n) {
1058 if (n->prev == NULL) {
1059 man->last = n->parent;
1060 man->next = ROFF_NEXT_CHILD;
1061 } else {
1062 man->last = n->prev;
1063 man->next = ROFF_NEXT_SIBLING;
1064 }
1065 }
1066 if (man->first == n)
1067 man->first = NULL;
1068 }
1069
1070 void
1071 roff_node_relink(struct roff_man *man, struct roff_node *n)
1072 {
1073 roff_node_unlink(man, n);
1074 n->prev = n->next = NULL;
1075 roff_node_append(man, n);
1076 }
1077
1078 void
1079 roff_node_free(struct roff_node *n)
1080 {
1081
1082 if (n->args != NULL)
1083 mdoc_argv_free(n->args);
1084 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1085 free(n->norm);
1086 if (n->eqn != NULL)
1087 eqn_box_free(n->eqn);
1088 free(n->string);
1089 free(n);
1090 }
1091
1092 void
1093 roff_node_delete(struct roff_man *man, struct roff_node *n)
1094 {
1095
1096 while (n->child != NULL)
1097 roff_node_delete(man, n->child);
1098 roff_node_unlink(man, n);
1099 roff_node_free(n);
1100 }
1101
1102 void
1103 deroff(char **dest, const struct roff_node *n)
1104 {
1105 char *cp;
1106 size_t sz;
1107
1108 if (n->type != ROFFT_TEXT) {
1109 for (n = n->child; n != NULL; n = n->next)
1110 deroff(dest, n);
1111 return;
1112 }
1113
1114 /* Skip leading whitespace. */
1115
1116 for (cp = n->string; *cp != '\0'; cp++) {
1117 if (cp[0] == '\\' && cp[1] != '\0' &&
1118 strchr(" %&0^|~", cp[1]) != NULL)
1119 cp++;
1120 else if ( ! isspace((unsigned char)*cp))
1121 break;
1122 }
1123
1124 /* Skip trailing backslash. */
1125
1126 sz = strlen(cp);
1127 if (sz > 0 && cp[sz - 1] == '\\')
1128 sz--;
1129
1130 /* Skip trailing whitespace. */
1131
1132 for (; sz; sz--)
1133 if ( ! isspace((unsigned char)cp[sz-1]))
1134 break;
1135
1136 /* Skip empty strings. */
1137
1138 if (sz == 0)
1139 return;
1140
1141 if (*dest == NULL) {
1142 *dest = mandoc_strndup(cp, sz);
1143 return;
1144 }
1145
1146 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1147 free(*dest);
1148 *dest = cp;
1149 }
1150
1151 /* --- main functions of the roff parser ---------------------------------- */
1152
1153 /*
1154 * In the current line, expand escape sequences that tend to get
1155 * used in numerical expressions and conditional requests.
1156 * Also check the syntax of the remaining escape sequences.
1157 */
1158 static int
1159 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1160 {
1161 struct mctx *ctx; /* current macro call context */
1162 char ubuf[24]; /* buffer to print the number */
1163 struct roff_node *n; /* used for header comments */
1164 const char *start; /* start of the string to process */
1165 char *stesc; /* start of an escape sequence ('\\') */
1166 char *ep; /* end of comment string */
1167 const char *stnam; /* start of the name, after "[(*" */
1168 const char *cp; /* end of the name, e.g. before ']' */
1169 const char *res; /* the string to be substituted */
1170 char *nbuf; /* new buffer to copy buf->buf to */
1171 size_t maxl; /* expected length of the escape name */
1172 size_t naml; /* actual length of the escape name */
1173 size_t asz; /* length of the replacement */
1174 size_t rsz; /* length of the rest of the string */
1175 enum mandoc_esc esc; /* type of the escape sequence */
1176 int inaml; /* length returned from mandoc_escape() */
1177 int expand_count; /* to avoid infinite loops */
1178 int npos; /* position in numeric expression */
1179 int arg_complete; /* argument not interrupted by eol */
1180 int quote_args; /* true for \\$@, false for \\$* */
1181 int done; /* no more input available */
1182 int deftype; /* type of definition to paste */
1183 int rcsid; /* kind of RCS id seen */
1184 char sign; /* increment number register */
1185 char term; /* character terminating the escape */
1186
1187 /* Search forward for comments. */
1188
1189 done = 0;
1190 start = buf->buf + pos;
1191 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1192 if (stesc[0] != r->escape || stesc[1] == '\0')
1193 continue;
1194 stesc++;
1195 if (*stesc != '"' && *stesc != '#')
1196 continue;
1197
1198 /* Comment found, look for RCS id. */
1199
1200 rcsid = 0;
1201 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1202 rcsid = 1 << MANDOC_OS_OPENBSD;
1203 cp += 8;
1204 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1205 rcsid = 1 << MANDOC_OS_NETBSD;
1206 cp += 7;
1207 }
1208 if (cp != NULL &&
1209 isalnum((unsigned char)*cp) == 0 &&
1210 strchr(cp, '$') != NULL) {
1211 if (r->man->meta.rcsids & rcsid)
1212 mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1213 ln, stesc + 1 - buf->buf, stesc + 1);
1214 r->man->meta.rcsids |= rcsid;
1215 }
1216
1217 /* Handle trailing whitespace. */
1218
1219 ep = strchr(stesc--, '\0') - 1;
1220 if (*ep == '\n') {
1221 done = 1;
1222 ep--;
1223 }
1224 if (*ep == ' ' || *ep == '\t')
1225 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1226 ln, ep - buf->buf, NULL);
1227
1228 /*
1229 * Save comments preceding the title macro
1230 * in the syntax tree.
1231 */
1232
1233 if (r->format == 0) {
1234 while (*ep == ' ' || *ep == '\t')
1235 ep--;
1236 ep[1] = '\0';
1237 n = roff_node_alloc(r->man,
1238 ln, stesc + 1 - buf->buf,
1239 ROFFT_COMMENT, TOKEN_NONE);
1240 n->string = mandoc_strdup(stesc + 2);
1241 roff_node_append(r->man, n);
1242 n->flags |= NODE_VALID | NODE_ENDED;
1243 r->man->next = ROFF_NEXT_SIBLING;
1244 }
1245
1246 /* Line continuation with comment. */
1247
1248 if (stesc[1] == '#') {
1249 *stesc = '\0';
1250 return ROFF_IGN | ROFF_APPEND;
1251 }
1252
1253 /* Discard normal comments. */
1254
1255 while (stesc > start && stesc[-1] == ' ' &&
1256 (stesc == start + 1 || stesc[-2] != '\\'))
1257 stesc--;
1258 *stesc = '\0';
1259 break;
1260 }
1261 if (stesc == start)
1262 return ROFF_CONT;
1263 stesc--;
1264
1265 /* Notice the end of the input. */
1266
1267 if (*stesc == '\n') {
1268 *stesc-- = '\0';
1269 done = 1;
1270 }
1271
1272 expand_count = 0;
1273 while (stesc >= start) {
1274
1275 /* Search backwards for the next backslash. */
1276
1277 if (*stesc != r->escape) {
1278 if (*stesc == '\\') {
1279 *stesc = '\0';
1280 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1281 buf->buf, stesc + 1) + 1;
1282 start = nbuf + pos;
1283 stesc = nbuf + (stesc - buf->buf);
1284 free(buf->buf);
1285 buf->buf = nbuf;
1286 }
1287 stesc--;
1288 continue;
1289 }
1290
1291 /* If it is escaped, skip it. */
1292
1293 for (cp = stesc - 1; cp >= start; cp--)
1294 if (*cp != r->escape)
1295 break;
1296
1297 if ((stesc - cp) % 2 == 0) {
1298 while (stesc > cp)
1299 *stesc-- = '\\';
1300 continue;
1301 } else if (stesc[1] != '\0') {
1302 *stesc = '\\';
1303 } else {
1304 *stesc-- = '\0';
1305 if (done)
1306 continue;
1307 else
1308 return ROFF_IGN | ROFF_APPEND;
1309 }
1310
1311 /* Decide whether to expand or to check only. */
1312
1313 term = '\0';
1314 cp = stesc + 1;
1315 switch (*cp) {
1316 case '*':
1317 case '$':
1318 res = NULL;
1319 break;
1320 case 'B':
1321 case 'w':
1322 term = cp[1];
1323 /* FALLTHROUGH */
1324 case 'n':
1325 sign = cp[1];
1326 if (sign == '+' || sign == '-')
1327 cp++;
1328 res = ubuf;
1329 break;
1330 default:
1331 esc = mandoc_escape(&cp, &stnam, &inaml);
1332 if (esc == ESCAPE_ERROR ||
1333 (esc == ESCAPE_SPECIAL &&
1334 mchars_spec2cp(stnam, inaml) < 0))
1335 mandoc_vmsg(MANDOCERR_ESC_BAD,
1336 r->parse, ln, (int)(stesc - buf->buf),
1337 "%.*s", (int)(cp - stesc), stesc);
1338 stesc--;
1339 continue;
1340 }
1341
1342 if (EXPAND_LIMIT < ++expand_count) {
1343 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1344 ln, (int)(stesc - buf->buf), NULL);
1345 return ROFF_IGN;
1346 }
1347
1348 /*
1349 * The third character decides the length
1350 * of the name of the string or register.
1351 * Save a pointer to the name.
1352 */
1353
1354 if (term == '\0') {
1355 switch (*++cp) {
1356 case '\0':
1357 maxl = 0;
1358 break;
1359 case '(':
1360 cp++;
1361 maxl = 2;
1362 break;
1363 case '[':
1364 cp++;
1365 term = ']';
1366 maxl = 0;
1367 break;
1368 default:
1369 maxl = 1;
1370 break;
1371 }
1372 } else {
1373 cp += 2;
1374 maxl = 0;
1375 }
1376 stnam = cp;
1377
1378 /* Advance to the end of the name. */
1379
1380 naml = 0;
1381 arg_complete = 1;
1382 while (maxl == 0 || naml < maxl) {
1383 if (*cp == '\0') {
1384 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1385 ln, (int)(stesc - buf->buf), stesc);
1386 arg_complete = 0;
1387 break;
1388 }
1389 if (maxl == 0 && *cp == term) {
1390 cp++;
1391 break;
1392 }
1393 if (*cp++ != '\\' || stesc[1] != 'w') {
1394 naml++;
1395 continue;
1396 }
1397 switch (mandoc_escape(&cp, NULL, NULL)) {
1398 case ESCAPE_SPECIAL:
1399 case ESCAPE_UNICODE:
1400 case ESCAPE_NUMBERED:
1401 case ESCAPE_OVERSTRIKE:
1402 naml++;
1403 break;
1404 default:
1405 break;
1406 }
1407 }
1408
1409 /*
1410 * Retrieve the replacement string; if it is
1411 * undefined, resume searching for escapes.
1412 */
1413
1414 switch (stesc[1]) {
1415 case '*':
1416 if (arg_complete) {
1417 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1418 res = roff_getstrn(r, stnam, naml, &deftype);
1419
1420 /*
1421 * If not overriden, let \*(.T
1422 * through to the formatters.
1423 */
1424
1425 if (res == NULL && naml == 2 &&
1426 stnam[0] == '.' && stnam[1] == 'T') {
1427 roff_setstrn(&r->strtab,
1428 ".T", 2, NULL, 0, 0);
1429 stesc--;
1430 continue;
1431 }
1432 }
1433 break;
1434 case '$':
1435 if (r->mstackpos < 0) {
1436 mandoc_vmsg(MANDOCERR_ARG_UNDEF,
1437 r->parse, ln, (int)(stesc - buf->buf),
1438 "%.3s", stesc);
1439 break;
1440 }
1441 ctx = r->mstack + r->mstackpos;
1442 npos = stesc[2] - '1';
1443 if (npos >= 0 && npos <= 8) {
1444 res = npos < ctx->argc ?
1445 ctx->argv[npos] : "";
1446 break;
1447 }
1448 if (stesc[2] == '*')
1449 quote_args = 0;
1450 else if (stesc[2] == '@')
1451 quote_args = 1;
1452 else {
1453 mandoc_vmsg(MANDOCERR_ARG_NONUM,
1454 r->parse, ln, (int)(stesc - buf->buf),
1455 "%.3s", stesc);
1456 break;
1457 }
1458 asz = 0;
1459 for (npos = 0; npos < ctx->argc; npos++) {
1460 if (npos)
1461 asz++; /* blank */
1462 if (quote_args)
1463 asz += 2; /* quotes */
1464 asz += strlen(ctx->argv[npos]);
1465 }
1466 if (asz != 3) {
1467 rsz = buf->sz - (stesc - buf->buf) - 3;
1468 if (asz < 3)
1469 memmove(stesc + asz, stesc + 3, rsz);
1470 buf->sz += asz - 3;
1471 nbuf = mandoc_realloc(buf->buf, buf->sz);
1472 start = nbuf + pos;
1473 stesc = nbuf + (stesc - buf->buf);
1474 buf->buf = nbuf;
1475 if (asz > 3)
1476 memmove(stesc + asz, stesc + 3, rsz);
1477 }
1478 for (npos = 0; npos < ctx->argc; npos++) {
1479 if (npos)
1480 *stesc++ = ' ';
1481 if (quote_args)
1482 *stesc++ = '"';
1483 cp = ctx->argv[npos];
1484 while (*cp != '\0')
1485 *stesc++ = *cp++;
1486 if (quote_args)
1487 *stesc++ = '"';
1488 }
1489 continue;
1490 case 'B':
1491 npos = 0;
1492 ubuf[0] = arg_complete &&
1493 roff_evalnum(r, ln, stnam, &npos,
1494 NULL, ROFFNUM_SCALE) &&
1495 stnam + npos + 1 == cp ? '1' : '0';
1496 ubuf[1] = '\0';
1497 break;
1498 case 'n':
1499 if (arg_complete)
1500 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1501 roff_getregn(r, stnam, naml, sign));
1502 else
1503 ubuf[0] = '\0';
1504 break;
1505 case 'w':
1506 /* use even incomplete args */
1507 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1508 24 * (int)naml);
1509 break;
1510 }
1511
1512 if (res == NULL) {
1513 if (stesc[1] == '*')
1514 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1515 r->parse, ln, (int)(stesc - buf->buf),
1516 "%.*s", (int)naml, stnam);
1517 res = "";
1518 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1519 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1520 ln, (int)(stesc - buf->buf), NULL);
1521 return ROFF_IGN;
1522 }
1523
1524 /* Replace the escape sequence by the string. */
1525
1526 *stesc = '\0';
1527 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1528 buf->buf, res, cp) + 1;
1529
1530 /* Prepare for the next replacement. */
1531
1532 start = nbuf + pos;
1533 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1534 free(buf->buf);
1535 buf->buf = nbuf;
1536 }
1537 return ROFF_CONT;
1538 }
1539
1540 /*
1541 * Process text streams.
1542 */
1543 static int
1544 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1545 {
1546 size_t sz;
1547 const char *start;
1548 char *p;
1549 int isz;
1550 enum mandoc_esc esc;
1551
1552 /* Spring the input line trap. */
1553
1554 if (roffit_lines == 1) {
1555 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1556 free(buf->buf);
1557 buf->buf = p;
1558 buf->sz = isz + 1;
1559 *offs = 0;
1560 free(roffit_macro);
1561 roffit_lines = 0;
1562 return ROFF_REPARSE;
1563 } else if (roffit_lines > 1)
1564 --roffit_lines;
1565
1566 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1567 if (roffce_lines < 1) {
1568 r->man->last = roffce_node;
1569 r->man->next = ROFF_NEXT_SIBLING;
1570 roffce_lines = 0;
1571 roffce_node = NULL;
1572 } else
1573 roffce_lines--;
1574 }
1575
1576 /* Convert all breakable hyphens into ASCII_HYPH. */
1577
1578 start = p = buf->buf + pos;
1579
1580 while (*p != '\0') {
1581 sz = strcspn(p, "-\\");
1582 p += sz;
1583
1584 if (*p == '\0')
1585 break;
1586
1587 if (*p == '\\') {
1588 /* Skip over escapes. */
1589 p++;
1590 esc = mandoc_escape((const char **)&p, NULL, NULL);
1591 if (esc == ESCAPE_ERROR)
1592 break;
1593 while (*p == '-')
1594 p++;
1595 continue;
1596 } else if (p == start) {
1597 p++;
1598 continue;
1599 }
1600
1601 if (isalpha((unsigned char)p[-1]) &&
1602 isalpha((unsigned char)p[1]))
1603 *p = ASCII_HYPH;
1604 p++;
1605 }
1606 return ROFF_CONT;
1607 }
1608
1609 int
1610 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1611 {
1612 enum roff_tok t;
1613 int e;
1614 int pos; /* parse point */
1615 int spos; /* saved parse point for messages */
1616 int ppos; /* original offset in buf->buf */
1617 int ctl; /* macro line (boolean) */
1618
1619 ppos = pos = *offs;
1620
1621 /* Handle in-line equation delimiters. */
1622
1623 if (r->tbl == NULL &&
1624 r->last_eqn != NULL && r->last_eqn->delim &&
1625 (r->eqn == NULL || r->eqn_inline)) {
1626 e = roff_eqndelim(r, buf, pos);
1627 if (e == ROFF_REPARSE)
1628 return e;
1629 assert(e == ROFF_CONT);
1630 }
1631
1632 /* Expand some escape sequences. */
1633
1634 e = roff_res(r, buf, ln, pos);
1635 if ((e & ROFF_MASK) == ROFF_IGN)
1636 return e;
1637 assert(e == ROFF_CONT);
1638
1639 ctl = roff_getcontrol(r, buf->buf, &pos);
1640
1641 /*
1642 * First, if a scope is open and we're not a macro, pass the
1643 * text through the macro's filter.
1644 * Equations process all content themselves.
1645 * Tables process almost all content themselves, but we want
1646 * to warn about macros before passing it there.
1647 */
1648
1649 if (r->last != NULL && ! ctl) {
1650 t = r->last->tok;
1651 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1652 if ((e & ROFF_MASK) == ROFF_IGN)
1653 return e;
1654 e &= ~ROFF_MASK;
1655 } else
1656 e = ROFF_IGN;
1657 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1658 eqn_read(r->eqn, buf->buf + ppos);
1659 return e;
1660 }
1661 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1662 tbl_read(r->tbl, ln, buf->buf, ppos);
1663 roff_addtbl(r->man, r->tbl);
1664 return e;
1665 }
1666 if ( ! ctl)
1667 return roff_parsetext(r, buf, pos, offs) | e;
1668
1669 /* Skip empty request lines. */
1670
1671 if (buf->buf[pos] == '"') {
1672 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1673 ln, pos, NULL);
1674 return ROFF_IGN;
1675 } else if (buf->buf[pos] == '\0')
1676 return ROFF_IGN;
1677
1678 /*
1679 * If a scope is open, go to the child handler for that macro,
1680 * as it may want to preprocess before doing anything with it.
1681 * Don't do so if an equation is open.
1682 */
1683
1684 if (r->last) {
1685 t = r->last->tok;
1686 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1687 }
1688
1689 /* No scope is open. This is a new request or macro. */
1690
1691 spos = pos;
1692 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1693
1694 /* Tables ignore most macros. */
1695
1696 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1697 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1698 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1699 ln, pos, buf->buf + spos);
1700 if (t != TOKEN_NONE)
1701 return ROFF_IGN;
1702 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1703 pos++;
1704 while (buf->buf[pos] == ' ')
1705 pos++;
1706 tbl_read(r->tbl, ln, buf->buf, pos);
1707 roff_addtbl(r->man, r->tbl);
1708 return ROFF_IGN;
1709 }
1710
1711 /* For now, let high level macros abort .ce mode. */
1712
1713 if (ctl && roffce_node != NULL &&
1714 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1715 t == ROFF_TH || t == ROFF_TS)) {
1716 r->man->last = roffce_node;
1717 r->man->next = ROFF_NEXT_SIBLING;
1718 roffce_lines = 0;
1719 roffce_node = NULL;
1720 }
1721
1722 /*
1723 * This is neither a roff request nor a user-defined macro.
1724 * Let the standard macro set parsers handle it.
1725 */
1726
1727 if (t == TOKEN_NONE)
1728 return ROFF_CONT;
1729
1730 /* Execute a roff request or a user defined macro. */
1731
1732 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1733 }
1734
1735 /*
1736 * Internal interface function to tell the roff parser that execution
1737 * of the current macro ended. This is required because macro
1738 * definitions usually do not end with a .return request.
1739 */
1740 void
1741 roff_userret(struct roff *r)
1742 {
1743 struct mctx *ctx;
1744 int i;
1745
1746 assert(r->mstackpos >= 0);
1747 ctx = r->mstack + r->mstackpos;
1748 for (i = 0; i < ctx->argc; i++)
1749 free(ctx->argv[i]);
1750 ctx->argc = 0;
1751 r->mstackpos--;
1752 }
1753
1754 void
1755 roff_endparse(struct roff *r)
1756 {
1757 if (r->last != NULL)
1758 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1759 r->last->line, r->last->col,
1760 roff_name[r->last->tok]);
1761
1762 if (r->eqn != NULL) {
1763 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1764 r->eqn->node->line, r->eqn->node->pos, "EQ");
1765 eqn_parse(r->eqn);
1766 r->eqn = NULL;
1767 }
1768
1769 if (r->tbl != NULL) {
1770 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1771 r->tbl->line, r->tbl->pos, "TS");
1772 tbl_end(r->tbl);
1773 r->tbl = NULL;
1774 }
1775 }
1776
1777 /*
1778 * Parse a roff node's type from the input buffer. This must be in the
1779 * form of ".foo xxx" in the usual way.
1780 */
1781 static enum roff_tok
1782 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1783 {
1784 char *cp;
1785 const char *mac;
1786 size_t maclen;
1787 int deftype;
1788 enum roff_tok t;
1789
1790 cp = buf + *pos;
1791
1792 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1793 return TOKEN_NONE;
1794
1795 mac = cp;
1796 maclen = roff_getname(r, &cp, ln, ppos);
1797
1798 deftype = ROFFDEF_USER | ROFFDEF_REN;
1799 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1800 switch (deftype) {
1801 case ROFFDEF_USER:
1802 t = ROFF_USERDEF;
1803 break;
1804 case ROFFDEF_REN:
1805 t = ROFF_RENAMED;
1806 break;
1807 default:
1808 t = roffhash_find(r->reqtab, mac, maclen);
1809 break;
1810 }
1811 if (t != TOKEN_NONE)
1812 *pos = cp - buf;
1813 else if (deftype == ROFFDEF_UNDEF) {
1814 /* Using an undefined macro defines it to be empty. */
1815 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1816 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1817 }
1818 return t;
1819 }
1820
1821 /* --- handling of request blocks ----------------------------------------- */
1822
1823 static int
1824 roff_cblock(ROFF_ARGS)
1825 {
1826
1827 /*
1828 * A block-close `..' should only be invoked as a child of an
1829 * ignore macro, otherwise raise a warning and just ignore it.
1830 */
1831
1832 if (r->last == NULL) {
1833 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1834 ln, ppos, "..");
1835 return ROFF_IGN;
1836 }
1837
1838 switch (r->last->tok) {
1839 case ROFF_am:
1840 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1841 case ROFF_ami:
1842 case ROFF_de:
1843 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1844 case ROFF_dei:
1845 case ROFF_ig:
1846 break;
1847 default:
1848 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1849 ln, ppos, "..");
1850 return ROFF_IGN;
1851 }
1852
1853 if (buf->buf[pos] != '\0')
1854 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1855 ".. %s", buf->buf + pos);
1856
1857 roffnode_pop(r);
1858 roffnode_cleanscope(r);
1859 return ROFF_IGN;
1860
1861 }
1862
1863 static int
1864 roffnode_cleanscope(struct roff *r)
1865 {
1866 int inloop;
1867
1868 inloop = 0;
1869 while (r->last != NULL) {
1870 if (--r->last->endspan != 0)
1871 break;
1872 inloop += roffnode_pop(r);
1873 }
1874 return inloop;
1875 }
1876
1877 static int
1878 roff_ccond(struct roff *r, int ln, int ppos)
1879 {
1880 if (NULL == r->last) {
1881 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1882 ln, ppos, "\\}");
1883 return 0;
1884 }
1885
1886 switch (r->last->tok) {
1887 case ROFF_el:
1888 case ROFF_ie:
1889 case ROFF_if:
1890 case ROFF_while:
1891 break;
1892 default:
1893 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1894 ln, ppos, "\\}");
1895 return 0;
1896 }
1897
1898 if (r->last->endspan > -1) {
1899 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1900 ln, ppos, "\\}");
1901 return 0;
1902 }
1903
1904 return roffnode_pop(r) + roffnode_cleanscope(r);
1905 }
1906
1907 static int
1908 roff_block(ROFF_ARGS)
1909 {
1910 const char *name, *value;
1911 char *call, *cp, *iname, *rname;
1912 size_t csz, namesz, rsz;
1913 int deftype;
1914
1915 /* Ignore groff compatibility mode for now. */
1916
1917 if (tok == ROFF_de1)
1918 tok = ROFF_de;
1919 else if (tok == ROFF_dei1)
1920 tok = ROFF_dei;
1921 else if (tok == ROFF_am1)
1922 tok = ROFF_am;
1923 else if (tok == ROFF_ami1)
1924 tok = ROFF_ami;
1925
1926 /* Parse the macro name argument. */
1927
1928 cp = buf->buf + pos;
1929 if (tok == ROFF_ig) {
1930 iname = NULL;
1931 namesz = 0;
1932 } else {
1933 iname = cp;
1934 namesz = roff_getname(r, &cp, ln, ppos);
1935 iname[namesz] = '\0';
1936 }
1937
1938 /* Resolve the macro name argument if it is indirect. */
1939
1940 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1941 deftype = ROFFDEF_USER;
1942 name = roff_getstrn(r, iname, namesz, &deftype);
1943 if (name == NULL) {
1944 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1945 r->parse, ln, (int)(iname - buf->buf),
1946 "%.*s", (int)namesz, iname);
1947 namesz = 0;
1948 } else
1949 namesz = strlen(name);
1950 } else
1951 name = iname;
1952
1953 if (namesz == 0 && tok != ROFF_ig) {
1954 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1955 ln, ppos, roff_name[tok]);
1956 return ROFF_IGN;
1957 }
1958
1959 roffnode_push(r, tok, name, ln, ppos);
1960
1961 /*
1962 * At the beginning of a `de' macro, clear the existing string
1963 * with the same name, if there is one. New content will be
1964 * appended from roff_block_text() in multiline mode.
1965 */
1966
1967 if (tok == ROFF_de || tok == ROFF_dei) {
1968 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1969 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1970 } else if (tok == ROFF_am || tok == ROFF_ami) {
1971 deftype = ROFFDEF_ANY;
1972 value = roff_getstrn(r, iname, namesz, &deftype);
1973 switch (deftype) { /* Before appending, ... */
1974 case ROFFDEF_PRE: /* copy predefined to user-defined. */
1975 roff_setstrn(&r->strtab, name, namesz,
1976 value, strlen(value), 0);
1977 break;
1978 case ROFFDEF_REN: /* call original standard macro. */
1979 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1980 (int)strlen(value), value);
1981 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1982 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1983 free(call);
1984 break;
1985 case ROFFDEF_STD: /* rename and call standard macro. */
1986 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1987 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1988 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1989 (int)rsz, rname);
1990 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1991 free(call);
1992 free(rname);
1993 break;
1994 default:
1995 break;
1996 }
1997 }
1998
1999 if (*cp == '\0')
2000 return ROFF_IGN;
2001
2002 /* Get the custom end marker. */
2003
2004 iname = cp;
2005 namesz = roff_getname(r, &cp, ln, ppos);
2006
2007 /* Resolve the end marker if it is indirect. */
2008
2009 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2010 deftype = ROFFDEF_USER;
2011 name = roff_getstrn(r, iname, namesz, &deftype);
2012 if (name == NULL) {
2013 mandoc_vmsg(MANDOCERR_STR_UNDEF,
2014 r->parse, ln, (int)(iname - buf->buf),
2015 "%.*s", (int)namesz, iname);
2016 namesz = 0;
2017 } else
2018 namesz = strlen(name);
2019 } else
2020 name = iname;
2021
2022 if (namesz)
2023 r->last->end = mandoc_strndup(name, namesz);
2024
2025 if (*cp != '\0')
2026 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2027 ln, pos, ".%s ... %s", roff_name[tok], cp);
2028
2029 return ROFF_IGN;
2030 }
2031
2032 static int
2033 roff_block_sub(ROFF_ARGS)
2034 {
2035 enum roff_tok t;
2036 int i, j;
2037
2038 /*
2039 * First check whether a custom macro exists at this level. If
2040 * it does, then check against it. This is some of groff's
2041 * stranger behaviours. If we encountered a custom end-scope
2042 * tag and that tag also happens to be a "real" macro, then we
2043 * need to try interpreting it again as a real macro. If it's
2044 * not, then return ignore. Else continue.
2045 */
2046
2047 if (r->last->end) {
2048 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2049 if (buf->buf[i] != r->last->end[j])
2050 break;
2051
2052 if (r->last->end[j] == '\0' &&
2053 (buf->buf[i] == '\0' ||
2054 buf->buf[i] == ' ' ||
2055 buf->buf[i] == '\t')) {
2056 roffnode_pop(r);
2057 roffnode_cleanscope(r);
2058
2059 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2060 i++;
2061
2062 pos = i;
2063 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2064 TOKEN_NONE)
2065 return ROFF_RERUN;
2066 return ROFF_IGN;
2067 }
2068 }
2069
2070 /*
2071 * If we have no custom end-query or lookup failed, then try
2072 * pulling it out of the hashtable.
2073 */
2074
2075 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2076
2077 if (t != ROFF_cblock) {
2078 if (tok != ROFF_ig)
2079 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2080 return ROFF_IGN;
2081 }
2082
2083 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2084 }
2085
2086 static int
2087 roff_block_text(ROFF_ARGS)
2088 {
2089
2090 if (tok != ROFF_ig)
2091 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2092
2093 return ROFF_IGN;
2094 }
2095
2096 static int
2097 roff_cond_sub(ROFF_ARGS)
2098 {
2099 char *ep;
2100 int endloop, irc, rr;
2101 enum roff_tok t;
2102
2103 irc = ROFF_IGN;
2104 rr = r->last->rule;
2105 endloop = tok != ROFF_while ? ROFF_IGN :
2106 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2107 if (roffnode_cleanscope(r))
2108 irc |= endloop;
2109
2110 /*
2111 * If `\}' occurs on a macro line without a preceding macro,
2112 * drop the line completely.
2113 */
2114
2115 ep = buf->buf + pos;
2116 if (ep[0] == '\\' && ep[1] == '}')
2117 rr = 0;
2118
2119 /*
2120 * The closing delimiter `\}' rewinds the conditional scope
2121 * but is otherwise ignored when interpreting the line.
2122 */
2123
2124 while ((ep = strchr(ep, '\\')) != NULL) {
2125 switch (ep[1]) {
2126 case '}':
2127 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2128 if (roff_ccond(r, ln, ep - buf->buf))
2129 irc |= endloop;
2130 break;
2131 case '\0':
2132 ++ep;
2133 break;
2134 default:
2135 ep += 2;
2136 break;
2137 }
2138 }
2139
2140 /*
2141 * Fully handle known macros when they are structurally
2142 * required or when the conditional evaluated to true.
2143 */
2144
2145 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2146 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ?
2147 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) :
2148 rr ? ROFF_CONT : ROFF_IGN;
2149 return irc;
2150 }
2151
2152 static int
2153 roff_cond_text(ROFF_ARGS)
2154 {
2155 char *ep;
2156 int endloop, irc, rr;
2157
2158 irc = ROFF_IGN;
2159 rr = r->last->rule;
2160 endloop = tok != ROFF_while ? ROFF_IGN :
2161 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2162 if (roffnode_cleanscope(r))
2163 irc |= endloop;
2164
2165 /*
2166 * If `\}' occurs on a text line with neither preceding
2167 * nor following characters, drop the line completely.
2168 */
2169
2170 ep = buf->buf + pos;
2171 if (strcmp(ep, "\\}") == 0)
2172 rr = 0;
2173
2174 /*
2175 * The closing delimiter `\}' rewinds the conditional scope
2176 * but is otherwise ignored when interpreting the line.
2177 */
2178
2179 while ((ep = strchr(ep, '\\')) != NULL) {
2180 switch (ep[1]) {
2181 case '}':
2182 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2183 if (roff_ccond(r, ln, ep - buf->buf))
2184 irc |= endloop;
2185 break;
2186 case '\0':
2187 ++ep;
2188 break;
2189 default:
2190 ep += 2;
2191 break;
2192 }
2193 }
2194 if (rr)
2195 irc |= ROFF_CONT;
2196 return irc;
2197 }
2198
2199 /* --- handling of numeric and conditional expressions -------------------- */
2200
2201 /*
2202 * Parse a single signed integer number. Stop at the first non-digit.
2203 * If there is at least one digit, return success and advance the
2204 * parse point, else return failure and let the parse point unchanged.
2205 * Ignore overflows, treat them just like the C language.
2206 */
2207 static int
2208 roff_getnum(const char *v, int *pos, int *res, int flags)
2209 {
2210 int myres, scaled, n, p;
2211
2212 if (NULL == res)
2213 res = &myres;
2214
2215 p = *pos;
2216 n = v[p] == '-';
2217 if (n || v[p] == '+')
2218 p++;
2219
2220 if (flags & ROFFNUM_WHITE)
2221 while (isspace((unsigned char)v[p]))
2222 p++;
2223
2224 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2225 *res = 10 * *res + v[p] - '0';
2226 if (p == *pos + n)
2227 return 0;
2228
2229 if (n)
2230 *res = -*res;
2231
2232 /* Each number may be followed by one optional scaling unit. */
2233
2234 switch (v[p]) {
2235 case 'f':
2236 scaled = *res * 65536;
2237 break;
2238 case 'i':
2239 scaled = *res * 240;
2240 break;
2241 case 'c':
2242 scaled = *res * 240 / 2.54;
2243 break;
2244 case 'v':
2245 case 'P':
2246 scaled = *res * 40;
2247 break;
2248 case 'm':
2249 case 'n':
2250 scaled = *res * 24;
2251 break;
2252 case 'p':
2253 scaled = *res * 10 / 3;
2254 break;
2255 case 'u':
2256 scaled = *res;
2257 break;
2258 case 'M':
2259 scaled = *res * 6 / 25;
2260 break;
2261 default:
2262 scaled = *res;
2263 p--;
2264 break;
2265 }
2266 if (flags & ROFFNUM_SCALE)
2267 *res = scaled;
2268
2269 *pos = p + 1;
2270 return 1;
2271 }
2272
2273 /*
2274 * Evaluate a string comparison condition.
2275 * The first character is the delimiter.
2276 * Succeed if the string up to its second occurrence
2277 * matches the string up to its third occurence.
2278 * Advance the cursor after the third occurrence
2279 * or lacking that, to the end of the line.
2280 */
2281 static int
2282 roff_evalstrcond(const char *v, int *pos)
2283 {
2284 const char *s1, *s2, *s3;
2285 int match;
2286
2287 match = 0;
2288 s1 = v + *pos; /* initial delimiter */
2289 s2 = s1 + 1; /* for scanning the first string */
2290 s3 = strchr(s2, *s1); /* for scanning the second string */
2291
2292 if (NULL == s3) /* found no middle delimiter */
2293 goto out;
2294
2295 while ('\0' != *++s3) {
2296 if (*s2 != *s3) { /* mismatch */
2297 s3 = strchr(s3, *s1);
2298 break;
2299 }
2300 if (*s3 == *s1) { /* found the final delimiter */
2301 match = 1;
2302 break;
2303 }
2304 s2++;
2305 }
2306
2307 out:
2308 if (NULL == s3)
2309 s3 = strchr(s2, '\0');
2310 else if (*s3 != '\0')
2311 s3++;
2312 *pos = s3 - v;
2313 return match;
2314 }
2315
2316 /*
2317 * Evaluate an optionally negated single character, numerical,
2318 * or string condition.
2319 */
2320 static int
2321 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2322 {
2323 const char *start, *end;
2324 char *cp, *name;
2325 size_t sz;
2326 int deftype, len, number, savepos, istrue, wanttrue;
2327
2328 if ('!' == v[*pos]) {
2329 wanttrue = 0;
2330 (*pos)++;
2331 } else
2332 wanttrue = 1;
2333
2334 switch (v[*pos]) {
2335 case '\0':
2336 return 0;
2337 case 'n':
2338 case 'o':
2339 (*pos)++;
2340 return wanttrue;
2341 case 'e':
2342 case 't':
2343 case 'v':
2344 (*pos)++;
2345 return !wanttrue;
2346 case 'c':
2347 do {
2348 (*pos)++;
2349 } while (v[*pos] == ' ');
2350
2351 /*
2352 * Quirk for groff compatibility:
2353 * The horizontal tab is neither available nor unavailable.
2354 */
2355
2356 if (v[*pos] == '\t') {
2357 (*pos)++;
2358 return 0;
2359 }
2360
2361 /* Printable ASCII characters are available. */
2362
2363 if (v[*pos] != '\\') {
2364 (*pos)++;
2365 return wanttrue;
2366 }
2367
2368 end = v + ++*pos;
2369 switch (mandoc_escape(&end, &start, &len)) {
2370 case ESCAPE_SPECIAL:
2371 istrue = mchars_spec2cp(start, len) != -1;
2372 break;
2373 case ESCAPE_UNICODE:
2374 istrue = 1;
2375 break;
2376 case ESCAPE_NUMBERED:
2377 istrue = mchars_num2char(start, len) != -1;
2378 break;
2379 default:
2380 istrue = !wanttrue;
2381 break;
2382 }
2383 *pos = end - v;
2384 return istrue == wanttrue;
2385 case 'd':
2386 case 'r':
2387 cp = v + *pos + 1;
2388 while (*cp == ' ')
2389 cp++;
2390 name = cp;
2391 sz = roff_getname(r, &cp, ln, cp - v);
2392 if (sz == 0)
2393 istrue = 0;
2394 else if (v[*pos] == 'r')
2395 istrue = roff_hasregn(r, name, sz);
2396 else {
2397 deftype = ROFFDEF_ANY;
2398 roff_getstrn(r, name, sz, &deftype);
2399 istrue = !!deftype;
2400 }
2401 *pos = cp - v;
2402 return istrue == wanttrue;
2403 default:
2404 break;
2405 }
2406
2407 savepos = *pos;
2408 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2409 return (number > 0) == wanttrue;
2410 else if (*pos == savepos)
2411 return roff_evalstrcond(v, pos) == wanttrue;
2412 else
2413 return 0;
2414 }
2415
2416 static int
2417 roff_line_ignore(ROFF_ARGS)
2418 {
2419
2420 return ROFF_IGN;
2421 }
2422
2423 static int
2424 roff_insec(ROFF_ARGS)
2425 {
2426
2427 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2428 ln, ppos, roff_name[tok]);
2429 return ROFF_IGN;
2430 }
2431
2432 static int
2433 roff_unsupp(ROFF_ARGS)
2434 {
2435
2436 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2437 ln, ppos, roff_name[tok]);
2438 return ROFF_IGN;
2439 }
2440
2441 static int
2442 roff_cond(ROFF_ARGS)
2443 {
2444 int irc;
2445
2446 roffnode_push(r, tok, NULL, ln, ppos);
2447
2448 /*
2449 * An `.el' has no conditional body: it will consume the value
2450 * of the current rstack entry set in prior `ie' calls or
2451 * defaults to DENY.
2452 *
2453 * If we're not an `el', however, then evaluate the conditional.
2454 */
2455
2456 r->last->rule = tok == ROFF_el ?
2457 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2458 roff_evalcond(r, ln, buf->buf, &pos);
2459
2460 /*
2461 * An if-else will put the NEGATION of the current evaluated
2462 * conditional into the stack of rules.
2463 */
2464
2465 if (tok == ROFF_ie) {
2466 if (r->rstackpos + 1 == r->rstacksz) {
2467 r->rstacksz += 16;
2468 r->rstack = mandoc_reallocarray(r->rstack,
2469 r->rstacksz, sizeof(int));
2470 }
2471 r->rstack[++r->rstackpos] = !r->last->rule;
2472 }
2473
2474 /* If the parent has false as its rule, then so do we. */
2475
2476 if (r->last->parent && !r->last->parent->rule)
2477 r->last->rule = 0;
2478
2479 /*
2480 * Determine scope.
2481 * If there is nothing on the line after the conditional,
2482 * not even whitespace, use next-line scope.
2483 * Except that .while does not support next-line scope.
2484 */
2485
2486 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2487 r->last->endspan = 2;
2488 goto out;
2489 }
2490
2491 while (buf->buf[pos] == ' ')
2492 pos++;
2493
2494 /* An opening brace requests multiline scope. */
2495
2496 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2497 r->last->endspan = -1;
2498 pos += 2;
2499 while (buf->buf[pos] == ' ')
2500 pos++;
2501 goto out;
2502 }
2503
2504 /*
2505 * Anything else following the conditional causes
2506 * single-line scope. Warn if the scope contains
2507 * nothing but trailing whitespace.
2508 */
2509
2510 if (buf->buf[pos] == '\0')
2511 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2512 ln, ppos, roff_name[tok]);
2513
2514 r->last->endspan = 1;
2515
2516 out:
2517 *offs = pos;
2518 irc = ROFF_RERUN;
2519 if (tok == ROFF_while)
2520 irc |= ROFF_WHILE;
2521 return irc;
2522 }
2523
2524 static int
2525 roff_ds(ROFF_ARGS)
2526 {
2527 char *string;
2528 const char *name;
2529 size_t namesz;
2530
2531 /* Ignore groff compatibility mode for now. */
2532
2533 if (tok == ROFF_ds1)
2534 tok = ROFF_ds;
2535 else if (tok == ROFF_as1)
2536 tok = ROFF_as;
2537
2538 /*
2539 * The first word is the name of the string.
2540 * If it is empty or terminated by an escape sequence,
2541 * abort the `ds' request without defining anything.
2542 */
2543
2544 name = string = buf->buf + pos;
2545 if (*name == '\0')
2546 return ROFF_IGN;
2547
2548 namesz = roff_getname(r, &string, ln, pos);
2549 if (name[namesz] == '\\')
2550 return ROFF_IGN;
2551
2552 /* Read past the initial double-quote, if any. */
2553 if (*string == '"')
2554 string++;
2555
2556 /* The rest is the value. */
2557 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2558 ROFF_as == tok);
2559 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2560 return ROFF_IGN;
2561 }
2562
2563 /*
2564 * Parse a single operator, one or two characters long.
2565 * If the operator is recognized, return success and advance the
2566 * parse point, else return failure and let the parse point unchanged.
2567 */
2568 static int
2569 roff_getop(const char *v, int *pos, char *res)
2570 {
2571
2572 *res = v[*pos];
2573
2574 switch (*res) {
2575 case '+':
2576 case '-':
2577 case '*':
2578 case '/':
2579 case '%':
2580 case '&':
2581 case ':':
2582 break;
2583 case '<':
2584 switch (v[*pos + 1]) {
2585 case '=':
2586 *res = 'l';
2587 (*pos)++;
2588 break;
2589 case '>':
2590 *res = '!';
2591 (*pos)++;
2592 break;
2593 case '?':
2594 *res = 'i';
2595 (*pos)++;
2596 break;
2597 default:
2598 break;
2599 }
2600 break;
2601 case '>':
2602 switch (v[*pos + 1]) {
2603 case '=':
2604 *res = 'g';
2605 (*pos)++;
2606 break;
2607 case '?':
2608 *res = 'a';
2609 (*pos)++;
2610 break;
2611 default:
2612 break;
2613 }
2614 break;
2615 case '=':
2616 if ('=' == v[*pos + 1])
2617 (*pos)++;
2618 break;
2619 default:
2620 return 0;
2621 }
2622 (*pos)++;
2623
2624 return *res;
2625 }
2626
2627 /*
2628 * Evaluate either a parenthesized numeric expression
2629 * or a single signed integer number.
2630 */
2631 static int
2632 roff_evalpar(struct roff *r, int ln,
2633 const char *v, int *pos, int *res, int flags)
2634 {
2635
2636 if ('(' != v[*pos])
2637 return roff_getnum(v, pos, res, flags);
2638
2639 (*pos)++;
2640 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2641 return 0;
2642
2643 /*
2644 * Omission of the closing parenthesis
2645 * is an error in validation mode,
2646 * but ignored in evaluation mode.
2647 */
2648
2649 if (')' == v[*pos])
2650 (*pos)++;
2651 else if (NULL == res)
2652 return 0;
2653
2654 return 1;
2655 }
2656
2657 /*
2658 * Evaluate a complete numeric expression.
2659 * Proceed left to right, there is no concept of precedence.
2660 */
2661 static int
2662 roff_evalnum(struct roff *r, int ln, const char *v,
2663 int *pos, int *res, int flags)
2664 {
2665 int mypos, operand2;
2666 char operator;
2667
2668 if (NULL == pos) {
2669 mypos = 0;
2670 pos = &mypos;
2671 }
2672
2673 if (flags & ROFFNUM_WHITE)
2674 while (isspace((unsigned char)v[*pos]))
2675 (*pos)++;
2676
2677 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2678 return 0;
2679
2680 while (1) {
2681 if (flags & ROFFNUM_WHITE)
2682 while (isspace((unsigned char)v[*pos]))
2683 (*pos)++;
2684
2685 if ( ! roff_getop(v, pos, &operator))
2686 break;
2687
2688 if (flags & ROFFNUM_WHITE)
2689 while (isspace((unsigned char)v[*pos]))
2690 (*pos)++;
2691
2692 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2693 return 0;
2694
2695 if (flags & ROFFNUM_WHITE)
2696 while (isspace((unsigned char)v[*pos]))
2697 (*pos)++;
2698
2699 if (NULL == res)
2700 continue;
2701
2702 switch (operator) {
2703 case '+':
2704 *res += operand2;
2705 break;
2706 case '-':
2707 *res -= operand2;
2708 break;
2709 case '*':
2710 *res *= operand2;
2711 break;
2712 case '/':
2713 if (operand2 == 0) {
2714 mandoc_msg(MANDOCERR_DIVZERO,
2715 r->parse, ln, *pos, v);
2716 *res = 0;
2717 break;
2718 }
2719 *res /= operand2;
2720 break;
2721 case '%':
2722 if (operand2 == 0) {
2723 mandoc_msg(MANDOCERR_DIVZERO,
2724 r->parse, ln, *pos, v);
2725 *res = 0;
2726 break;
2727 }
2728 *res %= operand2;
2729 break;
2730 case '<':
2731 *res = *res < operand2;
2732 break;
2733 case '>':
2734 *res = *res > operand2;
2735 break;
2736 case 'l':
2737 *res = *res <= operand2;
2738 break;
2739 case 'g':
2740 *res = *res >= operand2;
2741 break;
2742 case '=':
2743 *res = *res == operand2;
2744 break;
2745 case '!':
2746 *res = *res != operand2;
2747 break;
2748 case '&':
2749 *res = *res && operand2;
2750 break;
2751 case ':':
2752 *res = *res || operand2;
2753 break;
2754 case 'i':
2755 if (operand2 < *res)
2756 *res = operand2;
2757 break;
2758 case 'a':
2759 if (operand2 > *res)
2760 *res = operand2;
2761 break;
2762 default:
2763 abort();
2764 }
2765 }
2766 return 1;
2767 }
2768
2769 /* --- register management ------------------------------------------------ */
2770
2771 void
2772 roff_setreg(struct roff *r, const char *name, int val, char sign)
2773 {
2774 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2775 }
2776
2777 static void
2778 roff_setregn(struct roff *r, const char *name, size_t len,
2779 int val, char sign, int step)
2780 {
2781 struct roffreg *reg;
2782
2783 /* Search for an existing register with the same name. */
2784 reg = r->regtab;
2785
2786 while (reg != NULL && (reg->key.sz != len ||
2787 strncmp(reg->key.p, name, len) != 0))
2788 reg = reg->next;
2789
2790 if (NULL == reg) {
2791 /* Create a new register. */
2792 reg = mandoc_malloc(sizeof(struct roffreg));
2793 reg->key.p = mandoc_strndup(name, len);
2794 reg->key.sz = len;
2795 reg->val = 0;
2796 reg->step = 0;
2797 reg->next = r->regtab;
2798 r->regtab = reg;
2799 }
2800
2801 if ('+' == sign)
2802 reg->val += val;
2803 else if ('-' == sign)
2804 reg->val -= val;
2805 else
2806 reg->val = val;
2807 if (step != INT_MIN)
2808 reg->step = step;
2809 }
2810
2811 /*
2812 * Handle some predefined read-only number registers.
2813 * For now, return -1 if the requested register is not predefined;
2814 * in case a predefined read-only register having the value -1
2815 * were to turn up, another special value would have to be chosen.
2816 */
2817 static int
2818 roff_getregro(const struct roff *r, const char *name)
2819 {
2820
2821 switch (*name) {
2822 case '$': /* Number of arguments of the last macro evaluated. */
2823 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2824 case 'A': /* ASCII approximation mode is always off. */
2825 return 0;
2826 case 'g': /* Groff compatibility mode is always on. */
2827 return 1;
2828 case 'H': /* Fixed horizontal resolution. */
2829 return 24;
2830 case 'j': /* Always adjust left margin only. */
2831 return 0;
2832 case 'T': /* Some output device is always defined. */
2833 return 1;
2834 case 'V': /* Fixed vertical resolution. */
2835 return 40;
2836 default:
2837 return -1;
2838 }
2839 }
2840
2841 int
2842 roff_getreg(struct roff *r, const char *name)
2843 {
2844 return roff_getregn(r, name, strlen(name), '\0');
2845 }
2846
2847 static int
2848 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2849 {
2850 struct roffreg *reg;
2851 int val;
2852
2853 if ('.' == name[0] && 2 == len) {
2854 val = roff_getregro(r, name + 1);
2855 if (-1 != val)
2856 return val;
2857 }
2858
2859 for (reg = r->regtab; reg; reg = reg->next) {
2860 if (len == reg->key.sz &&
2861 0 == strncmp(name, reg->key.p, len)) {
2862 switch (sign) {
2863 case '+':
2864 reg->val += reg->step;
2865 break;
2866 case '-':
2867 reg->val -= reg->step;
2868 break;
2869 default:
2870 break;
2871 }
2872 return reg->val;
2873 }
2874 }
2875
2876 roff_setregn(r, name, len, 0, '\0', INT_MIN);
2877 return 0;
2878 }
2879
2880 static int
2881 roff_hasregn(const struct roff *r, const char *name, size_t len)
2882 {
2883 struct roffreg *reg;
2884 int val;
2885
2886 if ('.' == name[0] && 2 == len) {
2887 val = roff_getregro(r, name + 1);
2888 if (-1 != val)
2889 return 1;
2890 }
2891
2892 for (reg = r->regtab; reg; reg = reg->next)
2893 if (len == reg->key.sz &&
2894 0 == strncmp(name, reg->key.p, len))
2895 return 1;
2896
2897 return 0;
2898 }
2899
2900 static void
2901 roff_freereg(struct roffreg *reg)
2902 {
2903 struct roffreg *old_reg;
2904
2905 while (NULL != reg) {
2906 free(reg->key.p);
2907 old_reg = reg;
2908 reg = reg->next;
2909 free(old_reg);
2910 }
2911 }
2912
2913 static int
2914 roff_nr(ROFF_ARGS)
2915 {
2916 char *key, *val, *step;
2917 size_t keysz;
2918 int iv, is, len;
2919 char sign;
2920
2921 key = val = buf->buf + pos;
2922 if (*key == '\0')
2923 return ROFF_IGN;
2924
2925 keysz = roff_getname(r, &val, ln, pos);
2926 if (key[keysz] == '\\')
2927 return ROFF_IGN;
2928
2929 sign = *val;
2930 if (sign == '+' || sign == '-')
2931 val++;
2932
2933 len = 0;
2934 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
2935 return ROFF_IGN;
2936
2937 step = val + len;
2938 while (isspace((unsigned char)*step))
2939 step++;
2940 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
2941 is = INT_MIN;
2942
2943 roff_setregn(r, key, keysz, iv, sign, is);
2944 return ROFF_IGN;
2945 }
2946
2947 static int
2948 roff_rr(ROFF_ARGS)
2949 {
2950 struct roffreg *reg, **prev;
2951 char *name, *cp;
2952 size_t namesz;
2953
2954 name = cp = buf->buf + pos;
2955 if (*name == '\0')
2956 return ROFF_IGN;
2957 namesz = roff_getname(r, &cp, ln, pos);
2958 name[namesz] = '\0';
2959
2960 prev = &r->regtab;
2961 while (1) {
2962 reg = *prev;
2963 if (reg == NULL || !strcmp(name, reg->key.p))
2964 break;
2965 prev = &reg->next;
2966 }
2967 if (reg != NULL) {
2968 *prev = reg->next;
2969 free(reg->key.p);
2970 free(reg);
2971 }
2972 return ROFF_IGN;
2973 }
2974
2975 /* --- handler functions for roff requests -------------------------------- */
2976
2977 static int
2978 roff_rm(ROFF_ARGS)
2979 {
2980 const char *name;
2981 char *cp;
2982 size_t namesz;
2983
2984 cp = buf->buf + pos;
2985 while (*cp != '\0') {
2986 name = cp;
2987 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2988 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2989 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2990 if (name[namesz] == '\\')
2991 break;
2992 }
2993 return ROFF_IGN;
2994 }
2995
2996 static int
2997 roff_it(ROFF_ARGS)
2998 {
2999 int iv;
3000
3001 /* Parse the number of lines. */
3002
3003 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3004 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
3005 ln, ppos, buf->buf + 1);
3006 return ROFF_IGN;
3007 }
3008
3009 while (isspace((unsigned char)buf->buf[pos]))
3010 pos++;
3011
3012 /*
3013 * Arm the input line trap.
3014 * Special-casing "an-trap" is an ugly workaround to cope
3015 * with DocBook stupidly fiddling with man(7) internals.
3016 */
3017
3018 roffit_lines = iv;
3019 roffit_macro = mandoc_strdup(iv != 1 ||
3020 strcmp(buf->buf + pos, "an-trap") ?
3021 buf->buf + pos : "br");
3022 return ROFF_IGN;
3023 }
3024
3025 static int
3026 roff_Dd(ROFF_ARGS)
3027 {
3028 int mask;
3029 enum roff_tok t, te;
3030
3031 switch (tok) {
3032 case ROFF_Dd:
3033 tok = MDOC_Dd;
3034 te = MDOC_MAX;
3035 if (r->format == 0)
3036 r->format = MPARSE_MDOC;
3037 mask = MPARSE_MDOC | MPARSE_QUICK;
3038 break;
3039 case ROFF_TH:
3040 tok = MAN_TH;
3041 te = MAN_MAX;
3042 if (r->format == 0)
3043 r->format = MPARSE_MAN;
3044 mask = MPARSE_QUICK;
3045 break;
3046 default:
3047 abort();
3048 }
3049 if ((r->options & mask) == 0)
3050 for (t = tok; t < te; t++)
3051 roff_setstr(r, roff_name[t], NULL, 0);
3052 return ROFF_CONT;
3053 }
3054
3055 static int
3056 roff_TE(ROFF_ARGS)
3057 {
3058 if (r->tbl == NULL) {
3059 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
3060 ln, ppos, "TE");
3061 return ROFF_IGN;
3062 }
3063 if (tbl_end(r->tbl) == 0) {
3064 r->tbl = NULL;
3065 free(buf->buf);
3066 buf->buf = mandoc_strdup(".sp");
3067 buf->sz = 4;
3068 *offs = 0;
3069 return ROFF_REPARSE;
3070 }
3071 r->tbl = NULL;
3072 return ROFF_IGN;
3073 }
3074
3075 static int
3076 roff_T_(ROFF_ARGS)
3077 {
3078
3079 if (NULL == r->tbl)
3080 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
3081 ln, ppos, "T&");
3082 else
3083 tbl_restart(ln, ppos, r->tbl);
3084
3085 return ROFF_IGN;
3086 }
3087
3088 /*
3089 * Handle in-line equation delimiters.
3090 */
3091 static int
3092 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3093 {
3094 char *cp1, *cp2;
3095 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3096
3097 /*
3098 * Outside equations, look for an opening delimiter.
3099 * If we are inside an equation, we already know it is
3100 * in-line, or this function wouldn't have been called;
3101 * so look for a closing delimiter.
3102 */
3103
3104 cp1 = buf->buf + pos;
3105 cp2 = strchr(cp1, r->eqn == NULL ?
3106 r->last_eqn->odelim : r->last_eqn->cdelim);
3107 if (cp2 == NULL)
3108 return ROFF_CONT;
3109
3110 *cp2++ = '\0';
3111 bef_pr = bef_nl = aft_nl = aft_pr = "";
3112
3113 /* Handle preceding text, protecting whitespace. */
3114
3115 if (*buf->buf != '\0') {
3116 if (r->eqn == NULL)
3117 bef_pr = "\\&";
3118 bef_nl = "\n";
3119 }
3120
3121 /*
3122 * Prepare replacing the delimiter with an equation macro
3123 * and drop leading white space from the equation.
3124 */
3125
3126 if (r->eqn == NULL) {
3127 while (*cp2 == ' ')
3128 cp2++;
3129 mac = ".EQ";
3130 } else
3131 mac = ".EN";
3132
3133 /* Handle following text, protecting whitespace. */
3134
3135 if (*cp2 != '\0') {
3136 aft_nl = "\n";
3137 if (r->eqn != NULL)
3138 aft_pr = "\\&";
3139 }
3140
3141 /* Do the actual replacement. */
3142
3143 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3144 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3145 free(buf->buf);
3146 buf->buf = cp1;
3147
3148 /* Toggle the in-line state of the eqn subsystem. */
3149
3150 r->eqn_inline = r->eqn == NULL;
3151 return ROFF_REPARSE;
3152 }
3153
3154 static int
3155 roff_EQ(ROFF_ARGS)
3156 {
3157 struct roff_node *n;
3158
3159 if (r->man->macroset == MACROSET_MAN)
3160 man_breakscope(r->man, ROFF_EQ);
3161 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3162 if (ln > r->man->last->line)
3163 n->flags |= NODE_LINE;
3164 n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
3165 n->eqn->expectargs = UINT_MAX;
3166 roff_node_append(r->man, n);
3167 r->man->next = ROFF_NEXT_SIBLING;
3168
3169 assert(r->eqn == NULL);
3170 if (r->last_eqn == NULL)
3171 r->last_eqn = eqn_alloc(r->parse);
3172 else
3173 eqn_reset(r->last_eqn);
3174 r->eqn = r->last_eqn;
3175 r->eqn->node = n;
3176
3177 if (buf->buf[pos] != '\0')
3178 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3179 ".EQ %s", buf->buf + pos);
3180
3181 return ROFF_IGN;
3182 }
3183
3184 static int
3185 roff_EN(ROFF_ARGS)
3186 {
3187 if (r->eqn != NULL) {
3188 eqn_parse(r->eqn);
3189 r->eqn = NULL;
3190 } else
3191 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
3192 if (buf->buf[pos] != '\0')
3193 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3194 "EN %s", buf->buf + pos);
3195 return ROFF_IGN;
3196 }
3197
3198 static int
3199 roff_TS(ROFF_ARGS)
3200 {
3201 if (r->tbl != NULL) {
3202 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
3203 ln, ppos, "TS breaks TS");
3204 tbl_end(r->tbl);
3205 }
3206 r->tbl = tbl_alloc(ppos, ln, r->parse);
3207 if (r->last_tbl)
3208 r->last_tbl->next = r->tbl;
3209 else
3210 r->first_tbl = r->tbl;
3211 r->last_tbl = r->tbl;
3212 return ROFF_IGN;
3213 }
3214
3215 static int
3216 roff_onearg(ROFF_ARGS)
3217 {
3218 struct roff_node *n;
3219 char *cp;
3220 int npos;
3221
3222 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3223 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3224 tok == ROFF_ti))
3225 man_breakscope(r->man, tok);
3226
3227 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3228 r->man->last = roffce_node;
3229 r->man->next = ROFF_NEXT_SIBLING;
3230 }
3231
3232 roff_elem_alloc(r->man, ln, ppos, tok);
3233 n = r->man->last;
3234
3235 cp = buf->buf + pos;
3236 if (*cp != '\0') {
3237 while (*cp != '\0' && *cp != ' ')
3238 cp++;
3239 while (*cp == ' ')
3240 *cp++ = '\0';
3241 if (*cp != '\0')
3242 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
3243 r->parse, ln, cp - buf->buf,
3244 "%s ... %s", roff_name[tok], cp);
3245 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3246 }
3247
3248 if (tok == ROFF_ce || tok == ROFF_rj) {
3249 if (r->man->last->type == ROFFT_ELEM) {
3250 roff_word_alloc(r->man, ln, pos, "1");
3251 r->man->last->flags |= NODE_NOSRC;
3252 }
3253 npos = 0;
3254 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3255 &roffce_lines, 0) == 0) {
3256 mandoc_vmsg(MANDOCERR_CE_NONUM,
3257 r->parse, ln, pos, "ce %s", buf->buf + pos);
3258 roffce_lines = 1;
3259 }
3260 if (roffce_lines < 1) {
3261 r->man->last = r->man->last->parent;
3262 roffce_node = NULL;
3263 roffce_lines = 0;
3264 } else
3265 roffce_node = r->man->last->parent;
3266 } else {
3267 n->flags |= NODE_VALID | NODE_ENDED;
3268 r->man->last = n;
3269 }
3270 n->flags |= NODE_LINE;
3271 r->man->next = ROFF_NEXT_SIBLING;
3272 return ROFF_IGN;
3273 }
3274
3275 static int
3276 roff_manyarg(ROFF_ARGS)
3277 {
3278 struct roff_node *n;
3279 char *sp, *ep;
3280
3281 roff_elem_alloc(r->man, ln, ppos, tok);
3282 n = r->man->last;
3283
3284 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3285 while (*ep != '\0' && *ep != ' ')
3286 ep++;
3287 while (*ep == ' ')
3288 *ep++ = '\0';
3289 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3290 }
3291
3292 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3293 r->man->last = n;
3294 r->man->next = ROFF_NEXT_SIBLING;
3295 return ROFF_IGN;
3296 }
3297
3298 static int
3299 roff_als(ROFF_ARGS)
3300 {
3301 char *oldn, *newn, *end, *value;
3302 size_t oldsz, newsz, valsz;
3303
3304 newn = oldn = buf->buf + pos;
3305 if (*newn == '\0')
3306 return ROFF_IGN;
3307
3308 newsz = roff_getname(r, &oldn, ln, pos);
3309 if (newn[newsz] == '\\' || *oldn == '\0')
3310 return ROFF_IGN;
3311
3312 end = oldn;
3313 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3314 if (oldsz == 0)
3315 return ROFF_IGN;
3316
3317 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3318 (int)oldsz, oldn);
3319 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3320 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3321 free(value);
3322 return ROFF_IGN;
3323 }
3324
3325 static int
3326 roff_br(ROFF_ARGS)
3327 {
3328 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3329 man_breakscope(r->man, ROFF_br);
3330 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3331 if (buf->buf[pos] != '\0')
3332 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3333 "%s %s", roff_name[tok], buf->buf + pos);
3334 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3335 r->man->next = ROFF_NEXT_SIBLING;
3336 return ROFF_IGN;
3337 }
3338
3339 static int
3340 roff_cc(ROFF_ARGS)
3341 {
3342 const char *p;
3343
3344 p = buf->buf + pos;
3345
3346 if (*p == '\0' || (r->control = *p++) == '.')
3347 r->control = '\0';
3348
3349 if (*p != '\0')
3350 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3351 ln, p - buf->buf, "cc ... %s", p);
3352
3353 return ROFF_IGN;
3354 }
3355
3356 static int
3357 roff_char(ROFF_ARGS)
3358 {
3359 const char *p, *kp, *vp;
3360 size_t ksz, vsz;
3361 int font;
3362
3363 /* Parse the character to be replaced. */
3364
3365 kp = buf->buf + pos;
3366 p = kp + 1;
3367 if (*kp == '\0' || (*kp == '\\' &&
3368 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3369 (*p != ' ' && *p != '\0')) {
3370 mandoc_vmsg(MANDOCERR_CHAR_ARG, r->parse,
3371 ln, pos, "char %s", kp);
3372 return ROFF_IGN;
3373 }
3374 ksz = p - kp;
3375 while (*p == ' ')
3376 p++;
3377
3378 /*
3379 * If the replacement string contains a font escape sequence,
3380 * we have to restore the font at the end.
3381 */
3382
3383 vp = p;
3384 vsz = strlen(p);
3385 font = 0;
3386 while (*p != '\0') {
3387 if (*p++ != '\\')
3388 continue;
3389 switch (mandoc_escape(&p, NULL, NULL)) {
3390 case ESCAPE_FONT:
3391 case ESCAPE_FONTROMAN:
3392 case ESCAPE_FONTITALIC:
3393 case ESCAPE_FONTBOLD:
3394 case ESCAPE_FONTBI:
3395 case ESCAPE_FONTCW:
3396 case ESCAPE_FONTPREV:
3397 font++;
3398 break;
3399 default:
3400 break;
3401 }
3402 }
3403 if (font > 1)
3404 mandoc_msg(MANDOCERR_CHAR_FONT, r->parse,
3405 ln, vp - buf->buf, vp);
3406
3407 /*
3408 * Approximate the effect of .char using the .tr tables.
3409 * XXX In groff, .char and .tr interact differently.
3410 */
3411
3412 if (ksz == 1) {
3413 if (r->xtab == NULL)
3414 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3415 assert((unsigned int)*kp < 128);
3416 free(r->xtab[(int)*kp].p);
3417 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3418 "%s%s", vp, font ? "\fP" : "");
3419 } else {
3420 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3421 if (font)
3422 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3423 }
3424 return ROFF_IGN;
3425 }
3426
3427 static int
3428 roff_ec(ROFF_ARGS)
3429 {
3430 const char *p;
3431
3432 p = buf->buf + pos;
3433 if (*p == '\0')
3434 r->escape = '\\';
3435 else {
3436 r->escape = *p;
3437 if (*++p != '\0')
3438 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3439 ln, p - buf->buf, "ec ... %s", p);
3440 }
3441 return ROFF_IGN;
3442 }
3443
3444 static int
3445 roff_eo(ROFF_ARGS)
3446 {
3447 r->escape = '\0';
3448 if (buf->buf[pos] != '\0')
3449 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3450 ln, pos, "eo %s", buf->buf + pos);
3451 return ROFF_IGN;
3452 }
3453
3454 static int
3455 roff_nop(ROFF_ARGS)
3456 {
3457 while (buf->buf[pos] == ' ')
3458 pos++;
3459 *offs = pos;
3460 return ROFF_RERUN;
3461 }
3462
3463 static int
3464 roff_tr(ROFF_ARGS)
3465 {
3466 const char *p, *first, *second;
3467 size_t fsz, ssz;
3468 enum mandoc_esc esc;
3469
3470 p = buf->buf + pos;
3471
3472 if (*p == '\0') {
3473 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3474 return ROFF_IGN;
3475 }
3476
3477 while (*p != '\0') {
3478 fsz = ssz = 1;
3479
3480 first = p++;
3481 if (*first == '\\') {
3482 esc = mandoc_escape(&p, NULL, NULL);
3483 if (esc == ESCAPE_ERROR) {
3484 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3485 ln, (int)(p - buf->buf), first);
3486 return ROFF_IGN;
3487 }
3488 fsz = (size_t)(p - first);
3489 }
3490
3491 second = p++;
3492 if (*second == '\\') {
3493 esc = mandoc_escape(&p, NULL, NULL);
3494 if (esc == ESCAPE_ERROR) {
3495 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3496 ln, (int)(p - buf->buf), second);
3497 return ROFF_IGN;
3498 }
3499 ssz = (size_t)(p - second);
3500 } else if (*second == '\0') {
3501 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3502 ln, first - buf->buf, "tr %s", first);
3503 second = " ";
3504 p--;
3505 }
3506
3507 if (fsz > 1) {
3508 roff_setstrn(&r->xmbtab, first, fsz,
3509 second, ssz, 0);
3510 continue;
3511 }
3512
3513 if (r->xtab == NULL)
3514 r->xtab = mandoc_calloc(128,
3515 sizeof(struct roffstr));
3516
3517 free(r->xtab[(int)*first].p);
3518 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3519 r->xtab[(int)*first].sz = ssz;
3520 }
3521
3522 return ROFF_IGN;
3523 }
3524
3525 /*
3526 * Implementation of the .return request.
3527 * There is no need to call roff_userret() from here.
3528 * The read module will call that after rewinding the reader stack
3529 * to the place from where the current macro was called.
3530 */
3531 static int
3532 roff_return(ROFF_ARGS)
3533 {
3534 if (r->mstackpos >= 0)
3535 return ROFF_IGN | ROFF_USERRET;
3536
3537 mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "return");
3538 return ROFF_IGN;
3539 }
3540
3541 static int
3542 roff_rn(ROFF_ARGS)
3543 {
3544 const char *value;
3545 char *oldn, *newn, *end;
3546 size_t oldsz, newsz;
3547 int deftype;
3548
3549 oldn = newn = buf->buf + pos;
3550 if (*oldn == '\0')
3551 return ROFF_IGN;
3552
3553 oldsz = roff_getname(r, &newn, ln, pos);
3554 if (oldn[oldsz] == '\\' || *newn == '\0')
3555 return ROFF_IGN;
3556
3557 end = newn;
3558 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3559 if (newsz == 0)
3560 return ROFF_IGN;
3561
3562 deftype = ROFFDEF_ANY;
3563 value = roff_getstrn(r, oldn, oldsz, &deftype);
3564 switch (deftype) {
3565 case ROFFDEF_USER:
3566 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3567 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3568 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3569 break;
3570 case ROFFDEF_PRE:
3571 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3572 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3573 break;
3574 case ROFFDEF_REN:
3575 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3576 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3577 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3578 break;
3579 case ROFFDEF_STD:
3580 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3581 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3582 break;
3583 default:
3584 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3585 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3586 break;
3587 }
3588 return ROFF_IGN;
3589 }
3590
3591 static int
3592 roff_shift(ROFF_ARGS)
3593 {
3594 struct mctx *ctx;
3595 int levels, i;
3596
3597 levels = 1;
3598 if (buf->buf[pos] != '\0' &&
3599 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3600 mandoc_vmsg(MANDOCERR_CE_NONUM, r->parse,
3601 ln, pos, "shift %s", buf->buf + pos);
3602 levels = 1;
3603 }
3604 if (r->mstackpos < 0) {
3605 mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "shift");
3606 return ROFF_IGN;
3607 }
3608 ctx = r->mstack + r->mstackpos;
3609 if (levels > ctx->argc) {
3610 mandoc_vmsg(MANDOCERR_SHIFT, r->parse,
3611 ln, pos, "%d, but max is %d", levels, ctx->argc);
3612 levels = ctx->argc;
3613 }
3614 if (levels == 0)
3615 return ROFF_IGN;
3616 for (i = 0; i < levels; i++)
3617 free(ctx->argv[i]);
3618 ctx->argc -= levels;
3619 for (i = 0; i < ctx->argc; i++)
3620 ctx->argv[i] = ctx->argv[i + levels];
3621 return ROFF_IGN;
3622 }
3623
3624 static int
3625 roff_so(ROFF_ARGS)
3626 {
3627 char *name, *cp;
3628
3629 name = buf->buf + pos;
3630 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3631
3632 /*
3633 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3634 * opening anything that's not in our cwd or anything beneath
3635 * it. Thus, explicitly disallow traversing up the file-system
3636 * or using absolute paths.
3637 */
3638
3639 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3640 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3641 ".so %s", name);
3642 buf->sz = mandoc_asprintf(&cp,
3643 ".sp\nSee the file %s.\n.sp", name) + 1;
3644 free(buf->buf);
3645 buf->buf = cp;
3646 *offs = 0;
3647 return ROFF_REPARSE;
3648 }
3649
3650 *offs = pos;
3651 return ROFF_SO;
3652 }
3653
3654 /* --- user defined strings and macros ------------------------------------ */
3655
3656 static int
3657 roff_userdef(ROFF_ARGS)
3658 {
3659 struct mctx *ctx;
3660 char *arg, *ap, *dst, *src;
3661 size_t sz;
3662
3663 /* Initialize a new macro stack context. */
3664
3665 if (++r->mstackpos == r->mstacksz) {
3666 r->mstack = mandoc_recallocarray(r->mstack,
3667 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3668 r->mstacksz += 8;
3669 }
3670 ctx = r->mstack + r->mstackpos;
3671 ctx->argsz = 0;
3672 ctx->argc = 0;
3673 ctx->argv = NULL;
3674
3675 /*
3676 * Collect pointers to macro argument strings,
3677 * NUL-terminating them and escaping quotes.
3678 */
3679
3680 src = buf->buf + pos;
3681 while (*src != '\0') {
3682 if (ctx->argc == ctx->argsz) {
3683 ctx->argsz += 8;
3684 ctx->argv = mandoc_reallocarray(ctx->argv,
3685 ctx->argsz, sizeof(*ctx->argv));
3686 }
3687 arg = mandoc_getarg(r->parse, &src, ln, &pos);
3688 sz = 1; /* For the terminating NUL. */
3689 for (ap = arg; *ap != '\0'; ap++)
3690 sz += *ap == '"' ? 4 : 1;
3691 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3692 for (ap = arg; *ap != '\0'; ap++) {
3693 if (*ap == '"') {
3694 memcpy(dst, "\\(dq", 4);
3695 dst += 4;
3696 } else
3697 *dst++ = *ap;
3698 }
3699 *dst = '\0';
3700 }
3701
3702 /* Replace the macro invocation by the macro definition. */
3703
3704 free(buf->buf);
3705 buf->buf = mandoc_strdup(r->current_string);
3706 buf->sz = strlen(buf->buf) + 1;
3707 *offs = 0;
3708
3709 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3710 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3711 }
3712
3713 /*
3714 * Calling a high-level macro that was renamed with .rn.
3715 * r->current_string has already been set up by roff_parse().
3716 */
3717 static int
3718 roff_renamed(ROFF_ARGS)
3719 {
3720 char *nbuf;
3721
3722 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3723 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3724 free(buf->buf);
3725 buf->buf = nbuf;
3726 *offs = 0;
3727 return ROFF_CONT;
3728 }
3729
3730 static size_t
3731 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3732 {
3733 char *name, *cp;
3734 size_t namesz;
3735
3736 name = *cpp;
3737 if ('\0' == *name)
3738 return 0;
3739
3740 /* Read until end of name and terminate it with NUL. */
3741 for (cp = name; 1; cp++) {
3742 if ('\0' == *cp || ' ' == *cp) {
3743 namesz = cp - name;
3744 break;
3745 }
3746 if ('\\' != *cp)
3747 continue;
3748 namesz = cp - name;
3749 if ('{' == cp[1] || '}' == cp[1])
3750 break;
3751 cp++;
3752 if ('\\' == *cp)
3753 continue;
3754 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3755 "%.*s", (int)(cp - name + 1), name);
3756 mandoc_escape((const char **)&cp, NULL, NULL);
3757 break;
3758 }
3759
3760 /* Read past spaces. */
3761 while (' ' == *cp)
3762 cp++;
3763
3764 *cpp = cp;
3765 return namesz;
3766 }
3767
3768 /*
3769 * Store *string into the user-defined string called *name.
3770 * To clear an existing entry, call with (*r, *name, NULL, 0).
3771 * append == 0: replace mode
3772 * append == 1: single-line append mode
3773 * append == 2: multiline append mode, append '\n' after each call
3774 */
3775 static void
3776 roff_setstr(struct roff *r, const char *name, const char *string,
3777 int append)
3778 {
3779 size_t namesz;
3780
3781 namesz = strlen(name);
3782 roff_setstrn(&r->strtab, name, namesz, string,
3783 string ? strlen(string) : 0, append);
3784 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3785 }
3786
3787 static void
3788 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3789 const char *string, size_t stringsz, int append)
3790 {
3791 struct roffkv *n;
3792 char *c;
3793 int i;
3794 size_t oldch, newch;
3795
3796 /* Search for an existing string with the same name. */
3797 n = *r;
3798
3799 while (n && (namesz != n->key.sz ||
3800 strncmp(n->key.p, name, namesz)))
3801 n = n->next;
3802
3803 if (NULL == n) {
3804 /* Create a new string table entry. */
3805 n = mandoc_malloc(sizeof(struct roffkv));
3806 n->key.p = mandoc_strndup(name, namesz);
3807 n->key.sz = namesz;
3808 n->val.p = NULL;
3809 n->val.sz = 0;
3810 n->next = *r;
3811 *r = n;
3812 } else if (0 == append) {
3813 free(n->val.p);
3814 n->val.p = NULL;
3815 n->val.sz = 0;
3816 }
3817
3818 if (NULL == string)
3819 return;
3820
3821 /*
3822 * One additional byte for the '\n' in multiline mode,
3823 * and one for the terminating '\0'.
3824 */
3825 newch = stringsz + (1 < append ? 2u : 1u);
3826
3827 if (NULL == n->val.p) {
3828 n->val.p = mandoc_malloc(newch);
3829 *n->val.p = '\0';
3830 oldch = 0;
3831 } else {
3832 oldch = n->val.sz;
3833 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3834 }
3835
3836 /* Skip existing content in the destination buffer. */
3837 c = n->val.p + (int)oldch;
3838
3839 /* Append new content to the destination buffer. */
3840 i = 0;
3841 while (i < (int)stringsz) {
3842 /*
3843 * Rudimentary roff copy mode:
3844 * Handle escaped backslashes.
3845 */
3846 if ('\\' == string[i] && '\\' == string[i + 1])
3847 i++;
3848 *c++ = string[i++];
3849 }
3850
3851 /* Append terminating bytes. */
3852 if (1 < append)
3853 *c++ = '\n';
3854
3855 *c = '\0';
3856 n->val.sz = (int)(c - n->val.p);
3857 }
3858
3859 static const char *
3860 roff_getstrn(struct roff *r, const char *name, size_t len,
3861 int *deftype)
3862 {
3863 const struct roffkv *n;
3864 int found, i;
3865 enum roff_tok tok;
3866
3867 found = 0;
3868 for (n = r->strtab; n != NULL; n = n->next) {
3869 if (strncmp(name, n->key.p, len) != 0 ||
3870 n->key.p[len] != '\0' || n->val.p == NULL)
3871 continue;
3872 if (*deftype & ROFFDEF_USER) {
3873 *deftype = ROFFDEF_USER;
3874 return n->val.p;
3875 } else {
3876 found = 1;
3877 break;
3878 }
3879 }
3880 for (n = r->rentab; n != NULL; n = n->next) {
3881 if (strncmp(name, n->key.p, len) != 0 ||
3882 n->key.p[len] != '\0' || n->val.p == NULL)
3883 continue;
3884 if (*deftype & ROFFDEF_REN) {
3885 *deftype = ROFFDEF_REN;
3886 return n->val.p;
3887 } else {
3888 found = 1;
3889 break;
3890 }
3891 }
3892 for (i = 0; i < PREDEFS_MAX; i++) {
3893 if (strncmp(name, predefs[i].name, len) != 0 ||
3894 predefs[i].name[len] != '\0')
3895 continue;
3896 if (*deftype & ROFFDEF_PRE) {
3897 *deftype = ROFFDEF_PRE;
3898 return predefs[i].str;
3899 } else {
3900 found = 1;
3901 break;
3902 }
3903 }
3904 if (r->man->macroset != MACROSET_MAN) {
3905 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3906 if (strncmp(name, roff_name[tok], len) != 0 ||
3907 roff_name[tok][len] != '\0')
3908 continue;
3909 if (*deftype & ROFFDEF_STD) {
3910 *deftype = ROFFDEF_STD;
3911 return NULL;
3912 } else {
3913 found = 1;
3914 break;
3915 }
3916 }
3917 }
3918 if (r->man->macroset != MACROSET_MDOC) {
3919 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3920 if (strncmp(name, roff_name[tok], len) != 0 ||
3921 roff_name[tok][len] != '\0')
3922 continue;
3923 if (*deftype & ROFFDEF_STD) {
3924 *deftype = ROFFDEF_STD;
3925 return NULL;
3926 } else {
3927 found = 1;
3928 break;
3929 }
3930 }
3931 }
3932
3933 if (found == 0 && *deftype != ROFFDEF_ANY) {
3934 if (*deftype & ROFFDEF_REN) {
3935 /*
3936 * This might still be a request,
3937 * so do not treat it as undefined yet.
3938 */
3939 *deftype = ROFFDEF_UNDEF;
3940 return NULL;
3941 }
3942
3943 /* Using an undefined string defines it to be empty. */
3944
3945 roff_setstrn(&r->strtab, name, len, "", 0, 0);
3946 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
3947 }
3948
3949 *deftype = 0;
3950 return NULL;
3951 }
3952
3953 static void
3954 roff_freestr(struct roffkv *r)
3955 {
3956 struct roffkv *n, *nn;
3957
3958 for (n = r; n; n = nn) {
3959 free(n->key.p);
3960 free(n->val.p);
3961 nn = n->next;
3962 free(n);
3963 }
3964 }
3965
3966 /* --- accessors and utility functions ------------------------------------ */
3967
3968 /*
3969 * Duplicate an input string, making the appropriate character
3970 * conversations (as stipulated by `tr') along the way.
3971 * Returns a heap-allocated string with all the replacements made.
3972 */
3973 char *
3974 roff_strdup(const struct roff *r, const char *p)
3975 {
3976 const struct roffkv *cp;
3977 char *res;
3978 const char *pp;
3979 size_t ssz, sz;
3980 enum mandoc_esc esc;
3981
3982 if (NULL == r->xmbtab && NULL == r->xtab)
3983 return mandoc_strdup(p);
3984 else if ('\0' == *p)
3985 return mandoc_strdup("");
3986
3987 /*
3988 * Step through each character looking for term matches
3989 * (remember that a `tr' can be invoked with an escape, which is
3990 * a glyph but the escape is multi-character).
3991 * We only do this if the character hash has been initialised
3992 * and the string is >0 length.
3993 */
3994
3995 res = NULL;
3996 ssz = 0;
3997
3998 while ('\0' != *p) {
3999 assert((unsigned int)*p < 128);
4000 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4001 sz = r->xtab[(int)*p].sz;
4002 res = mandoc_realloc(res, ssz + sz + 1);
4003 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4004 ssz += sz;
4005 p++;
4006 continue;
4007 } else if ('\\' != *p) {
4008 res = mandoc_realloc(res, ssz + 2);
4009 res[ssz++] = *p++;
4010 continue;
4011 }
4012
4013 /* Search for term matches. */
4014 for (cp = r->xmbtab; cp; cp = cp->next)
4015 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4016 break;
4017
4018 if (NULL != cp) {
4019 /*
4020 * A match has been found.
4021 * Append the match to the array and move
4022 * forward by its keysize.
4023 */
4024 res = mandoc_realloc(res,
4025 ssz + cp->val.sz + 1);
4026 memcpy(res + ssz, cp->val.p, cp->val.sz);
4027 ssz += cp->val.sz;
4028 p += (int)cp->key.sz;
4029 continue;
4030 }
4031
4032 /*
4033 * Handle escapes carefully: we need to copy
4034 * over just the escape itself, or else we might
4035 * do replacements within the escape itself.
4036 * Make sure to pass along the bogus string.
4037 */
4038 pp = p++;
4039 esc = mandoc_escape(&p, NULL, NULL);
4040 if (ESCAPE_ERROR == esc) {
4041 sz = strlen(pp);
4042 res = mandoc_realloc(res, ssz + sz + 1);
4043 memcpy(res + ssz, pp, sz);
4044 break;
4045 }
4046 /*
4047 * We bail out on bad escapes.
4048 * No need to warn: we already did so when
4049 * roff_res() was called.
4050 */
4051 sz = (int)(p - pp);
4052 res = mandoc_realloc(res, ssz + sz + 1);
4053 memcpy(res + ssz, pp, sz);
4054 ssz += sz;
4055 }
4056
4057 res[(int)ssz] = '\0';
4058 return res;
4059 }
4060
4061 int
4062 roff_getformat(const struct roff *r)
4063 {
4064
4065 return r->format;
4066 }
4067
4068 /*
4069 * Find out whether a line is a macro line or not.
4070 * If it is, adjust the current position and return one; if it isn't,
4071 * return zero and don't change the current position.
4072 * If the control character has been set with `.cc', then let that grain
4073 * precedence.
4074 * This is slighly contrary to groff, where using the non-breaking
4075 * control character when `cc' has been invoked will cause the
4076 * non-breaking macro contents to be printed verbatim.
4077 */
4078 int
4079 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4080 {
4081 int pos;
4082
4083 pos = *ppos;
4084
4085 if (r->control != '\0' && cp[pos] == r->control)
4086 pos++;
4087 else if (r->control != '\0')
4088 return 0;
4089 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4090 pos += 2;
4091 else if ('.' == cp[pos] || '\'' == cp[pos])
4092 pos++;
4093 else
4094 return 0;
4095
4096 while (' ' == cp[pos] || '\t' == cp[pos])
4097 pos++;
4098
4099 *ppos = pos;
4100 return 1;
4101 }