]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Cleanup, no functional change:
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.349 2018/12/13 11:55:47 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40
41 /* Maximum number of string expansions per line, to break infinite loops. */
42 #define EXPAND_LIMIT 1000
43
44 /* Types of definitions of macros and strings. */
45 #define ROFFDEF_USER (1 << 1) /* User-defined. */
46 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
47 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
48 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
49 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
50 ROFFDEF_REN | ROFFDEF_STD)
51 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
52
53 /* --- data types --------------------------------------------------------- */
54
55 /*
56 * An incredibly-simple string buffer.
57 */
58 struct roffstr {
59 char *p; /* nil-terminated buffer */
60 size_t sz; /* saved strlen(p) */
61 };
62
63 /*
64 * A key-value roffstr pair as part of a singly-linked list.
65 */
66 struct roffkv {
67 struct roffstr key;
68 struct roffstr val;
69 struct roffkv *next; /* next in list */
70 };
71
72 /*
73 * A single number register as part of a singly-linked list.
74 */
75 struct roffreg {
76 struct roffstr key;
77 int val;
78 int step;
79 struct roffreg *next;
80 };
81
82 /*
83 * Association of request and macro names with token IDs.
84 */
85 struct roffreq {
86 enum roff_tok tok;
87 char name[];
88 };
89
90 /*
91 * A macro processing context.
92 * More than one is needed when macro calls are nested.
93 */
94 struct mctx {
95 char **argv;
96 int argc;
97 int argsz;
98 };
99
100 struct roff {
101 struct mparse *parse; /* parse point */
102 struct roff_man *man; /* mdoc or man parser */
103 struct roffnode *last; /* leaf of stack */
104 struct mctx *mstack; /* stack of macro contexts */
105 int *rstack; /* stack of inverted `ie' values */
106 struct ohash *reqtab; /* request lookup table */
107 struct roffreg *regtab; /* number registers */
108 struct roffkv *strtab; /* user-defined strings & macros */
109 struct roffkv *rentab; /* renamed strings & macros */
110 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
111 struct roffstr *xtab; /* single-byte trans table (`tr') */
112 const char *current_string; /* value of last called user macro */
113 struct tbl_node *first_tbl; /* first table parsed */
114 struct tbl_node *last_tbl; /* last table parsed */
115 struct tbl_node *tbl; /* current table being parsed */
116 struct eqn_node *last_eqn; /* equation parser */
117 struct eqn_node *eqn; /* active equation parser */
118 int eqn_inline; /* current equation is inline */
119 int options; /* parse options */
120 int mstacksz; /* current size of mstack */
121 int mstackpos; /* position in mstack */
122 int rstacksz; /* current size limit of rstack */
123 int rstackpos; /* position in rstack */
124 int format; /* current file in mdoc or man format */
125 char control; /* control character */
126 char escape; /* escape character */
127 };
128
129 struct roffnode {
130 enum roff_tok tok; /* type of node */
131 struct roffnode *parent; /* up one in stack */
132 int line; /* parse line */
133 int col; /* parse col */
134 char *name; /* node name, e.g. macro name */
135 char *end; /* end-rules: custom token */
136 int endspan; /* end-rules: next-line or infty */
137 int rule; /* current evaluation rule */
138 };
139
140 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
141 enum roff_tok tok, /* tok of macro */ \
142 struct buf *buf, /* input buffer */ \
143 int ln, /* parse line */ \
144 int ppos, /* original pos in buffer */ \
145 int pos, /* current pos in buffer */ \
146 int *offs /* reset offset of buffer data */
147
148 typedef int (*roffproc)(ROFF_ARGS);
149
150 struct roffmac {
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 };
157
158 struct predef {
159 const char *name; /* predefined input name */
160 const char *str; /* replacement symbol */
161 };
162
163 #define PREDEF(__name, __str) \
164 { (__name), (__str) },
165
166 /* --- function prototypes ------------------------------------------------ */
167
168 static int roffnode_cleanscope(struct roff *);
169 static int roffnode_pop(struct roff *);
170 static void roffnode_push(struct roff *, enum roff_tok,
171 const char *, int, int);
172 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
173 static int roff_als(ROFF_ARGS);
174 static int roff_block(ROFF_ARGS);
175 static int roff_block_text(ROFF_ARGS);
176 static int roff_block_sub(ROFF_ARGS);
177 static int roff_br(ROFF_ARGS);
178 static int roff_cblock(ROFF_ARGS);
179 static int roff_cc(ROFF_ARGS);
180 static int roff_ccond(struct roff *, int, int);
181 static int roff_char(ROFF_ARGS);
182 static int roff_cond(ROFF_ARGS);
183 static int roff_cond_text(ROFF_ARGS);
184 static int roff_cond_sub(ROFF_ARGS);
185 static int roff_ds(ROFF_ARGS);
186 static int roff_ec(ROFF_ARGS);
187 static int roff_eo(ROFF_ARGS);
188 static int roff_eqndelim(struct roff *, struct buf *, int);
189 static int roff_evalcond(struct roff *r, int, char *, int *);
190 static int roff_evalnum(struct roff *, int,
191 const char *, int *, int *, int);
192 static int roff_evalpar(struct roff *, int,
193 const char *, int *, int *, int);
194 static int roff_evalstrcond(const char *, int *);
195 static void roff_free1(struct roff *);
196 static void roff_freereg(struct roffreg *);
197 static void roff_freestr(struct roffkv *);
198 static size_t roff_getname(struct roff *, char **, int, int);
199 static int roff_getnum(const char *, int *, int *, int);
200 static int roff_getop(const char *, int *, char *);
201 static int roff_getregn(struct roff *,
202 const char *, size_t, char);
203 static int roff_getregro(const struct roff *,
204 const char *name);
205 static const char *roff_getstrn(struct roff *,
206 const char *, size_t, int *);
207 static int roff_hasregn(const struct roff *,
208 const char *, size_t);
209 static int roff_insec(ROFF_ARGS);
210 static int roff_it(ROFF_ARGS);
211 static int roff_line_ignore(ROFF_ARGS);
212 static void roff_man_alloc1(struct roff_man *);
213 static void roff_man_free1(struct roff_man *);
214 static int roff_manyarg(ROFF_ARGS);
215 static int roff_nop(ROFF_ARGS);
216 static int roff_nr(ROFF_ARGS);
217 static int roff_onearg(ROFF_ARGS);
218 static enum roff_tok roff_parse(struct roff *, char *, int *,
219 int, int);
220 static int roff_parsetext(struct roff *, struct buf *,
221 int, int *);
222 static int roff_renamed(ROFF_ARGS);
223 static int roff_res(struct roff *, struct buf *, int, int);
224 static int roff_return(ROFF_ARGS);
225 static int roff_rm(ROFF_ARGS);
226 static int roff_rn(ROFF_ARGS);
227 static int roff_rr(ROFF_ARGS);
228 static void roff_setregn(struct roff *, const char *,
229 size_t, int, char, int);
230 static void roff_setstr(struct roff *,
231 const char *, const char *, int);
232 static void roff_setstrn(struct roffkv **, const char *,
233 size_t, const char *, size_t, int);
234 static int roff_shift(ROFF_ARGS);
235 static int roff_so(ROFF_ARGS);
236 static int roff_tr(ROFF_ARGS);
237 static int roff_Dd(ROFF_ARGS);
238 static int roff_TE(ROFF_ARGS);
239 static int roff_TS(ROFF_ARGS);
240 static int roff_EQ(ROFF_ARGS);
241 static int roff_EN(ROFF_ARGS);
242 static int roff_T_(ROFF_ARGS);
243 static int roff_unsupp(ROFF_ARGS);
244 static int roff_userdef(ROFF_ARGS);
245
246 /* --- constant data ------------------------------------------------------ */
247
248 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
249 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
250
251 const char *__roff_name[MAN_MAX + 1] = {
252 "br", "ce", "ft", "ll",
253 "mc", "po", "rj", "sp",
254 "ta", "ti", NULL,
255 "ab", "ad", "af", "aln",
256 "als", "am", "am1", "ami",
257 "ami1", "as", "as1", "asciify",
258 "backtrace", "bd", "bleedat", "blm",
259 "box", "boxa", "bp", "BP",
260 "break", "breakchar", "brnl", "brp",
261 "brpnl", "c2", "cc",
262 "cf", "cflags", "ch", "char",
263 "chop", "class", "close", "CL",
264 "color", "composite", "continue", "cp",
265 "cropat", "cs", "cu", "da",
266 "dch", "Dd", "de", "de1",
267 "defcolor", "dei", "dei1", "device",
268 "devicem", "di", "do", "ds",
269 "ds1", "dwh", "dt", "ec",
270 "ecr", "ecs", "el", "em",
271 "EN", "eo", "EP", "EQ",
272 "errprint", "ev", "evc", "ex",
273 "fallback", "fam", "fc", "fchar",
274 "fcolor", "fdeferlig", "feature", "fkern",
275 "fl", "flig", "fp", "fps",
276 "fschar", "fspacewidth", "fspecial", "ftr",
277 "fzoom", "gcolor", "hc", "hcode",
278 "hidechar", "hla", "hlm", "hpf",
279 "hpfa", "hpfcode", "hw", "hy",
280 "hylang", "hylen", "hym", "hypp",
281 "hys", "ie", "if", "ig",
282 "index", "it", "itc", "IX",
283 "kern", "kernafter", "kernbefore", "kernpair",
284 "lc", "lc_ctype", "lds", "length",
285 "letadj", "lf", "lg", "lhang",
286 "linetabs", "lnr", "lnrf", "lpfx",
287 "ls", "lsm", "lt",
288 "mediasize", "minss", "mk", "mso",
289 "na", "ne", "nh", "nhychar",
290 "nm", "nn", "nop", "nr",
291 "nrf", "nroff", "ns", "nx",
292 "open", "opena", "os", "output",
293 "padj", "papersize", "pc", "pev",
294 "pi", "PI", "pl", "pm",
295 "pn", "pnr", "ps",
296 "psbb", "pshape", "pso", "ptr",
297 "pvs", "rchar", "rd", "recursionlimit",
298 "return", "rfschar", "rhang",
299 "rm", "rn", "rnn", "rr",
300 "rs", "rt", "schar", "sentchar",
301 "shc", "shift", "sizes", "so",
302 "spacewidth", "special", "spreadwarn", "ss",
303 "sty", "substring", "sv", "sy",
304 "T&", "tc", "TE",
305 "TH", "tkf", "tl",
306 "tm", "tm1", "tmc", "tr",
307 "track", "transchar", "trf", "trimat",
308 "trin", "trnt", "troff", "TS",
309 "uf", "ul", "unformat", "unwatch",
310 "unwatchn", "vpt", "vs", "warn",
311 "warnscale", "watch", "watchlength", "watchn",
312 "wh", "while", "write", "writec",
313 "writem", "xflag", ".", NULL,
314 NULL, "text",
315 "Dd", "Dt", "Os", "Sh",
316 "Ss", "Pp", "D1", "Dl",
317 "Bd", "Ed", "Bl", "El",
318 "It", "Ad", "An", "Ap",
319 "Ar", "Cd", "Cm", "Dv",
320 "Er", "Ev", "Ex", "Fa",
321 "Fd", "Fl", "Fn", "Ft",
322 "Ic", "In", "Li", "Nd",
323 "Nm", "Op", "Ot", "Pa",
324 "Rv", "St", "Va", "Vt",
325 "Xr", "%A", "%B", "%D",
326 "%I", "%J", "%N", "%O",
327 "%P", "%R", "%T", "%V",
328 "Ac", "Ao", "Aq", "At",
329 "Bc", "Bf", "Bo", "Bq",
330 "Bsx", "Bx", "Db", "Dc",
331 "Do", "Dq", "Ec", "Ef",
332 "Em", "Eo", "Fx", "Ms",
333 "No", "Ns", "Nx", "Ox",
334 "Pc", "Pf", "Po", "Pq",
335 "Qc", "Ql", "Qo", "Qq",
336 "Re", "Rs", "Sc", "So",
337 "Sq", "Sm", "Sx", "Sy",
338 "Tn", "Ux", "Xc", "Xo",
339 "Fo", "Fc", "Oo", "Oc",
340 "Bk", "Ek", "Bt", "Hf",
341 "Fr", "Ud", "Lb", "Lp",
342 "Lk", "Mt", "Brq", "Bro",
343 "Brc", "%C", "Es", "En",
344 "Dx", "%Q", "%U", "Ta",
345 NULL,
346 "TH", "SH", "SS", "TP",
347 "TQ",
348 "LP", "PP", "P", "IP",
349 "HP", "SM", "SB", "BI",
350 "IB", "BR", "RB", "R",
351 "B", "I", "IR", "RI",
352 "nf", "fi",
353 "RE", "RS", "DT", "UC",
354 "PD", "AT", "in",
355 "SY", "YS", "OP",
356 "EX", "EE", "UR",
357 "UE", "MT", "ME", NULL
358 };
359 const char *const *roff_name = __roff_name;
360
361 static struct roffmac roffs[TOKEN_NONE] = {
362 { roff_br, NULL, NULL, 0 }, /* br */
363 { roff_onearg, NULL, NULL, 0 }, /* ce */
364 { roff_onearg, NULL, NULL, 0 }, /* ft */
365 { roff_onearg, NULL, NULL, 0 }, /* ll */
366 { roff_onearg, NULL, NULL, 0 }, /* mc */
367 { roff_onearg, NULL, NULL, 0 }, /* po */
368 { roff_onearg, NULL, NULL, 0 }, /* rj */
369 { roff_onearg, NULL, NULL, 0 }, /* sp */
370 { roff_manyarg, NULL, NULL, 0 }, /* ta */
371 { roff_onearg, NULL, NULL, 0 }, /* ti */
372 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
373 { roff_unsupp, NULL, NULL, 0 }, /* ab */
374 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
375 { roff_line_ignore, NULL, NULL, 0 }, /* af */
376 { roff_unsupp, NULL, NULL, 0 }, /* aln */
377 { roff_als, NULL, NULL, 0 }, /* als */
378 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
379 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
380 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
381 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
382 { roff_ds, NULL, NULL, 0 }, /* as */
383 { roff_ds, NULL, NULL, 0 }, /* as1 */
384 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
385 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
386 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
387 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
388 { roff_unsupp, NULL, NULL, 0 }, /* blm */
389 { roff_unsupp, NULL, NULL, 0 }, /* box */
390 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
391 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
392 { roff_unsupp, NULL, NULL, 0 }, /* BP */
393 { roff_unsupp, NULL, NULL, 0 }, /* break */
394 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
395 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
396 { roff_br, NULL, NULL, 0 }, /* brp */
397 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
398 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
399 { roff_cc, NULL, NULL, 0 }, /* cc */
400 { roff_insec, NULL, NULL, 0 }, /* cf */
401 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
402 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
403 { roff_char, NULL, NULL, 0 }, /* char */
404 { roff_unsupp, NULL, NULL, 0 }, /* chop */
405 { roff_line_ignore, NULL, NULL, 0 }, /* class */
406 { roff_insec, NULL, NULL, 0 }, /* close */
407 { roff_unsupp, NULL, NULL, 0 }, /* CL */
408 { roff_line_ignore, NULL, NULL, 0 }, /* color */
409 { roff_unsupp, NULL, NULL, 0 }, /* composite */
410 { roff_unsupp, NULL, NULL, 0 }, /* continue */
411 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
412 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
413 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
414 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
415 { roff_unsupp, NULL, NULL, 0 }, /* da */
416 { roff_unsupp, NULL, NULL, 0 }, /* dch */
417 { roff_Dd, NULL, NULL, 0 }, /* Dd */
418 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
419 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
420 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
421 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
422 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
423 { roff_unsupp, NULL, NULL, 0 }, /* device */
424 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
425 { roff_unsupp, NULL, NULL, 0 }, /* di */
426 { roff_unsupp, NULL, NULL, 0 }, /* do */
427 { roff_ds, NULL, NULL, 0 }, /* ds */
428 { roff_ds, NULL, NULL, 0 }, /* ds1 */
429 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
430 { roff_unsupp, NULL, NULL, 0 }, /* dt */
431 { roff_ec, NULL, NULL, 0 }, /* ec */
432 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
433 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
434 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
435 { roff_unsupp, NULL, NULL, 0 }, /* em */
436 { roff_EN, NULL, NULL, 0 }, /* EN */
437 { roff_eo, NULL, NULL, 0 }, /* eo */
438 { roff_unsupp, NULL, NULL, 0 }, /* EP */
439 { roff_EQ, NULL, NULL, 0 }, /* EQ */
440 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
441 { roff_unsupp, NULL, NULL, 0 }, /* ev */
442 { roff_unsupp, NULL, NULL, 0 }, /* evc */
443 { roff_unsupp, NULL, NULL, 0 }, /* ex */
444 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
445 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
446 { roff_unsupp, NULL, NULL, 0 }, /* fc */
447 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
448 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
449 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
450 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
451 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
452 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
453 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
454 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
455 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
456 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
457 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
458 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
459 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
460 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
461 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
462 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
463 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
464 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
465 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
466 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
467 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
468 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
469 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
470 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
471 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
472 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
473 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
474 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
475 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
477 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
478 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
479 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
480 { roff_unsupp, NULL, NULL, 0 }, /* index */
481 { roff_it, NULL, NULL, 0 }, /* it */
482 { roff_unsupp, NULL, NULL, 0 }, /* itc */
483 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
484 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
485 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
486 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
487 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
488 { roff_unsupp, NULL, NULL, 0 }, /* lc */
489 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
490 { roff_unsupp, NULL, NULL, 0 }, /* lds */
491 { roff_unsupp, NULL, NULL, 0 }, /* length */
492 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
493 { roff_insec, NULL, NULL, 0 }, /* lf */
494 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
495 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
496 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
497 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
498 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
499 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
500 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
501 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
502 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
503 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
504 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
505 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
506 { roff_insec, NULL, NULL, 0 }, /* mso */
507 { roff_line_ignore, NULL, NULL, 0 }, /* na */
508 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
509 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
510 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
511 { roff_unsupp, NULL, NULL, 0 }, /* nm */
512 { roff_unsupp, NULL, NULL, 0 }, /* nn */
513 { roff_nop, NULL, NULL, 0 }, /* nop */
514 { roff_nr, NULL, NULL, 0 }, /* nr */
515 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
516 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
518 { roff_insec, NULL, NULL, 0 }, /* nx */
519 { roff_insec, NULL, NULL, 0 }, /* open */
520 { roff_insec, NULL, NULL, 0 }, /* opena */
521 { roff_line_ignore, NULL, NULL, 0 }, /* os */
522 { roff_unsupp, NULL, NULL, 0 }, /* output */
523 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
524 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
525 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
526 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
527 { roff_insec, NULL, NULL, 0 }, /* pi */
528 { roff_unsupp, NULL, NULL, 0 }, /* PI */
529 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
530 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
531 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
532 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
533 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
534 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
535 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
536 { roff_insec, NULL, NULL, 0 }, /* pso */
537 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
538 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
539 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
540 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
541 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
542 { roff_return, NULL, NULL, 0 }, /* return */
543 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
544 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
545 { roff_rm, NULL, NULL, 0 }, /* rm */
546 { roff_rn, NULL, NULL, 0 }, /* rn */
547 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
548 { roff_rr, NULL, NULL, 0 }, /* rr */
549 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
550 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
551 { roff_unsupp, NULL, NULL, 0 }, /* schar */
552 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
553 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
554 { roff_shift, NULL, NULL, 0 }, /* shift */
555 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
556 { roff_so, NULL, NULL, 0 }, /* so */
557 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
558 { roff_line_ignore, NULL, NULL, 0 }, /* special */
559 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
560 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
561 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
562 { roff_unsupp, NULL, NULL, 0 }, /* substring */
563 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
564 { roff_insec, NULL, NULL, 0 }, /* sy */
565 { roff_T_, NULL, NULL, 0 }, /* T& */
566 { roff_unsupp, NULL, NULL, 0 }, /* tc */
567 { roff_TE, NULL, NULL, 0 }, /* TE */
568 { roff_Dd, NULL, NULL, 0 }, /* TH */
569 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
570 { roff_unsupp, NULL, NULL, 0 }, /* tl */
571 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
572 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
573 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
574 { roff_tr, NULL, NULL, 0 }, /* tr */
575 { roff_line_ignore, NULL, NULL, 0 }, /* track */
576 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
577 { roff_insec, NULL, NULL, 0 }, /* trf */
578 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
579 { roff_unsupp, NULL, NULL, 0 }, /* trin */
580 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
581 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
582 { roff_TS, NULL, NULL, 0 }, /* TS */
583 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
584 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
585 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
586 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
587 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
588 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
589 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
590 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
591 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
592 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
593 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
594 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
595 { roff_unsupp, NULL, NULL, 0 }, /* wh */
596 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
597 { roff_insec, NULL, NULL, 0 }, /* write */
598 { roff_insec, NULL, NULL, 0 }, /* writec */
599 { roff_insec, NULL, NULL, 0 }, /* writem */
600 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
601 { roff_cblock, NULL, NULL, 0 }, /* . */
602 { roff_renamed, NULL, NULL, 0 },
603 { roff_userdef, NULL, NULL, 0 }
604 };
605
606 /* Array of injected predefined strings. */
607 #define PREDEFS_MAX 38
608 static const struct predef predefs[PREDEFS_MAX] = {
609 #include "predefs.in"
610 };
611
612 static int roffce_lines; /* number of input lines to center */
613 static struct roff_node *roffce_node; /* active request */
614 static int roffit_lines; /* number of lines to delay */
615 static char *roffit_macro; /* nil-terminated macro line */
616
617
618 /* --- request table ------------------------------------------------------ */
619
620 struct ohash *
621 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
622 {
623 struct ohash *htab;
624 struct roffreq *req;
625 enum roff_tok tok;
626 size_t sz;
627 unsigned int slot;
628
629 htab = mandoc_malloc(sizeof(*htab));
630 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
631
632 for (tok = mintok; tok < maxtok; tok++) {
633 if (roff_name[tok] == NULL)
634 continue;
635 sz = strlen(roff_name[tok]);
636 req = mandoc_malloc(sizeof(*req) + sz + 1);
637 req->tok = tok;
638 memcpy(req->name, roff_name[tok], sz + 1);
639 slot = ohash_qlookup(htab, req->name);
640 ohash_insert(htab, slot, req);
641 }
642 return htab;
643 }
644
645 void
646 roffhash_free(struct ohash *htab)
647 {
648 struct roffreq *req;
649 unsigned int slot;
650
651 if (htab == NULL)
652 return;
653 for (req = ohash_first(htab, &slot); req != NULL;
654 req = ohash_next(htab, &slot))
655 free(req);
656 ohash_delete(htab);
657 free(htab);
658 }
659
660 enum roff_tok
661 roffhash_find(struct ohash *htab, const char *name, size_t sz)
662 {
663 struct roffreq *req;
664 const char *end;
665
666 if (sz) {
667 end = name + sz;
668 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
669 } else
670 req = ohash_find(htab, ohash_qlookup(htab, name));
671 return req == NULL ? TOKEN_NONE : req->tok;
672 }
673
674 /* --- stack of request blocks -------------------------------------------- */
675
676 /*
677 * Pop the current node off of the stack of roff instructions currently
678 * pending.
679 */
680 static int
681 roffnode_pop(struct roff *r)
682 {
683 struct roffnode *p;
684 int inloop;
685
686 p = r->last;
687 inloop = p->tok == ROFF_while;
688 r->last = p->parent;
689 free(p->name);
690 free(p->end);
691 free(p);
692 return inloop;
693 }
694
695 /*
696 * Push a roff node onto the instruction stack. This must later be
697 * removed with roffnode_pop().
698 */
699 static void
700 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
701 int line, int col)
702 {
703 struct roffnode *p;
704
705 p = mandoc_calloc(1, sizeof(struct roffnode));
706 p->tok = tok;
707 if (name)
708 p->name = mandoc_strdup(name);
709 p->parent = r->last;
710 p->line = line;
711 p->col = col;
712 p->rule = p->parent ? p->parent->rule : 0;
713
714 r->last = p;
715 }
716
717 /* --- roff parser state data management ---------------------------------- */
718
719 static void
720 roff_free1(struct roff *r)
721 {
722 int i;
723
724 tbl_free(r->first_tbl);
725 r->first_tbl = r->last_tbl = r->tbl = NULL;
726
727 eqn_free(r->last_eqn);
728 r->last_eqn = r->eqn = NULL;
729
730 while (r->mstackpos >= 0)
731 roff_userret(r);
732
733 while (r->last)
734 roffnode_pop(r);
735
736 free (r->rstack);
737 r->rstack = NULL;
738 r->rstacksz = 0;
739 r->rstackpos = -1;
740
741 roff_freereg(r->regtab);
742 r->regtab = NULL;
743
744 roff_freestr(r->strtab);
745 roff_freestr(r->rentab);
746 roff_freestr(r->xmbtab);
747 r->strtab = r->rentab = r->xmbtab = NULL;
748
749 if (r->xtab)
750 for (i = 0; i < 128; i++)
751 free(r->xtab[i].p);
752 free(r->xtab);
753 r->xtab = NULL;
754 }
755
756 void
757 roff_reset(struct roff *r)
758 {
759 roff_free1(r);
760 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
761 r->control = '\0';
762 r->escape = '\\';
763 roffce_lines = 0;
764 roffce_node = NULL;
765 roffit_lines = 0;
766 roffit_macro = NULL;
767 }
768
769 void
770 roff_free(struct roff *r)
771 {
772 int i;
773
774 roff_free1(r);
775 for (i = 0; i < r->mstacksz; i++)
776 free(r->mstack[i].argv);
777 free(r->mstack);
778 roffhash_free(r->reqtab);
779 free(r);
780 }
781
782 struct roff *
783 roff_alloc(struct mparse *parse, int options)
784 {
785 struct roff *r;
786
787 r = mandoc_calloc(1, sizeof(struct roff));
788 r->parse = parse;
789 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
790 r->options = options;
791 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
792 r->mstackpos = -1;
793 r->rstackpos = -1;
794 r->escape = '\\';
795 return r;
796 }
797
798 /* --- syntax tree state data management ---------------------------------- */
799
800 static void
801 roff_man_free1(struct roff_man *man)
802 {
803
804 if (man->first != NULL)
805 roff_node_delete(man, man->first);
806 free(man->meta.msec);
807 free(man->meta.vol);
808 free(man->meta.os);
809 free(man->meta.arch);
810 free(man->meta.title);
811 free(man->meta.name);
812 free(man->meta.date);
813 }
814
815 static void
816 roff_man_alloc1(struct roff_man *man)
817 {
818
819 memset(&man->meta, 0, sizeof(man->meta));
820 man->first = mandoc_calloc(1, sizeof(*man->first));
821 man->first->type = ROFFT_ROOT;
822 man->last = man->first;
823 man->last_es = NULL;
824 man->flags = 0;
825 man->macroset = MACROSET_NONE;
826 man->lastsec = man->lastnamed = SEC_NONE;
827 man->next = ROFF_NEXT_CHILD;
828 }
829
830 void
831 roff_man_reset(struct roff_man *man)
832 {
833
834 roff_man_free1(man);
835 roff_man_alloc1(man);
836 }
837
838 void
839 roff_man_free(struct roff_man *man)
840 {
841
842 roff_man_free1(man);
843 free(man);
844 }
845
846 struct roff_man *
847 roff_man_alloc(struct roff *roff, struct mparse *parse,
848 const char *os_s, int quick)
849 {
850 struct roff_man *man;
851
852 man = mandoc_calloc(1, sizeof(*man));
853 man->parse = parse;
854 man->roff = roff;
855 man->os_s = os_s;
856 man->quick = quick;
857 roff_man_alloc1(man);
858 roff->man = man;
859 return man;
860 }
861
862 /* --- syntax tree handling ----------------------------------------------- */
863
864 struct roff_node *
865 roff_node_alloc(struct roff_man *man, int line, int pos,
866 enum roff_type type, int tok)
867 {
868 struct roff_node *n;
869
870 n = mandoc_calloc(1, sizeof(*n));
871 n->line = line;
872 n->pos = pos;
873 n->tok = tok;
874 n->type = type;
875 n->sec = man->lastsec;
876
877 if (man->flags & MDOC_SYNOPSIS)
878 n->flags |= NODE_SYNPRETTY;
879 else
880 n->flags &= ~NODE_SYNPRETTY;
881 if (man->flags & MDOC_NEWLINE)
882 n->flags |= NODE_LINE;
883 man->flags &= ~MDOC_NEWLINE;
884
885 return n;
886 }
887
888 void
889 roff_node_append(struct roff_man *man, struct roff_node *n)
890 {
891
892 switch (man->next) {
893 case ROFF_NEXT_SIBLING:
894 if (man->last->next != NULL) {
895 n->next = man->last->next;
896 man->last->next->prev = n;
897 } else
898 man->last->parent->last = n;
899 man->last->next = n;
900 n->prev = man->last;
901 n->parent = man->last->parent;
902 break;
903 case ROFF_NEXT_CHILD:
904 if (man->last->child != NULL) {
905 n->next = man->last->child;
906 man->last->child->prev = n;
907 } else
908 man->last->last = n;
909 man->last->child = n;
910 n->parent = man->last;
911 break;
912 default:
913 abort();
914 }
915 man->last = n;
916
917 switch (n->type) {
918 case ROFFT_HEAD:
919 n->parent->head = n;
920 break;
921 case ROFFT_BODY:
922 if (n->end != ENDBODY_NOT)
923 return;
924 n->parent->body = n;
925 break;
926 case ROFFT_TAIL:
927 n->parent->tail = n;
928 break;
929 default:
930 return;
931 }
932
933 /*
934 * Copy over the normalised-data pointer of our parent. Not
935 * everybody has one, but copying a null pointer is fine.
936 */
937
938 n->norm = n->parent->norm;
939 assert(n->parent->type == ROFFT_BLOCK);
940 }
941
942 void
943 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
944 {
945 struct roff_node *n;
946
947 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
948 n->string = roff_strdup(man->roff, word);
949 roff_node_append(man, n);
950 n->flags |= NODE_VALID | NODE_ENDED;
951 man->next = ROFF_NEXT_SIBLING;
952 }
953
954 void
955 roff_word_append(struct roff_man *man, const char *word)
956 {
957 struct roff_node *n;
958 char *addstr, *newstr;
959
960 n = man->last;
961 addstr = roff_strdup(man->roff, word);
962 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
963 free(addstr);
964 free(n->string);
965 n->string = newstr;
966 man->next = ROFF_NEXT_SIBLING;
967 }
968
969 void
970 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
971 {
972 struct roff_node *n;
973
974 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
975 roff_node_append(man, n);
976 man->next = ROFF_NEXT_CHILD;
977 }
978
979 struct roff_node *
980 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
981 {
982 struct roff_node *n;
983
984 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
985 roff_node_append(man, n);
986 man->next = ROFF_NEXT_CHILD;
987 return n;
988 }
989
990 struct roff_node *
991 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
992 {
993 struct roff_node *n;
994
995 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
996 roff_node_append(man, n);
997 man->next = ROFF_NEXT_CHILD;
998 return n;
999 }
1000
1001 struct roff_node *
1002 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1003 {
1004 struct roff_node *n;
1005
1006 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1007 roff_node_append(man, n);
1008 man->next = ROFF_NEXT_CHILD;
1009 return n;
1010 }
1011
1012 static void
1013 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1014 {
1015 struct roff_node *n;
1016 struct tbl_span *span;
1017
1018 if (man->macroset == MACROSET_MAN)
1019 man_breakscope(man, ROFF_TS);
1020 while ((span = tbl_span(tbl)) != NULL) {
1021 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1022 n->span = span;
1023 roff_node_append(man, n);
1024 n->flags |= NODE_VALID | NODE_ENDED;
1025 man->next = ROFF_NEXT_SIBLING;
1026 }
1027 }
1028
1029 void
1030 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1031 {
1032
1033 /* Adjust siblings. */
1034
1035 if (n->prev)
1036 n->prev->next = n->next;
1037 if (n->next)
1038 n->next->prev = n->prev;
1039
1040 /* Adjust parent. */
1041
1042 if (n->parent != NULL) {
1043 if (n->parent->child == n)
1044 n->parent->child = n->next;
1045 if (n->parent->last == n)
1046 n->parent->last = n->prev;
1047 }
1048
1049 /* Adjust parse point. */
1050
1051 if (man == NULL)
1052 return;
1053 if (man->last == n) {
1054 if (n->prev == NULL) {
1055 man->last = n->parent;
1056 man->next = ROFF_NEXT_CHILD;
1057 } else {
1058 man->last = n->prev;
1059 man->next = ROFF_NEXT_SIBLING;
1060 }
1061 }
1062 if (man->first == n)
1063 man->first = NULL;
1064 }
1065
1066 void
1067 roff_node_relink(struct roff_man *man, struct roff_node *n)
1068 {
1069 roff_node_unlink(man, n);
1070 n->prev = n->next = NULL;
1071 roff_node_append(man, n);
1072 }
1073
1074 void
1075 roff_node_free(struct roff_node *n)
1076 {
1077
1078 if (n->args != NULL)
1079 mdoc_argv_free(n->args);
1080 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1081 free(n->norm);
1082 eqn_box_free(n->eqn);
1083 free(n->string);
1084 free(n);
1085 }
1086
1087 void
1088 roff_node_delete(struct roff_man *man, struct roff_node *n)
1089 {
1090
1091 while (n->child != NULL)
1092 roff_node_delete(man, n->child);
1093 roff_node_unlink(man, n);
1094 roff_node_free(n);
1095 }
1096
1097 void
1098 deroff(char **dest, const struct roff_node *n)
1099 {
1100 char *cp;
1101 size_t sz;
1102
1103 if (n->type != ROFFT_TEXT) {
1104 for (n = n->child; n != NULL; n = n->next)
1105 deroff(dest, n);
1106 return;
1107 }
1108
1109 /* Skip leading whitespace. */
1110
1111 for (cp = n->string; *cp != '\0'; cp++) {
1112 if (cp[0] == '\\' && cp[1] != '\0' &&
1113 strchr(" %&0^|~", cp[1]) != NULL)
1114 cp++;
1115 else if ( ! isspace((unsigned char)*cp))
1116 break;
1117 }
1118
1119 /* Skip trailing backslash. */
1120
1121 sz = strlen(cp);
1122 if (sz > 0 && cp[sz - 1] == '\\')
1123 sz--;
1124
1125 /* Skip trailing whitespace. */
1126
1127 for (; sz; sz--)
1128 if ( ! isspace((unsigned char)cp[sz-1]))
1129 break;
1130
1131 /* Skip empty strings. */
1132
1133 if (sz == 0)
1134 return;
1135
1136 if (*dest == NULL) {
1137 *dest = mandoc_strndup(cp, sz);
1138 return;
1139 }
1140
1141 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1142 free(*dest);
1143 *dest = cp;
1144 }
1145
1146 /* --- main functions of the roff parser ---------------------------------- */
1147
1148 /*
1149 * In the current line, expand escape sequences that tend to get
1150 * used in numerical expressions and conditional requests.
1151 * Also check the syntax of the remaining escape sequences.
1152 */
1153 static int
1154 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1155 {
1156 struct mctx *ctx; /* current macro call context */
1157 char ubuf[24]; /* buffer to print the number */
1158 struct roff_node *n; /* used for header comments */
1159 const char *start; /* start of the string to process */
1160 char *stesc; /* start of an escape sequence ('\\') */
1161 char *ep; /* end of comment string */
1162 const char *stnam; /* start of the name, after "[(*" */
1163 const char *cp; /* end of the name, e.g. before ']' */
1164 const char *res; /* the string to be substituted */
1165 char *nbuf; /* new buffer to copy buf->buf to */
1166 size_t maxl; /* expected length of the escape name */
1167 size_t naml; /* actual length of the escape name */
1168 size_t asz; /* length of the replacement */
1169 size_t rsz; /* length of the rest of the string */
1170 enum mandoc_esc esc; /* type of the escape sequence */
1171 int inaml; /* length returned from mandoc_escape() */
1172 int expand_count; /* to avoid infinite loops */
1173 int npos; /* position in numeric expression */
1174 int arg_complete; /* argument not interrupted by eol */
1175 int quote_args; /* true for \\$@, false for \\$* */
1176 int done; /* no more input available */
1177 int deftype; /* type of definition to paste */
1178 int rcsid; /* kind of RCS id seen */
1179 char sign; /* increment number register */
1180 char term; /* character terminating the escape */
1181
1182 /* Search forward for comments. */
1183
1184 done = 0;
1185 start = buf->buf + pos;
1186 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1187 if (stesc[0] != r->escape || stesc[1] == '\0')
1188 continue;
1189 stesc++;
1190 if (*stesc != '"' && *stesc != '#')
1191 continue;
1192
1193 /* Comment found, look for RCS id. */
1194
1195 rcsid = 0;
1196 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1197 rcsid = 1 << MANDOC_OS_OPENBSD;
1198 cp += 8;
1199 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1200 rcsid = 1 << MANDOC_OS_NETBSD;
1201 cp += 7;
1202 }
1203 if (cp != NULL &&
1204 isalnum((unsigned char)*cp) == 0 &&
1205 strchr(cp, '$') != NULL) {
1206 if (r->man->meta.rcsids & rcsid)
1207 mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1208 ln, stesc + 1 - buf->buf, stesc + 1);
1209 r->man->meta.rcsids |= rcsid;
1210 }
1211
1212 /* Handle trailing whitespace. */
1213
1214 ep = strchr(stesc--, '\0') - 1;
1215 if (*ep == '\n') {
1216 done = 1;
1217 ep--;
1218 }
1219 if (*ep == ' ' || *ep == '\t')
1220 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1221 ln, ep - buf->buf, NULL);
1222
1223 /*
1224 * Save comments preceding the title macro
1225 * in the syntax tree.
1226 */
1227
1228 if (r->format == 0) {
1229 while (*ep == ' ' || *ep == '\t')
1230 ep--;
1231 ep[1] = '\0';
1232 n = roff_node_alloc(r->man,
1233 ln, stesc + 1 - buf->buf,
1234 ROFFT_COMMENT, TOKEN_NONE);
1235 n->string = mandoc_strdup(stesc + 2);
1236 roff_node_append(r->man, n);
1237 n->flags |= NODE_VALID | NODE_ENDED;
1238 r->man->next = ROFF_NEXT_SIBLING;
1239 }
1240
1241 /* Line continuation with comment. */
1242
1243 if (stesc[1] == '#') {
1244 *stesc = '\0';
1245 return ROFF_IGN | ROFF_APPEND;
1246 }
1247
1248 /* Discard normal comments. */
1249
1250 while (stesc > start && stesc[-1] == ' ' &&
1251 (stesc == start + 1 || stesc[-2] != '\\'))
1252 stesc--;
1253 *stesc = '\0';
1254 break;
1255 }
1256 if (stesc == start)
1257 return ROFF_CONT;
1258 stesc--;
1259
1260 /* Notice the end of the input. */
1261
1262 if (*stesc == '\n') {
1263 *stesc-- = '\0';
1264 done = 1;
1265 }
1266
1267 expand_count = 0;
1268 while (stesc >= start) {
1269
1270 /* Search backwards for the next backslash. */
1271
1272 if (*stesc != r->escape) {
1273 if (*stesc == '\\') {
1274 *stesc = '\0';
1275 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1276 buf->buf, stesc + 1) + 1;
1277 start = nbuf + pos;
1278 stesc = nbuf + (stesc - buf->buf);
1279 free(buf->buf);
1280 buf->buf = nbuf;
1281 }
1282 stesc--;
1283 continue;
1284 }
1285
1286 /* If it is escaped, skip it. */
1287
1288 for (cp = stesc - 1; cp >= start; cp--)
1289 if (*cp != r->escape)
1290 break;
1291
1292 if ((stesc - cp) % 2 == 0) {
1293 while (stesc > cp)
1294 *stesc-- = '\\';
1295 continue;
1296 } else if (stesc[1] != '\0') {
1297 *stesc = '\\';
1298 } else {
1299 *stesc-- = '\0';
1300 if (done)
1301 continue;
1302 else
1303 return ROFF_IGN | ROFF_APPEND;
1304 }
1305
1306 /* Decide whether to expand or to check only. */
1307
1308 term = '\0';
1309 cp = stesc + 1;
1310 switch (*cp) {
1311 case '*':
1312 case '$':
1313 res = NULL;
1314 break;
1315 case 'B':
1316 case 'w':
1317 term = cp[1];
1318 /* FALLTHROUGH */
1319 case 'n':
1320 sign = cp[1];
1321 if (sign == '+' || sign == '-')
1322 cp++;
1323 res = ubuf;
1324 break;
1325 default:
1326 esc = mandoc_escape(&cp, &stnam, &inaml);
1327 if (esc == ESCAPE_ERROR ||
1328 (esc == ESCAPE_SPECIAL &&
1329 mchars_spec2cp(stnam, inaml) < 0))
1330 mandoc_vmsg(MANDOCERR_ESC_BAD,
1331 r->parse, ln, (int)(stesc - buf->buf),
1332 "%.*s", (int)(cp - stesc), stesc);
1333 stesc--;
1334 continue;
1335 }
1336
1337 if (EXPAND_LIMIT < ++expand_count) {
1338 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1339 ln, (int)(stesc - buf->buf), NULL);
1340 return ROFF_IGN;
1341 }
1342
1343 /*
1344 * The third character decides the length
1345 * of the name of the string or register.
1346 * Save a pointer to the name.
1347 */
1348
1349 if (term == '\0') {
1350 switch (*++cp) {
1351 case '\0':
1352 maxl = 0;
1353 break;
1354 case '(':
1355 cp++;
1356 maxl = 2;
1357 break;
1358 case '[':
1359 cp++;
1360 term = ']';
1361 maxl = 0;
1362 break;
1363 default:
1364 maxl = 1;
1365 break;
1366 }
1367 } else {
1368 cp += 2;
1369 maxl = 0;
1370 }
1371 stnam = cp;
1372
1373 /* Advance to the end of the name. */
1374
1375 naml = 0;
1376 arg_complete = 1;
1377 while (maxl == 0 || naml < maxl) {
1378 if (*cp == '\0') {
1379 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1380 ln, (int)(stesc - buf->buf), stesc);
1381 arg_complete = 0;
1382 break;
1383 }
1384 if (maxl == 0 && *cp == term) {
1385 cp++;
1386 break;
1387 }
1388 if (*cp++ != '\\' || stesc[1] != 'w') {
1389 naml++;
1390 continue;
1391 }
1392 switch (mandoc_escape(&cp, NULL, NULL)) {
1393 case ESCAPE_SPECIAL:
1394 case ESCAPE_UNICODE:
1395 case ESCAPE_NUMBERED:
1396 case ESCAPE_OVERSTRIKE:
1397 naml++;
1398 break;
1399 default:
1400 break;
1401 }
1402 }
1403
1404 /*
1405 * Retrieve the replacement string; if it is
1406 * undefined, resume searching for escapes.
1407 */
1408
1409 switch (stesc[1]) {
1410 case '*':
1411 if (arg_complete) {
1412 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1413 res = roff_getstrn(r, stnam, naml, &deftype);
1414
1415 /*
1416 * If not overriden, let \*(.T
1417 * through to the formatters.
1418 */
1419
1420 if (res == NULL && naml == 2 &&
1421 stnam[0] == '.' && stnam[1] == 'T') {
1422 roff_setstrn(&r->strtab,
1423 ".T", 2, NULL, 0, 0);
1424 stesc--;
1425 continue;
1426 }
1427 }
1428 break;
1429 case '$':
1430 if (r->mstackpos < 0) {
1431 mandoc_vmsg(MANDOCERR_ARG_UNDEF,
1432 r->parse, ln, (int)(stesc - buf->buf),
1433 "%.3s", stesc);
1434 break;
1435 }
1436 ctx = r->mstack + r->mstackpos;
1437 npos = stesc[2] - '1';
1438 if (npos >= 0 && npos <= 8) {
1439 res = npos < ctx->argc ?
1440 ctx->argv[npos] : "";
1441 break;
1442 }
1443 if (stesc[2] == '*')
1444 quote_args = 0;
1445 else if (stesc[2] == '@')
1446 quote_args = 1;
1447 else {
1448 mandoc_vmsg(MANDOCERR_ARG_NONUM,
1449 r->parse, ln, (int)(stesc - buf->buf),
1450 "%.3s", stesc);
1451 break;
1452 }
1453 asz = 0;
1454 for (npos = 0; npos < ctx->argc; npos++) {
1455 if (npos)
1456 asz++; /* blank */
1457 if (quote_args)
1458 asz += 2; /* quotes */
1459 asz += strlen(ctx->argv[npos]);
1460 }
1461 if (asz != 3) {
1462 rsz = buf->sz - (stesc - buf->buf) - 3;
1463 if (asz < 3)
1464 memmove(stesc + asz, stesc + 3, rsz);
1465 buf->sz += asz - 3;
1466 nbuf = mandoc_realloc(buf->buf, buf->sz);
1467 start = nbuf + pos;
1468 stesc = nbuf + (stesc - buf->buf);
1469 buf->buf = nbuf;
1470 if (asz > 3)
1471 memmove(stesc + asz, stesc + 3, rsz);
1472 }
1473 for (npos = 0; npos < ctx->argc; npos++) {
1474 if (npos)
1475 *stesc++ = ' ';
1476 if (quote_args)
1477 *stesc++ = '"';
1478 cp = ctx->argv[npos];
1479 while (*cp != '\0')
1480 *stesc++ = *cp++;
1481 if (quote_args)
1482 *stesc++ = '"';
1483 }
1484 continue;
1485 case 'B':
1486 npos = 0;
1487 ubuf[0] = arg_complete &&
1488 roff_evalnum(r, ln, stnam, &npos,
1489 NULL, ROFFNUM_SCALE) &&
1490 stnam + npos + 1 == cp ? '1' : '0';
1491 ubuf[1] = '\0';
1492 break;
1493 case 'n':
1494 if (arg_complete)
1495 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1496 roff_getregn(r, stnam, naml, sign));
1497 else
1498 ubuf[0] = '\0';
1499 break;
1500 case 'w':
1501 /* use even incomplete args */
1502 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1503 24 * (int)naml);
1504 break;
1505 }
1506
1507 if (res == NULL) {
1508 if (stesc[1] == '*')
1509 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1510 r->parse, ln, (int)(stesc - buf->buf),
1511 "%.*s", (int)naml, stnam);
1512 res = "";
1513 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1514 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1515 ln, (int)(stesc - buf->buf), NULL);
1516 return ROFF_IGN;
1517 }
1518
1519 /* Replace the escape sequence by the string. */
1520
1521 *stesc = '\0';
1522 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1523 buf->buf, res, cp) + 1;
1524
1525 /* Prepare for the next replacement. */
1526
1527 start = nbuf + pos;
1528 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1529 free(buf->buf);
1530 buf->buf = nbuf;
1531 }
1532 return ROFF_CONT;
1533 }
1534
1535 /*
1536 * Process text streams.
1537 */
1538 static int
1539 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1540 {
1541 size_t sz;
1542 const char *start;
1543 char *p;
1544 int isz;
1545 enum mandoc_esc esc;
1546
1547 /* Spring the input line trap. */
1548
1549 if (roffit_lines == 1) {
1550 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1551 free(buf->buf);
1552 buf->buf = p;
1553 buf->sz = isz + 1;
1554 *offs = 0;
1555 free(roffit_macro);
1556 roffit_lines = 0;
1557 return ROFF_REPARSE;
1558 } else if (roffit_lines > 1)
1559 --roffit_lines;
1560
1561 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1562 if (roffce_lines < 1) {
1563 r->man->last = roffce_node;
1564 r->man->next = ROFF_NEXT_SIBLING;
1565 roffce_lines = 0;
1566 roffce_node = NULL;
1567 } else
1568 roffce_lines--;
1569 }
1570
1571 /* Convert all breakable hyphens into ASCII_HYPH. */
1572
1573 start = p = buf->buf + pos;
1574
1575 while (*p != '\0') {
1576 sz = strcspn(p, "-\\");
1577 p += sz;
1578
1579 if (*p == '\0')
1580 break;
1581
1582 if (*p == '\\') {
1583 /* Skip over escapes. */
1584 p++;
1585 esc = mandoc_escape((const char **)&p, NULL, NULL);
1586 if (esc == ESCAPE_ERROR)
1587 break;
1588 while (*p == '-')
1589 p++;
1590 continue;
1591 } else if (p == start) {
1592 p++;
1593 continue;
1594 }
1595
1596 if (isalpha((unsigned char)p[-1]) &&
1597 isalpha((unsigned char)p[1]))
1598 *p = ASCII_HYPH;
1599 p++;
1600 }
1601 return ROFF_CONT;
1602 }
1603
1604 int
1605 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1606 {
1607 enum roff_tok t;
1608 int e;
1609 int pos; /* parse point */
1610 int spos; /* saved parse point for messages */
1611 int ppos; /* original offset in buf->buf */
1612 int ctl; /* macro line (boolean) */
1613
1614 ppos = pos = *offs;
1615
1616 /* Handle in-line equation delimiters. */
1617
1618 if (r->tbl == NULL &&
1619 r->last_eqn != NULL && r->last_eqn->delim &&
1620 (r->eqn == NULL || r->eqn_inline)) {
1621 e = roff_eqndelim(r, buf, pos);
1622 if (e == ROFF_REPARSE)
1623 return e;
1624 assert(e == ROFF_CONT);
1625 }
1626
1627 /* Expand some escape sequences. */
1628
1629 e = roff_res(r, buf, ln, pos);
1630 if ((e & ROFF_MASK) == ROFF_IGN)
1631 return e;
1632 assert(e == ROFF_CONT);
1633
1634 ctl = roff_getcontrol(r, buf->buf, &pos);
1635
1636 /*
1637 * First, if a scope is open and we're not a macro, pass the
1638 * text through the macro's filter.
1639 * Equations process all content themselves.
1640 * Tables process almost all content themselves, but we want
1641 * to warn about macros before passing it there.
1642 */
1643
1644 if (r->last != NULL && ! ctl) {
1645 t = r->last->tok;
1646 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1647 if ((e & ROFF_MASK) == ROFF_IGN)
1648 return e;
1649 e &= ~ROFF_MASK;
1650 } else
1651 e = ROFF_IGN;
1652 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1653 eqn_read(r->eqn, buf->buf + ppos);
1654 return e;
1655 }
1656 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1657 tbl_read(r->tbl, ln, buf->buf, ppos);
1658 roff_addtbl(r->man, ln, r->tbl);
1659 return e;
1660 }
1661 if ( ! ctl)
1662 return roff_parsetext(r, buf, pos, offs) | e;
1663
1664 /* Skip empty request lines. */
1665
1666 if (buf->buf[pos] == '"') {
1667 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1668 ln, pos, NULL);
1669 return ROFF_IGN;
1670 } else if (buf->buf[pos] == '\0')
1671 return ROFF_IGN;
1672
1673 /*
1674 * If a scope is open, go to the child handler for that macro,
1675 * as it may want to preprocess before doing anything with it.
1676 * Don't do so if an equation is open.
1677 */
1678
1679 if (r->last) {
1680 t = r->last->tok;
1681 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1682 }
1683
1684 /* No scope is open. This is a new request or macro. */
1685
1686 spos = pos;
1687 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1688
1689 /* Tables ignore most macros. */
1690
1691 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1692 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1693 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1694 ln, pos, buf->buf + spos);
1695 if (t != TOKEN_NONE)
1696 return ROFF_IGN;
1697 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1698 pos++;
1699 while (buf->buf[pos] == ' ')
1700 pos++;
1701 tbl_read(r->tbl, ln, buf->buf, pos);
1702 roff_addtbl(r->man, ln, r->tbl);
1703 return ROFF_IGN;
1704 }
1705
1706 /* For now, let high level macros abort .ce mode. */
1707
1708 if (ctl && roffce_node != NULL &&
1709 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1710 t == ROFF_TH || t == ROFF_TS)) {
1711 r->man->last = roffce_node;
1712 r->man->next = ROFF_NEXT_SIBLING;
1713 roffce_lines = 0;
1714 roffce_node = NULL;
1715 }
1716
1717 /*
1718 * This is neither a roff request nor a user-defined macro.
1719 * Let the standard macro set parsers handle it.
1720 */
1721
1722 if (t == TOKEN_NONE)
1723 return ROFF_CONT;
1724
1725 /* Execute a roff request or a user defined macro. */
1726
1727 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1728 }
1729
1730 /*
1731 * Internal interface function to tell the roff parser that execution
1732 * of the current macro ended. This is required because macro
1733 * definitions usually do not end with a .return request.
1734 */
1735 void
1736 roff_userret(struct roff *r)
1737 {
1738 struct mctx *ctx;
1739 int i;
1740
1741 assert(r->mstackpos >= 0);
1742 ctx = r->mstack + r->mstackpos;
1743 for (i = 0; i < ctx->argc; i++)
1744 free(ctx->argv[i]);
1745 ctx->argc = 0;
1746 r->mstackpos--;
1747 }
1748
1749 void
1750 roff_endparse(struct roff *r)
1751 {
1752 if (r->last != NULL)
1753 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1754 r->last->line, r->last->col,
1755 roff_name[r->last->tok]);
1756
1757 if (r->eqn != NULL) {
1758 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1759 r->eqn->node->line, r->eqn->node->pos, "EQ");
1760 eqn_parse(r->eqn);
1761 r->eqn = NULL;
1762 }
1763
1764 if (r->tbl != NULL) {
1765 tbl_end(r->tbl, 1);
1766 r->tbl = NULL;
1767 }
1768 }
1769
1770 /*
1771 * Parse a roff node's type from the input buffer. This must be in the
1772 * form of ".foo xxx" in the usual way.
1773 */
1774 static enum roff_tok
1775 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1776 {
1777 char *cp;
1778 const char *mac;
1779 size_t maclen;
1780 int deftype;
1781 enum roff_tok t;
1782
1783 cp = buf + *pos;
1784
1785 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1786 return TOKEN_NONE;
1787
1788 mac = cp;
1789 maclen = roff_getname(r, &cp, ln, ppos);
1790
1791 deftype = ROFFDEF_USER | ROFFDEF_REN;
1792 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1793 switch (deftype) {
1794 case ROFFDEF_USER:
1795 t = ROFF_USERDEF;
1796 break;
1797 case ROFFDEF_REN:
1798 t = ROFF_RENAMED;
1799 break;
1800 default:
1801 t = roffhash_find(r->reqtab, mac, maclen);
1802 break;
1803 }
1804 if (t != TOKEN_NONE)
1805 *pos = cp - buf;
1806 else if (deftype == ROFFDEF_UNDEF) {
1807 /* Using an undefined macro defines it to be empty. */
1808 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1809 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1810 }
1811 return t;
1812 }
1813
1814 /* --- handling of request blocks ----------------------------------------- */
1815
1816 static int
1817 roff_cblock(ROFF_ARGS)
1818 {
1819
1820 /*
1821 * A block-close `..' should only be invoked as a child of an
1822 * ignore macro, otherwise raise a warning and just ignore it.
1823 */
1824
1825 if (r->last == NULL) {
1826 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1827 ln, ppos, "..");
1828 return ROFF_IGN;
1829 }
1830
1831 switch (r->last->tok) {
1832 case ROFF_am:
1833 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1834 case ROFF_ami:
1835 case ROFF_de:
1836 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1837 case ROFF_dei:
1838 case ROFF_ig:
1839 break;
1840 default:
1841 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1842 ln, ppos, "..");
1843 return ROFF_IGN;
1844 }
1845
1846 if (buf->buf[pos] != '\0')
1847 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1848 ".. %s", buf->buf + pos);
1849
1850 roffnode_pop(r);
1851 roffnode_cleanscope(r);
1852 return ROFF_IGN;
1853
1854 }
1855
1856 static int
1857 roffnode_cleanscope(struct roff *r)
1858 {
1859 int inloop;
1860
1861 inloop = 0;
1862 while (r->last != NULL) {
1863 if (--r->last->endspan != 0)
1864 break;
1865 inloop += roffnode_pop(r);
1866 }
1867 return inloop;
1868 }
1869
1870 static int
1871 roff_ccond(struct roff *r, int ln, int ppos)
1872 {
1873 if (NULL == r->last) {
1874 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1875 ln, ppos, "\\}");
1876 return 0;
1877 }
1878
1879 switch (r->last->tok) {
1880 case ROFF_el:
1881 case ROFF_ie:
1882 case ROFF_if:
1883 case ROFF_while:
1884 break;
1885 default:
1886 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1887 ln, ppos, "\\}");
1888 return 0;
1889 }
1890
1891 if (r->last->endspan > -1) {
1892 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1893 ln, ppos, "\\}");
1894 return 0;
1895 }
1896
1897 return roffnode_pop(r) + roffnode_cleanscope(r);
1898 }
1899
1900 static int
1901 roff_block(ROFF_ARGS)
1902 {
1903 const char *name, *value;
1904 char *call, *cp, *iname, *rname;
1905 size_t csz, namesz, rsz;
1906 int deftype;
1907
1908 /* Ignore groff compatibility mode for now. */
1909
1910 if (tok == ROFF_de1)
1911 tok = ROFF_de;
1912 else if (tok == ROFF_dei1)
1913 tok = ROFF_dei;
1914 else if (tok == ROFF_am1)
1915 tok = ROFF_am;
1916 else if (tok == ROFF_ami1)
1917 tok = ROFF_ami;
1918
1919 /* Parse the macro name argument. */
1920
1921 cp = buf->buf + pos;
1922 if (tok == ROFF_ig) {
1923 iname = NULL;
1924 namesz = 0;
1925 } else {
1926 iname = cp;
1927 namesz = roff_getname(r, &cp, ln, ppos);
1928 iname[namesz] = '\0';
1929 }
1930
1931 /* Resolve the macro name argument if it is indirect. */
1932
1933 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1934 deftype = ROFFDEF_USER;
1935 name = roff_getstrn(r, iname, namesz, &deftype);
1936 if (name == NULL) {
1937 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1938 r->parse, ln, (int)(iname - buf->buf),
1939 "%.*s", (int)namesz, iname);
1940 namesz = 0;
1941 } else
1942 namesz = strlen(name);
1943 } else
1944 name = iname;
1945
1946 if (namesz == 0 && tok != ROFF_ig) {
1947 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1948 ln, ppos, roff_name[tok]);
1949 return ROFF_IGN;
1950 }
1951
1952 roffnode_push(r, tok, name, ln, ppos);
1953
1954 /*
1955 * At the beginning of a `de' macro, clear the existing string
1956 * with the same name, if there is one. New content will be
1957 * appended from roff_block_text() in multiline mode.
1958 */
1959
1960 if (tok == ROFF_de || tok == ROFF_dei) {
1961 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1962 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1963 } else if (tok == ROFF_am || tok == ROFF_ami) {
1964 deftype = ROFFDEF_ANY;
1965 value = roff_getstrn(r, iname, namesz, &deftype);
1966 switch (deftype) { /* Before appending, ... */
1967 case ROFFDEF_PRE: /* copy predefined to user-defined. */
1968 roff_setstrn(&r->strtab, name, namesz,
1969 value, strlen(value), 0);
1970 break;
1971 case ROFFDEF_REN: /* call original standard macro. */
1972 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1973 (int)strlen(value), value);
1974 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1975 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1976 free(call);
1977 break;
1978 case ROFFDEF_STD: /* rename and call standard macro. */
1979 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1980 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1981 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1982 (int)rsz, rname);
1983 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1984 free(call);
1985 free(rname);
1986 break;
1987 default:
1988 break;
1989 }
1990 }
1991
1992 if (*cp == '\0')
1993 return ROFF_IGN;
1994
1995 /* Get the custom end marker. */
1996
1997 iname = cp;
1998 namesz = roff_getname(r, &cp, ln, ppos);
1999
2000 /* Resolve the end marker if it is indirect. */
2001
2002 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2003 deftype = ROFFDEF_USER;
2004 name = roff_getstrn(r, iname, namesz, &deftype);
2005 if (name == NULL) {
2006 mandoc_vmsg(MANDOCERR_STR_UNDEF,
2007 r->parse, ln, (int)(iname - buf->buf),
2008 "%.*s", (int)namesz, iname);
2009 namesz = 0;
2010 } else
2011 namesz = strlen(name);
2012 } else
2013 name = iname;
2014
2015 if (namesz)
2016 r->last->end = mandoc_strndup(name, namesz);
2017
2018 if (*cp != '\0')
2019 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2020 ln, pos, ".%s ... %s", roff_name[tok], cp);
2021
2022 return ROFF_IGN;
2023 }
2024
2025 static int
2026 roff_block_sub(ROFF_ARGS)
2027 {
2028 enum roff_tok t;
2029 int i, j;
2030
2031 /*
2032 * First check whether a custom macro exists at this level. If
2033 * it does, then check against it. This is some of groff's
2034 * stranger behaviours. If we encountered a custom end-scope
2035 * tag and that tag also happens to be a "real" macro, then we
2036 * need to try interpreting it again as a real macro. If it's
2037 * not, then return ignore. Else continue.
2038 */
2039
2040 if (r->last->end) {
2041 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2042 if (buf->buf[i] != r->last->end[j])
2043 break;
2044
2045 if (r->last->end[j] == '\0' &&
2046 (buf->buf[i] == '\0' ||
2047 buf->buf[i] == ' ' ||
2048 buf->buf[i] == '\t')) {
2049 roffnode_pop(r);
2050 roffnode_cleanscope(r);
2051
2052 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2053 i++;
2054
2055 pos = i;
2056 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2057 TOKEN_NONE)
2058 return ROFF_RERUN;
2059 return ROFF_IGN;
2060 }
2061 }
2062
2063 /*
2064 * If we have no custom end-query or lookup failed, then try
2065 * pulling it out of the hashtable.
2066 */
2067
2068 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2069
2070 if (t != ROFF_cblock) {
2071 if (tok != ROFF_ig)
2072 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2073 return ROFF_IGN;
2074 }
2075
2076 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2077 }
2078
2079 static int
2080 roff_block_text(ROFF_ARGS)
2081 {
2082
2083 if (tok != ROFF_ig)
2084 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2085
2086 return ROFF_IGN;
2087 }
2088
2089 static int
2090 roff_cond_sub(ROFF_ARGS)
2091 {
2092 char *ep;
2093 int endloop, irc, rr;
2094 enum roff_tok t;
2095
2096 irc = ROFF_IGN;
2097 rr = r->last->rule;
2098 endloop = tok != ROFF_while ? ROFF_IGN :
2099 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2100 if (roffnode_cleanscope(r))
2101 irc |= endloop;
2102
2103 /*
2104 * If `\}' occurs on a macro line without a preceding macro,
2105 * drop the line completely.
2106 */
2107
2108 ep = buf->buf + pos;
2109 if (ep[0] == '\\' && ep[1] == '}')
2110 rr = 0;
2111
2112 /*
2113 * The closing delimiter `\}' rewinds the conditional scope
2114 * but is otherwise ignored when interpreting the line.
2115 */
2116
2117 while ((ep = strchr(ep, '\\')) != NULL) {
2118 switch (ep[1]) {
2119 case '}':
2120 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2121 if (roff_ccond(r, ln, ep - buf->buf))
2122 irc |= endloop;
2123 break;
2124 case '\0':
2125 ++ep;
2126 break;
2127 default:
2128 ep += 2;
2129 break;
2130 }
2131 }
2132
2133 /*
2134 * Fully handle known macros when they are structurally
2135 * required or when the conditional evaluated to true.
2136 */
2137
2138 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2139 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ?
2140 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) :
2141 rr ? ROFF_CONT : ROFF_IGN;
2142 return irc;
2143 }
2144
2145 static int
2146 roff_cond_text(ROFF_ARGS)
2147 {
2148 char *ep;
2149 int endloop, irc, rr;
2150
2151 irc = ROFF_IGN;
2152 rr = r->last->rule;
2153 endloop = tok != ROFF_while ? ROFF_IGN :
2154 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2155 if (roffnode_cleanscope(r))
2156 irc |= endloop;
2157
2158 /*
2159 * If `\}' occurs on a text line with neither preceding
2160 * nor following characters, drop the line completely.
2161 */
2162
2163 ep = buf->buf + pos;
2164 if (strcmp(ep, "\\}") == 0)
2165 rr = 0;
2166
2167 /*
2168 * The closing delimiter `\}' rewinds the conditional scope
2169 * but is otherwise ignored when interpreting the line.
2170 */
2171
2172 while ((ep = strchr(ep, '\\')) != NULL) {
2173 switch (ep[1]) {
2174 case '}':
2175 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2176 if (roff_ccond(r, ln, ep - buf->buf))
2177 irc |= endloop;
2178 break;
2179 case '\0':
2180 ++ep;
2181 break;
2182 default:
2183 ep += 2;
2184 break;
2185 }
2186 }
2187 if (rr)
2188 irc |= ROFF_CONT;
2189 return irc;
2190 }
2191
2192 /* --- handling of numeric and conditional expressions -------------------- */
2193
2194 /*
2195 * Parse a single signed integer number. Stop at the first non-digit.
2196 * If there is at least one digit, return success and advance the
2197 * parse point, else return failure and let the parse point unchanged.
2198 * Ignore overflows, treat them just like the C language.
2199 */
2200 static int
2201 roff_getnum(const char *v, int *pos, int *res, int flags)
2202 {
2203 int myres, scaled, n, p;
2204
2205 if (NULL == res)
2206 res = &myres;
2207
2208 p = *pos;
2209 n = v[p] == '-';
2210 if (n || v[p] == '+')
2211 p++;
2212
2213 if (flags & ROFFNUM_WHITE)
2214 while (isspace((unsigned char)v[p]))
2215 p++;
2216
2217 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2218 *res = 10 * *res + v[p] - '0';
2219 if (p == *pos + n)
2220 return 0;
2221
2222 if (n)
2223 *res = -*res;
2224
2225 /* Each number may be followed by one optional scaling unit. */
2226
2227 switch (v[p]) {
2228 case 'f':
2229 scaled = *res * 65536;
2230 break;
2231 case 'i':
2232 scaled = *res * 240;
2233 break;
2234 case 'c':
2235 scaled = *res * 240 / 2.54;
2236 break;
2237 case 'v':
2238 case 'P':
2239 scaled = *res * 40;
2240 break;
2241 case 'm':
2242 case 'n':
2243 scaled = *res * 24;
2244 break;
2245 case 'p':
2246 scaled = *res * 10 / 3;
2247 break;
2248 case 'u':
2249 scaled = *res;
2250 break;
2251 case 'M':
2252 scaled = *res * 6 / 25;
2253 break;
2254 default:
2255 scaled = *res;
2256 p--;
2257 break;
2258 }
2259 if (flags & ROFFNUM_SCALE)
2260 *res = scaled;
2261
2262 *pos = p + 1;
2263 return 1;
2264 }
2265
2266 /*
2267 * Evaluate a string comparison condition.
2268 * The first character is the delimiter.
2269 * Succeed if the string up to its second occurrence
2270 * matches the string up to its third occurence.
2271 * Advance the cursor after the third occurrence
2272 * or lacking that, to the end of the line.
2273 */
2274 static int
2275 roff_evalstrcond(const char *v, int *pos)
2276 {
2277 const char *s1, *s2, *s3;
2278 int match;
2279
2280 match = 0;
2281 s1 = v + *pos; /* initial delimiter */
2282 s2 = s1 + 1; /* for scanning the first string */
2283 s3 = strchr(s2, *s1); /* for scanning the second string */
2284
2285 if (NULL == s3) /* found no middle delimiter */
2286 goto out;
2287
2288 while ('\0' != *++s3) {
2289 if (*s2 != *s3) { /* mismatch */
2290 s3 = strchr(s3, *s1);
2291 break;
2292 }
2293 if (*s3 == *s1) { /* found the final delimiter */
2294 match = 1;
2295 break;
2296 }
2297 s2++;
2298 }
2299
2300 out:
2301 if (NULL == s3)
2302 s3 = strchr(s2, '\0');
2303 else if (*s3 != '\0')
2304 s3++;
2305 *pos = s3 - v;
2306 return match;
2307 }
2308
2309 /*
2310 * Evaluate an optionally negated single character, numerical,
2311 * or string condition.
2312 */
2313 static int
2314 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2315 {
2316 const char *start, *end;
2317 char *cp, *name;
2318 size_t sz;
2319 int deftype, len, number, savepos, istrue, wanttrue;
2320
2321 if ('!' == v[*pos]) {
2322 wanttrue = 0;
2323 (*pos)++;
2324 } else
2325 wanttrue = 1;
2326
2327 switch (v[*pos]) {
2328 case '\0':
2329 return 0;
2330 case 'n':
2331 case 'o':
2332 (*pos)++;
2333 return wanttrue;
2334 case 'e':
2335 case 't':
2336 case 'v':
2337 (*pos)++;
2338 return !wanttrue;
2339 case 'c':
2340 do {
2341 (*pos)++;
2342 } while (v[*pos] == ' ');
2343
2344 /*
2345 * Quirk for groff compatibility:
2346 * The horizontal tab is neither available nor unavailable.
2347 */
2348
2349 if (v[*pos] == '\t') {
2350 (*pos)++;
2351 return 0;
2352 }
2353
2354 /* Printable ASCII characters are available. */
2355
2356 if (v[*pos] != '\\') {
2357 (*pos)++;
2358 return wanttrue;
2359 }
2360
2361 end = v + ++*pos;
2362 switch (mandoc_escape(&end, &start, &len)) {
2363 case ESCAPE_SPECIAL:
2364 istrue = mchars_spec2cp(start, len) != -1;
2365 break;
2366 case ESCAPE_UNICODE:
2367 istrue = 1;
2368 break;
2369 case ESCAPE_NUMBERED:
2370 istrue = mchars_num2char(start, len) != -1;
2371 break;
2372 default:
2373 istrue = !wanttrue;
2374 break;
2375 }
2376 *pos = end - v;
2377 return istrue == wanttrue;
2378 case 'd':
2379 case 'r':
2380 cp = v + *pos + 1;
2381 while (*cp == ' ')
2382 cp++;
2383 name = cp;
2384 sz = roff_getname(r, &cp, ln, cp - v);
2385 if (sz == 0)
2386 istrue = 0;
2387 else if (v[*pos] == 'r')
2388 istrue = roff_hasregn(r, name, sz);
2389 else {
2390 deftype = ROFFDEF_ANY;
2391 roff_getstrn(r, name, sz, &deftype);
2392 istrue = !!deftype;
2393 }
2394 *pos = cp - v;
2395 return istrue == wanttrue;
2396 default:
2397 break;
2398 }
2399
2400 savepos = *pos;
2401 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2402 return (number > 0) == wanttrue;
2403 else if (*pos == savepos)
2404 return roff_evalstrcond(v, pos) == wanttrue;
2405 else
2406 return 0;
2407 }
2408
2409 static int
2410 roff_line_ignore(ROFF_ARGS)
2411 {
2412
2413 return ROFF_IGN;
2414 }
2415
2416 static int
2417 roff_insec(ROFF_ARGS)
2418 {
2419
2420 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2421 ln, ppos, roff_name[tok]);
2422 return ROFF_IGN;
2423 }
2424
2425 static int
2426 roff_unsupp(ROFF_ARGS)
2427 {
2428
2429 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2430 ln, ppos, roff_name[tok]);
2431 return ROFF_IGN;
2432 }
2433
2434 static int
2435 roff_cond(ROFF_ARGS)
2436 {
2437 int irc;
2438
2439 roffnode_push(r, tok, NULL, ln, ppos);
2440
2441 /*
2442 * An `.el' has no conditional body: it will consume the value
2443 * of the current rstack entry set in prior `ie' calls or
2444 * defaults to DENY.
2445 *
2446 * If we're not an `el', however, then evaluate the conditional.
2447 */
2448
2449 r->last->rule = tok == ROFF_el ?
2450 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2451 roff_evalcond(r, ln, buf->buf, &pos);
2452
2453 /*
2454 * An if-else will put the NEGATION of the current evaluated
2455 * conditional into the stack of rules.
2456 */
2457
2458 if (tok == ROFF_ie) {
2459 if (r->rstackpos + 1 == r->rstacksz) {
2460 r->rstacksz += 16;
2461 r->rstack = mandoc_reallocarray(r->rstack,
2462 r->rstacksz, sizeof(int));
2463 }
2464 r->rstack[++r->rstackpos] = !r->last->rule;
2465 }
2466
2467 /* If the parent has false as its rule, then so do we. */
2468
2469 if (r->last->parent && !r->last->parent->rule)
2470 r->last->rule = 0;
2471
2472 /*
2473 * Determine scope.
2474 * If there is nothing on the line after the conditional,
2475 * not even whitespace, use next-line scope.
2476 * Except that .while does not support next-line scope.
2477 */
2478
2479 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2480 r->last->endspan = 2;
2481 goto out;
2482 }
2483
2484 while (buf->buf[pos] == ' ')
2485 pos++;
2486
2487 /* An opening brace requests multiline scope. */
2488
2489 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2490 r->last->endspan = -1;
2491 pos += 2;
2492 while (buf->buf[pos] == ' ')
2493 pos++;
2494 goto out;
2495 }
2496
2497 /*
2498 * Anything else following the conditional causes
2499 * single-line scope. Warn if the scope contains
2500 * nothing but trailing whitespace.
2501 */
2502
2503 if (buf->buf[pos] == '\0')
2504 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2505 ln, ppos, roff_name[tok]);
2506
2507 r->last->endspan = 1;
2508
2509 out:
2510 *offs = pos;
2511 irc = ROFF_RERUN;
2512 if (tok == ROFF_while)
2513 irc |= ROFF_WHILE;
2514 return irc;
2515 }
2516
2517 static int
2518 roff_ds(ROFF_ARGS)
2519 {
2520 char *string;
2521 const char *name;
2522 size_t namesz;
2523
2524 /* Ignore groff compatibility mode for now. */
2525
2526 if (tok == ROFF_ds1)
2527 tok = ROFF_ds;
2528 else if (tok == ROFF_as1)
2529 tok = ROFF_as;
2530
2531 /*
2532 * The first word is the name of the string.
2533 * If it is empty or terminated by an escape sequence,
2534 * abort the `ds' request without defining anything.
2535 */
2536
2537 name = string = buf->buf + pos;
2538 if (*name == '\0')
2539 return ROFF_IGN;
2540
2541 namesz = roff_getname(r, &string, ln, pos);
2542 if (name[namesz] == '\\')
2543 return ROFF_IGN;
2544
2545 /* Read past the initial double-quote, if any. */
2546 if (*string == '"')
2547 string++;
2548
2549 /* The rest is the value. */
2550 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2551 ROFF_as == tok);
2552 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2553 return ROFF_IGN;
2554 }
2555
2556 /*
2557 * Parse a single operator, one or two characters long.
2558 * If the operator is recognized, return success and advance the
2559 * parse point, else return failure and let the parse point unchanged.
2560 */
2561 static int
2562 roff_getop(const char *v, int *pos, char *res)
2563 {
2564
2565 *res = v[*pos];
2566
2567 switch (*res) {
2568 case '+':
2569 case '-':
2570 case '*':
2571 case '/':
2572 case '%':
2573 case '&':
2574 case ':':
2575 break;
2576 case '<':
2577 switch (v[*pos + 1]) {
2578 case '=':
2579 *res = 'l';
2580 (*pos)++;
2581 break;
2582 case '>':
2583 *res = '!';
2584 (*pos)++;
2585 break;
2586 case '?':
2587 *res = 'i';
2588 (*pos)++;
2589 break;
2590 default:
2591 break;
2592 }
2593 break;
2594 case '>':
2595 switch (v[*pos + 1]) {
2596 case '=':
2597 *res = 'g';
2598 (*pos)++;
2599 break;
2600 case '?':
2601 *res = 'a';
2602 (*pos)++;
2603 break;
2604 default:
2605 break;
2606 }
2607 break;
2608 case '=':
2609 if ('=' == v[*pos + 1])
2610 (*pos)++;
2611 break;
2612 default:
2613 return 0;
2614 }
2615 (*pos)++;
2616
2617 return *res;
2618 }
2619
2620 /*
2621 * Evaluate either a parenthesized numeric expression
2622 * or a single signed integer number.
2623 */
2624 static int
2625 roff_evalpar(struct roff *r, int ln,
2626 const char *v, int *pos, int *res, int flags)
2627 {
2628
2629 if ('(' != v[*pos])
2630 return roff_getnum(v, pos, res, flags);
2631
2632 (*pos)++;
2633 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2634 return 0;
2635
2636 /*
2637 * Omission of the closing parenthesis
2638 * is an error in validation mode,
2639 * but ignored in evaluation mode.
2640 */
2641
2642 if (')' == v[*pos])
2643 (*pos)++;
2644 else if (NULL == res)
2645 return 0;
2646
2647 return 1;
2648 }
2649
2650 /*
2651 * Evaluate a complete numeric expression.
2652 * Proceed left to right, there is no concept of precedence.
2653 */
2654 static int
2655 roff_evalnum(struct roff *r, int ln, const char *v,
2656 int *pos, int *res, int flags)
2657 {
2658 int mypos, operand2;
2659 char operator;
2660
2661 if (NULL == pos) {
2662 mypos = 0;
2663 pos = &mypos;
2664 }
2665
2666 if (flags & ROFFNUM_WHITE)
2667 while (isspace((unsigned char)v[*pos]))
2668 (*pos)++;
2669
2670 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2671 return 0;
2672
2673 while (1) {
2674 if (flags & ROFFNUM_WHITE)
2675 while (isspace((unsigned char)v[*pos]))
2676 (*pos)++;
2677
2678 if ( ! roff_getop(v, pos, &operator))
2679 break;
2680
2681 if (flags & ROFFNUM_WHITE)
2682 while (isspace((unsigned char)v[*pos]))
2683 (*pos)++;
2684
2685 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2686 return 0;
2687
2688 if (flags & ROFFNUM_WHITE)
2689 while (isspace((unsigned char)v[*pos]))
2690 (*pos)++;
2691
2692 if (NULL == res)
2693 continue;
2694
2695 switch (operator) {
2696 case '+':
2697 *res += operand2;
2698 break;
2699 case '-':
2700 *res -= operand2;
2701 break;
2702 case '*':
2703 *res *= operand2;
2704 break;
2705 case '/':
2706 if (operand2 == 0) {
2707 mandoc_msg(MANDOCERR_DIVZERO,
2708 r->parse, ln, *pos, v);
2709 *res = 0;
2710 break;
2711 }
2712 *res /= operand2;
2713 break;
2714 case '%':
2715 if (operand2 == 0) {
2716 mandoc_msg(MANDOCERR_DIVZERO,
2717 r->parse, ln, *pos, v);
2718 *res = 0;
2719 break;
2720 }
2721 *res %= operand2;
2722 break;
2723 case '<':
2724 *res = *res < operand2;
2725 break;
2726 case '>':
2727 *res = *res > operand2;
2728 break;
2729 case 'l':
2730 *res = *res <= operand2;
2731 break;
2732 case 'g':
2733 *res = *res >= operand2;
2734 break;
2735 case '=':
2736 *res = *res == operand2;
2737 break;
2738 case '!':
2739 *res = *res != operand2;
2740 break;
2741 case '&':
2742 *res = *res && operand2;
2743 break;
2744 case ':':
2745 *res = *res || operand2;
2746 break;
2747 case 'i':
2748 if (operand2 < *res)
2749 *res = operand2;
2750 break;
2751 case 'a':
2752 if (operand2 > *res)
2753 *res = operand2;
2754 break;
2755 default:
2756 abort();
2757 }
2758 }
2759 return 1;
2760 }
2761
2762 /* --- register management ------------------------------------------------ */
2763
2764 void
2765 roff_setreg(struct roff *r, const char *name, int val, char sign)
2766 {
2767 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2768 }
2769
2770 static void
2771 roff_setregn(struct roff *r, const char *name, size_t len,
2772 int val, char sign, int step)
2773 {
2774 struct roffreg *reg;
2775
2776 /* Search for an existing register with the same name. */
2777 reg = r->regtab;
2778
2779 while (reg != NULL && (reg->key.sz != len ||
2780 strncmp(reg->key.p, name, len) != 0))
2781 reg = reg->next;
2782
2783 if (NULL == reg) {
2784 /* Create a new register. */
2785 reg = mandoc_malloc(sizeof(struct roffreg));
2786 reg->key.p = mandoc_strndup(name, len);
2787 reg->key.sz = len;
2788 reg->val = 0;
2789 reg->step = 0;
2790 reg->next = r->regtab;
2791 r->regtab = reg;
2792 }
2793
2794 if ('+' == sign)
2795 reg->val += val;
2796 else if ('-' == sign)
2797 reg->val -= val;
2798 else
2799 reg->val = val;
2800 if (step != INT_MIN)
2801 reg->step = step;
2802 }
2803
2804 /*
2805 * Handle some predefined read-only number registers.
2806 * For now, return -1 if the requested register is not predefined;
2807 * in case a predefined read-only register having the value -1
2808 * were to turn up, another special value would have to be chosen.
2809 */
2810 static int
2811 roff_getregro(const struct roff *r, const char *name)
2812 {
2813
2814 switch (*name) {
2815 case '$': /* Number of arguments of the last macro evaluated. */
2816 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2817 case 'A': /* ASCII approximation mode is always off. */
2818 return 0;
2819 case 'g': /* Groff compatibility mode is always on. */
2820 return 1;
2821 case 'H': /* Fixed horizontal resolution. */
2822 return 24;
2823 case 'j': /* Always adjust left margin only. */
2824 return 0;
2825 case 'T': /* Some output device is always defined. */
2826 return 1;
2827 case 'V': /* Fixed vertical resolution. */
2828 return 40;
2829 default:
2830 return -1;
2831 }
2832 }
2833
2834 int
2835 roff_getreg(struct roff *r, const char *name)
2836 {
2837 return roff_getregn(r, name, strlen(name), '\0');
2838 }
2839
2840 static int
2841 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2842 {
2843 struct roffreg *reg;
2844 int val;
2845
2846 if ('.' == name[0] && 2 == len) {
2847 val = roff_getregro(r, name + 1);
2848 if (-1 != val)
2849 return val;
2850 }
2851
2852 for (reg = r->regtab; reg; reg = reg->next) {
2853 if (len == reg->key.sz &&
2854 0 == strncmp(name, reg->key.p, len)) {
2855 switch (sign) {
2856 case '+':
2857 reg->val += reg->step;
2858 break;
2859 case '-':
2860 reg->val -= reg->step;
2861 break;
2862 default:
2863 break;
2864 }
2865 return reg->val;
2866 }
2867 }
2868
2869 roff_setregn(r, name, len, 0, '\0', INT_MIN);
2870 return 0;
2871 }
2872
2873 static int
2874 roff_hasregn(const struct roff *r, const char *name, size_t len)
2875 {
2876 struct roffreg *reg;
2877 int val;
2878
2879 if ('.' == name[0] && 2 == len) {
2880 val = roff_getregro(r, name + 1);
2881 if (-1 != val)
2882 return 1;
2883 }
2884
2885 for (reg = r->regtab; reg; reg = reg->next)
2886 if (len == reg->key.sz &&
2887 0 == strncmp(name, reg->key.p, len))
2888 return 1;
2889
2890 return 0;
2891 }
2892
2893 static void
2894 roff_freereg(struct roffreg *reg)
2895 {
2896 struct roffreg *old_reg;
2897
2898 while (NULL != reg) {
2899 free(reg->key.p);
2900 old_reg = reg;
2901 reg = reg->next;
2902 free(old_reg);
2903 }
2904 }
2905
2906 static int
2907 roff_nr(ROFF_ARGS)
2908 {
2909 char *key, *val, *step;
2910 size_t keysz;
2911 int iv, is, len;
2912 char sign;
2913
2914 key = val = buf->buf + pos;
2915 if (*key == '\0')
2916 return ROFF_IGN;
2917
2918 keysz = roff_getname(r, &val, ln, pos);
2919 if (key[keysz] == '\\')
2920 return ROFF_IGN;
2921
2922 sign = *val;
2923 if (sign == '+' || sign == '-')
2924 val++;
2925
2926 len = 0;
2927 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
2928 return ROFF_IGN;
2929
2930 step = val + len;
2931 while (isspace((unsigned char)*step))
2932 step++;
2933 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
2934 is = INT_MIN;
2935
2936 roff_setregn(r, key, keysz, iv, sign, is);
2937 return ROFF_IGN;
2938 }
2939
2940 static int
2941 roff_rr(ROFF_ARGS)
2942 {
2943 struct roffreg *reg, **prev;
2944 char *name, *cp;
2945 size_t namesz;
2946
2947 name = cp = buf->buf + pos;
2948 if (*name == '\0')
2949 return ROFF_IGN;
2950 namesz = roff_getname(r, &cp, ln, pos);
2951 name[namesz] = '\0';
2952
2953 prev = &r->regtab;
2954 while (1) {
2955 reg = *prev;
2956 if (reg == NULL || !strcmp(name, reg->key.p))
2957 break;
2958 prev = &reg->next;
2959 }
2960 if (reg != NULL) {
2961 *prev = reg->next;
2962 free(reg->key.p);
2963 free(reg);
2964 }
2965 return ROFF_IGN;
2966 }
2967
2968 /* --- handler functions for roff requests -------------------------------- */
2969
2970 static int
2971 roff_rm(ROFF_ARGS)
2972 {
2973 const char *name;
2974 char *cp;
2975 size_t namesz;
2976
2977 cp = buf->buf + pos;
2978 while (*cp != '\0') {
2979 name = cp;
2980 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2981 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2982 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2983 if (name[namesz] == '\\')
2984 break;
2985 }
2986 return ROFF_IGN;
2987 }
2988
2989 static int
2990 roff_it(ROFF_ARGS)
2991 {
2992 int iv;
2993
2994 /* Parse the number of lines. */
2995
2996 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2997 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2998 ln, ppos, buf->buf + 1);
2999 return ROFF_IGN;
3000 }
3001
3002 while (isspace((unsigned char)buf->buf[pos]))
3003 pos++;
3004
3005 /*
3006 * Arm the input line trap.
3007 * Special-casing "an-trap" is an ugly workaround to cope
3008 * with DocBook stupidly fiddling with man(7) internals.
3009 */
3010
3011 roffit_lines = iv;
3012 roffit_macro = mandoc_strdup(iv != 1 ||
3013 strcmp(buf->buf + pos, "an-trap") ?
3014 buf->buf + pos : "br");
3015 return ROFF_IGN;
3016 }
3017
3018 static int
3019 roff_Dd(ROFF_ARGS)
3020 {
3021 int mask;
3022 enum roff_tok t, te;
3023
3024 switch (tok) {
3025 case ROFF_Dd:
3026 tok = MDOC_Dd;
3027 te = MDOC_MAX;
3028 if (r->format == 0)
3029 r->format = MPARSE_MDOC;
3030 mask = MPARSE_MDOC | MPARSE_QUICK;
3031 break;
3032 case ROFF_TH:
3033 tok = MAN_TH;
3034 te = MAN_MAX;
3035 if (r->format == 0)
3036 r->format = MPARSE_MAN;
3037 mask = MPARSE_QUICK;
3038 break;
3039 default:
3040 abort();
3041 }
3042 if ((r->options & mask) == 0)
3043 for (t = tok; t < te; t++)
3044 roff_setstr(r, roff_name[t], NULL, 0);
3045 return ROFF_CONT;
3046 }
3047
3048 static int
3049 roff_TE(ROFF_ARGS)
3050 {
3051 if (r->tbl == NULL) {
3052 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
3053 ln, ppos, "TE");
3054 return ROFF_IGN;
3055 }
3056 if (tbl_end(r->tbl, 0) == 0) {
3057 r->tbl = NULL;
3058 free(buf->buf);
3059 buf->buf = mandoc_strdup(".sp");
3060 buf->sz = 4;
3061 *offs = 0;
3062 return ROFF_REPARSE;
3063 }
3064 r->tbl = NULL;
3065 return ROFF_IGN;
3066 }
3067
3068 static int
3069 roff_T_(ROFF_ARGS)
3070 {
3071
3072 if (NULL == r->tbl)
3073 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
3074 ln, ppos, "T&");
3075 else
3076 tbl_restart(ln, ppos, r->tbl);
3077
3078 return ROFF_IGN;
3079 }
3080
3081 /*
3082 * Handle in-line equation delimiters.
3083 */
3084 static int
3085 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3086 {
3087 char *cp1, *cp2;
3088 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3089
3090 /*
3091 * Outside equations, look for an opening delimiter.
3092 * If we are inside an equation, we already know it is
3093 * in-line, or this function wouldn't have been called;
3094 * so look for a closing delimiter.
3095 */
3096
3097 cp1 = buf->buf + pos;
3098 cp2 = strchr(cp1, r->eqn == NULL ?
3099 r->last_eqn->odelim : r->last_eqn->cdelim);
3100 if (cp2 == NULL)
3101 return ROFF_CONT;
3102
3103 *cp2++ = '\0';
3104 bef_pr = bef_nl = aft_nl = aft_pr = "";
3105
3106 /* Handle preceding text, protecting whitespace. */
3107
3108 if (*buf->buf != '\0') {
3109 if (r->eqn == NULL)
3110 bef_pr = "\\&";
3111 bef_nl = "\n";
3112 }
3113
3114 /*
3115 * Prepare replacing the delimiter with an equation macro
3116 * and drop leading white space from the equation.
3117 */
3118
3119 if (r->eqn == NULL) {
3120 while (*cp2 == ' ')
3121 cp2++;
3122 mac = ".EQ";
3123 } else
3124 mac = ".EN";
3125
3126 /* Handle following text, protecting whitespace. */
3127
3128 if (*cp2 != '\0') {
3129 aft_nl = "\n";
3130 if (r->eqn != NULL)
3131 aft_pr = "\\&";
3132 }
3133
3134 /* Do the actual replacement. */
3135
3136 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3137 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3138 free(buf->buf);
3139 buf->buf = cp1;
3140
3141 /* Toggle the in-line state of the eqn subsystem. */
3142
3143 r->eqn_inline = r->eqn == NULL;
3144 return ROFF_REPARSE;
3145 }
3146
3147 static int
3148 roff_EQ(ROFF_ARGS)
3149 {
3150 struct roff_node *n;
3151
3152 if (r->man->macroset == MACROSET_MAN)
3153 man_breakscope(r->man, ROFF_EQ);
3154 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3155 if (ln > r->man->last->line)
3156 n->flags |= NODE_LINE;
3157 n->eqn = eqn_box_new();
3158 roff_node_append(r->man, n);
3159 r->man->next = ROFF_NEXT_SIBLING;
3160
3161 assert(r->eqn == NULL);
3162 if (r->last_eqn == NULL)
3163 r->last_eqn = eqn_alloc(r->parse);
3164 else
3165 eqn_reset(r->last_eqn);
3166 r->eqn = r->last_eqn;
3167 r->eqn->node = n;
3168
3169 if (buf->buf[pos] != '\0')
3170 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3171 ".EQ %s", buf->buf + pos);
3172
3173 return ROFF_IGN;
3174 }
3175
3176 static int
3177 roff_EN(ROFF_ARGS)
3178 {
3179 if (r->eqn != NULL) {
3180 eqn_parse(r->eqn);
3181 r->eqn = NULL;
3182 } else
3183 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
3184 if (buf->buf[pos] != '\0')
3185 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3186 "EN %s", buf->buf + pos);
3187 return ROFF_IGN;
3188 }
3189
3190 static int
3191 roff_TS(ROFF_ARGS)
3192 {
3193 if (r->tbl != NULL) {
3194 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
3195 ln, ppos, "TS breaks TS");
3196 tbl_end(r->tbl, 0);
3197 }
3198 r->tbl = tbl_alloc(ppos, ln, r->parse, r->last_tbl);
3199 if (r->last_tbl == NULL)
3200 r->first_tbl = r->tbl;
3201 r->last_tbl = r->tbl;
3202 return ROFF_IGN;
3203 }
3204
3205 static int
3206 roff_onearg(ROFF_ARGS)
3207 {
3208 struct roff_node *n;
3209 char *cp;
3210 int npos;
3211
3212 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3213 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3214 tok == ROFF_ti))
3215 man_breakscope(r->man, tok);
3216
3217 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3218 r->man->last = roffce_node;
3219 r->man->next = ROFF_NEXT_SIBLING;
3220 }
3221
3222 roff_elem_alloc(r->man, ln, ppos, tok);
3223 n = r->man->last;
3224
3225 cp = buf->buf + pos;
3226 if (*cp != '\0') {
3227 while (*cp != '\0' && *cp != ' ')
3228 cp++;
3229 while (*cp == ' ')
3230 *cp++ = '\0';
3231 if (*cp != '\0')
3232 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
3233 r->parse, ln, cp - buf->buf,
3234 "%s ... %s", roff_name[tok], cp);
3235 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3236 }
3237
3238 if (tok == ROFF_ce || tok == ROFF_rj) {
3239 if (r->man->last->type == ROFFT_ELEM) {
3240 roff_word_alloc(r->man, ln, pos, "1");
3241 r->man->last->flags |= NODE_NOSRC;
3242 }
3243 npos = 0;
3244 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3245 &roffce_lines, 0) == 0) {
3246 mandoc_vmsg(MANDOCERR_CE_NONUM,
3247 r->parse, ln, pos, "ce %s", buf->buf + pos);
3248 roffce_lines = 1;
3249 }
3250 if (roffce_lines < 1) {
3251 r->man->last = r->man->last->parent;
3252 roffce_node = NULL;
3253 roffce_lines = 0;
3254 } else
3255 roffce_node = r->man->last->parent;
3256 } else {
3257 n->flags |= NODE_VALID | NODE_ENDED;
3258 r->man->last = n;
3259 }
3260 n->flags |= NODE_LINE;
3261 r->man->next = ROFF_NEXT_SIBLING;
3262 return ROFF_IGN;
3263 }
3264
3265 static int
3266 roff_manyarg(ROFF_ARGS)
3267 {
3268 struct roff_node *n;
3269 char *sp, *ep;
3270
3271 roff_elem_alloc(r->man, ln, ppos, tok);
3272 n = r->man->last;
3273
3274 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3275 while (*ep != '\0' && *ep != ' ')
3276 ep++;
3277 while (*ep == ' ')
3278 *ep++ = '\0';
3279 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3280 }
3281
3282 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3283 r->man->last = n;
3284 r->man->next = ROFF_NEXT_SIBLING;
3285 return ROFF_IGN;
3286 }
3287
3288 static int
3289 roff_als(ROFF_ARGS)
3290 {
3291 char *oldn, *newn, *end, *value;
3292 size_t oldsz, newsz, valsz;
3293
3294 newn = oldn = buf->buf + pos;
3295 if (*newn == '\0')
3296 return ROFF_IGN;
3297
3298 newsz = roff_getname(r, &oldn, ln, pos);
3299 if (newn[newsz] == '\\' || *oldn == '\0')
3300 return ROFF_IGN;
3301
3302 end = oldn;
3303 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3304 if (oldsz == 0)
3305 return ROFF_IGN;
3306
3307 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3308 (int)oldsz, oldn);
3309 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3310 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3311 free(value);
3312 return ROFF_IGN;
3313 }
3314
3315 static int
3316 roff_br(ROFF_ARGS)
3317 {
3318 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3319 man_breakscope(r->man, ROFF_br);
3320 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3321 if (buf->buf[pos] != '\0')
3322 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3323 "%s %s", roff_name[tok], buf->buf + pos);
3324 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3325 r->man->next = ROFF_NEXT_SIBLING;
3326 return ROFF_IGN;
3327 }
3328
3329 static int
3330 roff_cc(ROFF_ARGS)
3331 {
3332 const char *p;
3333
3334 p = buf->buf + pos;
3335
3336 if (*p == '\0' || (r->control = *p++) == '.')
3337 r->control = '\0';
3338
3339 if (*p != '\0')
3340 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3341 ln, p - buf->buf, "cc ... %s", p);
3342
3343 return ROFF_IGN;
3344 }
3345
3346 static int
3347 roff_char(ROFF_ARGS)
3348 {
3349 const char *p, *kp, *vp;
3350 size_t ksz, vsz;
3351 int font;
3352
3353 /* Parse the character to be replaced. */
3354
3355 kp = buf->buf + pos;
3356 p = kp + 1;
3357 if (*kp == '\0' || (*kp == '\\' &&
3358 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3359 (*p != ' ' && *p != '\0')) {
3360 mandoc_vmsg(MANDOCERR_CHAR_ARG, r->parse,
3361 ln, pos, "char %s", kp);
3362 return ROFF_IGN;
3363 }
3364 ksz = p - kp;
3365 while (*p == ' ')
3366 p++;
3367
3368 /*
3369 * If the replacement string contains a font escape sequence,
3370 * we have to restore the font at the end.
3371 */
3372
3373 vp = p;
3374 vsz = strlen(p);
3375 font = 0;
3376 while (*p != '\0') {
3377 if (*p++ != '\\')
3378 continue;
3379 switch (mandoc_escape(&p, NULL, NULL)) {
3380 case ESCAPE_FONT:
3381 case ESCAPE_FONTROMAN:
3382 case ESCAPE_FONTITALIC:
3383 case ESCAPE_FONTBOLD:
3384 case ESCAPE_FONTBI:
3385 case ESCAPE_FONTCW:
3386 case ESCAPE_FONTPREV:
3387 font++;
3388 break;
3389 default:
3390 break;
3391 }
3392 }
3393 if (font > 1)
3394 mandoc_msg(MANDOCERR_CHAR_FONT, r->parse,
3395 ln, vp - buf->buf, vp);
3396
3397 /*
3398 * Approximate the effect of .char using the .tr tables.
3399 * XXX In groff, .char and .tr interact differently.
3400 */
3401
3402 if (ksz == 1) {
3403 if (r->xtab == NULL)
3404 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3405 assert((unsigned int)*kp < 128);
3406 free(r->xtab[(int)*kp].p);
3407 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3408 "%s%s", vp, font ? "\fP" : "");
3409 } else {
3410 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3411 if (font)
3412 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3413 }
3414 return ROFF_IGN;
3415 }
3416
3417 static int
3418 roff_ec(ROFF_ARGS)
3419 {
3420 const char *p;
3421
3422 p = buf->buf + pos;
3423 if (*p == '\0')
3424 r->escape = '\\';
3425 else {
3426 r->escape = *p;
3427 if (*++p != '\0')
3428 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3429 ln, p - buf->buf, "ec ... %s", p);
3430 }
3431 return ROFF_IGN;
3432 }
3433
3434 static int
3435 roff_eo(ROFF_ARGS)
3436 {
3437 r->escape = '\0';
3438 if (buf->buf[pos] != '\0')
3439 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3440 ln, pos, "eo %s", buf->buf + pos);
3441 return ROFF_IGN;
3442 }
3443
3444 static int
3445 roff_nop(ROFF_ARGS)
3446 {
3447 while (buf->buf[pos] == ' ')
3448 pos++;
3449 *offs = pos;
3450 return ROFF_RERUN;
3451 }
3452
3453 static int
3454 roff_tr(ROFF_ARGS)
3455 {
3456 const char *p, *first, *second;
3457 size_t fsz, ssz;
3458 enum mandoc_esc esc;
3459
3460 p = buf->buf + pos;
3461
3462 if (*p == '\0') {
3463 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3464 return ROFF_IGN;
3465 }
3466
3467 while (*p != '\0') {
3468 fsz = ssz = 1;
3469
3470 first = p++;
3471 if (*first == '\\') {
3472 esc = mandoc_escape(&p, NULL, NULL);
3473 if (esc == ESCAPE_ERROR) {
3474 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3475 ln, (int)(p - buf->buf), first);
3476 return ROFF_IGN;
3477 }
3478 fsz = (size_t)(p - first);
3479 }
3480
3481 second = p++;
3482 if (*second == '\\') {
3483 esc = mandoc_escape(&p, NULL, NULL);
3484 if (esc == ESCAPE_ERROR) {
3485 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3486 ln, (int)(p - buf->buf), second);
3487 return ROFF_IGN;
3488 }
3489 ssz = (size_t)(p - second);
3490 } else if (*second == '\0') {
3491 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3492 ln, first - buf->buf, "tr %s", first);
3493 second = " ";
3494 p--;
3495 }
3496
3497 if (fsz > 1) {
3498 roff_setstrn(&r->xmbtab, first, fsz,
3499 second, ssz, 0);
3500 continue;
3501 }
3502
3503 if (r->xtab == NULL)
3504 r->xtab = mandoc_calloc(128,
3505 sizeof(struct roffstr));
3506
3507 free(r->xtab[(int)*first].p);
3508 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3509 r->xtab[(int)*first].sz = ssz;
3510 }
3511
3512 return ROFF_IGN;
3513 }
3514
3515 /*
3516 * Implementation of the .return request.
3517 * There is no need to call roff_userret() from here.
3518 * The read module will call that after rewinding the reader stack
3519 * to the place from where the current macro was called.
3520 */
3521 static int
3522 roff_return(ROFF_ARGS)
3523 {
3524 if (r->mstackpos >= 0)
3525 return ROFF_IGN | ROFF_USERRET;
3526
3527 mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "return");
3528 return ROFF_IGN;
3529 }
3530
3531 static int
3532 roff_rn(ROFF_ARGS)
3533 {
3534 const char *value;
3535 char *oldn, *newn, *end;
3536 size_t oldsz, newsz;
3537 int deftype;
3538
3539 oldn = newn = buf->buf + pos;
3540 if (*oldn == '\0')
3541 return ROFF_IGN;
3542
3543 oldsz = roff_getname(r, &newn, ln, pos);
3544 if (oldn[oldsz] == '\\' || *newn == '\0')
3545 return ROFF_IGN;
3546
3547 end = newn;
3548 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3549 if (newsz == 0)
3550 return ROFF_IGN;
3551
3552 deftype = ROFFDEF_ANY;
3553 value = roff_getstrn(r, oldn, oldsz, &deftype);
3554 switch (deftype) {
3555 case ROFFDEF_USER:
3556 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3557 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3558 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3559 break;
3560 case ROFFDEF_PRE:
3561 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3562 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3563 break;
3564 case ROFFDEF_REN:
3565 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3566 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3567 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3568 break;
3569 case ROFFDEF_STD:
3570 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3571 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3572 break;
3573 default:
3574 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3575 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3576 break;
3577 }
3578 return ROFF_IGN;
3579 }
3580
3581 static int
3582 roff_shift(ROFF_ARGS)
3583 {
3584 struct mctx *ctx;
3585 int levels, i;
3586
3587 levels = 1;
3588 if (buf->buf[pos] != '\0' &&
3589 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3590 mandoc_vmsg(MANDOCERR_CE_NONUM, r->parse,
3591 ln, pos, "shift %s", buf->buf + pos);
3592 levels = 1;
3593 }
3594 if (r->mstackpos < 0) {
3595 mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "shift");
3596 return ROFF_IGN;
3597 }
3598 ctx = r->mstack + r->mstackpos;
3599 if (levels > ctx->argc) {
3600 mandoc_vmsg(MANDOCERR_SHIFT, r->parse,
3601 ln, pos, "%d, but max is %d", levels, ctx->argc);
3602 levels = ctx->argc;
3603 }
3604 if (levels == 0)
3605 return ROFF_IGN;
3606 for (i = 0; i < levels; i++)
3607 free(ctx->argv[i]);
3608 ctx->argc -= levels;
3609 for (i = 0; i < ctx->argc; i++)
3610 ctx->argv[i] = ctx->argv[i + levels];
3611 return ROFF_IGN;
3612 }
3613
3614 static int
3615 roff_so(ROFF_ARGS)
3616 {
3617 char *name, *cp;
3618
3619 name = buf->buf + pos;
3620 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3621
3622 /*
3623 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3624 * opening anything that's not in our cwd or anything beneath
3625 * it. Thus, explicitly disallow traversing up the file-system
3626 * or using absolute paths.
3627 */
3628
3629 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3630 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3631 ".so %s", name);
3632 buf->sz = mandoc_asprintf(&cp,
3633 ".sp\nSee the file %s.\n.sp", name) + 1;
3634 free(buf->buf);
3635 buf->buf = cp;
3636 *offs = 0;
3637 return ROFF_REPARSE;
3638 }
3639
3640 *offs = pos;
3641 return ROFF_SO;
3642 }
3643
3644 /* --- user defined strings and macros ------------------------------------ */
3645
3646 static int
3647 roff_userdef(ROFF_ARGS)
3648 {
3649 struct mctx *ctx;
3650 char *arg, *ap, *dst, *src;
3651 size_t sz;
3652
3653 /* Initialize a new macro stack context. */
3654
3655 if (++r->mstackpos == r->mstacksz) {
3656 r->mstack = mandoc_recallocarray(r->mstack,
3657 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3658 r->mstacksz += 8;
3659 }
3660 ctx = r->mstack + r->mstackpos;
3661 ctx->argsz = 0;
3662 ctx->argc = 0;
3663 ctx->argv = NULL;
3664
3665 /*
3666 * Collect pointers to macro argument strings,
3667 * NUL-terminating them and escaping quotes.
3668 */
3669
3670 src = buf->buf + pos;
3671 while (*src != '\0') {
3672 if (ctx->argc == ctx->argsz) {
3673 ctx->argsz += 8;
3674 ctx->argv = mandoc_reallocarray(ctx->argv,
3675 ctx->argsz, sizeof(*ctx->argv));
3676 }
3677 arg = mandoc_getarg(r->parse, &src, ln, &pos);
3678 sz = 1; /* For the terminating NUL. */
3679 for (ap = arg; *ap != '\0'; ap++)
3680 sz += *ap == '"' ? 4 : 1;
3681 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3682 for (ap = arg; *ap != '\0'; ap++) {
3683 if (*ap == '"') {
3684 memcpy(dst, "\\(dq", 4);
3685 dst += 4;
3686 } else
3687 *dst++ = *ap;
3688 }
3689 *dst = '\0';
3690 }
3691
3692 /* Replace the macro invocation by the macro definition. */
3693
3694 free(buf->buf);
3695 buf->buf = mandoc_strdup(r->current_string);
3696 buf->sz = strlen(buf->buf) + 1;
3697 *offs = 0;
3698
3699 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3700 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3701 }
3702
3703 /*
3704 * Calling a high-level macro that was renamed with .rn.
3705 * r->current_string has already been set up by roff_parse().
3706 */
3707 static int
3708 roff_renamed(ROFF_ARGS)
3709 {
3710 char *nbuf;
3711
3712 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3713 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3714 free(buf->buf);
3715 buf->buf = nbuf;
3716 *offs = 0;
3717 return ROFF_CONT;
3718 }
3719
3720 static size_t
3721 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3722 {
3723 char *name, *cp;
3724 size_t namesz;
3725
3726 name = *cpp;
3727 if ('\0' == *name)
3728 return 0;
3729
3730 /* Read until end of name and terminate it with NUL. */
3731 for (cp = name; 1; cp++) {
3732 if ('\0' == *cp || ' ' == *cp) {
3733 namesz = cp - name;
3734 break;
3735 }
3736 if ('\\' != *cp)
3737 continue;
3738 namesz = cp - name;
3739 if ('{' == cp[1] || '}' == cp[1])
3740 break;
3741 cp++;
3742 if ('\\' == *cp)
3743 continue;
3744 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3745 "%.*s", (int)(cp - name + 1), name);
3746 mandoc_escape((const char **)&cp, NULL, NULL);
3747 break;
3748 }
3749
3750 /* Read past spaces. */
3751 while (' ' == *cp)
3752 cp++;
3753
3754 *cpp = cp;
3755 return namesz;
3756 }
3757
3758 /*
3759 * Store *string into the user-defined string called *name.
3760 * To clear an existing entry, call with (*r, *name, NULL, 0).
3761 * append == 0: replace mode
3762 * append == 1: single-line append mode
3763 * append == 2: multiline append mode, append '\n' after each call
3764 */
3765 static void
3766 roff_setstr(struct roff *r, const char *name, const char *string,
3767 int append)
3768 {
3769 size_t namesz;
3770
3771 namesz = strlen(name);
3772 roff_setstrn(&r->strtab, name, namesz, string,
3773 string ? strlen(string) : 0, append);
3774 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3775 }
3776
3777 static void
3778 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3779 const char *string, size_t stringsz, int append)
3780 {
3781 struct roffkv *n;
3782 char *c;
3783 int i;
3784 size_t oldch, newch;
3785
3786 /* Search for an existing string with the same name. */
3787 n = *r;
3788
3789 while (n && (namesz != n->key.sz ||
3790 strncmp(n->key.p, name, namesz)))
3791 n = n->next;
3792
3793 if (NULL == n) {
3794 /* Create a new string table entry. */
3795 n = mandoc_malloc(sizeof(struct roffkv));
3796 n->key.p = mandoc_strndup(name, namesz);
3797 n->key.sz = namesz;
3798 n->val.p = NULL;
3799 n->val.sz = 0;
3800 n->next = *r;
3801 *r = n;
3802 } else if (0 == append) {
3803 free(n->val.p);
3804 n->val.p = NULL;
3805 n->val.sz = 0;
3806 }
3807
3808 if (NULL == string)
3809 return;
3810
3811 /*
3812 * One additional byte for the '\n' in multiline mode,
3813 * and one for the terminating '\0'.
3814 */
3815 newch = stringsz + (1 < append ? 2u : 1u);
3816
3817 if (NULL == n->val.p) {
3818 n->val.p = mandoc_malloc(newch);
3819 *n->val.p = '\0';
3820 oldch = 0;
3821 } else {
3822 oldch = n->val.sz;
3823 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3824 }
3825
3826 /* Skip existing content in the destination buffer. */
3827 c = n->val.p + (int)oldch;
3828
3829 /* Append new content to the destination buffer. */
3830 i = 0;
3831 while (i < (int)stringsz) {
3832 /*
3833 * Rudimentary roff copy mode:
3834 * Handle escaped backslashes.
3835 */
3836 if ('\\' == string[i] && '\\' == string[i + 1])
3837 i++;
3838 *c++ = string[i++];
3839 }
3840
3841 /* Append terminating bytes. */
3842 if (1 < append)
3843 *c++ = '\n';
3844
3845 *c = '\0';
3846 n->val.sz = (int)(c - n->val.p);
3847 }
3848
3849 static const char *
3850 roff_getstrn(struct roff *r, const char *name, size_t len,
3851 int *deftype)
3852 {
3853 const struct roffkv *n;
3854 int found, i;
3855 enum roff_tok tok;
3856
3857 found = 0;
3858 for (n = r->strtab; n != NULL; n = n->next) {
3859 if (strncmp(name, n->key.p, len) != 0 ||
3860 n->key.p[len] != '\0' || n->val.p == NULL)
3861 continue;
3862 if (*deftype & ROFFDEF_USER) {
3863 *deftype = ROFFDEF_USER;
3864 return n->val.p;
3865 } else {
3866 found = 1;
3867 break;
3868 }
3869 }
3870 for (n = r->rentab; n != NULL; n = n->next) {
3871 if (strncmp(name, n->key.p, len) != 0 ||
3872 n->key.p[len] != '\0' || n->val.p == NULL)
3873 continue;
3874 if (*deftype & ROFFDEF_REN) {
3875 *deftype = ROFFDEF_REN;
3876 return n->val.p;
3877 } else {
3878 found = 1;
3879 break;
3880 }
3881 }
3882 for (i = 0; i < PREDEFS_MAX; i++) {
3883 if (strncmp(name, predefs[i].name, len) != 0 ||
3884 predefs[i].name[len] != '\0')
3885 continue;
3886 if (*deftype & ROFFDEF_PRE) {
3887 *deftype = ROFFDEF_PRE;
3888 return predefs[i].str;
3889 } else {
3890 found = 1;
3891 break;
3892 }
3893 }
3894 if (r->man->macroset != MACROSET_MAN) {
3895 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3896 if (strncmp(name, roff_name[tok], len) != 0 ||
3897 roff_name[tok][len] != '\0')
3898 continue;
3899 if (*deftype & ROFFDEF_STD) {
3900 *deftype = ROFFDEF_STD;
3901 return NULL;
3902 } else {
3903 found = 1;
3904 break;
3905 }
3906 }
3907 }
3908 if (r->man->macroset != MACROSET_MDOC) {
3909 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3910 if (strncmp(name, roff_name[tok], len) != 0 ||
3911 roff_name[tok][len] != '\0')
3912 continue;
3913 if (*deftype & ROFFDEF_STD) {
3914 *deftype = ROFFDEF_STD;
3915 return NULL;
3916 } else {
3917 found = 1;
3918 break;
3919 }
3920 }
3921 }
3922
3923 if (found == 0 && *deftype != ROFFDEF_ANY) {
3924 if (*deftype & ROFFDEF_REN) {
3925 /*
3926 * This might still be a request,
3927 * so do not treat it as undefined yet.
3928 */
3929 *deftype = ROFFDEF_UNDEF;
3930 return NULL;
3931 }
3932
3933 /* Using an undefined string defines it to be empty. */
3934
3935 roff_setstrn(&r->strtab, name, len, "", 0, 0);
3936 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
3937 }
3938
3939 *deftype = 0;
3940 return NULL;
3941 }
3942
3943 static void
3944 roff_freestr(struct roffkv *r)
3945 {
3946 struct roffkv *n, *nn;
3947
3948 for (n = r; n; n = nn) {
3949 free(n->key.p);
3950 free(n->val.p);
3951 nn = n->next;
3952 free(n);
3953 }
3954 }
3955
3956 /* --- accessors and utility functions ------------------------------------ */
3957
3958 /*
3959 * Duplicate an input string, making the appropriate character
3960 * conversations (as stipulated by `tr') along the way.
3961 * Returns a heap-allocated string with all the replacements made.
3962 */
3963 char *
3964 roff_strdup(const struct roff *r, const char *p)
3965 {
3966 const struct roffkv *cp;
3967 char *res;
3968 const char *pp;
3969 size_t ssz, sz;
3970 enum mandoc_esc esc;
3971
3972 if (NULL == r->xmbtab && NULL == r->xtab)
3973 return mandoc_strdup(p);
3974 else if ('\0' == *p)
3975 return mandoc_strdup("");
3976
3977 /*
3978 * Step through each character looking for term matches
3979 * (remember that a `tr' can be invoked with an escape, which is
3980 * a glyph but the escape is multi-character).
3981 * We only do this if the character hash has been initialised
3982 * and the string is >0 length.
3983 */
3984
3985 res = NULL;
3986 ssz = 0;
3987
3988 while ('\0' != *p) {
3989 assert((unsigned int)*p < 128);
3990 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3991 sz = r->xtab[(int)*p].sz;
3992 res = mandoc_realloc(res, ssz + sz + 1);
3993 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3994 ssz += sz;
3995 p++;
3996 continue;
3997 } else if ('\\' != *p) {
3998 res = mandoc_realloc(res, ssz + 2);
3999 res[ssz++] = *p++;
4000 continue;
4001 }
4002
4003 /* Search for term matches. */
4004 for (cp = r->xmbtab; cp; cp = cp->next)
4005 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4006 break;
4007
4008 if (NULL != cp) {
4009 /*
4010 * A match has been found.
4011 * Append the match to the array and move
4012 * forward by its keysize.
4013 */
4014 res = mandoc_realloc(res,
4015 ssz + cp->val.sz + 1);
4016 memcpy(res + ssz, cp->val.p, cp->val.sz);
4017 ssz += cp->val.sz;
4018 p += (int)cp->key.sz;
4019 continue;
4020 }
4021
4022 /*
4023 * Handle escapes carefully: we need to copy
4024 * over just the escape itself, or else we might
4025 * do replacements within the escape itself.
4026 * Make sure to pass along the bogus string.
4027 */
4028 pp = p++;
4029 esc = mandoc_escape(&p, NULL, NULL);
4030 if (ESCAPE_ERROR == esc) {
4031 sz = strlen(pp);
4032 res = mandoc_realloc(res, ssz + sz + 1);
4033 memcpy(res + ssz, pp, sz);
4034 break;
4035 }
4036 /*
4037 * We bail out on bad escapes.
4038 * No need to warn: we already did so when
4039 * roff_res() was called.
4040 */
4041 sz = (int)(p - pp);
4042 res = mandoc_realloc(res, ssz + sz + 1);
4043 memcpy(res + ssz, pp, sz);
4044 ssz += sz;
4045 }
4046
4047 res[(int)ssz] = '\0';
4048 return res;
4049 }
4050
4051 int
4052 roff_getformat(const struct roff *r)
4053 {
4054
4055 return r->format;
4056 }
4057
4058 /*
4059 * Find out whether a line is a macro line or not.
4060 * If it is, adjust the current position and return one; if it isn't,
4061 * return zero and don't change the current position.
4062 * If the control character has been set with `.cc', then let that grain
4063 * precedence.
4064 * This is slighly contrary to groff, where using the non-breaking
4065 * control character when `cc' has been invoked will cause the
4066 * non-breaking macro contents to be printed verbatim.
4067 */
4068 int
4069 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4070 {
4071 int pos;
4072
4073 pos = *ppos;
4074
4075 if (r->control != '\0' && cp[pos] == r->control)
4076 pos++;
4077 else if (r->control != '\0')
4078 return 0;
4079 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4080 pos += 2;
4081 else if ('.' == cp[pos] || '\'' == cp[pos])
4082 pos++;
4083 else
4084 return 0;
4085
4086 while (' ' == cp[pos] || '\t' == cp[pos])
4087 pos++;
4088
4089 *ppos = pos;
4090 return 1;
4091 }