]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
If man(7) next-line scope is open and the line ends with \c,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.340 2018/08/24 23:12:33 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* Types of definitions of macros and strings. */
43 #define ROFFDEF_USER (1 << 1) /* User-defined. */
44 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
45 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
46 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
47 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
48 ROFFDEF_REN | ROFFDEF_STD)
49 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
50
51 /* --- data types --------------------------------------------------------- */
52
53 /*
54 * An incredibly-simple string buffer.
55 */
56 struct roffstr {
57 char *p; /* nil-terminated buffer */
58 size_t sz; /* saved strlen(p) */
59 };
60
61 /*
62 * A key-value roffstr pair as part of a singly-linked list.
63 */
64 struct roffkv {
65 struct roffstr key;
66 struct roffstr val;
67 struct roffkv *next; /* next in list */
68 };
69
70 /*
71 * A single number register as part of a singly-linked list.
72 */
73 struct roffreg {
74 struct roffstr key;
75 int val;
76 int step;
77 struct roffreg *next;
78 };
79
80 /*
81 * Association of request and macro names with token IDs.
82 */
83 struct roffreq {
84 enum roff_tok tok;
85 char name[];
86 };
87
88 /*
89 * A macro processing context.
90 * More than one is needed when macro calls are nested.
91 */
92 struct mctx {
93 char **argv;
94 int argc;
95 int argsz;
96 };
97
98 struct roff {
99 struct mparse *parse; /* parse point */
100 struct roff_man *man; /* mdoc or man parser */
101 struct roffnode *last; /* leaf of stack */
102 struct mctx *mstack; /* stack of macro contexts */
103 int *rstack; /* stack of inverted `ie' values */
104 struct ohash *reqtab; /* request lookup table */
105 struct roffreg *regtab; /* number registers */
106 struct roffkv *strtab; /* user-defined strings & macros */
107 struct roffkv *rentab; /* renamed strings & macros */
108 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
109 struct roffstr *xtab; /* single-byte trans table (`tr') */
110 const char *current_string; /* value of last called user macro */
111 struct tbl_node *first_tbl; /* first table parsed */
112 struct tbl_node *last_tbl; /* last table parsed */
113 struct tbl_node *tbl; /* current table being parsed */
114 struct eqn_node *last_eqn; /* equation parser */
115 struct eqn_node *eqn; /* active equation parser */
116 int eqn_inline; /* current equation is inline */
117 int options; /* parse options */
118 int mstacksz; /* current size of mstack */
119 int mstackpos; /* position in mstack */
120 int rstacksz; /* current size limit of rstack */
121 int rstackpos; /* position in rstack */
122 int format; /* current file in mdoc or man format */
123 char control; /* control character */
124 char escape; /* escape character */
125 };
126
127 struct roffnode {
128 enum roff_tok tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 int rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum roff_tok tok, /* tok of macro */ \
140 struct buf *buf, /* input buffer */ \
141 int ln, /* parse line */ \
142 int ppos, /* original pos in buffer */ \
143 int pos, /* current pos in buffer */ \
144 int *offs /* reset offset of buffer data */
145
146 typedef int (*roffproc)(ROFF_ARGS);
147
148 struct roffmac {
149 roffproc proc; /* process new macro */
150 roffproc text; /* process as child text of macro */
151 roffproc sub; /* process as child of macro */
152 int flags;
153 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
154 };
155
156 struct predef {
157 const char *name; /* predefined input name */
158 const char *str; /* replacement symbol */
159 };
160
161 #define PREDEF(__name, __str) \
162 { (__name), (__str) },
163
164 /* --- function prototypes ------------------------------------------------ */
165
166 static int roffnode_cleanscope(struct roff *);
167 static int roffnode_pop(struct roff *);
168 static void roffnode_push(struct roff *, enum roff_tok,
169 const char *, int, int);
170 static void roff_addtbl(struct roff_man *, struct tbl_node *);
171 static int roff_als(ROFF_ARGS);
172 static int roff_block(ROFF_ARGS);
173 static int roff_block_text(ROFF_ARGS);
174 static int roff_block_sub(ROFF_ARGS);
175 static int roff_br(ROFF_ARGS);
176 static int roff_cblock(ROFF_ARGS);
177 static int roff_cc(ROFF_ARGS);
178 static int roff_ccond(struct roff *, int, int);
179 static int roff_cond(ROFF_ARGS);
180 static int roff_cond_text(ROFF_ARGS);
181 static int roff_cond_sub(ROFF_ARGS);
182 static int roff_ds(ROFF_ARGS);
183 static int roff_ec(ROFF_ARGS);
184 static int roff_eo(ROFF_ARGS);
185 static int roff_eqndelim(struct roff *, struct buf *, int);
186 static int roff_evalcond(struct roff *r, int, char *, int *);
187 static int roff_evalnum(struct roff *, int,
188 const char *, int *, int *, int);
189 static int roff_evalpar(struct roff *, int,
190 const char *, int *, int *, int);
191 static int roff_evalstrcond(const char *, int *);
192 static void roff_free1(struct roff *);
193 static void roff_freereg(struct roffreg *);
194 static void roff_freestr(struct roffkv *);
195 static size_t roff_getname(struct roff *, char **, int, int);
196 static int roff_getnum(const char *, int *, int *, int);
197 static int roff_getop(const char *, int *, char *);
198 static int roff_getregn(struct roff *,
199 const char *, size_t, char);
200 static int roff_getregro(const struct roff *,
201 const char *name);
202 static const char *roff_getstrn(struct roff *,
203 const char *, size_t, int *);
204 static int roff_hasregn(const struct roff *,
205 const char *, size_t);
206 static int roff_insec(ROFF_ARGS);
207 static int roff_it(ROFF_ARGS);
208 static int roff_line_ignore(ROFF_ARGS);
209 static void roff_man_alloc1(struct roff_man *);
210 static void roff_man_free1(struct roff_man *);
211 static int roff_manyarg(ROFF_ARGS);
212 static int roff_nop(ROFF_ARGS);
213 static int roff_nr(ROFF_ARGS);
214 static int roff_onearg(ROFF_ARGS);
215 static enum roff_tok roff_parse(struct roff *, char *, int *,
216 int, int);
217 static int roff_parsetext(struct roff *, struct buf *,
218 int, int *);
219 static int roff_renamed(ROFF_ARGS);
220 static int roff_res(struct roff *, struct buf *, int, int);
221 static int roff_return(ROFF_ARGS);
222 static int roff_rm(ROFF_ARGS);
223 static int roff_rn(ROFF_ARGS);
224 static int roff_rr(ROFF_ARGS);
225 static void roff_setregn(struct roff *, const char *,
226 size_t, int, char, int);
227 static void roff_setstr(struct roff *,
228 const char *, const char *, int);
229 static void roff_setstrn(struct roffkv **, const char *,
230 size_t, const char *, size_t, int);
231 static int roff_shift(ROFF_ARGS);
232 static int roff_so(ROFF_ARGS);
233 static int roff_tr(ROFF_ARGS);
234 static int roff_Dd(ROFF_ARGS);
235 static int roff_TE(ROFF_ARGS);
236 static int roff_TS(ROFF_ARGS);
237 static int roff_EQ(ROFF_ARGS);
238 static int roff_EN(ROFF_ARGS);
239 static int roff_T_(ROFF_ARGS);
240 static int roff_unsupp(ROFF_ARGS);
241 static int roff_userdef(ROFF_ARGS);
242
243 /* --- constant data ------------------------------------------------------ */
244
245 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
246 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
247
248 const char *__roff_name[MAN_MAX + 1] = {
249 "br", "ce", "ft", "ll",
250 "mc", "po", "rj", "sp",
251 "ta", "ti", NULL,
252 "ab", "ad", "af", "aln",
253 "als", "am", "am1", "ami",
254 "ami1", "as", "as1", "asciify",
255 "backtrace", "bd", "bleedat", "blm",
256 "box", "boxa", "bp", "BP",
257 "break", "breakchar", "brnl", "brp",
258 "brpnl", "c2", "cc",
259 "cf", "cflags", "ch", "char",
260 "chop", "class", "close", "CL",
261 "color", "composite", "continue", "cp",
262 "cropat", "cs", "cu", "da",
263 "dch", "Dd", "de", "de1",
264 "defcolor", "dei", "dei1", "device",
265 "devicem", "di", "do", "ds",
266 "ds1", "dwh", "dt", "ec",
267 "ecr", "ecs", "el", "em",
268 "EN", "eo", "EP", "EQ",
269 "errprint", "ev", "evc", "ex",
270 "fallback", "fam", "fc", "fchar",
271 "fcolor", "fdeferlig", "feature", "fkern",
272 "fl", "flig", "fp", "fps",
273 "fschar", "fspacewidth", "fspecial", "ftr",
274 "fzoom", "gcolor", "hc", "hcode",
275 "hidechar", "hla", "hlm", "hpf",
276 "hpfa", "hpfcode", "hw", "hy",
277 "hylang", "hylen", "hym", "hypp",
278 "hys", "ie", "if", "ig",
279 "index", "it", "itc", "IX",
280 "kern", "kernafter", "kernbefore", "kernpair",
281 "lc", "lc_ctype", "lds", "length",
282 "letadj", "lf", "lg", "lhang",
283 "linetabs", "lnr", "lnrf", "lpfx",
284 "ls", "lsm", "lt",
285 "mediasize", "minss", "mk", "mso",
286 "na", "ne", "nh", "nhychar",
287 "nm", "nn", "nop", "nr",
288 "nrf", "nroff", "ns", "nx",
289 "open", "opena", "os", "output",
290 "padj", "papersize", "pc", "pev",
291 "pi", "PI", "pl", "pm",
292 "pn", "pnr", "ps",
293 "psbb", "pshape", "pso", "ptr",
294 "pvs", "rchar", "rd", "recursionlimit",
295 "return", "rfschar", "rhang",
296 "rm", "rn", "rnn", "rr",
297 "rs", "rt", "schar", "sentchar",
298 "shc", "shift", "sizes", "so",
299 "spacewidth", "special", "spreadwarn", "ss",
300 "sty", "substring", "sv", "sy",
301 "T&", "tc", "TE",
302 "TH", "tkf", "tl",
303 "tm", "tm1", "tmc", "tr",
304 "track", "transchar", "trf", "trimat",
305 "trin", "trnt", "troff", "TS",
306 "uf", "ul", "unformat", "unwatch",
307 "unwatchn", "vpt", "vs", "warn",
308 "warnscale", "watch", "watchlength", "watchn",
309 "wh", "while", "write", "writec",
310 "writem", "xflag", ".", NULL,
311 NULL, "text",
312 "Dd", "Dt", "Os", "Sh",
313 "Ss", "Pp", "D1", "Dl",
314 "Bd", "Ed", "Bl", "El",
315 "It", "Ad", "An", "Ap",
316 "Ar", "Cd", "Cm", "Dv",
317 "Er", "Ev", "Ex", "Fa",
318 "Fd", "Fl", "Fn", "Ft",
319 "Ic", "In", "Li", "Nd",
320 "Nm", "Op", "Ot", "Pa",
321 "Rv", "St", "Va", "Vt",
322 "Xr", "%A", "%B", "%D",
323 "%I", "%J", "%N", "%O",
324 "%P", "%R", "%T", "%V",
325 "Ac", "Ao", "Aq", "At",
326 "Bc", "Bf", "Bo", "Bq",
327 "Bsx", "Bx", "Db", "Dc",
328 "Do", "Dq", "Ec", "Ef",
329 "Em", "Eo", "Fx", "Ms",
330 "No", "Ns", "Nx", "Ox",
331 "Pc", "Pf", "Po", "Pq",
332 "Qc", "Ql", "Qo", "Qq",
333 "Re", "Rs", "Sc", "So",
334 "Sq", "Sm", "Sx", "Sy",
335 "Tn", "Ux", "Xc", "Xo",
336 "Fo", "Fc", "Oo", "Oc",
337 "Bk", "Ek", "Bt", "Hf",
338 "Fr", "Ud", "Lb", "Lp",
339 "Lk", "Mt", "Brq", "Bro",
340 "Brc", "%C", "Es", "En",
341 "Dx", "%Q", "%U", "Ta",
342 NULL,
343 "TH", "SH", "SS", "TP",
344 "TQ",
345 "LP", "PP", "P", "IP",
346 "HP", "SM", "SB", "BI",
347 "IB", "BR", "RB", "R",
348 "B", "I", "IR", "RI",
349 "nf", "fi",
350 "RE", "RS", "DT", "UC",
351 "PD", "AT", "in",
352 "SY", "YS", "OP",
353 "EX", "EE", "UR",
354 "UE", "MT", "ME", NULL
355 };
356 const char *const *roff_name = __roff_name;
357
358 static struct roffmac roffs[TOKEN_NONE] = {
359 { roff_br, NULL, NULL, 0 }, /* br */
360 { roff_onearg, NULL, NULL, 0 }, /* ce */
361 { roff_onearg, NULL, NULL, 0 }, /* ft */
362 { roff_onearg, NULL, NULL, 0 }, /* ll */
363 { roff_onearg, NULL, NULL, 0 }, /* mc */
364 { roff_onearg, NULL, NULL, 0 }, /* po */
365 { roff_onearg, NULL, NULL, 0 }, /* rj */
366 { roff_onearg, NULL, NULL, 0 }, /* sp */
367 { roff_manyarg, NULL, NULL, 0 }, /* ta */
368 { roff_onearg, NULL, NULL, 0 }, /* ti */
369 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
370 { roff_unsupp, NULL, NULL, 0 }, /* ab */
371 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
372 { roff_line_ignore, NULL, NULL, 0 }, /* af */
373 { roff_unsupp, NULL, NULL, 0 }, /* aln */
374 { roff_als, NULL, NULL, 0 }, /* als */
375 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
376 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
377 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
378 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
379 { roff_ds, NULL, NULL, 0 }, /* as */
380 { roff_ds, NULL, NULL, 0 }, /* as1 */
381 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
382 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
383 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
384 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
385 { roff_unsupp, NULL, NULL, 0 }, /* blm */
386 { roff_unsupp, NULL, NULL, 0 }, /* box */
387 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
388 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
389 { roff_unsupp, NULL, NULL, 0 }, /* BP */
390 { roff_unsupp, NULL, NULL, 0 }, /* break */
391 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
392 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
393 { roff_br, NULL, NULL, 0 }, /* brp */
394 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
395 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
396 { roff_cc, NULL, NULL, 0 }, /* cc */
397 { roff_insec, NULL, NULL, 0 }, /* cf */
398 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
399 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
400 { roff_unsupp, NULL, NULL, 0 }, /* char */
401 { roff_unsupp, NULL, NULL, 0 }, /* chop */
402 { roff_line_ignore, NULL, NULL, 0 }, /* class */
403 { roff_insec, NULL, NULL, 0 }, /* close */
404 { roff_unsupp, NULL, NULL, 0 }, /* CL */
405 { roff_line_ignore, NULL, NULL, 0 }, /* color */
406 { roff_unsupp, NULL, NULL, 0 }, /* composite */
407 { roff_unsupp, NULL, NULL, 0 }, /* continue */
408 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
409 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
410 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
411 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
412 { roff_unsupp, NULL, NULL, 0 }, /* da */
413 { roff_unsupp, NULL, NULL, 0 }, /* dch */
414 { roff_Dd, NULL, NULL, 0 }, /* Dd */
415 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
416 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
417 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
418 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
419 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
420 { roff_unsupp, NULL, NULL, 0 }, /* device */
421 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
422 { roff_unsupp, NULL, NULL, 0 }, /* di */
423 { roff_unsupp, NULL, NULL, 0 }, /* do */
424 { roff_ds, NULL, NULL, 0 }, /* ds */
425 { roff_ds, NULL, NULL, 0 }, /* ds1 */
426 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
427 { roff_unsupp, NULL, NULL, 0 }, /* dt */
428 { roff_ec, NULL, NULL, 0 }, /* ec */
429 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
430 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
431 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
432 { roff_unsupp, NULL, NULL, 0 }, /* em */
433 { roff_EN, NULL, NULL, 0 }, /* EN */
434 { roff_eo, NULL, NULL, 0 }, /* eo */
435 { roff_unsupp, NULL, NULL, 0 }, /* EP */
436 { roff_EQ, NULL, NULL, 0 }, /* EQ */
437 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
438 { roff_unsupp, NULL, NULL, 0 }, /* ev */
439 { roff_unsupp, NULL, NULL, 0 }, /* evc */
440 { roff_unsupp, NULL, NULL, 0 }, /* ex */
441 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
442 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
443 { roff_unsupp, NULL, NULL, 0 }, /* fc */
444 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
445 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
446 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
447 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
448 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
449 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
450 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
451 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
452 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
453 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
454 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
455 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
456 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
457 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
458 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
459 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
460 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
461 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
462 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
463 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
464 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
465 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
466 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
467 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
468 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
469 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
470 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
471 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
472 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
473 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
474 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
475 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
476 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
477 { roff_unsupp, NULL, NULL, 0 }, /* index */
478 { roff_it, NULL, NULL, 0 }, /* it */
479 { roff_unsupp, NULL, NULL, 0 }, /* itc */
480 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
481 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
482 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
483 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
484 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
485 { roff_unsupp, NULL, NULL, 0 }, /* lc */
486 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
487 { roff_unsupp, NULL, NULL, 0 }, /* lds */
488 { roff_unsupp, NULL, NULL, 0 }, /* length */
489 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
490 { roff_insec, NULL, NULL, 0 }, /* lf */
491 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
492 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
493 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
494 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
495 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
496 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
497 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
498 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
499 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
500 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
501 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
502 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
503 { roff_insec, NULL, NULL, 0 }, /* mso */
504 { roff_line_ignore, NULL, NULL, 0 }, /* na */
505 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
506 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
507 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
508 { roff_unsupp, NULL, NULL, 0 }, /* nm */
509 { roff_unsupp, NULL, NULL, 0 }, /* nn */
510 { roff_nop, NULL, NULL, 0 }, /* nop */
511 { roff_nr, NULL, NULL, 0 }, /* nr */
512 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
513 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
514 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
515 { roff_insec, NULL, NULL, 0 }, /* nx */
516 { roff_insec, NULL, NULL, 0 }, /* open */
517 { roff_insec, NULL, NULL, 0 }, /* opena */
518 { roff_line_ignore, NULL, NULL, 0 }, /* os */
519 { roff_unsupp, NULL, NULL, 0 }, /* output */
520 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
521 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
522 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
523 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
524 { roff_insec, NULL, NULL, 0 }, /* pi */
525 { roff_unsupp, NULL, NULL, 0 }, /* PI */
526 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
527 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
528 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
529 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
530 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
531 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
532 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
533 { roff_insec, NULL, NULL, 0 }, /* pso */
534 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
535 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
536 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
537 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
538 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
539 { roff_return, NULL, NULL, 0 }, /* return */
540 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
541 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
542 { roff_rm, NULL, NULL, 0 }, /* rm */
543 { roff_rn, NULL, NULL, 0 }, /* rn */
544 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
545 { roff_rr, NULL, NULL, 0 }, /* rr */
546 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
547 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
548 { roff_unsupp, NULL, NULL, 0 }, /* schar */
549 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
550 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
551 { roff_shift, NULL, NULL, 0 }, /* shift */
552 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
553 { roff_so, NULL, NULL, 0 }, /* so */
554 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
555 { roff_line_ignore, NULL, NULL, 0 }, /* special */
556 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
557 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
558 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
559 { roff_unsupp, NULL, NULL, 0 }, /* substring */
560 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
561 { roff_insec, NULL, NULL, 0 }, /* sy */
562 { roff_T_, NULL, NULL, 0 }, /* T& */
563 { roff_unsupp, NULL, NULL, 0 }, /* tc */
564 { roff_TE, NULL, NULL, 0 }, /* TE */
565 { roff_Dd, NULL, NULL, 0 }, /* TH */
566 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
567 { roff_unsupp, NULL, NULL, 0 }, /* tl */
568 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
569 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
570 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
571 { roff_tr, NULL, NULL, 0 }, /* tr */
572 { roff_line_ignore, NULL, NULL, 0 }, /* track */
573 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
574 { roff_insec, NULL, NULL, 0 }, /* trf */
575 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
576 { roff_unsupp, NULL, NULL, 0 }, /* trin */
577 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
578 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
579 { roff_TS, NULL, NULL, 0 }, /* TS */
580 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
581 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
582 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
583 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
584 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
585 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
586 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
587 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
588 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
589 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
590 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
591 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
592 { roff_unsupp, NULL, NULL, 0 }, /* wh */
593 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
594 { roff_insec, NULL, NULL, 0 }, /* write */
595 { roff_insec, NULL, NULL, 0 }, /* writec */
596 { roff_insec, NULL, NULL, 0 }, /* writem */
597 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
598 { roff_cblock, NULL, NULL, 0 }, /* . */
599 { roff_renamed, NULL, NULL, 0 },
600 { roff_userdef, NULL, NULL, 0 }
601 };
602
603 /* Array of injected predefined strings. */
604 #define PREDEFS_MAX 38
605 static const struct predef predefs[PREDEFS_MAX] = {
606 #include "predefs.in"
607 };
608
609 static int roffce_lines; /* number of input lines to center */
610 static struct roff_node *roffce_node; /* active request */
611 static int roffit_lines; /* number of lines to delay */
612 static char *roffit_macro; /* nil-terminated macro line */
613
614
615 /* --- request table ------------------------------------------------------ */
616
617 struct ohash *
618 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
619 {
620 struct ohash *htab;
621 struct roffreq *req;
622 enum roff_tok tok;
623 size_t sz;
624 unsigned int slot;
625
626 htab = mandoc_malloc(sizeof(*htab));
627 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
628
629 for (tok = mintok; tok < maxtok; tok++) {
630 if (roff_name[tok] == NULL)
631 continue;
632 sz = strlen(roff_name[tok]);
633 req = mandoc_malloc(sizeof(*req) + sz + 1);
634 req->tok = tok;
635 memcpy(req->name, roff_name[tok], sz + 1);
636 slot = ohash_qlookup(htab, req->name);
637 ohash_insert(htab, slot, req);
638 }
639 return htab;
640 }
641
642 void
643 roffhash_free(struct ohash *htab)
644 {
645 struct roffreq *req;
646 unsigned int slot;
647
648 if (htab == NULL)
649 return;
650 for (req = ohash_first(htab, &slot); req != NULL;
651 req = ohash_next(htab, &slot))
652 free(req);
653 ohash_delete(htab);
654 free(htab);
655 }
656
657 enum roff_tok
658 roffhash_find(struct ohash *htab, const char *name, size_t sz)
659 {
660 struct roffreq *req;
661 const char *end;
662
663 if (sz) {
664 end = name + sz;
665 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
666 } else
667 req = ohash_find(htab, ohash_qlookup(htab, name));
668 return req == NULL ? TOKEN_NONE : req->tok;
669 }
670
671 /* --- stack of request blocks -------------------------------------------- */
672
673 /*
674 * Pop the current node off of the stack of roff instructions currently
675 * pending.
676 */
677 static int
678 roffnode_pop(struct roff *r)
679 {
680 struct roffnode *p;
681 int inloop;
682
683 p = r->last;
684 inloop = p->tok == ROFF_while;
685 r->last = p->parent;
686 free(p->name);
687 free(p->end);
688 free(p);
689 return inloop;
690 }
691
692 /*
693 * Push a roff node onto the instruction stack. This must later be
694 * removed with roffnode_pop().
695 */
696 static void
697 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
698 int line, int col)
699 {
700 struct roffnode *p;
701
702 p = mandoc_calloc(1, sizeof(struct roffnode));
703 p->tok = tok;
704 if (name)
705 p->name = mandoc_strdup(name);
706 p->parent = r->last;
707 p->line = line;
708 p->col = col;
709 p->rule = p->parent ? p->parent->rule : 0;
710
711 r->last = p;
712 }
713
714 /* --- roff parser state data management ---------------------------------- */
715
716 static void
717 roff_free1(struct roff *r)
718 {
719 struct tbl_node *tbl;
720 int i;
721
722 while (NULL != (tbl = r->first_tbl)) {
723 r->first_tbl = tbl->next;
724 tbl_free(tbl);
725 }
726 r->first_tbl = r->last_tbl = r->tbl = NULL;
727
728 if (r->last_eqn != NULL)
729 eqn_free(r->last_eqn);
730 r->last_eqn = r->eqn = NULL;
731
732 while (r->mstackpos >= 0)
733 roff_userret(r);
734
735 while (r->last)
736 roffnode_pop(r);
737
738 free (r->rstack);
739 r->rstack = NULL;
740 r->rstacksz = 0;
741 r->rstackpos = -1;
742
743 roff_freereg(r->regtab);
744 r->regtab = NULL;
745
746 roff_freestr(r->strtab);
747 roff_freestr(r->rentab);
748 roff_freestr(r->xmbtab);
749 r->strtab = r->rentab = r->xmbtab = NULL;
750
751 if (r->xtab)
752 for (i = 0; i < 128; i++)
753 free(r->xtab[i].p);
754 free(r->xtab);
755 r->xtab = NULL;
756 }
757
758 void
759 roff_reset(struct roff *r)
760 {
761 roff_free1(r);
762 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
763 r->control = '\0';
764 r->escape = '\\';
765 roffce_lines = 0;
766 roffce_node = NULL;
767 roffit_lines = 0;
768 roffit_macro = NULL;
769 }
770
771 void
772 roff_free(struct roff *r)
773 {
774 int i;
775
776 roff_free1(r);
777 for (i = 0; i < r->mstacksz; i++)
778 free(r->mstack[i].argv);
779 free(r->mstack);
780 roffhash_free(r->reqtab);
781 free(r);
782 }
783
784 struct roff *
785 roff_alloc(struct mparse *parse, int options)
786 {
787 struct roff *r;
788
789 r = mandoc_calloc(1, sizeof(struct roff));
790 r->parse = parse;
791 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
792 r->options = options;
793 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
794 r->mstackpos = -1;
795 r->rstackpos = -1;
796 r->escape = '\\';
797 return r;
798 }
799
800 /* --- syntax tree state data management ---------------------------------- */
801
802 static void
803 roff_man_free1(struct roff_man *man)
804 {
805
806 if (man->first != NULL)
807 roff_node_delete(man, man->first);
808 free(man->meta.msec);
809 free(man->meta.vol);
810 free(man->meta.os);
811 free(man->meta.arch);
812 free(man->meta.title);
813 free(man->meta.name);
814 free(man->meta.date);
815 }
816
817 static void
818 roff_man_alloc1(struct roff_man *man)
819 {
820
821 memset(&man->meta, 0, sizeof(man->meta));
822 man->first = mandoc_calloc(1, sizeof(*man->first));
823 man->first->type = ROFFT_ROOT;
824 man->last = man->first;
825 man->last_es = NULL;
826 man->flags = 0;
827 man->macroset = MACROSET_NONE;
828 man->lastsec = man->lastnamed = SEC_NONE;
829 man->next = ROFF_NEXT_CHILD;
830 }
831
832 void
833 roff_man_reset(struct roff_man *man)
834 {
835
836 roff_man_free1(man);
837 roff_man_alloc1(man);
838 }
839
840 void
841 roff_man_free(struct roff_man *man)
842 {
843
844 roff_man_free1(man);
845 free(man);
846 }
847
848 struct roff_man *
849 roff_man_alloc(struct roff *roff, struct mparse *parse,
850 const char *os_s, int quick)
851 {
852 struct roff_man *man;
853
854 man = mandoc_calloc(1, sizeof(*man));
855 man->parse = parse;
856 man->roff = roff;
857 man->os_s = os_s;
858 man->quick = quick;
859 roff_man_alloc1(man);
860 roff->man = man;
861 return man;
862 }
863
864 /* --- syntax tree handling ----------------------------------------------- */
865
866 struct roff_node *
867 roff_node_alloc(struct roff_man *man, int line, int pos,
868 enum roff_type type, int tok)
869 {
870 struct roff_node *n;
871
872 n = mandoc_calloc(1, sizeof(*n));
873 n->line = line;
874 n->pos = pos;
875 n->tok = tok;
876 n->type = type;
877 n->sec = man->lastsec;
878
879 if (man->flags & MDOC_SYNOPSIS)
880 n->flags |= NODE_SYNPRETTY;
881 else
882 n->flags &= ~NODE_SYNPRETTY;
883 if (man->flags & MDOC_NEWLINE)
884 n->flags |= NODE_LINE;
885 man->flags &= ~MDOC_NEWLINE;
886
887 return n;
888 }
889
890 void
891 roff_node_append(struct roff_man *man, struct roff_node *n)
892 {
893
894 switch (man->next) {
895 case ROFF_NEXT_SIBLING:
896 if (man->last->next != NULL) {
897 n->next = man->last->next;
898 man->last->next->prev = n;
899 } else
900 man->last->parent->last = n;
901 man->last->next = n;
902 n->prev = man->last;
903 n->parent = man->last->parent;
904 break;
905 case ROFF_NEXT_CHILD:
906 if (man->last->child != NULL) {
907 n->next = man->last->child;
908 man->last->child->prev = n;
909 } else
910 man->last->last = n;
911 man->last->child = n;
912 n->parent = man->last;
913 break;
914 default:
915 abort();
916 }
917 man->last = n;
918
919 switch (n->type) {
920 case ROFFT_HEAD:
921 n->parent->head = n;
922 break;
923 case ROFFT_BODY:
924 if (n->end != ENDBODY_NOT)
925 return;
926 n->parent->body = n;
927 break;
928 case ROFFT_TAIL:
929 n->parent->tail = n;
930 break;
931 default:
932 return;
933 }
934
935 /*
936 * Copy over the normalised-data pointer of our parent. Not
937 * everybody has one, but copying a null pointer is fine.
938 */
939
940 n->norm = n->parent->norm;
941 assert(n->parent->type == ROFFT_BLOCK);
942 }
943
944 void
945 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
946 {
947 struct roff_node *n;
948
949 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
950 n->string = roff_strdup(man->roff, word);
951 roff_node_append(man, n);
952 n->flags |= NODE_VALID | NODE_ENDED;
953 man->next = ROFF_NEXT_SIBLING;
954 }
955
956 void
957 roff_word_append(struct roff_man *man, const char *word)
958 {
959 struct roff_node *n;
960 char *addstr, *newstr;
961
962 n = man->last;
963 addstr = roff_strdup(man->roff, word);
964 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
965 free(addstr);
966 free(n->string);
967 n->string = newstr;
968 man->next = ROFF_NEXT_SIBLING;
969 }
970
971 void
972 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
973 {
974 struct roff_node *n;
975
976 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
977 roff_node_append(man, n);
978 man->next = ROFF_NEXT_CHILD;
979 }
980
981 struct roff_node *
982 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
983 {
984 struct roff_node *n;
985
986 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
987 roff_node_append(man, n);
988 man->next = ROFF_NEXT_CHILD;
989 return n;
990 }
991
992 struct roff_node *
993 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
994 {
995 struct roff_node *n;
996
997 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
998 roff_node_append(man, n);
999 man->next = ROFF_NEXT_CHILD;
1000 return n;
1001 }
1002
1003 struct roff_node *
1004 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1005 {
1006 struct roff_node *n;
1007
1008 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1009 roff_node_append(man, n);
1010 man->next = ROFF_NEXT_CHILD;
1011 return n;
1012 }
1013
1014 static void
1015 roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
1016 {
1017 struct roff_node *n;
1018 const struct tbl_span *span;
1019
1020 if (man->macroset == MACROSET_MAN)
1021 man_breakscope(man, ROFF_TS);
1022 while ((span = tbl_span(tbl)) != NULL) {
1023 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1024 n->span = span;
1025 roff_node_append(man, n);
1026 n->flags |= NODE_VALID | NODE_ENDED;
1027 man->next = ROFF_NEXT_SIBLING;
1028 }
1029 }
1030
1031 void
1032 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1033 {
1034
1035 /* Adjust siblings. */
1036
1037 if (n->prev)
1038 n->prev->next = n->next;
1039 if (n->next)
1040 n->next->prev = n->prev;
1041
1042 /* Adjust parent. */
1043
1044 if (n->parent != NULL) {
1045 if (n->parent->child == n)
1046 n->parent->child = n->next;
1047 if (n->parent->last == n)
1048 n->parent->last = n->prev;
1049 }
1050
1051 /* Adjust parse point. */
1052
1053 if (man == NULL)
1054 return;
1055 if (man->last == n) {
1056 if (n->prev == NULL) {
1057 man->last = n->parent;
1058 man->next = ROFF_NEXT_CHILD;
1059 } else {
1060 man->last = n->prev;
1061 man->next = ROFF_NEXT_SIBLING;
1062 }
1063 }
1064 if (man->first == n)
1065 man->first = NULL;
1066 }
1067
1068 void
1069 roff_node_free(struct roff_node *n)
1070 {
1071
1072 if (n->args != NULL)
1073 mdoc_argv_free(n->args);
1074 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1075 free(n->norm);
1076 if (n->eqn != NULL)
1077 eqn_box_free(n->eqn);
1078 free(n->string);
1079 free(n);
1080 }
1081
1082 void
1083 roff_node_delete(struct roff_man *man, struct roff_node *n)
1084 {
1085
1086 while (n->child != NULL)
1087 roff_node_delete(man, n->child);
1088 roff_node_unlink(man, n);
1089 roff_node_free(n);
1090 }
1091
1092 void
1093 deroff(char **dest, const struct roff_node *n)
1094 {
1095 char *cp;
1096 size_t sz;
1097
1098 if (n->type != ROFFT_TEXT) {
1099 for (n = n->child; n != NULL; n = n->next)
1100 deroff(dest, n);
1101 return;
1102 }
1103
1104 /* Skip leading whitespace. */
1105
1106 for (cp = n->string; *cp != '\0'; cp++) {
1107 if (cp[0] == '\\' && cp[1] != '\0' &&
1108 strchr(" %&0^|~", cp[1]) != NULL)
1109 cp++;
1110 else if ( ! isspace((unsigned char)*cp))
1111 break;
1112 }
1113
1114 /* Skip trailing backslash. */
1115
1116 sz = strlen(cp);
1117 if (sz > 0 && cp[sz - 1] == '\\')
1118 sz--;
1119
1120 /* Skip trailing whitespace. */
1121
1122 for (; sz; sz--)
1123 if ( ! isspace((unsigned char)cp[sz-1]))
1124 break;
1125
1126 /* Skip empty strings. */
1127
1128 if (sz == 0)
1129 return;
1130
1131 if (*dest == NULL) {
1132 *dest = mandoc_strndup(cp, sz);
1133 return;
1134 }
1135
1136 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1137 free(*dest);
1138 *dest = cp;
1139 }
1140
1141 /* --- main functions of the roff parser ---------------------------------- */
1142
1143 /*
1144 * In the current line, expand escape sequences that tend to get
1145 * used in numerical expressions and conditional requests.
1146 * Also check the syntax of the remaining escape sequences.
1147 */
1148 static int
1149 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1150 {
1151 struct mctx *ctx; /* current macro call context */
1152 char ubuf[24]; /* buffer to print the number */
1153 struct roff_node *n; /* used for header comments */
1154 const char *start; /* start of the string to process */
1155 char *stesc; /* start of an escape sequence ('\\') */
1156 char *ep; /* end of comment string */
1157 const char *stnam; /* start of the name, after "[(*" */
1158 const char *cp; /* end of the name, e.g. before ']' */
1159 const char *res; /* the string to be substituted */
1160 char *nbuf; /* new buffer to copy buf->buf to */
1161 size_t maxl; /* expected length of the escape name */
1162 size_t naml; /* actual length of the escape name */
1163 size_t asz; /* length of the replacement */
1164 size_t rsz; /* length of the rest of the string */
1165 enum mandoc_esc esc; /* type of the escape sequence */
1166 int inaml; /* length returned from mandoc_escape() */
1167 int expand_count; /* to avoid infinite loops */
1168 int npos; /* position in numeric expression */
1169 int arg_complete; /* argument not interrupted by eol */
1170 int quote_args; /* true for \\$@, false for \\$* */
1171 int done; /* no more input available */
1172 int deftype; /* type of definition to paste */
1173 int rcsid; /* kind of RCS id seen */
1174 char sign; /* increment number register */
1175 char term; /* character terminating the escape */
1176
1177 /* Search forward for comments. */
1178
1179 done = 0;
1180 start = buf->buf + pos;
1181 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1182 if (stesc[0] != r->escape || stesc[1] == '\0')
1183 continue;
1184 stesc++;
1185 if (*stesc != '"' && *stesc != '#')
1186 continue;
1187
1188 /* Comment found, look for RCS id. */
1189
1190 rcsid = 0;
1191 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1192 rcsid = 1 << MANDOC_OS_OPENBSD;
1193 cp += 8;
1194 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1195 rcsid = 1 << MANDOC_OS_NETBSD;
1196 cp += 7;
1197 }
1198 if (cp != NULL &&
1199 isalnum((unsigned char)*cp) == 0 &&
1200 strchr(cp, '$') != NULL) {
1201 if (r->man->meta.rcsids & rcsid)
1202 mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1203 ln, stesc + 1 - buf->buf, stesc + 1);
1204 r->man->meta.rcsids |= rcsid;
1205 }
1206
1207 /* Handle trailing whitespace. */
1208
1209 ep = strchr(stesc--, '\0') - 1;
1210 if (*ep == '\n') {
1211 done = 1;
1212 ep--;
1213 }
1214 if (*ep == ' ' || *ep == '\t')
1215 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1216 ln, ep - buf->buf, NULL);
1217
1218 /*
1219 * Save comments preceding the title macro
1220 * in the syntax tree.
1221 */
1222
1223 if (r->format == 0) {
1224 while (*ep == ' ' || *ep == '\t')
1225 ep--;
1226 ep[1] = '\0';
1227 n = roff_node_alloc(r->man,
1228 ln, stesc + 1 - buf->buf,
1229 ROFFT_COMMENT, TOKEN_NONE);
1230 n->string = mandoc_strdup(stesc + 2);
1231 roff_node_append(r->man, n);
1232 n->flags |= NODE_VALID | NODE_ENDED;
1233 r->man->next = ROFF_NEXT_SIBLING;
1234 }
1235
1236 /* Line continuation with comment. */
1237
1238 if (stesc[1] == '#') {
1239 *stesc = '\0';
1240 return ROFF_IGN | ROFF_APPEND;
1241 }
1242
1243 /* Discard normal comments. */
1244
1245 while (stesc > start && stesc[-1] == ' ' &&
1246 (stesc == start + 1 || stesc[-2] != '\\'))
1247 stesc--;
1248 *stesc = '\0';
1249 break;
1250 }
1251 if (stesc == start)
1252 return ROFF_CONT;
1253 stesc--;
1254
1255 /* Notice the end of the input. */
1256
1257 if (*stesc == '\n') {
1258 *stesc-- = '\0';
1259 done = 1;
1260 }
1261
1262 expand_count = 0;
1263 while (stesc >= start) {
1264
1265 /* Search backwards for the next backslash. */
1266
1267 if (*stesc != r->escape) {
1268 if (*stesc == '\\') {
1269 *stesc = '\0';
1270 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1271 buf->buf, stesc + 1) + 1;
1272 start = nbuf + pos;
1273 stesc = nbuf + (stesc - buf->buf);
1274 free(buf->buf);
1275 buf->buf = nbuf;
1276 }
1277 stesc--;
1278 continue;
1279 }
1280
1281 /* If it is escaped, skip it. */
1282
1283 for (cp = stesc - 1; cp >= start; cp--)
1284 if (*cp != r->escape)
1285 break;
1286
1287 if ((stesc - cp) % 2 == 0) {
1288 while (stesc > cp)
1289 *stesc-- = '\\';
1290 continue;
1291 } else if (stesc[1] != '\0') {
1292 *stesc = '\\';
1293 } else {
1294 *stesc-- = '\0';
1295 if (done)
1296 continue;
1297 else
1298 return ROFF_IGN | ROFF_APPEND;
1299 }
1300
1301 /* Decide whether to expand or to check only. */
1302
1303 term = '\0';
1304 cp = stesc + 1;
1305 switch (*cp) {
1306 case '*':
1307 case '$':
1308 res = NULL;
1309 break;
1310 case 'B':
1311 case 'w':
1312 term = cp[1];
1313 /* FALLTHROUGH */
1314 case 'n':
1315 sign = cp[1];
1316 if (sign == '+' || sign == '-')
1317 cp++;
1318 res = ubuf;
1319 break;
1320 default:
1321 esc = mandoc_escape(&cp, &stnam, &inaml);
1322 if (esc == ESCAPE_ERROR ||
1323 (esc == ESCAPE_SPECIAL &&
1324 mchars_spec2cp(stnam, inaml) < 0))
1325 mandoc_vmsg(MANDOCERR_ESC_BAD,
1326 r->parse, ln, (int)(stesc - buf->buf),
1327 "%.*s", (int)(cp - stesc), stesc);
1328 stesc--;
1329 continue;
1330 }
1331
1332 if (EXPAND_LIMIT < ++expand_count) {
1333 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1334 ln, (int)(stesc - buf->buf), NULL);
1335 return ROFF_IGN;
1336 }
1337
1338 /*
1339 * The third character decides the length
1340 * of the name of the string or register.
1341 * Save a pointer to the name.
1342 */
1343
1344 if (term == '\0') {
1345 switch (*++cp) {
1346 case '\0':
1347 maxl = 0;
1348 break;
1349 case '(':
1350 cp++;
1351 maxl = 2;
1352 break;
1353 case '[':
1354 cp++;
1355 term = ']';
1356 maxl = 0;
1357 break;
1358 default:
1359 maxl = 1;
1360 break;
1361 }
1362 } else {
1363 cp += 2;
1364 maxl = 0;
1365 }
1366 stnam = cp;
1367
1368 /* Advance to the end of the name. */
1369
1370 naml = 0;
1371 arg_complete = 1;
1372 while (maxl == 0 || naml < maxl) {
1373 if (*cp == '\0') {
1374 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1375 ln, (int)(stesc - buf->buf), stesc);
1376 arg_complete = 0;
1377 break;
1378 }
1379 if (maxl == 0 && *cp == term) {
1380 cp++;
1381 break;
1382 }
1383 if (*cp++ != '\\' || stesc[1] != 'w') {
1384 naml++;
1385 continue;
1386 }
1387 switch (mandoc_escape(&cp, NULL, NULL)) {
1388 case ESCAPE_SPECIAL:
1389 case ESCAPE_UNICODE:
1390 case ESCAPE_NUMBERED:
1391 case ESCAPE_OVERSTRIKE:
1392 naml++;
1393 break;
1394 default:
1395 break;
1396 }
1397 }
1398
1399 /*
1400 * Retrieve the replacement string; if it is
1401 * undefined, resume searching for escapes.
1402 */
1403
1404 switch (stesc[1]) {
1405 case '*':
1406 if (arg_complete) {
1407 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1408 res = roff_getstrn(r, stnam, naml, &deftype);
1409
1410 /*
1411 * If not overriden, let \*(.T
1412 * through to the formatters.
1413 */
1414
1415 if (res == NULL && naml == 2 &&
1416 stnam[0] == '.' && stnam[1] == 'T') {
1417 roff_setstrn(&r->strtab,
1418 ".T", 2, NULL, 0, 0);
1419 stesc--;
1420 continue;
1421 }
1422 }
1423 break;
1424 case '$':
1425 if (r->mstackpos < 0) {
1426 mandoc_vmsg(MANDOCERR_ARG_UNDEF,
1427 r->parse, ln, (int)(stesc - buf->buf),
1428 "%.3s", stesc);
1429 break;
1430 }
1431 ctx = r->mstack + r->mstackpos;
1432 npos = stesc[2] - '1';
1433 if (npos >= 0 && npos <= 8) {
1434 res = npos < ctx->argc ?
1435 ctx->argv[npos] : "";
1436 break;
1437 }
1438 if (stesc[2] == '*')
1439 quote_args = 0;
1440 else if (stesc[2] == '@')
1441 quote_args = 1;
1442 else {
1443 mandoc_vmsg(MANDOCERR_ARG_NONUM,
1444 r->parse, ln, (int)(stesc - buf->buf),
1445 "%.3s", stesc);
1446 break;
1447 }
1448 asz = 0;
1449 for (npos = 0; npos < ctx->argc; npos++) {
1450 if (npos)
1451 asz++; /* blank */
1452 if (quote_args)
1453 asz += 2; /* quotes */
1454 asz += strlen(ctx->argv[npos]);
1455 }
1456 if (asz != 3) {
1457 rsz = buf->sz - (stesc - buf->buf) - 3;
1458 if (asz < 3)
1459 memmove(stesc + asz, stesc + 3, rsz);
1460 buf->sz += asz - 3;
1461 nbuf = mandoc_realloc(buf->buf, buf->sz);
1462 start = nbuf + pos;
1463 stesc = nbuf + (stesc - buf->buf);
1464 buf->buf = nbuf;
1465 if (asz > 3)
1466 memmove(stesc + asz, stesc + 3, rsz);
1467 }
1468 for (npos = 0; npos < ctx->argc; npos++) {
1469 if (npos)
1470 *stesc++ = ' ';
1471 if (quote_args)
1472 *stesc++ = '"';
1473 cp = ctx->argv[npos];
1474 while (*cp != '\0')
1475 *stesc++ = *cp++;
1476 if (quote_args)
1477 *stesc++ = '"';
1478 }
1479 continue;
1480 case 'B':
1481 npos = 0;
1482 ubuf[0] = arg_complete &&
1483 roff_evalnum(r, ln, stnam, &npos,
1484 NULL, ROFFNUM_SCALE) &&
1485 stnam + npos + 1 == cp ? '1' : '0';
1486 ubuf[1] = '\0';
1487 break;
1488 case 'n':
1489 if (arg_complete)
1490 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1491 roff_getregn(r, stnam, naml, sign));
1492 else
1493 ubuf[0] = '\0';
1494 break;
1495 case 'w':
1496 /* use even incomplete args */
1497 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1498 24 * (int)naml);
1499 break;
1500 }
1501
1502 if (res == NULL) {
1503 if (stesc[1] == '*')
1504 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1505 r->parse, ln, (int)(stesc - buf->buf),
1506 "%.*s", (int)naml, stnam);
1507 res = "";
1508 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1509 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1510 ln, (int)(stesc - buf->buf), NULL);
1511 return ROFF_IGN;
1512 }
1513
1514 /* Replace the escape sequence by the string. */
1515
1516 *stesc = '\0';
1517 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1518 buf->buf, res, cp) + 1;
1519
1520 /* Prepare for the next replacement. */
1521
1522 start = nbuf + pos;
1523 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1524 free(buf->buf);
1525 buf->buf = nbuf;
1526 }
1527 return ROFF_CONT;
1528 }
1529
1530 /*
1531 * Process text streams.
1532 */
1533 static int
1534 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1535 {
1536 size_t sz;
1537 const char *start;
1538 char *p;
1539 int isz;
1540 enum mandoc_esc esc;
1541
1542 /* Spring the input line trap. */
1543
1544 if (roffit_lines == 1) {
1545 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1546 free(buf->buf);
1547 buf->buf = p;
1548 buf->sz = isz + 1;
1549 *offs = 0;
1550 free(roffit_macro);
1551 roffit_lines = 0;
1552 return ROFF_REPARSE;
1553 } else if (roffit_lines > 1)
1554 --roffit_lines;
1555
1556 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1557 if (roffce_lines < 1) {
1558 r->man->last = roffce_node;
1559 r->man->next = ROFF_NEXT_SIBLING;
1560 roffce_lines = 0;
1561 roffce_node = NULL;
1562 } else
1563 roffce_lines--;
1564 }
1565
1566 /* Convert all breakable hyphens into ASCII_HYPH. */
1567
1568 start = p = buf->buf + pos;
1569
1570 while (*p != '\0') {
1571 sz = strcspn(p, "-\\");
1572 p += sz;
1573
1574 if (*p == '\0')
1575 break;
1576
1577 if (*p == '\\') {
1578 /* Skip over escapes. */
1579 p++;
1580 esc = mandoc_escape((const char **)&p, NULL, NULL);
1581 if (esc == ESCAPE_ERROR)
1582 break;
1583 while (*p == '-')
1584 p++;
1585 continue;
1586 } else if (p == start) {
1587 p++;
1588 continue;
1589 }
1590
1591 if (isalpha((unsigned char)p[-1]) &&
1592 isalpha((unsigned char)p[1]))
1593 *p = ASCII_HYPH;
1594 p++;
1595 }
1596 return ROFF_CONT;
1597 }
1598
1599 int
1600 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1601 {
1602 enum roff_tok t;
1603 int e;
1604 int pos; /* parse point */
1605 int spos; /* saved parse point for messages */
1606 int ppos; /* original offset in buf->buf */
1607 int ctl; /* macro line (boolean) */
1608
1609 ppos = pos = *offs;
1610
1611 /* Handle in-line equation delimiters. */
1612
1613 if (r->tbl == NULL &&
1614 r->last_eqn != NULL && r->last_eqn->delim &&
1615 (r->eqn == NULL || r->eqn_inline)) {
1616 e = roff_eqndelim(r, buf, pos);
1617 if (e == ROFF_REPARSE)
1618 return e;
1619 assert(e == ROFF_CONT);
1620 }
1621
1622 /* Expand some escape sequences. */
1623
1624 e = roff_res(r, buf, ln, pos);
1625 if ((e & ROFF_MASK) == ROFF_IGN)
1626 return e;
1627 assert(e == ROFF_CONT);
1628
1629 ctl = roff_getcontrol(r, buf->buf, &pos);
1630
1631 /*
1632 * First, if a scope is open and we're not a macro, pass the
1633 * text through the macro's filter.
1634 * Equations process all content themselves.
1635 * Tables process almost all content themselves, but we want
1636 * to warn about macros before passing it there.
1637 */
1638
1639 if (r->last != NULL && ! ctl) {
1640 t = r->last->tok;
1641 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1642 if ((e & ROFF_MASK) == ROFF_IGN)
1643 return e;
1644 e &= ~ROFF_MASK;
1645 } else
1646 e = ROFF_IGN;
1647 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1648 eqn_read(r->eqn, buf->buf + ppos);
1649 return e;
1650 }
1651 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1652 tbl_read(r->tbl, ln, buf->buf, ppos);
1653 roff_addtbl(r->man, r->tbl);
1654 return e;
1655 }
1656 if ( ! ctl)
1657 return roff_parsetext(r, buf, pos, offs) | e;
1658
1659 /* Skip empty request lines. */
1660
1661 if (buf->buf[pos] == '"') {
1662 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1663 ln, pos, NULL);
1664 return ROFF_IGN;
1665 } else if (buf->buf[pos] == '\0')
1666 return ROFF_IGN;
1667
1668 /*
1669 * If a scope is open, go to the child handler for that macro,
1670 * as it may want to preprocess before doing anything with it.
1671 * Don't do so if an equation is open.
1672 */
1673
1674 if (r->last) {
1675 t = r->last->tok;
1676 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1677 }
1678
1679 /* No scope is open. This is a new request or macro. */
1680
1681 spos = pos;
1682 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1683
1684 /* Tables ignore most macros. */
1685
1686 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1687 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1688 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1689 ln, pos, buf->buf + spos);
1690 if (t != TOKEN_NONE)
1691 return ROFF_IGN;
1692 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1693 pos++;
1694 while (buf->buf[pos] == ' ')
1695 pos++;
1696 tbl_read(r->tbl, ln, buf->buf, pos);
1697 roff_addtbl(r->man, r->tbl);
1698 return ROFF_IGN;
1699 }
1700
1701 /* For now, let high level macros abort .ce mode. */
1702
1703 if (ctl && roffce_node != NULL &&
1704 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1705 t == ROFF_TH || t == ROFF_TS)) {
1706 r->man->last = roffce_node;
1707 r->man->next = ROFF_NEXT_SIBLING;
1708 roffce_lines = 0;
1709 roffce_node = NULL;
1710 }
1711
1712 /*
1713 * This is neither a roff request nor a user-defined macro.
1714 * Let the standard macro set parsers handle it.
1715 */
1716
1717 if (t == TOKEN_NONE)
1718 return ROFF_CONT;
1719
1720 /* Execute a roff request or a user defined macro. */
1721
1722 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1723 }
1724
1725 /*
1726 * Internal interface function to tell the roff parser that execution
1727 * of the current macro ended. This is required because macro
1728 * definitions usually do not end with a .return request.
1729 */
1730 void
1731 roff_userret(struct roff *r)
1732 {
1733 struct mctx *ctx;
1734 int i;
1735
1736 assert(r->mstackpos >= 0);
1737 ctx = r->mstack + r->mstackpos;
1738 for (i = 0; i < ctx->argc; i++)
1739 free(ctx->argv[i]);
1740 ctx->argc = 0;
1741 r->mstackpos--;
1742 }
1743
1744 void
1745 roff_endparse(struct roff *r)
1746 {
1747 if (r->last != NULL)
1748 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1749 r->last->line, r->last->col,
1750 roff_name[r->last->tok]);
1751
1752 if (r->eqn != NULL) {
1753 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1754 r->eqn->node->line, r->eqn->node->pos, "EQ");
1755 eqn_parse(r->eqn);
1756 r->eqn = NULL;
1757 }
1758
1759 if (r->tbl != NULL) {
1760 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1761 r->tbl->line, r->tbl->pos, "TS");
1762 tbl_end(r->tbl);
1763 r->tbl = NULL;
1764 }
1765 }
1766
1767 /*
1768 * Parse a roff node's type from the input buffer. This must be in the
1769 * form of ".foo xxx" in the usual way.
1770 */
1771 static enum roff_tok
1772 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1773 {
1774 char *cp;
1775 const char *mac;
1776 size_t maclen;
1777 int deftype;
1778 enum roff_tok t;
1779
1780 cp = buf + *pos;
1781
1782 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1783 return TOKEN_NONE;
1784
1785 mac = cp;
1786 maclen = roff_getname(r, &cp, ln, ppos);
1787
1788 deftype = ROFFDEF_USER | ROFFDEF_REN;
1789 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1790 switch (deftype) {
1791 case ROFFDEF_USER:
1792 t = ROFF_USERDEF;
1793 break;
1794 case ROFFDEF_REN:
1795 t = ROFF_RENAMED;
1796 break;
1797 default:
1798 t = roffhash_find(r->reqtab, mac, maclen);
1799 break;
1800 }
1801 if (t != TOKEN_NONE)
1802 *pos = cp - buf;
1803 else if (deftype == ROFFDEF_UNDEF) {
1804 /* Using an undefined macro defines it to be empty. */
1805 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1806 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1807 }
1808 return t;
1809 }
1810
1811 /* --- handling of request blocks ----------------------------------------- */
1812
1813 static int
1814 roff_cblock(ROFF_ARGS)
1815 {
1816
1817 /*
1818 * A block-close `..' should only be invoked as a child of an
1819 * ignore macro, otherwise raise a warning and just ignore it.
1820 */
1821
1822 if (r->last == NULL) {
1823 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1824 ln, ppos, "..");
1825 return ROFF_IGN;
1826 }
1827
1828 switch (r->last->tok) {
1829 case ROFF_am:
1830 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1831 case ROFF_ami:
1832 case ROFF_de:
1833 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1834 case ROFF_dei:
1835 case ROFF_ig:
1836 break;
1837 default:
1838 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1839 ln, ppos, "..");
1840 return ROFF_IGN;
1841 }
1842
1843 if (buf->buf[pos] != '\0')
1844 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1845 ".. %s", buf->buf + pos);
1846
1847 roffnode_pop(r);
1848 roffnode_cleanscope(r);
1849 return ROFF_IGN;
1850
1851 }
1852
1853 static int
1854 roffnode_cleanscope(struct roff *r)
1855 {
1856 int inloop;
1857
1858 inloop = 0;
1859 while (r->last != NULL) {
1860 if (--r->last->endspan != 0)
1861 break;
1862 inloop += roffnode_pop(r);
1863 }
1864 return inloop;
1865 }
1866
1867 static int
1868 roff_ccond(struct roff *r, int ln, int ppos)
1869 {
1870 if (NULL == r->last) {
1871 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1872 ln, ppos, "\\}");
1873 return 0;
1874 }
1875
1876 switch (r->last->tok) {
1877 case ROFF_el:
1878 case ROFF_ie:
1879 case ROFF_if:
1880 case ROFF_while:
1881 break;
1882 default:
1883 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1884 ln, ppos, "\\}");
1885 return 0;
1886 }
1887
1888 if (r->last->endspan > -1) {
1889 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1890 ln, ppos, "\\}");
1891 return 0;
1892 }
1893
1894 return roffnode_pop(r) + roffnode_cleanscope(r);
1895 }
1896
1897 static int
1898 roff_block(ROFF_ARGS)
1899 {
1900 const char *name, *value;
1901 char *call, *cp, *iname, *rname;
1902 size_t csz, namesz, rsz;
1903 int deftype;
1904
1905 /* Ignore groff compatibility mode for now. */
1906
1907 if (tok == ROFF_de1)
1908 tok = ROFF_de;
1909 else if (tok == ROFF_dei1)
1910 tok = ROFF_dei;
1911 else if (tok == ROFF_am1)
1912 tok = ROFF_am;
1913 else if (tok == ROFF_ami1)
1914 tok = ROFF_ami;
1915
1916 /* Parse the macro name argument. */
1917
1918 cp = buf->buf + pos;
1919 if (tok == ROFF_ig) {
1920 iname = NULL;
1921 namesz = 0;
1922 } else {
1923 iname = cp;
1924 namesz = roff_getname(r, &cp, ln, ppos);
1925 iname[namesz] = '\0';
1926 }
1927
1928 /* Resolve the macro name argument if it is indirect. */
1929
1930 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1931 deftype = ROFFDEF_USER;
1932 name = roff_getstrn(r, iname, namesz, &deftype);
1933 if (name == NULL) {
1934 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1935 r->parse, ln, (int)(iname - buf->buf),
1936 "%.*s", (int)namesz, iname);
1937 namesz = 0;
1938 } else
1939 namesz = strlen(name);
1940 } else
1941 name = iname;
1942
1943 if (namesz == 0 && tok != ROFF_ig) {
1944 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1945 ln, ppos, roff_name[tok]);
1946 return ROFF_IGN;
1947 }
1948
1949 roffnode_push(r, tok, name, ln, ppos);
1950
1951 /*
1952 * At the beginning of a `de' macro, clear the existing string
1953 * with the same name, if there is one. New content will be
1954 * appended from roff_block_text() in multiline mode.
1955 */
1956
1957 if (tok == ROFF_de || tok == ROFF_dei) {
1958 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1959 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1960 } else if (tok == ROFF_am || tok == ROFF_ami) {
1961 deftype = ROFFDEF_ANY;
1962 value = roff_getstrn(r, iname, namesz, &deftype);
1963 switch (deftype) { /* Before appending, ... */
1964 case ROFFDEF_PRE: /* copy predefined to user-defined. */
1965 roff_setstrn(&r->strtab, name, namesz,
1966 value, strlen(value), 0);
1967 break;
1968 case ROFFDEF_REN: /* call original standard macro. */
1969 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1970 (int)strlen(value), value);
1971 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1972 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1973 free(call);
1974 break;
1975 case ROFFDEF_STD: /* rename and call standard macro. */
1976 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1977 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1978 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1979 (int)rsz, rname);
1980 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1981 free(call);
1982 free(rname);
1983 break;
1984 default:
1985 break;
1986 }
1987 }
1988
1989 if (*cp == '\0')
1990 return ROFF_IGN;
1991
1992 /* Get the custom end marker. */
1993
1994 iname = cp;
1995 namesz = roff_getname(r, &cp, ln, ppos);
1996
1997 /* Resolve the end marker if it is indirect. */
1998
1999 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2000 deftype = ROFFDEF_USER;
2001 name = roff_getstrn(r, iname, namesz, &deftype);
2002 if (name == NULL) {
2003 mandoc_vmsg(MANDOCERR_STR_UNDEF,
2004 r->parse, ln, (int)(iname - buf->buf),
2005 "%.*s", (int)namesz, iname);
2006 namesz = 0;
2007 } else
2008 namesz = strlen(name);
2009 } else
2010 name = iname;
2011
2012 if (namesz)
2013 r->last->end = mandoc_strndup(name, namesz);
2014
2015 if (*cp != '\0')
2016 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2017 ln, pos, ".%s ... %s", roff_name[tok], cp);
2018
2019 return ROFF_IGN;
2020 }
2021
2022 static int
2023 roff_block_sub(ROFF_ARGS)
2024 {
2025 enum roff_tok t;
2026 int i, j;
2027
2028 /*
2029 * First check whether a custom macro exists at this level. If
2030 * it does, then check against it. This is some of groff's
2031 * stranger behaviours. If we encountered a custom end-scope
2032 * tag and that tag also happens to be a "real" macro, then we
2033 * need to try interpreting it again as a real macro. If it's
2034 * not, then return ignore. Else continue.
2035 */
2036
2037 if (r->last->end) {
2038 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2039 if (buf->buf[i] != r->last->end[j])
2040 break;
2041
2042 if (r->last->end[j] == '\0' &&
2043 (buf->buf[i] == '\0' ||
2044 buf->buf[i] == ' ' ||
2045 buf->buf[i] == '\t')) {
2046 roffnode_pop(r);
2047 roffnode_cleanscope(r);
2048
2049 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2050 i++;
2051
2052 pos = i;
2053 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2054 TOKEN_NONE)
2055 return ROFF_RERUN;
2056 return ROFF_IGN;
2057 }
2058 }
2059
2060 /*
2061 * If we have no custom end-query or lookup failed, then try
2062 * pulling it out of the hashtable.
2063 */
2064
2065 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2066
2067 if (t != ROFF_cblock) {
2068 if (tok != ROFF_ig)
2069 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2070 return ROFF_IGN;
2071 }
2072
2073 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2074 }
2075
2076 static int
2077 roff_block_text(ROFF_ARGS)
2078 {
2079
2080 if (tok != ROFF_ig)
2081 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2082
2083 return ROFF_IGN;
2084 }
2085
2086 static int
2087 roff_cond_sub(ROFF_ARGS)
2088 {
2089 char *ep;
2090 int endloop, irc, rr;
2091 enum roff_tok t;
2092
2093 irc = ROFF_IGN;
2094 rr = r->last->rule;
2095 endloop = tok != ROFF_while ? ROFF_IGN :
2096 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2097 if (roffnode_cleanscope(r))
2098 irc |= endloop;
2099
2100 /*
2101 * If `\}' occurs on a macro line without a preceding macro,
2102 * drop the line completely.
2103 */
2104
2105 ep = buf->buf + pos;
2106 if (ep[0] == '\\' && ep[1] == '}')
2107 rr = 0;
2108
2109 /* Always check for the closing delimiter `\}'. */
2110
2111 while ((ep = strchr(ep, '\\')) != NULL) {
2112 switch (ep[1]) {
2113 case '}':
2114 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2115 if (roff_ccond(r, ln, ep - buf->buf))
2116 irc |= endloop;
2117 break;
2118 case '\0':
2119 ++ep;
2120 break;
2121 default:
2122 ep += 2;
2123 break;
2124 }
2125 }
2126
2127 /*
2128 * Fully handle known macros when they are structurally
2129 * required or when the conditional evaluated to true.
2130 */
2131
2132 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2133 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ?
2134 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) :
2135 rr ? ROFF_CONT : ROFF_IGN;
2136 return irc;
2137 }
2138
2139 static int
2140 roff_cond_text(ROFF_ARGS)
2141 {
2142 char *ep;
2143 int endloop, irc, rr;
2144
2145 irc = ROFF_IGN;
2146 rr = r->last->rule;
2147 endloop = tok != ROFF_while ? ROFF_IGN :
2148 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2149 if (roffnode_cleanscope(r))
2150 irc |= endloop;
2151
2152 ep = buf->buf + pos;
2153 while ((ep = strchr(ep, '\\')) != NULL) {
2154 if (*(++ep) == '}') {
2155 *ep = '&';
2156 if (roff_ccond(r, ln, ep - buf->buf - 1))
2157 irc |= endloop;
2158 }
2159 if (*ep != '\0')
2160 ++ep;
2161 }
2162 if (rr)
2163 irc |= ROFF_CONT;
2164 return irc;
2165 }
2166
2167 /* --- handling of numeric and conditional expressions -------------------- */
2168
2169 /*
2170 * Parse a single signed integer number. Stop at the first non-digit.
2171 * If there is at least one digit, return success and advance the
2172 * parse point, else return failure and let the parse point unchanged.
2173 * Ignore overflows, treat them just like the C language.
2174 */
2175 static int
2176 roff_getnum(const char *v, int *pos, int *res, int flags)
2177 {
2178 int myres, scaled, n, p;
2179
2180 if (NULL == res)
2181 res = &myres;
2182
2183 p = *pos;
2184 n = v[p] == '-';
2185 if (n || v[p] == '+')
2186 p++;
2187
2188 if (flags & ROFFNUM_WHITE)
2189 while (isspace((unsigned char)v[p]))
2190 p++;
2191
2192 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2193 *res = 10 * *res + v[p] - '0';
2194 if (p == *pos + n)
2195 return 0;
2196
2197 if (n)
2198 *res = -*res;
2199
2200 /* Each number may be followed by one optional scaling unit. */
2201
2202 switch (v[p]) {
2203 case 'f':
2204 scaled = *res * 65536;
2205 break;
2206 case 'i':
2207 scaled = *res * 240;
2208 break;
2209 case 'c':
2210 scaled = *res * 240 / 2.54;
2211 break;
2212 case 'v':
2213 case 'P':
2214 scaled = *res * 40;
2215 break;
2216 case 'm':
2217 case 'n':
2218 scaled = *res * 24;
2219 break;
2220 case 'p':
2221 scaled = *res * 10 / 3;
2222 break;
2223 case 'u':
2224 scaled = *res;
2225 break;
2226 case 'M':
2227 scaled = *res * 6 / 25;
2228 break;
2229 default:
2230 scaled = *res;
2231 p--;
2232 break;
2233 }
2234 if (flags & ROFFNUM_SCALE)
2235 *res = scaled;
2236
2237 *pos = p + 1;
2238 return 1;
2239 }
2240
2241 /*
2242 * Evaluate a string comparison condition.
2243 * The first character is the delimiter.
2244 * Succeed if the string up to its second occurrence
2245 * matches the string up to its third occurence.
2246 * Advance the cursor after the third occurrence
2247 * or lacking that, to the end of the line.
2248 */
2249 static int
2250 roff_evalstrcond(const char *v, int *pos)
2251 {
2252 const char *s1, *s2, *s3;
2253 int match;
2254
2255 match = 0;
2256 s1 = v + *pos; /* initial delimiter */
2257 s2 = s1 + 1; /* for scanning the first string */
2258 s3 = strchr(s2, *s1); /* for scanning the second string */
2259
2260 if (NULL == s3) /* found no middle delimiter */
2261 goto out;
2262
2263 while ('\0' != *++s3) {
2264 if (*s2 != *s3) { /* mismatch */
2265 s3 = strchr(s3, *s1);
2266 break;
2267 }
2268 if (*s3 == *s1) { /* found the final delimiter */
2269 match = 1;
2270 break;
2271 }
2272 s2++;
2273 }
2274
2275 out:
2276 if (NULL == s3)
2277 s3 = strchr(s2, '\0');
2278 else if (*s3 != '\0')
2279 s3++;
2280 *pos = s3 - v;
2281 return match;
2282 }
2283
2284 /*
2285 * Evaluate an optionally negated single character, numerical,
2286 * or string condition.
2287 */
2288 static int
2289 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2290 {
2291 const char *start, *end;
2292 char *cp, *name;
2293 size_t sz;
2294 int deftype, len, number, savepos, istrue, wanttrue;
2295
2296 if ('!' == v[*pos]) {
2297 wanttrue = 0;
2298 (*pos)++;
2299 } else
2300 wanttrue = 1;
2301
2302 switch (v[*pos]) {
2303 case '\0':
2304 return 0;
2305 case 'n':
2306 case 'o':
2307 (*pos)++;
2308 return wanttrue;
2309 case 'e':
2310 case 't':
2311 case 'v':
2312 (*pos)++;
2313 return !wanttrue;
2314 case 'c':
2315 do {
2316 (*pos)++;
2317 } while (v[*pos] == ' ');
2318
2319 /*
2320 * Quirk for groff compatibility:
2321 * The horizontal tab is neither available nor unavailable.
2322 */
2323
2324 if (v[*pos] == '\t') {
2325 (*pos)++;
2326 return 0;
2327 }
2328
2329 /* Printable ASCII characters are available. */
2330
2331 if (v[*pos] != '\\') {
2332 (*pos)++;
2333 return wanttrue;
2334 }
2335
2336 end = v + ++*pos;
2337 switch (mandoc_escape(&end, &start, &len)) {
2338 case ESCAPE_SPECIAL:
2339 istrue = mchars_spec2cp(start, len) != -1;
2340 break;
2341 case ESCAPE_UNICODE:
2342 istrue = 1;
2343 break;
2344 case ESCAPE_NUMBERED:
2345 istrue = mchars_num2char(start, len) != -1;
2346 break;
2347 default:
2348 istrue = !wanttrue;
2349 break;
2350 }
2351 *pos = end - v;
2352 return istrue == wanttrue;
2353 case 'd':
2354 case 'r':
2355 cp = v + *pos + 1;
2356 while (*cp == ' ')
2357 cp++;
2358 name = cp;
2359 sz = roff_getname(r, &cp, ln, cp - v);
2360 if (sz == 0)
2361 istrue = 0;
2362 else if (v[*pos] == 'r')
2363 istrue = roff_hasregn(r, name, sz);
2364 else {
2365 deftype = ROFFDEF_ANY;
2366 roff_getstrn(r, name, sz, &deftype);
2367 istrue = !!deftype;
2368 }
2369 *pos = cp - v;
2370 return istrue == wanttrue;
2371 default:
2372 break;
2373 }
2374
2375 savepos = *pos;
2376 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2377 return (number > 0) == wanttrue;
2378 else if (*pos == savepos)
2379 return roff_evalstrcond(v, pos) == wanttrue;
2380 else
2381 return 0;
2382 }
2383
2384 static int
2385 roff_line_ignore(ROFF_ARGS)
2386 {
2387
2388 return ROFF_IGN;
2389 }
2390
2391 static int
2392 roff_insec(ROFF_ARGS)
2393 {
2394
2395 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2396 ln, ppos, roff_name[tok]);
2397 return ROFF_IGN;
2398 }
2399
2400 static int
2401 roff_unsupp(ROFF_ARGS)
2402 {
2403
2404 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2405 ln, ppos, roff_name[tok]);
2406 return ROFF_IGN;
2407 }
2408
2409 static int
2410 roff_cond(ROFF_ARGS)
2411 {
2412 int irc;
2413
2414 roffnode_push(r, tok, NULL, ln, ppos);
2415
2416 /*
2417 * An `.el' has no conditional body: it will consume the value
2418 * of the current rstack entry set in prior `ie' calls or
2419 * defaults to DENY.
2420 *
2421 * If we're not an `el', however, then evaluate the conditional.
2422 */
2423
2424 r->last->rule = tok == ROFF_el ?
2425 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2426 roff_evalcond(r, ln, buf->buf, &pos);
2427
2428 /*
2429 * An if-else will put the NEGATION of the current evaluated
2430 * conditional into the stack of rules.
2431 */
2432
2433 if (tok == ROFF_ie) {
2434 if (r->rstackpos + 1 == r->rstacksz) {
2435 r->rstacksz += 16;
2436 r->rstack = mandoc_reallocarray(r->rstack,
2437 r->rstacksz, sizeof(int));
2438 }
2439 r->rstack[++r->rstackpos] = !r->last->rule;
2440 }
2441
2442 /* If the parent has false as its rule, then so do we. */
2443
2444 if (r->last->parent && !r->last->parent->rule)
2445 r->last->rule = 0;
2446
2447 /*
2448 * Determine scope.
2449 * If there is nothing on the line after the conditional,
2450 * not even whitespace, use next-line scope.
2451 * Except that .while does not support next-line scope.
2452 */
2453
2454 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2455 r->last->endspan = 2;
2456 goto out;
2457 }
2458
2459 while (buf->buf[pos] == ' ')
2460 pos++;
2461
2462 /* An opening brace requests multiline scope. */
2463
2464 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2465 r->last->endspan = -1;
2466 pos += 2;
2467 while (buf->buf[pos] == ' ')
2468 pos++;
2469 goto out;
2470 }
2471
2472 /*
2473 * Anything else following the conditional causes
2474 * single-line scope. Warn if the scope contains
2475 * nothing but trailing whitespace.
2476 */
2477
2478 if (buf->buf[pos] == '\0')
2479 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2480 ln, ppos, roff_name[tok]);
2481
2482 r->last->endspan = 1;
2483
2484 out:
2485 *offs = pos;
2486 irc = ROFF_RERUN;
2487 if (tok == ROFF_while)
2488 irc |= ROFF_WHILE;
2489 return irc;
2490 }
2491
2492 static int
2493 roff_ds(ROFF_ARGS)
2494 {
2495 char *string;
2496 const char *name;
2497 size_t namesz;
2498
2499 /* Ignore groff compatibility mode for now. */
2500
2501 if (tok == ROFF_ds1)
2502 tok = ROFF_ds;
2503 else if (tok == ROFF_as1)
2504 tok = ROFF_as;
2505
2506 /*
2507 * The first word is the name of the string.
2508 * If it is empty or terminated by an escape sequence,
2509 * abort the `ds' request without defining anything.
2510 */
2511
2512 name = string = buf->buf + pos;
2513 if (*name == '\0')
2514 return ROFF_IGN;
2515
2516 namesz = roff_getname(r, &string, ln, pos);
2517 if (name[namesz] == '\\')
2518 return ROFF_IGN;
2519
2520 /* Read past the initial double-quote, if any. */
2521 if (*string == '"')
2522 string++;
2523
2524 /* The rest is the value. */
2525 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2526 ROFF_as == tok);
2527 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2528 return ROFF_IGN;
2529 }
2530
2531 /*
2532 * Parse a single operator, one or two characters long.
2533 * If the operator is recognized, return success and advance the
2534 * parse point, else return failure and let the parse point unchanged.
2535 */
2536 static int
2537 roff_getop(const char *v, int *pos, char *res)
2538 {
2539
2540 *res = v[*pos];
2541
2542 switch (*res) {
2543 case '+':
2544 case '-':
2545 case '*':
2546 case '/':
2547 case '%':
2548 case '&':
2549 case ':':
2550 break;
2551 case '<':
2552 switch (v[*pos + 1]) {
2553 case '=':
2554 *res = 'l';
2555 (*pos)++;
2556 break;
2557 case '>':
2558 *res = '!';
2559 (*pos)++;
2560 break;
2561 case '?':
2562 *res = 'i';
2563 (*pos)++;
2564 break;
2565 default:
2566 break;
2567 }
2568 break;
2569 case '>':
2570 switch (v[*pos + 1]) {
2571 case '=':
2572 *res = 'g';
2573 (*pos)++;
2574 break;
2575 case '?':
2576 *res = 'a';
2577 (*pos)++;
2578 break;
2579 default:
2580 break;
2581 }
2582 break;
2583 case '=':
2584 if ('=' == v[*pos + 1])
2585 (*pos)++;
2586 break;
2587 default:
2588 return 0;
2589 }
2590 (*pos)++;
2591
2592 return *res;
2593 }
2594
2595 /*
2596 * Evaluate either a parenthesized numeric expression
2597 * or a single signed integer number.
2598 */
2599 static int
2600 roff_evalpar(struct roff *r, int ln,
2601 const char *v, int *pos, int *res, int flags)
2602 {
2603
2604 if ('(' != v[*pos])
2605 return roff_getnum(v, pos, res, flags);
2606
2607 (*pos)++;
2608 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2609 return 0;
2610
2611 /*
2612 * Omission of the closing parenthesis
2613 * is an error in validation mode,
2614 * but ignored in evaluation mode.
2615 */
2616
2617 if (')' == v[*pos])
2618 (*pos)++;
2619 else if (NULL == res)
2620 return 0;
2621
2622 return 1;
2623 }
2624
2625 /*
2626 * Evaluate a complete numeric expression.
2627 * Proceed left to right, there is no concept of precedence.
2628 */
2629 static int
2630 roff_evalnum(struct roff *r, int ln, const char *v,
2631 int *pos, int *res, int flags)
2632 {
2633 int mypos, operand2;
2634 char operator;
2635
2636 if (NULL == pos) {
2637 mypos = 0;
2638 pos = &mypos;
2639 }
2640
2641 if (flags & ROFFNUM_WHITE)
2642 while (isspace((unsigned char)v[*pos]))
2643 (*pos)++;
2644
2645 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2646 return 0;
2647
2648 while (1) {
2649 if (flags & ROFFNUM_WHITE)
2650 while (isspace((unsigned char)v[*pos]))
2651 (*pos)++;
2652
2653 if ( ! roff_getop(v, pos, &operator))
2654 break;
2655
2656 if (flags & ROFFNUM_WHITE)
2657 while (isspace((unsigned char)v[*pos]))
2658 (*pos)++;
2659
2660 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2661 return 0;
2662
2663 if (flags & ROFFNUM_WHITE)
2664 while (isspace((unsigned char)v[*pos]))
2665 (*pos)++;
2666
2667 if (NULL == res)
2668 continue;
2669
2670 switch (operator) {
2671 case '+':
2672 *res += operand2;
2673 break;
2674 case '-':
2675 *res -= operand2;
2676 break;
2677 case '*':
2678 *res *= operand2;
2679 break;
2680 case '/':
2681 if (operand2 == 0) {
2682 mandoc_msg(MANDOCERR_DIVZERO,
2683 r->parse, ln, *pos, v);
2684 *res = 0;
2685 break;
2686 }
2687 *res /= operand2;
2688 break;
2689 case '%':
2690 if (operand2 == 0) {
2691 mandoc_msg(MANDOCERR_DIVZERO,
2692 r->parse, ln, *pos, v);
2693 *res = 0;
2694 break;
2695 }
2696 *res %= operand2;
2697 break;
2698 case '<':
2699 *res = *res < operand2;
2700 break;
2701 case '>':
2702 *res = *res > operand2;
2703 break;
2704 case 'l':
2705 *res = *res <= operand2;
2706 break;
2707 case 'g':
2708 *res = *res >= operand2;
2709 break;
2710 case '=':
2711 *res = *res == operand2;
2712 break;
2713 case '!':
2714 *res = *res != operand2;
2715 break;
2716 case '&':
2717 *res = *res && operand2;
2718 break;
2719 case ':':
2720 *res = *res || operand2;
2721 break;
2722 case 'i':
2723 if (operand2 < *res)
2724 *res = operand2;
2725 break;
2726 case 'a':
2727 if (operand2 > *res)
2728 *res = operand2;
2729 break;
2730 default:
2731 abort();
2732 }
2733 }
2734 return 1;
2735 }
2736
2737 /* --- register management ------------------------------------------------ */
2738
2739 void
2740 roff_setreg(struct roff *r, const char *name, int val, char sign)
2741 {
2742 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2743 }
2744
2745 static void
2746 roff_setregn(struct roff *r, const char *name, size_t len,
2747 int val, char sign, int step)
2748 {
2749 struct roffreg *reg;
2750
2751 /* Search for an existing register with the same name. */
2752 reg = r->regtab;
2753
2754 while (reg != NULL && (reg->key.sz != len ||
2755 strncmp(reg->key.p, name, len) != 0))
2756 reg = reg->next;
2757
2758 if (NULL == reg) {
2759 /* Create a new register. */
2760 reg = mandoc_malloc(sizeof(struct roffreg));
2761 reg->key.p = mandoc_strndup(name, len);
2762 reg->key.sz = len;
2763 reg->val = 0;
2764 reg->step = 0;
2765 reg->next = r->regtab;
2766 r->regtab = reg;
2767 }
2768
2769 if ('+' == sign)
2770 reg->val += val;
2771 else if ('-' == sign)
2772 reg->val -= val;
2773 else
2774 reg->val = val;
2775 if (step != INT_MIN)
2776 reg->step = step;
2777 }
2778
2779 /*
2780 * Handle some predefined read-only number registers.
2781 * For now, return -1 if the requested register is not predefined;
2782 * in case a predefined read-only register having the value -1
2783 * were to turn up, another special value would have to be chosen.
2784 */
2785 static int
2786 roff_getregro(const struct roff *r, const char *name)
2787 {
2788
2789 switch (*name) {
2790 case '$': /* Number of arguments of the last macro evaluated. */
2791 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2792 case 'A': /* ASCII approximation mode is always off. */
2793 return 0;
2794 case 'g': /* Groff compatibility mode is always on. */
2795 return 1;
2796 case 'H': /* Fixed horizontal resolution. */
2797 return 24;
2798 case 'j': /* Always adjust left margin only. */
2799 return 0;
2800 case 'T': /* Some output device is always defined. */
2801 return 1;
2802 case 'V': /* Fixed vertical resolution. */
2803 return 40;
2804 default:
2805 return -1;
2806 }
2807 }
2808
2809 int
2810 roff_getreg(struct roff *r, const char *name)
2811 {
2812 return roff_getregn(r, name, strlen(name), '\0');
2813 }
2814
2815 static int
2816 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2817 {
2818 struct roffreg *reg;
2819 int val;
2820
2821 if ('.' == name[0] && 2 == len) {
2822 val = roff_getregro(r, name + 1);
2823 if (-1 != val)
2824 return val;
2825 }
2826
2827 for (reg = r->regtab; reg; reg = reg->next) {
2828 if (len == reg->key.sz &&
2829 0 == strncmp(name, reg->key.p, len)) {
2830 switch (sign) {
2831 case '+':
2832 reg->val += reg->step;
2833 break;
2834 case '-':
2835 reg->val -= reg->step;
2836 break;
2837 default:
2838 break;
2839 }
2840 return reg->val;
2841 }
2842 }
2843
2844 roff_setregn(r, name, len, 0, '\0', INT_MIN);
2845 return 0;
2846 }
2847
2848 static int
2849 roff_hasregn(const struct roff *r, const char *name, size_t len)
2850 {
2851 struct roffreg *reg;
2852 int val;
2853
2854 if ('.' == name[0] && 2 == len) {
2855 val = roff_getregro(r, name + 1);
2856 if (-1 != val)
2857 return 1;
2858 }
2859
2860 for (reg = r->regtab; reg; reg = reg->next)
2861 if (len == reg->key.sz &&
2862 0 == strncmp(name, reg->key.p, len))
2863 return 1;
2864
2865 return 0;
2866 }
2867
2868 static void
2869 roff_freereg(struct roffreg *reg)
2870 {
2871 struct roffreg *old_reg;
2872
2873 while (NULL != reg) {
2874 free(reg->key.p);
2875 old_reg = reg;
2876 reg = reg->next;
2877 free(old_reg);
2878 }
2879 }
2880
2881 static int
2882 roff_nr(ROFF_ARGS)
2883 {
2884 char *key, *val, *step;
2885 size_t keysz;
2886 int iv, is, len;
2887 char sign;
2888
2889 key = val = buf->buf + pos;
2890 if (*key == '\0')
2891 return ROFF_IGN;
2892
2893 keysz = roff_getname(r, &val, ln, pos);
2894 if (key[keysz] == '\\')
2895 return ROFF_IGN;
2896
2897 sign = *val;
2898 if (sign == '+' || sign == '-')
2899 val++;
2900
2901 len = 0;
2902 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
2903 return ROFF_IGN;
2904
2905 step = val + len;
2906 while (isspace((unsigned char)*step))
2907 step++;
2908 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
2909 is = INT_MIN;
2910
2911 roff_setregn(r, key, keysz, iv, sign, is);
2912 return ROFF_IGN;
2913 }
2914
2915 static int
2916 roff_rr(ROFF_ARGS)
2917 {
2918 struct roffreg *reg, **prev;
2919 char *name, *cp;
2920 size_t namesz;
2921
2922 name = cp = buf->buf + pos;
2923 if (*name == '\0')
2924 return ROFF_IGN;
2925 namesz = roff_getname(r, &cp, ln, pos);
2926 name[namesz] = '\0';
2927
2928 prev = &r->regtab;
2929 while (1) {
2930 reg = *prev;
2931 if (reg == NULL || !strcmp(name, reg->key.p))
2932 break;
2933 prev = &reg->next;
2934 }
2935 if (reg != NULL) {
2936 *prev = reg->next;
2937 free(reg->key.p);
2938 free(reg);
2939 }
2940 return ROFF_IGN;
2941 }
2942
2943 /* --- handler functions for roff requests -------------------------------- */
2944
2945 static int
2946 roff_rm(ROFF_ARGS)
2947 {
2948 const char *name;
2949 char *cp;
2950 size_t namesz;
2951
2952 cp = buf->buf + pos;
2953 while (*cp != '\0') {
2954 name = cp;
2955 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2956 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2957 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2958 if (name[namesz] == '\\')
2959 break;
2960 }
2961 return ROFF_IGN;
2962 }
2963
2964 static int
2965 roff_it(ROFF_ARGS)
2966 {
2967 int iv;
2968
2969 /* Parse the number of lines. */
2970
2971 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2972 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2973 ln, ppos, buf->buf + 1);
2974 return ROFF_IGN;
2975 }
2976
2977 while (isspace((unsigned char)buf->buf[pos]))
2978 pos++;
2979
2980 /*
2981 * Arm the input line trap.
2982 * Special-casing "an-trap" is an ugly workaround to cope
2983 * with DocBook stupidly fiddling with man(7) internals.
2984 */
2985
2986 roffit_lines = iv;
2987 roffit_macro = mandoc_strdup(iv != 1 ||
2988 strcmp(buf->buf + pos, "an-trap") ?
2989 buf->buf + pos : "br");
2990 return ROFF_IGN;
2991 }
2992
2993 static int
2994 roff_Dd(ROFF_ARGS)
2995 {
2996 int mask;
2997 enum roff_tok t, te;
2998
2999 switch (tok) {
3000 case ROFF_Dd:
3001 tok = MDOC_Dd;
3002 te = MDOC_MAX;
3003 if (r->format == 0)
3004 r->format = MPARSE_MDOC;
3005 mask = MPARSE_MDOC | MPARSE_QUICK;
3006 break;
3007 case ROFF_TH:
3008 tok = MAN_TH;
3009 te = MAN_MAX;
3010 if (r->format == 0)
3011 r->format = MPARSE_MAN;
3012 mask = MPARSE_QUICK;
3013 break;
3014 default:
3015 abort();
3016 }
3017 if ((r->options & mask) == 0)
3018 for (t = tok; t < te; t++)
3019 roff_setstr(r, roff_name[t], NULL, 0);
3020 return ROFF_CONT;
3021 }
3022
3023 static int
3024 roff_TE(ROFF_ARGS)
3025 {
3026 if (r->tbl == NULL) {
3027 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
3028 ln, ppos, "TE");
3029 return ROFF_IGN;
3030 }
3031 if (tbl_end(r->tbl) == 0) {
3032 r->tbl = NULL;
3033 free(buf->buf);
3034 buf->buf = mandoc_strdup(".sp");
3035 buf->sz = 4;
3036 *offs = 0;
3037 return ROFF_REPARSE;
3038 }
3039 r->tbl = NULL;
3040 return ROFF_IGN;
3041 }
3042
3043 static int
3044 roff_T_(ROFF_ARGS)
3045 {
3046
3047 if (NULL == r->tbl)
3048 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
3049 ln, ppos, "T&");
3050 else
3051 tbl_restart(ln, ppos, r->tbl);
3052
3053 return ROFF_IGN;
3054 }
3055
3056 /*
3057 * Handle in-line equation delimiters.
3058 */
3059 static int
3060 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3061 {
3062 char *cp1, *cp2;
3063 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3064
3065 /*
3066 * Outside equations, look for an opening delimiter.
3067 * If we are inside an equation, we already know it is
3068 * in-line, or this function wouldn't have been called;
3069 * so look for a closing delimiter.
3070 */
3071
3072 cp1 = buf->buf + pos;
3073 cp2 = strchr(cp1, r->eqn == NULL ?
3074 r->last_eqn->odelim : r->last_eqn->cdelim);
3075 if (cp2 == NULL)
3076 return ROFF_CONT;
3077
3078 *cp2++ = '\0';
3079 bef_pr = bef_nl = aft_nl = aft_pr = "";
3080
3081 /* Handle preceding text, protecting whitespace. */
3082
3083 if (*buf->buf != '\0') {
3084 if (r->eqn == NULL)
3085 bef_pr = "\\&";
3086 bef_nl = "\n";
3087 }
3088
3089 /*
3090 * Prepare replacing the delimiter with an equation macro
3091 * and drop leading white space from the equation.
3092 */
3093
3094 if (r->eqn == NULL) {
3095 while (*cp2 == ' ')
3096 cp2++;
3097 mac = ".EQ";
3098 } else
3099 mac = ".EN";
3100
3101 /* Handle following text, protecting whitespace. */
3102
3103 if (*cp2 != '\0') {
3104 aft_nl = "\n";
3105 if (r->eqn != NULL)
3106 aft_pr = "\\&";
3107 }
3108
3109 /* Do the actual replacement. */
3110
3111 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3112 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3113 free(buf->buf);
3114 buf->buf = cp1;
3115
3116 /* Toggle the in-line state of the eqn subsystem. */
3117
3118 r->eqn_inline = r->eqn == NULL;
3119 return ROFF_REPARSE;
3120 }
3121
3122 static int
3123 roff_EQ(ROFF_ARGS)
3124 {
3125 struct roff_node *n;
3126
3127 if (r->man->macroset == MACROSET_MAN)
3128 man_breakscope(r->man, ROFF_EQ);
3129 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3130 if (ln > r->man->last->line)
3131 n->flags |= NODE_LINE;
3132 n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
3133 n->eqn->expectargs = UINT_MAX;
3134 roff_node_append(r->man, n);
3135 r->man->next = ROFF_NEXT_SIBLING;
3136
3137 assert(r->eqn == NULL);
3138 if (r->last_eqn == NULL)
3139 r->last_eqn = eqn_alloc(r->parse);
3140 else
3141 eqn_reset(r->last_eqn);
3142 r->eqn = r->last_eqn;
3143 r->eqn->node = n;
3144
3145 if (buf->buf[pos] != '\0')
3146 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3147 ".EQ %s", buf->buf + pos);
3148
3149 return ROFF_IGN;
3150 }
3151
3152 static int
3153 roff_EN(ROFF_ARGS)
3154 {
3155 if (r->eqn != NULL) {
3156 eqn_parse(r->eqn);
3157 r->eqn = NULL;
3158 } else
3159 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
3160 if (buf->buf[pos] != '\0')
3161 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3162 "EN %s", buf->buf + pos);
3163 return ROFF_IGN;
3164 }
3165
3166 static int
3167 roff_TS(ROFF_ARGS)
3168 {
3169 if (r->tbl != NULL) {
3170 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
3171 ln, ppos, "TS breaks TS");
3172 tbl_end(r->tbl);
3173 }
3174 r->tbl = tbl_alloc(ppos, ln, r->parse);
3175 if (r->last_tbl)
3176 r->last_tbl->next = r->tbl;
3177 else
3178 r->first_tbl = r->tbl;
3179 r->last_tbl = r->tbl;
3180 return ROFF_IGN;
3181 }
3182
3183 static int
3184 roff_onearg(ROFF_ARGS)
3185 {
3186 struct roff_node *n;
3187 char *cp;
3188 int npos;
3189
3190 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3191 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3192 tok == ROFF_ti))
3193 man_breakscope(r->man, tok);
3194
3195 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3196 r->man->last = roffce_node;
3197 r->man->next = ROFF_NEXT_SIBLING;
3198 }
3199
3200 roff_elem_alloc(r->man, ln, ppos, tok);
3201 n = r->man->last;
3202
3203 cp = buf->buf + pos;
3204 if (*cp != '\0') {
3205 while (*cp != '\0' && *cp != ' ')
3206 cp++;
3207 while (*cp == ' ')
3208 *cp++ = '\0';
3209 if (*cp != '\0')
3210 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
3211 r->parse, ln, cp - buf->buf,
3212 "%s ... %s", roff_name[tok], cp);
3213 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3214 }
3215
3216 if (tok == ROFF_ce || tok == ROFF_rj) {
3217 if (r->man->last->type == ROFFT_ELEM) {
3218 roff_word_alloc(r->man, ln, pos, "1");
3219 r->man->last->flags |= NODE_NOSRC;
3220 }
3221 npos = 0;
3222 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3223 &roffce_lines, 0) == 0) {
3224 mandoc_vmsg(MANDOCERR_CE_NONUM,
3225 r->parse, ln, pos, "ce %s", buf->buf + pos);
3226 roffce_lines = 1;
3227 }
3228 if (roffce_lines < 1) {
3229 r->man->last = r->man->last->parent;
3230 roffce_node = NULL;
3231 roffce_lines = 0;
3232 } else
3233 roffce_node = r->man->last->parent;
3234 } else {
3235 n->flags |= NODE_VALID | NODE_ENDED;
3236 r->man->last = n;
3237 }
3238 n->flags |= NODE_LINE;
3239 r->man->next = ROFF_NEXT_SIBLING;
3240 return ROFF_IGN;
3241 }
3242
3243 static int
3244 roff_manyarg(ROFF_ARGS)
3245 {
3246 struct roff_node *n;
3247 char *sp, *ep;
3248
3249 roff_elem_alloc(r->man, ln, ppos, tok);
3250 n = r->man->last;
3251
3252 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3253 while (*ep != '\0' && *ep != ' ')
3254 ep++;
3255 while (*ep == ' ')
3256 *ep++ = '\0';
3257 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3258 }
3259
3260 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3261 r->man->last = n;
3262 r->man->next = ROFF_NEXT_SIBLING;
3263 return ROFF_IGN;
3264 }
3265
3266 static int
3267 roff_als(ROFF_ARGS)
3268 {
3269 char *oldn, *newn, *end, *value;
3270 size_t oldsz, newsz, valsz;
3271
3272 newn = oldn = buf->buf + pos;
3273 if (*newn == '\0')
3274 return ROFF_IGN;
3275
3276 newsz = roff_getname(r, &oldn, ln, pos);
3277 if (newn[newsz] == '\\' || *oldn == '\0')
3278 return ROFF_IGN;
3279
3280 end = oldn;
3281 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3282 if (oldsz == 0)
3283 return ROFF_IGN;
3284
3285 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3286 (int)oldsz, oldn);
3287 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3288 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3289 free(value);
3290 return ROFF_IGN;
3291 }
3292
3293 static int
3294 roff_br(ROFF_ARGS)
3295 {
3296 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3297 man_breakscope(r->man, ROFF_br);
3298 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3299 if (buf->buf[pos] != '\0')
3300 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3301 "%s %s", roff_name[tok], buf->buf + pos);
3302 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3303 r->man->next = ROFF_NEXT_SIBLING;
3304 return ROFF_IGN;
3305 }
3306
3307 static int
3308 roff_cc(ROFF_ARGS)
3309 {
3310 const char *p;
3311
3312 p = buf->buf + pos;
3313
3314 if (*p == '\0' || (r->control = *p++) == '.')
3315 r->control = '\0';
3316
3317 if (*p != '\0')
3318 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3319 ln, p - buf->buf, "cc ... %s", p);
3320
3321 return ROFF_IGN;
3322 }
3323
3324 static int
3325 roff_ec(ROFF_ARGS)
3326 {
3327 const char *p;
3328
3329 p = buf->buf + pos;
3330 if (*p == '\0')
3331 r->escape = '\\';
3332 else {
3333 r->escape = *p;
3334 if (*++p != '\0')
3335 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3336 ln, p - buf->buf, "ec ... %s", p);
3337 }
3338 return ROFF_IGN;
3339 }
3340
3341 static int
3342 roff_eo(ROFF_ARGS)
3343 {
3344 r->escape = '\0';
3345 if (buf->buf[pos] != '\0')
3346 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3347 ln, pos, "eo %s", buf->buf + pos);
3348 return ROFF_IGN;
3349 }
3350
3351 static int
3352 roff_nop(ROFF_ARGS)
3353 {
3354 while (buf->buf[pos] == ' ')
3355 pos++;
3356 *offs = pos;
3357 return ROFF_RERUN;
3358 }
3359
3360 static int
3361 roff_tr(ROFF_ARGS)
3362 {
3363 const char *p, *first, *second;
3364 size_t fsz, ssz;
3365 enum mandoc_esc esc;
3366
3367 p = buf->buf + pos;
3368
3369 if (*p == '\0') {
3370 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3371 return ROFF_IGN;
3372 }
3373
3374 while (*p != '\0') {
3375 fsz = ssz = 1;
3376
3377 first = p++;
3378 if (*first == '\\') {
3379 esc = mandoc_escape(&p, NULL, NULL);
3380 if (esc == ESCAPE_ERROR) {
3381 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3382 ln, (int)(p - buf->buf), first);
3383 return ROFF_IGN;
3384 }
3385 fsz = (size_t)(p - first);
3386 }
3387
3388 second = p++;
3389 if (*second == '\\') {
3390 esc = mandoc_escape(&p, NULL, NULL);
3391 if (esc == ESCAPE_ERROR) {
3392 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3393 ln, (int)(p - buf->buf), second);
3394 return ROFF_IGN;
3395 }
3396 ssz = (size_t)(p - second);
3397 } else if (*second == '\0') {
3398 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3399 ln, first - buf->buf, "tr %s", first);
3400 second = " ";
3401 p--;
3402 }
3403
3404 if (fsz > 1) {
3405 roff_setstrn(&r->xmbtab, first, fsz,
3406 second, ssz, 0);
3407 continue;
3408 }
3409
3410 if (r->xtab == NULL)
3411 r->xtab = mandoc_calloc(128,
3412 sizeof(struct roffstr));
3413
3414 free(r->xtab[(int)*first].p);
3415 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3416 r->xtab[(int)*first].sz = ssz;
3417 }
3418
3419 return ROFF_IGN;
3420 }
3421
3422 /*
3423 * Implementation of the .return request.
3424 * There is no need to call roff_userret() from here.
3425 * The read module will call that after rewinding the reader stack
3426 * to the place from where the current macro was called.
3427 */
3428 static int
3429 roff_return(ROFF_ARGS)
3430 {
3431 if (r->mstackpos >= 0)
3432 return ROFF_IGN | ROFF_USERRET;
3433
3434 mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "return");
3435 return ROFF_IGN;
3436 }
3437
3438 static int
3439 roff_rn(ROFF_ARGS)
3440 {
3441 const char *value;
3442 char *oldn, *newn, *end;
3443 size_t oldsz, newsz;
3444 int deftype;
3445
3446 oldn = newn = buf->buf + pos;
3447 if (*oldn == '\0')
3448 return ROFF_IGN;
3449
3450 oldsz = roff_getname(r, &newn, ln, pos);
3451 if (oldn[oldsz] == '\\' || *newn == '\0')
3452 return ROFF_IGN;
3453
3454 end = newn;
3455 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3456 if (newsz == 0)
3457 return ROFF_IGN;
3458
3459 deftype = ROFFDEF_ANY;
3460 value = roff_getstrn(r, oldn, oldsz, &deftype);
3461 switch (deftype) {
3462 case ROFFDEF_USER:
3463 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3464 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3465 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3466 break;
3467 case ROFFDEF_PRE:
3468 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3469 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3470 break;
3471 case ROFFDEF_REN:
3472 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3473 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3474 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3475 break;
3476 case ROFFDEF_STD:
3477 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3478 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3479 break;
3480 default:
3481 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3482 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3483 break;
3484 }
3485 return ROFF_IGN;
3486 }
3487
3488 static int
3489 roff_shift(ROFF_ARGS)
3490 {
3491 struct mctx *ctx;
3492 int levels, i;
3493
3494 levels = 1;
3495 if (buf->buf[pos] != '\0' &&
3496 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3497 mandoc_vmsg(MANDOCERR_CE_NONUM, r->parse,
3498 ln, pos, "shift %s", buf->buf + pos);
3499 levels = 1;
3500 }
3501 if (r->mstackpos < 0) {
3502 mandoc_msg(MANDOCERR_REQ_NOMAC, r->parse, ln, ppos, "shift");
3503 return ROFF_IGN;
3504 }
3505 ctx = r->mstack + r->mstackpos;
3506 if (levels > ctx->argc) {
3507 mandoc_vmsg(MANDOCERR_SHIFT, r->parse,
3508 ln, pos, "%d, but max is %d", levels, ctx->argc);
3509 levels = ctx->argc;
3510 }
3511 if (levels == 0)
3512 return ROFF_IGN;
3513 for (i = 0; i < levels; i++)
3514 free(ctx->argv[i]);
3515 ctx->argc -= levels;
3516 for (i = 0; i < ctx->argc; i++)
3517 ctx->argv[i] = ctx->argv[i + levels];
3518 return ROFF_IGN;
3519 }
3520
3521 static int
3522 roff_so(ROFF_ARGS)
3523 {
3524 char *name, *cp;
3525
3526 name = buf->buf + pos;
3527 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3528
3529 /*
3530 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3531 * opening anything that's not in our cwd or anything beneath
3532 * it. Thus, explicitly disallow traversing up the file-system
3533 * or using absolute paths.
3534 */
3535
3536 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3537 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3538 ".so %s", name);
3539 buf->sz = mandoc_asprintf(&cp,
3540 ".sp\nSee the file %s.\n.sp", name) + 1;
3541 free(buf->buf);
3542 buf->buf = cp;
3543 *offs = 0;
3544 return ROFF_REPARSE;
3545 }
3546
3547 *offs = pos;
3548 return ROFF_SO;
3549 }
3550
3551 /* --- user defined strings and macros ------------------------------------ */
3552
3553 static int
3554 roff_userdef(ROFF_ARGS)
3555 {
3556 struct mctx *ctx;
3557 char *arg, *ap, *dst, *src;
3558 size_t sz;
3559
3560 /* Initialize a new macro stack context. */
3561
3562 if (++r->mstackpos == r->mstacksz) {
3563 r->mstack = mandoc_recallocarray(r->mstack,
3564 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3565 r->mstacksz += 8;
3566 }
3567 ctx = r->mstack + r->mstackpos;
3568 ctx->argsz = 0;
3569 ctx->argc = 0;
3570 ctx->argv = NULL;
3571
3572 /*
3573 * Collect pointers to macro argument strings,
3574 * NUL-terminating them and escaping quotes.
3575 */
3576
3577 src = buf->buf + pos;
3578 while (*src != '\0') {
3579 if (ctx->argc == ctx->argsz) {
3580 ctx->argsz += 8;
3581 ctx->argv = mandoc_reallocarray(ctx->argv,
3582 ctx->argsz, sizeof(*ctx->argv));
3583 }
3584 arg = mandoc_getarg(r->parse, &src, ln, &pos);
3585 sz = 1; /* For the terminating NUL. */
3586 for (ap = arg; *ap != '\0'; ap++)
3587 sz += *ap == '"' ? 4 : 1;
3588 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3589 for (ap = arg; *ap != '\0'; ap++) {
3590 if (*ap == '"') {
3591 memcpy(dst, "\\(dq", 4);
3592 dst += 4;
3593 } else
3594 *dst++ = *ap;
3595 }
3596 *dst = '\0';
3597 }
3598
3599 /* Replace the macro invocation by the macro definition. */
3600
3601 free(buf->buf);
3602 buf->buf = mandoc_strdup(r->current_string);
3603 buf->sz = strlen(buf->buf) + 1;
3604 *offs = 0;
3605
3606 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3607 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3608 }
3609
3610 /*
3611 * Calling a high-level macro that was renamed with .rn.
3612 * r->current_string has already been set up by roff_parse().
3613 */
3614 static int
3615 roff_renamed(ROFF_ARGS)
3616 {
3617 char *nbuf;
3618
3619 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3620 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3621 free(buf->buf);
3622 buf->buf = nbuf;
3623 *offs = 0;
3624 return ROFF_CONT;
3625 }
3626
3627 static size_t
3628 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3629 {
3630 char *name, *cp;
3631 size_t namesz;
3632
3633 name = *cpp;
3634 if ('\0' == *name)
3635 return 0;
3636
3637 /* Read until end of name and terminate it with NUL. */
3638 for (cp = name; 1; cp++) {
3639 if ('\0' == *cp || ' ' == *cp) {
3640 namesz = cp - name;
3641 break;
3642 }
3643 if ('\\' != *cp)
3644 continue;
3645 namesz = cp - name;
3646 if ('{' == cp[1] || '}' == cp[1])
3647 break;
3648 cp++;
3649 if ('\\' == *cp)
3650 continue;
3651 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3652 "%.*s", (int)(cp - name + 1), name);
3653 mandoc_escape((const char **)&cp, NULL, NULL);
3654 break;
3655 }
3656
3657 /* Read past spaces. */
3658 while (' ' == *cp)
3659 cp++;
3660
3661 *cpp = cp;
3662 return namesz;
3663 }
3664
3665 /*
3666 * Store *string into the user-defined string called *name.
3667 * To clear an existing entry, call with (*r, *name, NULL, 0).
3668 * append == 0: replace mode
3669 * append == 1: single-line append mode
3670 * append == 2: multiline append mode, append '\n' after each call
3671 */
3672 static void
3673 roff_setstr(struct roff *r, const char *name, const char *string,
3674 int append)
3675 {
3676 size_t namesz;
3677
3678 namesz = strlen(name);
3679 roff_setstrn(&r->strtab, name, namesz, string,
3680 string ? strlen(string) : 0, append);
3681 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3682 }
3683
3684 static void
3685 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3686 const char *string, size_t stringsz, int append)
3687 {
3688 struct roffkv *n;
3689 char *c;
3690 int i;
3691 size_t oldch, newch;
3692
3693 /* Search for an existing string with the same name. */
3694 n = *r;
3695
3696 while (n && (namesz != n->key.sz ||
3697 strncmp(n->key.p, name, namesz)))
3698 n = n->next;
3699
3700 if (NULL == n) {
3701 /* Create a new string table entry. */
3702 n = mandoc_malloc(sizeof(struct roffkv));
3703 n->key.p = mandoc_strndup(name, namesz);
3704 n->key.sz = namesz;
3705 n->val.p = NULL;
3706 n->val.sz = 0;
3707 n->next = *r;
3708 *r = n;
3709 } else if (0 == append) {
3710 free(n->val.p);
3711 n->val.p = NULL;
3712 n->val.sz = 0;
3713 }
3714
3715 if (NULL == string)
3716 return;
3717
3718 /*
3719 * One additional byte for the '\n' in multiline mode,
3720 * and one for the terminating '\0'.
3721 */
3722 newch = stringsz + (1 < append ? 2u : 1u);
3723
3724 if (NULL == n->val.p) {
3725 n->val.p = mandoc_malloc(newch);
3726 *n->val.p = '\0';
3727 oldch = 0;
3728 } else {
3729 oldch = n->val.sz;
3730 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3731 }
3732
3733 /* Skip existing content in the destination buffer. */
3734 c = n->val.p + (int)oldch;
3735
3736 /* Append new content to the destination buffer. */
3737 i = 0;
3738 while (i < (int)stringsz) {
3739 /*
3740 * Rudimentary roff copy mode:
3741 * Handle escaped backslashes.
3742 */
3743 if ('\\' == string[i] && '\\' == string[i + 1])
3744 i++;
3745 *c++ = string[i++];
3746 }
3747
3748 /* Append terminating bytes. */
3749 if (1 < append)
3750 *c++ = '\n';
3751
3752 *c = '\0';
3753 n->val.sz = (int)(c - n->val.p);
3754 }
3755
3756 static const char *
3757 roff_getstrn(struct roff *r, const char *name, size_t len,
3758 int *deftype)
3759 {
3760 const struct roffkv *n;
3761 int found, i;
3762 enum roff_tok tok;
3763
3764 found = 0;
3765 for (n = r->strtab; n != NULL; n = n->next) {
3766 if (strncmp(name, n->key.p, len) != 0 ||
3767 n->key.p[len] != '\0' || n->val.p == NULL)
3768 continue;
3769 if (*deftype & ROFFDEF_USER) {
3770 *deftype = ROFFDEF_USER;
3771 return n->val.p;
3772 } else {
3773 found = 1;
3774 break;
3775 }
3776 }
3777 for (n = r->rentab; n != NULL; n = n->next) {
3778 if (strncmp(name, n->key.p, len) != 0 ||
3779 n->key.p[len] != '\0' || n->val.p == NULL)
3780 continue;
3781 if (*deftype & ROFFDEF_REN) {
3782 *deftype = ROFFDEF_REN;
3783 return n->val.p;
3784 } else {
3785 found = 1;
3786 break;
3787 }
3788 }
3789 for (i = 0; i < PREDEFS_MAX; i++) {
3790 if (strncmp(name, predefs[i].name, len) != 0 ||
3791 predefs[i].name[len] != '\0')
3792 continue;
3793 if (*deftype & ROFFDEF_PRE) {
3794 *deftype = ROFFDEF_PRE;
3795 return predefs[i].str;
3796 } else {
3797 found = 1;
3798 break;
3799 }
3800 }
3801 if (r->man->macroset != MACROSET_MAN) {
3802 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3803 if (strncmp(name, roff_name[tok], len) != 0 ||
3804 roff_name[tok][len] != '\0')
3805 continue;
3806 if (*deftype & ROFFDEF_STD) {
3807 *deftype = ROFFDEF_STD;
3808 return NULL;
3809 } else {
3810 found = 1;
3811 break;
3812 }
3813 }
3814 }
3815 if (r->man->macroset != MACROSET_MDOC) {
3816 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3817 if (strncmp(name, roff_name[tok], len) != 0 ||
3818 roff_name[tok][len] != '\0')
3819 continue;
3820 if (*deftype & ROFFDEF_STD) {
3821 *deftype = ROFFDEF_STD;
3822 return NULL;
3823 } else {
3824 found = 1;
3825 break;
3826 }
3827 }
3828 }
3829
3830 if (found == 0 && *deftype != ROFFDEF_ANY) {
3831 if (*deftype & ROFFDEF_REN) {
3832 /*
3833 * This might still be a request,
3834 * so do not treat it as undefined yet.
3835 */
3836 *deftype = ROFFDEF_UNDEF;
3837 return NULL;
3838 }
3839
3840 /* Using an undefined string defines it to be empty. */
3841
3842 roff_setstrn(&r->strtab, name, len, "", 0, 0);
3843 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
3844 }
3845
3846 *deftype = 0;
3847 return NULL;
3848 }
3849
3850 static void
3851 roff_freestr(struct roffkv *r)
3852 {
3853 struct roffkv *n, *nn;
3854
3855 for (n = r; n; n = nn) {
3856 free(n->key.p);
3857 free(n->val.p);
3858 nn = n->next;
3859 free(n);
3860 }
3861 }
3862
3863 /* --- accessors and utility functions ------------------------------------ */
3864
3865 /*
3866 * Duplicate an input string, making the appropriate character
3867 * conversations (as stipulated by `tr') along the way.
3868 * Returns a heap-allocated string with all the replacements made.
3869 */
3870 char *
3871 roff_strdup(const struct roff *r, const char *p)
3872 {
3873 const struct roffkv *cp;
3874 char *res;
3875 const char *pp;
3876 size_t ssz, sz;
3877 enum mandoc_esc esc;
3878
3879 if (NULL == r->xmbtab && NULL == r->xtab)
3880 return mandoc_strdup(p);
3881 else if ('\0' == *p)
3882 return mandoc_strdup("");
3883
3884 /*
3885 * Step through each character looking for term matches
3886 * (remember that a `tr' can be invoked with an escape, which is
3887 * a glyph but the escape is multi-character).
3888 * We only do this if the character hash has been initialised
3889 * and the string is >0 length.
3890 */
3891
3892 res = NULL;
3893 ssz = 0;
3894
3895 while ('\0' != *p) {
3896 assert((unsigned int)*p < 128);
3897 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3898 sz = r->xtab[(int)*p].sz;
3899 res = mandoc_realloc(res, ssz + sz + 1);
3900 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3901 ssz += sz;
3902 p++;
3903 continue;
3904 } else if ('\\' != *p) {
3905 res = mandoc_realloc(res, ssz + 2);
3906 res[ssz++] = *p++;
3907 continue;
3908 }
3909
3910 /* Search for term matches. */
3911 for (cp = r->xmbtab; cp; cp = cp->next)
3912 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3913 break;
3914
3915 if (NULL != cp) {
3916 /*
3917 * A match has been found.
3918 * Append the match to the array and move
3919 * forward by its keysize.
3920 */
3921 res = mandoc_realloc(res,
3922 ssz + cp->val.sz + 1);
3923 memcpy(res + ssz, cp->val.p, cp->val.sz);
3924 ssz += cp->val.sz;
3925 p += (int)cp->key.sz;
3926 continue;
3927 }
3928
3929 /*
3930 * Handle escapes carefully: we need to copy
3931 * over just the escape itself, or else we might
3932 * do replacements within the escape itself.
3933 * Make sure to pass along the bogus string.
3934 */
3935 pp = p++;
3936 esc = mandoc_escape(&p, NULL, NULL);
3937 if (ESCAPE_ERROR == esc) {
3938 sz = strlen(pp);
3939 res = mandoc_realloc(res, ssz + sz + 1);
3940 memcpy(res + ssz, pp, sz);
3941 break;
3942 }
3943 /*
3944 * We bail out on bad escapes.
3945 * No need to warn: we already did so when
3946 * roff_res() was called.
3947 */
3948 sz = (int)(p - pp);
3949 res = mandoc_realloc(res, ssz + sz + 1);
3950 memcpy(res + ssz, pp, sz);
3951 ssz += sz;
3952 }
3953
3954 res[(int)ssz] = '\0';
3955 return res;
3956 }
3957
3958 int
3959 roff_getformat(const struct roff *r)
3960 {
3961
3962 return r->format;
3963 }
3964
3965 /*
3966 * Find out whether a line is a macro line or not.
3967 * If it is, adjust the current position and return one; if it isn't,
3968 * return zero and don't change the current position.
3969 * If the control character has been set with `.cc', then let that grain
3970 * precedence.
3971 * This is slighly contrary to groff, where using the non-breaking
3972 * control character when `cc' has been invoked will cause the
3973 * non-breaking macro contents to be printed verbatim.
3974 */
3975 int
3976 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3977 {
3978 int pos;
3979
3980 pos = *ppos;
3981
3982 if (r->control != '\0' && cp[pos] == r->control)
3983 pos++;
3984 else if (r->control != '\0')
3985 return 0;
3986 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3987 pos += 2;
3988 else if ('.' == cp[pos] || '\'' == cp[pos])
3989 pos++;
3990 else
3991 return 0;
3992
3993 while (' ' == cp[pos] || '\t' == cp[pos])
3994 pos++;
3995
3996 *ppos = pos;
3997 return 1;
3998 }