]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
bd8c8250a99b33678efabc6b2d18ad1611c8c735
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.334 2018/08/18 21:37:01 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* Types of definitions of macros and strings. */
43 #define ROFFDEF_USER (1 << 1) /* User-defined. */
44 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
45 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
46 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
47 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
48 ROFFDEF_REN | ROFFDEF_STD)
49 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
50
51 /* --- data types --------------------------------------------------------- */
52
53 /*
54 * An incredibly-simple string buffer.
55 */
56 struct roffstr {
57 char *p; /* nil-terminated buffer */
58 size_t sz; /* saved strlen(p) */
59 };
60
61 /*
62 * A key-value roffstr pair as part of a singly-linked list.
63 */
64 struct roffkv {
65 struct roffstr key;
66 struct roffstr val;
67 struct roffkv *next; /* next in list */
68 };
69
70 /*
71 * A single number register as part of a singly-linked list.
72 */
73 struct roffreg {
74 struct roffstr key;
75 int val;
76 int step;
77 struct roffreg *next;
78 };
79
80 /*
81 * Association of request and macro names with token IDs.
82 */
83 struct roffreq {
84 enum roff_tok tok;
85 char name[];
86 };
87
88 struct roff {
89 struct mparse *parse; /* parse point */
90 struct roff_man *man; /* mdoc or man parser */
91 struct roffnode *last; /* leaf of stack */
92 int *rstack; /* stack of inverted `ie' values */
93 struct ohash *reqtab; /* request lookup table */
94 struct roffreg *regtab; /* number registers */
95 struct roffkv *strtab; /* user-defined strings & macros */
96 struct roffkv *rentab; /* renamed strings & macros */
97 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
98 struct roffstr *xtab; /* single-byte trans table (`tr') */
99 const char *current_string; /* value of last called user macro */
100 struct tbl_node *first_tbl; /* first table parsed */
101 struct tbl_node *last_tbl; /* last table parsed */
102 struct tbl_node *tbl; /* current table being parsed */
103 struct eqn_node *last_eqn; /* equation parser */
104 struct eqn_node *eqn; /* active equation parser */
105 int eqn_inline; /* current equation is inline */
106 int options; /* parse options */
107 int rstacksz; /* current size limit of rstack */
108 int rstackpos; /* position in rstack */
109 int format; /* current file in mdoc or man format */
110 int argc; /* number of args of the last macro */
111 char control; /* control character */
112 char escape; /* escape character */
113 };
114
115 struct roffnode {
116 enum roff_tok tok; /* type of node */
117 struct roffnode *parent; /* up one in stack */
118 int line; /* parse line */
119 int col; /* parse col */
120 char *name; /* node name, e.g. macro name */
121 char *end; /* end-rules: custom token */
122 int endspan; /* end-rules: next-line or infty */
123 int rule; /* current evaluation rule */
124 };
125
126 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
127 enum roff_tok tok, /* tok of macro */ \
128 struct buf *buf, /* input buffer */ \
129 int ln, /* parse line */ \
130 int ppos, /* original pos in buffer */ \
131 int pos, /* current pos in buffer */ \
132 int *offs /* reset offset of buffer data */
133
134 typedef enum rofferr (*roffproc)(ROFF_ARGS);
135
136 struct roffmac {
137 roffproc proc; /* process new macro */
138 roffproc text; /* process as child text of macro */
139 roffproc sub; /* process as child of macro */
140 int flags;
141 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
142 };
143
144 struct predef {
145 const char *name; /* predefined input name */
146 const char *str; /* replacement symbol */
147 };
148
149 #define PREDEF(__name, __str) \
150 { (__name), (__str) },
151
152 /* --- function prototypes ------------------------------------------------ */
153
154 static void roffnode_cleanscope(struct roff *);
155 static void roffnode_pop(struct roff *);
156 static void roffnode_push(struct roff *, enum roff_tok,
157 const char *, int, int);
158 static void roff_addtbl(struct roff_man *, struct tbl_node *);
159 static enum rofferr roff_als(ROFF_ARGS);
160 static enum rofferr roff_block(ROFF_ARGS);
161 static enum rofferr roff_block_text(ROFF_ARGS);
162 static enum rofferr roff_block_sub(ROFF_ARGS);
163 static enum rofferr roff_br(ROFF_ARGS);
164 static enum rofferr roff_cblock(ROFF_ARGS);
165 static enum rofferr roff_cc(ROFF_ARGS);
166 static void roff_ccond(struct roff *, int, int);
167 static enum rofferr roff_cond(ROFF_ARGS);
168 static enum rofferr roff_cond_text(ROFF_ARGS);
169 static enum rofferr roff_cond_sub(ROFF_ARGS);
170 static enum rofferr roff_ds(ROFF_ARGS);
171 static enum rofferr roff_ec(ROFF_ARGS);
172 static enum rofferr roff_eo(ROFF_ARGS);
173 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
174 static int roff_evalcond(struct roff *r, int, char *, int *);
175 static int roff_evalnum(struct roff *, int,
176 const char *, int *, int *, int);
177 static int roff_evalpar(struct roff *, int,
178 const char *, int *, int *, int);
179 static int roff_evalstrcond(const char *, int *);
180 static void roff_free1(struct roff *);
181 static void roff_freereg(struct roffreg *);
182 static void roff_freestr(struct roffkv *);
183 static size_t roff_getname(struct roff *, char **, int, int);
184 static int roff_getnum(const char *, int *, int *, int);
185 static int roff_getop(const char *, int *, char *);
186 static int roff_getregn(struct roff *,
187 const char *, size_t, char);
188 static int roff_getregro(const struct roff *,
189 const char *name);
190 static const char *roff_getstrn(struct roff *,
191 const char *, size_t, int *);
192 static int roff_hasregn(const struct roff *,
193 const char *, size_t);
194 static enum rofferr roff_insec(ROFF_ARGS);
195 static enum rofferr roff_it(ROFF_ARGS);
196 static enum rofferr roff_line_ignore(ROFF_ARGS);
197 static void roff_man_alloc1(struct roff_man *);
198 static void roff_man_free1(struct roff_man *);
199 static enum rofferr roff_manyarg(ROFF_ARGS);
200 static enum rofferr roff_nop(ROFF_ARGS);
201 static enum rofferr roff_nr(ROFF_ARGS);
202 static enum rofferr roff_onearg(ROFF_ARGS);
203 static enum roff_tok roff_parse(struct roff *, char *, int *,
204 int, int);
205 static enum rofferr roff_parsetext(struct roff *, struct buf *,
206 int, int *);
207 static enum rofferr roff_renamed(ROFF_ARGS);
208 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
209 static enum rofferr roff_rm(ROFF_ARGS);
210 static enum rofferr roff_rn(ROFF_ARGS);
211 static enum rofferr roff_rr(ROFF_ARGS);
212 static void roff_setregn(struct roff *, const char *,
213 size_t, int, char, int);
214 static void roff_setstr(struct roff *,
215 const char *, const char *, int);
216 static void roff_setstrn(struct roffkv **, const char *,
217 size_t, const char *, size_t, int);
218 static enum rofferr roff_so(ROFF_ARGS);
219 static enum rofferr roff_tr(ROFF_ARGS);
220 static enum rofferr roff_Dd(ROFF_ARGS);
221 static enum rofferr roff_TE(ROFF_ARGS);
222 static enum rofferr roff_TS(ROFF_ARGS);
223 static enum rofferr roff_EQ(ROFF_ARGS);
224 static enum rofferr roff_EN(ROFF_ARGS);
225 static enum rofferr roff_T_(ROFF_ARGS);
226 static enum rofferr roff_unsupp(ROFF_ARGS);
227 static enum rofferr roff_userdef(ROFF_ARGS);
228
229 /* --- constant data ------------------------------------------------------ */
230
231 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
232 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
233
234 const char *__roff_name[MAN_MAX + 1] = {
235 "br", "ce", "ft", "ll",
236 "mc", "po", "rj", "sp",
237 "ta", "ti", NULL,
238 "ab", "ad", "af", "aln",
239 "als", "am", "am1", "ami",
240 "ami1", "as", "as1", "asciify",
241 "backtrace", "bd", "bleedat", "blm",
242 "box", "boxa", "bp", "BP",
243 "break", "breakchar", "brnl", "brp",
244 "brpnl", "c2", "cc",
245 "cf", "cflags", "ch", "char",
246 "chop", "class", "close", "CL",
247 "color", "composite", "continue", "cp",
248 "cropat", "cs", "cu", "da",
249 "dch", "Dd", "de", "de1",
250 "defcolor", "dei", "dei1", "device",
251 "devicem", "di", "do", "ds",
252 "ds1", "dwh", "dt", "ec",
253 "ecr", "ecs", "el", "em",
254 "EN", "eo", "EP", "EQ",
255 "errprint", "ev", "evc", "ex",
256 "fallback", "fam", "fc", "fchar",
257 "fcolor", "fdeferlig", "feature", "fkern",
258 "fl", "flig", "fp", "fps",
259 "fschar", "fspacewidth", "fspecial", "ftr",
260 "fzoom", "gcolor", "hc", "hcode",
261 "hidechar", "hla", "hlm", "hpf",
262 "hpfa", "hpfcode", "hw", "hy",
263 "hylang", "hylen", "hym", "hypp",
264 "hys", "ie", "if", "ig",
265 "index", "it", "itc", "IX",
266 "kern", "kernafter", "kernbefore", "kernpair",
267 "lc", "lc_ctype", "lds", "length",
268 "letadj", "lf", "lg", "lhang",
269 "linetabs", "lnr", "lnrf", "lpfx",
270 "ls", "lsm", "lt",
271 "mediasize", "minss", "mk", "mso",
272 "na", "ne", "nh", "nhychar",
273 "nm", "nn", "nop", "nr",
274 "nrf", "nroff", "ns", "nx",
275 "open", "opena", "os", "output",
276 "padj", "papersize", "pc", "pev",
277 "pi", "PI", "pl", "pm",
278 "pn", "pnr", "ps",
279 "psbb", "pshape", "pso", "ptr",
280 "pvs", "rchar", "rd", "recursionlimit",
281 "return", "rfschar", "rhang",
282 "rm", "rn", "rnn", "rr",
283 "rs", "rt", "schar", "sentchar",
284 "shc", "shift", "sizes", "so",
285 "spacewidth", "special", "spreadwarn", "ss",
286 "sty", "substring", "sv", "sy",
287 "T&", "tc", "TE",
288 "TH", "tkf", "tl",
289 "tm", "tm1", "tmc", "tr",
290 "track", "transchar", "trf", "trimat",
291 "trin", "trnt", "troff", "TS",
292 "uf", "ul", "unformat", "unwatch",
293 "unwatchn", "vpt", "vs", "warn",
294 "warnscale", "watch", "watchlength", "watchn",
295 "wh", "while", "write", "writec",
296 "writem", "xflag", ".", NULL,
297 NULL, "text",
298 "Dd", "Dt", "Os", "Sh",
299 "Ss", "Pp", "D1", "Dl",
300 "Bd", "Ed", "Bl", "El",
301 "It", "Ad", "An", "Ap",
302 "Ar", "Cd", "Cm", "Dv",
303 "Er", "Ev", "Ex", "Fa",
304 "Fd", "Fl", "Fn", "Ft",
305 "Ic", "In", "Li", "Nd",
306 "Nm", "Op", "Ot", "Pa",
307 "Rv", "St", "Va", "Vt",
308 "Xr", "%A", "%B", "%D",
309 "%I", "%J", "%N", "%O",
310 "%P", "%R", "%T", "%V",
311 "Ac", "Ao", "Aq", "At",
312 "Bc", "Bf", "Bo", "Bq",
313 "Bsx", "Bx", "Db", "Dc",
314 "Do", "Dq", "Ec", "Ef",
315 "Em", "Eo", "Fx", "Ms",
316 "No", "Ns", "Nx", "Ox",
317 "Pc", "Pf", "Po", "Pq",
318 "Qc", "Ql", "Qo", "Qq",
319 "Re", "Rs", "Sc", "So",
320 "Sq", "Sm", "Sx", "Sy",
321 "Tn", "Ux", "Xc", "Xo",
322 "Fo", "Fc", "Oo", "Oc",
323 "Bk", "Ek", "Bt", "Hf",
324 "Fr", "Ud", "Lb", "Lp",
325 "Lk", "Mt", "Brq", "Bro",
326 "Brc", "%C", "Es", "En",
327 "Dx", "%Q", "%U", "Ta",
328 NULL,
329 "TH", "SH", "SS", "TP",
330 "TQ",
331 "LP", "PP", "P", "IP",
332 "HP", "SM", "SB", "BI",
333 "IB", "BR", "RB", "R",
334 "B", "I", "IR", "RI",
335 "nf", "fi",
336 "RE", "RS", "DT", "UC",
337 "PD", "AT", "in",
338 "SY", "YS", "OP",
339 "EX", "EE", "UR",
340 "UE", "MT", "ME", NULL
341 };
342 const char *const *roff_name = __roff_name;
343
344 static struct roffmac roffs[TOKEN_NONE] = {
345 { roff_br, NULL, NULL, 0 }, /* br */
346 { roff_onearg, NULL, NULL, 0 }, /* ce */
347 { roff_onearg, NULL, NULL, 0 }, /* ft */
348 { roff_onearg, NULL, NULL, 0 }, /* ll */
349 { roff_onearg, NULL, NULL, 0 }, /* mc */
350 { roff_onearg, NULL, NULL, 0 }, /* po */
351 { roff_onearg, NULL, NULL, 0 }, /* rj */
352 { roff_onearg, NULL, NULL, 0 }, /* sp */
353 { roff_manyarg, NULL, NULL, 0 }, /* ta */
354 { roff_onearg, NULL, NULL, 0 }, /* ti */
355 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
356 { roff_unsupp, NULL, NULL, 0 }, /* ab */
357 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
358 { roff_line_ignore, NULL, NULL, 0 }, /* af */
359 { roff_unsupp, NULL, NULL, 0 }, /* aln */
360 { roff_als, NULL, NULL, 0 }, /* als */
361 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
362 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
363 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
364 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
365 { roff_ds, NULL, NULL, 0 }, /* as */
366 { roff_ds, NULL, NULL, 0 }, /* as1 */
367 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
368 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
369 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
370 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
371 { roff_unsupp, NULL, NULL, 0 }, /* blm */
372 { roff_unsupp, NULL, NULL, 0 }, /* box */
373 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
374 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
375 { roff_unsupp, NULL, NULL, 0 }, /* BP */
376 { roff_unsupp, NULL, NULL, 0 }, /* break */
377 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
378 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
379 { roff_br, NULL, NULL, 0 }, /* brp */
380 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
381 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
382 { roff_cc, NULL, NULL, 0 }, /* cc */
383 { roff_insec, NULL, NULL, 0 }, /* cf */
384 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
385 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
386 { roff_unsupp, NULL, NULL, 0 }, /* char */
387 { roff_unsupp, NULL, NULL, 0 }, /* chop */
388 { roff_line_ignore, NULL, NULL, 0 }, /* class */
389 { roff_insec, NULL, NULL, 0 }, /* close */
390 { roff_unsupp, NULL, NULL, 0 }, /* CL */
391 { roff_line_ignore, NULL, NULL, 0 }, /* color */
392 { roff_unsupp, NULL, NULL, 0 }, /* composite */
393 { roff_unsupp, NULL, NULL, 0 }, /* continue */
394 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
395 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
396 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
397 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
398 { roff_unsupp, NULL, NULL, 0 }, /* da */
399 { roff_unsupp, NULL, NULL, 0 }, /* dch */
400 { roff_Dd, NULL, NULL, 0 }, /* Dd */
401 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
402 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
403 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
404 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
405 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
406 { roff_unsupp, NULL, NULL, 0 }, /* device */
407 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
408 { roff_unsupp, NULL, NULL, 0 }, /* di */
409 { roff_unsupp, NULL, NULL, 0 }, /* do */
410 { roff_ds, NULL, NULL, 0 }, /* ds */
411 { roff_ds, NULL, NULL, 0 }, /* ds1 */
412 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
413 { roff_unsupp, NULL, NULL, 0 }, /* dt */
414 { roff_ec, NULL, NULL, 0 }, /* ec */
415 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
416 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
417 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
418 { roff_unsupp, NULL, NULL, 0 }, /* em */
419 { roff_EN, NULL, NULL, 0 }, /* EN */
420 { roff_eo, NULL, NULL, 0 }, /* eo */
421 { roff_unsupp, NULL, NULL, 0 }, /* EP */
422 { roff_EQ, NULL, NULL, 0 }, /* EQ */
423 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
424 { roff_unsupp, NULL, NULL, 0 }, /* ev */
425 { roff_unsupp, NULL, NULL, 0 }, /* evc */
426 { roff_unsupp, NULL, NULL, 0 }, /* ex */
427 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
428 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
429 { roff_unsupp, NULL, NULL, 0 }, /* fc */
430 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
431 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
432 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
433 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
434 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
435 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
436 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
437 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
438 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
439 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
440 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
441 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
442 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
443 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
444 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
445 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
446 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
447 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
448 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
449 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
450 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
451 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
452 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
453 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
454 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
455 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
456 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
457 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
458 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
459 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
460 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
461 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
462 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
463 { roff_unsupp, NULL, NULL, 0 }, /* index */
464 { roff_it, NULL, NULL, 0 }, /* it */
465 { roff_unsupp, NULL, NULL, 0 }, /* itc */
466 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
467 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
468 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
469 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
470 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
471 { roff_unsupp, NULL, NULL, 0 }, /* lc */
472 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
473 { roff_unsupp, NULL, NULL, 0 }, /* lds */
474 { roff_unsupp, NULL, NULL, 0 }, /* length */
475 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
476 { roff_insec, NULL, NULL, 0 }, /* lf */
477 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
478 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
479 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
480 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
481 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
482 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
483 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
484 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
485 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
486 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
487 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
488 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
489 { roff_insec, NULL, NULL, 0 }, /* mso */
490 { roff_line_ignore, NULL, NULL, 0 }, /* na */
491 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
492 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
493 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
494 { roff_unsupp, NULL, NULL, 0 }, /* nm */
495 { roff_unsupp, NULL, NULL, 0 }, /* nn */
496 { roff_nop, NULL, NULL, 0 }, /* nop */
497 { roff_nr, NULL, NULL, 0 }, /* nr */
498 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
499 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
500 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
501 { roff_insec, NULL, NULL, 0 }, /* nx */
502 { roff_insec, NULL, NULL, 0 }, /* open */
503 { roff_insec, NULL, NULL, 0 }, /* opena */
504 { roff_line_ignore, NULL, NULL, 0 }, /* os */
505 { roff_unsupp, NULL, NULL, 0 }, /* output */
506 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
507 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
508 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
509 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
510 { roff_insec, NULL, NULL, 0 }, /* pi */
511 { roff_unsupp, NULL, NULL, 0 }, /* PI */
512 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
513 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
514 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
515 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
516 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
517 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
518 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
519 { roff_insec, NULL, NULL, 0 }, /* pso */
520 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
521 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
522 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
523 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
524 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
525 { roff_unsupp, NULL, NULL, 0 }, /* return */
526 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
527 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
528 { roff_rm, NULL, NULL, 0 }, /* rm */
529 { roff_rn, NULL, NULL, 0 }, /* rn */
530 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
531 { roff_rr, NULL, NULL, 0 }, /* rr */
532 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
533 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
534 { roff_unsupp, NULL, NULL, 0 }, /* schar */
535 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
536 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
537 { roff_unsupp, NULL, NULL, 0 }, /* shift */
538 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
539 { roff_so, NULL, NULL, 0 }, /* so */
540 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
541 { roff_line_ignore, NULL, NULL, 0 }, /* special */
542 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
543 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
544 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
545 { roff_unsupp, NULL, NULL, 0 }, /* substring */
546 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
547 { roff_insec, NULL, NULL, 0 }, /* sy */
548 { roff_T_, NULL, NULL, 0 }, /* T& */
549 { roff_unsupp, NULL, NULL, 0 }, /* tc */
550 { roff_TE, NULL, NULL, 0 }, /* TE */
551 { roff_Dd, NULL, NULL, 0 }, /* TH */
552 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
553 { roff_unsupp, NULL, NULL, 0 }, /* tl */
554 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
555 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
556 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
557 { roff_tr, NULL, NULL, 0 }, /* tr */
558 { roff_line_ignore, NULL, NULL, 0 }, /* track */
559 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
560 { roff_insec, NULL, NULL, 0 }, /* trf */
561 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
562 { roff_unsupp, NULL, NULL, 0 }, /* trin */
563 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
564 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
565 { roff_TS, NULL, NULL, 0 }, /* TS */
566 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
567 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
568 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
569 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
570 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
571 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
572 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
573 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
574 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
575 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
576 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
577 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
578 { roff_unsupp, NULL, NULL, 0 }, /* wh */
579 { roff_unsupp, NULL, NULL, 0 }, /* while */
580 { roff_insec, NULL, NULL, 0 }, /* write */
581 { roff_insec, NULL, NULL, 0 }, /* writec */
582 { roff_insec, NULL, NULL, 0 }, /* writem */
583 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
584 { roff_cblock, NULL, NULL, 0 }, /* . */
585 { roff_renamed, NULL, NULL, 0 },
586 { roff_userdef, NULL, NULL, 0 }
587 };
588
589 /* Array of injected predefined strings. */
590 #define PREDEFS_MAX 38
591 static const struct predef predefs[PREDEFS_MAX] = {
592 #include "predefs.in"
593 };
594
595 static int roffce_lines; /* number of input lines to center */
596 static struct roff_node *roffce_node; /* active request */
597 static int roffit_lines; /* number of lines to delay */
598 static char *roffit_macro; /* nil-terminated macro line */
599
600
601 /* --- request table ------------------------------------------------------ */
602
603 struct ohash *
604 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
605 {
606 struct ohash *htab;
607 struct roffreq *req;
608 enum roff_tok tok;
609 size_t sz;
610 unsigned int slot;
611
612 htab = mandoc_malloc(sizeof(*htab));
613 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
614
615 for (tok = mintok; tok < maxtok; tok++) {
616 if (roff_name[tok] == NULL)
617 continue;
618 sz = strlen(roff_name[tok]);
619 req = mandoc_malloc(sizeof(*req) + sz + 1);
620 req->tok = tok;
621 memcpy(req->name, roff_name[tok], sz + 1);
622 slot = ohash_qlookup(htab, req->name);
623 ohash_insert(htab, slot, req);
624 }
625 return htab;
626 }
627
628 void
629 roffhash_free(struct ohash *htab)
630 {
631 struct roffreq *req;
632 unsigned int slot;
633
634 if (htab == NULL)
635 return;
636 for (req = ohash_first(htab, &slot); req != NULL;
637 req = ohash_next(htab, &slot))
638 free(req);
639 ohash_delete(htab);
640 free(htab);
641 }
642
643 enum roff_tok
644 roffhash_find(struct ohash *htab, const char *name, size_t sz)
645 {
646 struct roffreq *req;
647 const char *end;
648
649 if (sz) {
650 end = name + sz;
651 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
652 } else
653 req = ohash_find(htab, ohash_qlookup(htab, name));
654 return req == NULL ? TOKEN_NONE : req->tok;
655 }
656
657 /* --- stack of request blocks -------------------------------------------- */
658
659 /*
660 * Pop the current node off of the stack of roff instructions currently
661 * pending.
662 */
663 static void
664 roffnode_pop(struct roff *r)
665 {
666 struct roffnode *p;
667
668 assert(r->last);
669 p = r->last;
670
671 r->last = r->last->parent;
672 free(p->name);
673 free(p->end);
674 free(p);
675 }
676
677 /*
678 * Push a roff node onto the instruction stack. This must later be
679 * removed with roffnode_pop().
680 */
681 static void
682 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
683 int line, int col)
684 {
685 struct roffnode *p;
686
687 p = mandoc_calloc(1, sizeof(struct roffnode));
688 p->tok = tok;
689 if (name)
690 p->name = mandoc_strdup(name);
691 p->parent = r->last;
692 p->line = line;
693 p->col = col;
694 p->rule = p->parent ? p->parent->rule : 0;
695
696 r->last = p;
697 }
698
699 /* --- roff parser state data management ---------------------------------- */
700
701 static void
702 roff_free1(struct roff *r)
703 {
704 struct tbl_node *tbl;
705 int i;
706
707 while (NULL != (tbl = r->first_tbl)) {
708 r->first_tbl = tbl->next;
709 tbl_free(tbl);
710 }
711 r->first_tbl = r->last_tbl = r->tbl = NULL;
712
713 if (r->last_eqn != NULL)
714 eqn_free(r->last_eqn);
715 r->last_eqn = r->eqn = NULL;
716
717 while (r->last)
718 roffnode_pop(r);
719
720 free (r->rstack);
721 r->rstack = NULL;
722 r->rstacksz = 0;
723 r->rstackpos = -1;
724
725 roff_freereg(r->regtab);
726 r->regtab = NULL;
727
728 roff_freestr(r->strtab);
729 roff_freestr(r->rentab);
730 roff_freestr(r->xmbtab);
731 r->strtab = r->rentab = r->xmbtab = NULL;
732
733 if (r->xtab)
734 for (i = 0; i < 128; i++)
735 free(r->xtab[i].p);
736 free(r->xtab);
737 r->xtab = NULL;
738 }
739
740 void
741 roff_reset(struct roff *r)
742 {
743 roff_free1(r);
744 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
745 r->control = '\0';
746 r->escape = '\\';
747 roffce_lines = 0;
748 roffce_node = NULL;
749 roffit_lines = 0;
750 roffit_macro = NULL;
751 }
752
753 void
754 roff_free(struct roff *r)
755 {
756 roff_free1(r);
757 roffhash_free(r->reqtab);
758 free(r);
759 }
760
761 struct roff *
762 roff_alloc(struct mparse *parse, int options)
763 {
764 struct roff *r;
765
766 r = mandoc_calloc(1, sizeof(struct roff));
767 r->parse = parse;
768 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
769 r->options = options;
770 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
771 r->rstackpos = -1;
772 r->escape = '\\';
773 return r;
774 }
775
776 /* --- syntax tree state data management ---------------------------------- */
777
778 static void
779 roff_man_free1(struct roff_man *man)
780 {
781
782 if (man->first != NULL)
783 roff_node_delete(man, man->first);
784 free(man->meta.msec);
785 free(man->meta.vol);
786 free(man->meta.os);
787 free(man->meta.arch);
788 free(man->meta.title);
789 free(man->meta.name);
790 free(man->meta.date);
791 }
792
793 static void
794 roff_man_alloc1(struct roff_man *man)
795 {
796
797 memset(&man->meta, 0, sizeof(man->meta));
798 man->first = mandoc_calloc(1, sizeof(*man->first));
799 man->first->type = ROFFT_ROOT;
800 man->last = man->first;
801 man->last_es = NULL;
802 man->flags = 0;
803 man->macroset = MACROSET_NONE;
804 man->lastsec = man->lastnamed = SEC_NONE;
805 man->next = ROFF_NEXT_CHILD;
806 }
807
808 void
809 roff_man_reset(struct roff_man *man)
810 {
811
812 roff_man_free1(man);
813 roff_man_alloc1(man);
814 }
815
816 void
817 roff_man_free(struct roff_man *man)
818 {
819
820 roff_man_free1(man);
821 free(man);
822 }
823
824 struct roff_man *
825 roff_man_alloc(struct roff *roff, struct mparse *parse,
826 const char *os_s, int quick)
827 {
828 struct roff_man *man;
829
830 man = mandoc_calloc(1, sizeof(*man));
831 man->parse = parse;
832 man->roff = roff;
833 man->os_s = os_s;
834 man->quick = quick;
835 roff_man_alloc1(man);
836 roff->man = man;
837 return man;
838 }
839
840 /* --- syntax tree handling ----------------------------------------------- */
841
842 struct roff_node *
843 roff_node_alloc(struct roff_man *man, int line, int pos,
844 enum roff_type type, int tok)
845 {
846 struct roff_node *n;
847
848 n = mandoc_calloc(1, sizeof(*n));
849 n->line = line;
850 n->pos = pos;
851 n->tok = tok;
852 n->type = type;
853 n->sec = man->lastsec;
854
855 if (man->flags & MDOC_SYNOPSIS)
856 n->flags |= NODE_SYNPRETTY;
857 else
858 n->flags &= ~NODE_SYNPRETTY;
859 if (man->flags & MDOC_NEWLINE)
860 n->flags |= NODE_LINE;
861 man->flags &= ~MDOC_NEWLINE;
862
863 return n;
864 }
865
866 void
867 roff_node_append(struct roff_man *man, struct roff_node *n)
868 {
869
870 switch (man->next) {
871 case ROFF_NEXT_SIBLING:
872 if (man->last->next != NULL) {
873 n->next = man->last->next;
874 man->last->next->prev = n;
875 } else
876 man->last->parent->last = n;
877 man->last->next = n;
878 n->prev = man->last;
879 n->parent = man->last->parent;
880 break;
881 case ROFF_NEXT_CHILD:
882 if (man->last->child != NULL) {
883 n->next = man->last->child;
884 man->last->child->prev = n;
885 } else
886 man->last->last = n;
887 man->last->child = n;
888 n->parent = man->last;
889 break;
890 default:
891 abort();
892 }
893 man->last = n;
894
895 switch (n->type) {
896 case ROFFT_HEAD:
897 n->parent->head = n;
898 break;
899 case ROFFT_BODY:
900 if (n->end != ENDBODY_NOT)
901 return;
902 n->parent->body = n;
903 break;
904 case ROFFT_TAIL:
905 n->parent->tail = n;
906 break;
907 default:
908 return;
909 }
910
911 /*
912 * Copy over the normalised-data pointer of our parent. Not
913 * everybody has one, but copying a null pointer is fine.
914 */
915
916 n->norm = n->parent->norm;
917 assert(n->parent->type == ROFFT_BLOCK);
918 }
919
920 void
921 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
922 {
923 struct roff_node *n;
924
925 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
926 n->string = roff_strdup(man->roff, word);
927 roff_node_append(man, n);
928 n->flags |= NODE_VALID | NODE_ENDED;
929 man->next = ROFF_NEXT_SIBLING;
930 }
931
932 void
933 roff_word_append(struct roff_man *man, const char *word)
934 {
935 struct roff_node *n;
936 char *addstr, *newstr;
937
938 n = man->last;
939 addstr = roff_strdup(man->roff, word);
940 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
941 free(addstr);
942 free(n->string);
943 n->string = newstr;
944 man->next = ROFF_NEXT_SIBLING;
945 }
946
947 void
948 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
949 {
950 struct roff_node *n;
951
952 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
953 roff_node_append(man, n);
954 man->next = ROFF_NEXT_CHILD;
955 }
956
957 struct roff_node *
958 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
959 {
960 struct roff_node *n;
961
962 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
963 roff_node_append(man, n);
964 man->next = ROFF_NEXT_CHILD;
965 return n;
966 }
967
968 struct roff_node *
969 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
970 {
971 struct roff_node *n;
972
973 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
974 roff_node_append(man, n);
975 man->next = ROFF_NEXT_CHILD;
976 return n;
977 }
978
979 struct roff_node *
980 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
981 {
982 struct roff_node *n;
983
984 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
985 roff_node_append(man, n);
986 man->next = ROFF_NEXT_CHILD;
987 return n;
988 }
989
990 static void
991 roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
992 {
993 struct roff_node *n;
994 const struct tbl_span *span;
995
996 if (man->macroset == MACROSET_MAN)
997 man_breakscope(man, ROFF_TS);
998 while ((span = tbl_span(tbl)) != NULL) {
999 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1000 n->span = span;
1001 roff_node_append(man, n);
1002 n->flags |= NODE_VALID | NODE_ENDED;
1003 man->next = ROFF_NEXT_SIBLING;
1004 }
1005 }
1006
1007 void
1008 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1009 {
1010
1011 /* Adjust siblings. */
1012
1013 if (n->prev)
1014 n->prev->next = n->next;
1015 if (n->next)
1016 n->next->prev = n->prev;
1017
1018 /* Adjust parent. */
1019
1020 if (n->parent != NULL) {
1021 if (n->parent->child == n)
1022 n->parent->child = n->next;
1023 if (n->parent->last == n)
1024 n->parent->last = n->prev;
1025 }
1026
1027 /* Adjust parse point. */
1028
1029 if (man == NULL)
1030 return;
1031 if (man->last == n) {
1032 if (n->prev == NULL) {
1033 man->last = n->parent;
1034 man->next = ROFF_NEXT_CHILD;
1035 } else {
1036 man->last = n->prev;
1037 man->next = ROFF_NEXT_SIBLING;
1038 }
1039 }
1040 if (man->first == n)
1041 man->first = NULL;
1042 }
1043
1044 void
1045 roff_node_free(struct roff_node *n)
1046 {
1047
1048 if (n->args != NULL)
1049 mdoc_argv_free(n->args);
1050 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1051 free(n->norm);
1052 if (n->eqn != NULL)
1053 eqn_box_free(n->eqn);
1054 free(n->string);
1055 free(n);
1056 }
1057
1058 void
1059 roff_node_delete(struct roff_man *man, struct roff_node *n)
1060 {
1061
1062 while (n->child != NULL)
1063 roff_node_delete(man, n->child);
1064 roff_node_unlink(man, n);
1065 roff_node_free(n);
1066 }
1067
1068 void
1069 deroff(char **dest, const struct roff_node *n)
1070 {
1071 char *cp;
1072 size_t sz;
1073
1074 if (n->type != ROFFT_TEXT) {
1075 for (n = n->child; n != NULL; n = n->next)
1076 deroff(dest, n);
1077 return;
1078 }
1079
1080 /* Skip leading whitespace. */
1081
1082 for (cp = n->string; *cp != '\0'; cp++) {
1083 if (cp[0] == '\\' && cp[1] != '\0' &&
1084 strchr(" %&0^|~", cp[1]) != NULL)
1085 cp++;
1086 else if ( ! isspace((unsigned char)*cp))
1087 break;
1088 }
1089
1090 /* Skip trailing backslash. */
1091
1092 sz = strlen(cp);
1093 if (sz > 0 && cp[sz - 1] == '\\')
1094 sz--;
1095
1096 /* Skip trailing whitespace. */
1097
1098 for (; sz; sz--)
1099 if ( ! isspace((unsigned char)cp[sz-1]))
1100 break;
1101
1102 /* Skip empty strings. */
1103
1104 if (sz == 0)
1105 return;
1106
1107 if (*dest == NULL) {
1108 *dest = mandoc_strndup(cp, sz);
1109 return;
1110 }
1111
1112 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1113 free(*dest);
1114 *dest = cp;
1115 }
1116
1117 /* --- main functions of the roff parser ---------------------------------- */
1118
1119 /*
1120 * In the current line, expand escape sequences that tend to get
1121 * used in numerical expressions and conditional requests.
1122 * Also check the syntax of the remaining escape sequences.
1123 */
1124 static enum rofferr
1125 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1126 {
1127 char ubuf[24]; /* buffer to print the number */
1128 struct roff_node *n; /* used for header comments */
1129 const char *start; /* start of the string to process */
1130 char *stesc; /* start of an escape sequence ('\\') */
1131 char *ep; /* end of comment string */
1132 const char *stnam; /* start of the name, after "[(*" */
1133 const char *cp; /* end of the name, e.g. before ']' */
1134 const char *res; /* the string to be substituted */
1135 char *nbuf; /* new buffer to copy buf->buf to */
1136 size_t maxl; /* expected length of the escape name */
1137 size_t naml; /* actual length of the escape name */
1138 enum mandoc_esc esc; /* type of the escape sequence */
1139 int inaml; /* length returned from mandoc_escape() */
1140 int expand_count; /* to avoid infinite loops */
1141 int npos; /* position in numeric expression */
1142 int arg_complete; /* argument not interrupted by eol */
1143 int done; /* no more input available */
1144 int deftype; /* type of definition to paste */
1145 int rcsid; /* kind of RCS id seen */
1146 char sign; /* increment number register */
1147 char term; /* character terminating the escape */
1148
1149 /* Search forward for comments. */
1150
1151 done = 0;
1152 start = buf->buf + pos;
1153 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1154 if (stesc[0] != r->escape || stesc[1] == '\0')
1155 continue;
1156 stesc++;
1157 if (*stesc != '"' && *stesc != '#')
1158 continue;
1159
1160 /* Comment found, look for RCS id. */
1161
1162 rcsid = 0;
1163 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1164 rcsid = 1 << MANDOC_OS_OPENBSD;
1165 cp += 8;
1166 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1167 rcsid = 1 << MANDOC_OS_NETBSD;
1168 cp += 7;
1169 }
1170 if (cp != NULL &&
1171 isalnum((unsigned char)*cp) == 0 &&
1172 strchr(cp, '$') != NULL) {
1173 if (r->man->meta.rcsids & rcsid)
1174 mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1175 ln, stesc + 1 - buf->buf, stesc + 1);
1176 r->man->meta.rcsids |= rcsid;
1177 }
1178
1179 /* Handle trailing whitespace. */
1180
1181 ep = strchr(stesc--, '\0') - 1;
1182 if (*ep == '\n') {
1183 done = 1;
1184 ep--;
1185 }
1186 if (*ep == ' ' || *ep == '\t')
1187 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1188 ln, ep - buf->buf, NULL);
1189
1190 /*
1191 * Save comments preceding the title macro
1192 * in the syntax tree.
1193 */
1194
1195 if (r->format == 0) {
1196 while (*ep == ' ' || *ep == '\t')
1197 ep--;
1198 ep[1] = '\0';
1199 n = roff_node_alloc(r->man,
1200 ln, stesc + 1 - buf->buf,
1201 ROFFT_COMMENT, TOKEN_NONE);
1202 n->string = mandoc_strdup(stesc + 2);
1203 roff_node_append(r->man, n);
1204 n->flags |= NODE_VALID | NODE_ENDED;
1205 r->man->next = ROFF_NEXT_SIBLING;
1206 }
1207
1208 /* Line continuation with comment. */
1209
1210 if (stesc[1] == '#') {
1211 *stesc = '\0';
1212 return ROFF_APPEND;
1213 }
1214
1215 /* Discard normal comments. */
1216
1217 while (stesc > start && stesc[-1] == ' ')
1218 stesc--;
1219 *stesc = '\0';
1220 break;
1221 }
1222 if (stesc == start)
1223 return ROFF_CONT;
1224 stesc--;
1225
1226 /* Notice the end of the input. */
1227
1228 if (*stesc == '\n') {
1229 *stesc-- = '\0';
1230 done = 1;
1231 }
1232
1233 expand_count = 0;
1234 while (stesc >= start) {
1235
1236 /* Search backwards for the next backslash. */
1237
1238 if (*stesc != r->escape) {
1239 if (*stesc == '\\') {
1240 *stesc = '\0';
1241 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1242 buf->buf, stesc + 1) + 1;
1243 start = nbuf + pos;
1244 stesc = nbuf + (stesc - buf->buf);
1245 free(buf->buf);
1246 buf->buf = nbuf;
1247 }
1248 stesc--;
1249 continue;
1250 }
1251
1252 /* If it is escaped, skip it. */
1253
1254 for (cp = stesc - 1; cp >= start; cp--)
1255 if (*cp != r->escape)
1256 break;
1257
1258 if ((stesc - cp) % 2 == 0) {
1259 while (stesc > cp)
1260 *stesc-- = '\\';
1261 continue;
1262 } else if (stesc[1] != '\0') {
1263 *stesc = '\\';
1264 } else {
1265 *stesc-- = '\0';
1266 if (done)
1267 continue;
1268 else
1269 return ROFF_APPEND;
1270 }
1271
1272 /* Decide whether to expand or to check only. */
1273
1274 term = '\0';
1275 cp = stesc + 1;
1276 switch (*cp) {
1277 case '*':
1278 res = NULL;
1279 break;
1280 case 'B':
1281 case 'w':
1282 term = cp[1];
1283 /* FALLTHROUGH */
1284 case 'n':
1285 sign = cp[1];
1286 if (sign == '+' || sign == '-')
1287 cp++;
1288 res = ubuf;
1289 break;
1290 default:
1291 esc = mandoc_escape(&cp, &stnam, &inaml);
1292 if (esc == ESCAPE_ERROR ||
1293 (esc == ESCAPE_SPECIAL &&
1294 mchars_spec2cp(stnam, inaml) < 0))
1295 mandoc_vmsg(MANDOCERR_ESC_BAD,
1296 r->parse, ln, (int)(stesc - buf->buf),
1297 "%.*s", (int)(cp - stesc), stesc);
1298 stesc--;
1299 continue;
1300 }
1301
1302 if (EXPAND_LIMIT < ++expand_count) {
1303 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1304 ln, (int)(stesc - buf->buf), NULL);
1305 return ROFF_IGN;
1306 }
1307
1308 /*
1309 * The third character decides the length
1310 * of the name of the string or register.
1311 * Save a pointer to the name.
1312 */
1313
1314 if (term == '\0') {
1315 switch (*++cp) {
1316 case '\0':
1317 maxl = 0;
1318 break;
1319 case '(':
1320 cp++;
1321 maxl = 2;
1322 break;
1323 case '[':
1324 cp++;
1325 term = ']';
1326 maxl = 0;
1327 break;
1328 default:
1329 maxl = 1;
1330 break;
1331 }
1332 } else {
1333 cp += 2;
1334 maxl = 0;
1335 }
1336 stnam = cp;
1337
1338 /* Advance to the end of the name. */
1339
1340 naml = 0;
1341 arg_complete = 1;
1342 while (maxl == 0 || naml < maxl) {
1343 if (*cp == '\0') {
1344 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1345 ln, (int)(stesc - buf->buf), stesc);
1346 arg_complete = 0;
1347 break;
1348 }
1349 if (maxl == 0 && *cp == term) {
1350 cp++;
1351 break;
1352 }
1353 if (*cp++ != '\\' || stesc[1] != 'w') {
1354 naml++;
1355 continue;
1356 }
1357 switch (mandoc_escape(&cp, NULL, NULL)) {
1358 case ESCAPE_SPECIAL:
1359 case ESCAPE_UNICODE:
1360 case ESCAPE_NUMBERED:
1361 case ESCAPE_OVERSTRIKE:
1362 naml++;
1363 break;
1364 default:
1365 break;
1366 }
1367 }
1368
1369 /*
1370 * Retrieve the replacement string; if it is
1371 * undefined, resume searching for escapes.
1372 */
1373
1374 switch (stesc[1]) {
1375 case '*':
1376 if (arg_complete) {
1377 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1378 res = roff_getstrn(r, stnam, naml, &deftype);
1379
1380 /*
1381 * If not overriden, let \*(.T
1382 * through to the formatters.
1383 */
1384
1385 if (res == NULL && naml == 2 &&
1386 stnam[0] == '.' && stnam[1] == 'T') {
1387 roff_setstrn(&r->strtab,
1388 ".T", 2, NULL, 0, 0);
1389 stesc--;
1390 continue;
1391 }
1392 }
1393 break;
1394 case 'B':
1395 npos = 0;
1396 ubuf[0] = arg_complete &&
1397 roff_evalnum(r, ln, stnam, &npos,
1398 NULL, ROFFNUM_SCALE) &&
1399 stnam + npos + 1 == cp ? '1' : '0';
1400 ubuf[1] = '\0';
1401 break;
1402 case 'n':
1403 if (arg_complete)
1404 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1405 roff_getregn(r, stnam, naml, sign));
1406 else
1407 ubuf[0] = '\0';
1408 break;
1409 case 'w':
1410 /* use even incomplete args */
1411 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1412 24 * (int)naml);
1413 break;
1414 }
1415
1416 if (res == NULL) {
1417 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1418 r->parse, ln, (int)(stesc - buf->buf),
1419 "%.*s", (int)naml, stnam);
1420 res = "";
1421 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1422 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1423 ln, (int)(stesc - buf->buf), NULL);
1424 return ROFF_IGN;
1425 }
1426
1427 /* Replace the escape sequence by the string. */
1428
1429 *stesc = '\0';
1430 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1431 buf->buf, res, cp) + 1;
1432
1433 /* Prepare for the next replacement. */
1434
1435 start = nbuf + pos;
1436 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1437 free(buf->buf);
1438 buf->buf = nbuf;
1439 }
1440 return ROFF_CONT;
1441 }
1442
1443 /*
1444 * Process text streams.
1445 */
1446 static enum rofferr
1447 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1448 {
1449 size_t sz;
1450 const char *start;
1451 char *p;
1452 int isz;
1453 enum mandoc_esc esc;
1454
1455 /* Spring the input line trap. */
1456
1457 if (roffit_lines == 1) {
1458 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1459 free(buf->buf);
1460 buf->buf = p;
1461 buf->sz = isz + 1;
1462 *offs = 0;
1463 free(roffit_macro);
1464 roffit_lines = 0;
1465 return ROFF_REPARSE;
1466 } else if (roffit_lines > 1)
1467 --roffit_lines;
1468
1469 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1470 if (roffce_lines < 1) {
1471 r->man->last = roffce_node;
1472 r->man->next = ROFF_NEXT_SIBLING;
1473 roffce_lines = 0;
1474 roffce_node = NULL;
1475 } else
1476 roffce_lines--;
1477 }
1478
1479 /* Convert all breakable hyphens into ASCII_HYPH. */
1480
1481 start = p = buf->buf + pos;
1482
1483 while (*p != '\0') {
1484 sz = strcspn(p, "-\\");
1485 p += sz;
1486
1487 if (*p == '\0')
1488 break;
1489
1490 if (*p == '\\') {
1491 /* Skip over escapes. */
1492 p++;
1493 esc = mandoc_escape((const char **)&p, NULL, NULL);
1494 if (esc == ESCAPE_ERROR)
1495 break;
1496 while (*p == '-')
1497 p++;
1498 continue;
1499 } else if (p == start) {
1500 p++;
1501 continue;
1502 }
1503
1504 if (isalpha((unsigned char)p[-1]) &&
1505 isalpha((unsigned char)p[1]))
1506 *p = ASCII_HYPH;
1507 p++;
1508 }
1509 return ROFF_CONT;
1510 }
1511
1512 enum rofferr
1513 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1514 {
1515 enum roff_tok t;
1516 enum rofferr e;
1517 int pos; /* parse point */
1518 int spos; /* saved parse point for messages */
1519 int ppos; /* original offset in buf->buf */
1520 int ctl; /* macro line (boolean) */
1521
1522 ppos = pos = *offs;
1523
1524 /* Handle in-line equation delimiters. */
1525
1526 if (r->tbl == NULL &&
1527 r->last_eqn != NULL && r->last_eqn->delim &&
1528 (r->eqn == NULL || r->eqn_inline)) {
1529 e = roff_eqndelim(r, buf, pos);
1530 if (e == ROFF_REPARSE)
1531 return e;
1532 assert(e == ROFF_CONT);
1533 }
1534
1535 /* Expand some escape sequences. */
1536
1537 e = roff_res(r, buf, ln, pos);
1538 if (e == ROFF_IGN || e == ROFF_APPEND)
1539 return e;
1540 assert(e == ROFF_CONT);
1541
1542 ctl = roff_getcontrol(r, buf->buf, &pos);
1543
1544 /*
1545 * First, if a scope is open and we're not a macro, pass the
1546 * text through the macro's filter.
1547 * Equations process all content themselves.
1548 * Tables process almost all content themselves, but we want
1549 * to warn about macros before passing it there.
1550 */
1551
1552 if (r->last != NULL && ! ctl) {
1553 t = r->last->tok;
1554 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1555 if (e == ROFF_IGN)
1556 return e;
1557 assert(e == ROFF_CONT);
1558 }
1559 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1560 eqn_read(r->eqn, buf->buf + ppos);
1561 return ROFF_IGN;
1562 }
1563 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1564 tbl_read(r->tbl, ln, buf->buf, ppos);
1565 roff_addtbl(r->man, r->tbl);
1566 return ROFF_IGN;
1567 }
1568 if ( ! ctl)
1569 return roff_parsetext(r, buf, pos, offs);
1570
1571 /* Skip empty request lines. */
1572
1573 if (buf->buf[pos] == '"') {
1574 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1575 ln, pos, NULL);
1576 return ROFF_IGN;
1577 } else if (buf->buf[pos] == '\0')
1578 return ROFF_IGN;
1579
1580 /*
1581 * If a scope is open, go to the child handler for that macro,
1582 * as it may want to preprocess before doing anything with it.
1583 * Don't do so if an equation is open.
1584 */
1585
1586 if (r->last) {
1587 t = r->last->tok;
1588 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1589 }
1590
1591 /* No scope is open. This is a new request or macro. */
1592
1593 spos = pos;
1594 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1595
1596 /* Tables ignore most macros. */
1597
1598 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1599 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1600 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1601 ln, pos, buf->buf + spos);
1602 if (t != TOKEN_NONE)
1603 return ROFF_IGN;
1604 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1605 pos++;
1606 while (buf->buf[pos] == ' ')
1607 pos++;
1608 tbl_read(r->tbl, ln, buf->buf, pos);
1609 roff_addtbl(r->man, r->tbl);
1610 return ROFF_IGN;
1611 }
1612
1613 /* For now, let high level macros abort .ce mode. */
1614
1615 if (ctl && roffce_node != NULL &&
1616 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1617 t == ROFF_TH || t == ROFF_TS)) {
1618 r->man->last = roffce_node;
1619 r->man->next = ROFF_NEXT_SIBLING;
1620 roffce_lines = 0;
1621 roffce_node = NULL;
1622 }
1623
1624 /*
1625 * This is neither a roff request nor a user-defined macro.
1626 * Let the standard macro set parsers handle it.
1627 */
1628
1629 if (t == TOKEN_NONE)
1630 return ROFF_CONT;
1631
1632 /* Execute a roff request or a user defined macro. */
1633
1634 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1635 }
1636
1637 void
1638 roff_endparse(struct roff *r)
1639 {
1640 if (r->last != NULL)
1641 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1642 r->last->line, r->last->col,
1643 roff_name[r->last->tok]);
1644
1645 if (r->eqn != NULL) {
1646 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1647 r->eqn->node->line, r->eqn->node->pos, "EQ");
1648 eqn_parse(r->eqn);
1649 r->eqn = NULL;
1650 }
1651
1652 if (r->tbl != NULL) {
1653 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1654 r->tbl->line, r->tbl->pos, "TS");
1655 tbl_end(r->tbl);
1656 r->tbl = NULL;
1657 }
1658 }
1659
1660 /*
1661 * Parse a roff node's type from the input buffer. This must be in the
1662 * form of ".foo xxx" in the usual way.
1663 */
1664 static enum roff_tok
1665 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1666 {
1667 char *cp;
1668 const char *mac;
1669 size_t maclen;
1670 int deftype;
1671 enum roff_tok t;
1672
1673 cp = buf + *pos;
1674
1675 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1676 return TOKEN_NONE;
1677
1678 mac = cp;
1679 maclen = roff_getname(r, &cp, ln, ppos);
1680
1681 deftype = ROFFDEF_USER | ROFFDEF_REN;
1682 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1683 switch (deftype) {
1684 case ROFFDEF_USER:
1685 t = ROFF_USERDEF;
1686 break;
1687 case ROFFDEF_REN:
1688 t = ROFF_RENAMED;
1689 break;
1690 default:
1691 t = roffhash_find(r->reqtab, mac, maclen);
1692 break;
1693 }
1694 if (t != TOKEN_NONE)
1695 *pos = cp - buf;
1696 else if (deftype == ROFFDEF_UNDEF) {
1697 /* Using an undefined macro defines it to be empty. */
1698 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1699 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1700 }
1701 return t;
1702 }
1703
1704 /* --- handling of request blocks ----------------------------------------- */
1705
1706 static enum rofferr
1707 roff_cblock(ROFF_ARGS)
1708 {
1709
1710 /*
1711 * A block-close `..' should only be invoked as a child of an
1712 * ignore macro, otherwise raise a warning and just ignore it.
1713 */
1714
1715 if (r->last == NULL) {
1716 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1717 ln, ppos, "..");
1718 return ROFF_IGN;
1719 }
1720
1721 switch (r->last->tok) {
1722 case ROFF_am:
1723 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1724 case ROFF_ami:
1725 case ROFF_de:
1726 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1727 case ROFF_dei:
1728 case ROFF_ig:
1729 break;
1730 default:
1731 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1732 ln, ppos, "..");
1733 return ROFF_IGN;
1734 }
1735
1736 if (buf->buf[pos] != '\0')
1737 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1738 ".. %s", buf->buf + pos);
1739
1740 roffnode_pop(r);
1741 roffnode_cleanscope(r);
1742 return ROFF_IGN;
1743
1744 }
1745
1746 static void
1747 roffnode_cleanscope(struct roff *r)
1748 {
1749
1750 while (r->last) {
1751 if (--r->last->endspan != 0)
1752 break;
1753 roffnode_pop(r);
1754 }
1755 }
1756
1757 static void
1758 roff_ccond(struct roff *r, int ln, int ppos)
1759 {
1760
1761 if (NULL == r->last) {
1762 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1763 ln, ppos, "\\}");
1764 return;
1765 }
1766
1767 switch (r->last->tok) {
1768 case ROFF_el:
1769 case ROFF_ie:
1770 case ROFF_if:
1771 break;
1772 default:
1773 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1774 ln, ppos, "\\}");
1775 return;
1776 }
1777
1778 if (r->last->endspan > -1) {
1779 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1780 ln, ppos, "\\}");
1781 return;
1782 }
1783
1784 roffnode_pop(r);
1785 roffnode_cleanscope(r);
1786 return;
1787 }
1788
1789 static enum rofferr
1790 roff_block(ROFF_ARGS)
1791 {
1792 const char *name, *value;
1793 char *call, *cp, *iname, *rname;
1794 size_t csz, namesz, rsz;
1795 int deftype;
1796
1797 /* Ignore groff compatibility mode for now. */
1798
1799 if (tok == ROFF_de1)
1800 tok = ROFF_de;
1801 else if (tok == ROFF_dei1)
1802 tok = ROFF_dei;
1803 else if (tok == ROFF_am1)
1804 tok = ROFF_am;
1805 else if (tok == ROFF_ami1)
1806 tok = ROFF_ami;
1807
1808 /* Parse the macro name argument. */
1809
1810 cp = buf->buf + pos;
1811 if (tok == ROFF_ig) {
1812 iname = NULL;
1813 namesz = 0;
1814 } else {
1815 iname = cp;
1816 namesz = roff_getname(r, &cp, ln, ppos);
1817 iname[namesz] = '\0';
1818 }
1819
1820 /* Resolve the macro name argument if it is indirect. */
1821
1822 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1823 deftype = ROFFDEF_USER;
1824 name = roff_getstrn(r, iname, namesz, &deftype);
1825 if (name == NULL) {
1826 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1827 r->parse, ln, (int)(iname - buf->buf),
1828 "%.*s", (int)namesz, iname);
1829 namesz = 0;
1830 } else
1831 namesz = strlen(name);
1832 } else
1833 name = iname;
1834
1835 if (namesz == 0 && tok != ROFF_ig) {
1836 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1837 ln, ppos, roff_name[tok]);
1838 return ROFF_IGN;
1839 }
1840
1841 roffnode_push(r, tok, name, ln, ppos);
1842
1843 /*
1844 * At the beginning of a `de' macro, clear the existing string
1845 * with the same name, if there is one. New content will be
1846 * appended from roff_block_text() in multiline mode.
1847 */
1848
1849 if (tok == ROFF_de || tok == ROFF_dei) {
1850 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1851 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1852 } else if (tok == ROFF_am || tok == ROFF_ami) {
1853 deftype = ROFFDEF_ANY;
1854 value = roff_getstrn(r, iname, namesz, &deftype);
1855 switch (deftype) { /* Before appending, ... */
1856 case ROFFDEF_PRE: /* copy predefined to user-defined. */
1857 roff_setstrn(&r->strtab, name, namesz,
1858 value, strlen(value), 0);
1859 break;
1860 case ROFFDEF_REN: /* call original standard macro. */
1861 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1862 (int)strlen(value), value);
1863 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1864 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1865 free(call);
1866 break;
1867 case ROFFDEF_STD: /* rename and call standard macro. */
1868 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1869 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1870 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1871 (int)rsz, rname);
1872 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1873 free(call);
1874 free(rname);
1875 break;
1876 default:
1877 break;
1878 }
1879 }
1880
1881 if (*cp == '\0')
1882 return ROFF_IGN;
1883
1884 /* Get the custom end marker. */
1885
1886 iname = cp;
1887 namesz = roff_getname(r, &cp, ln, ppos);
1888
1889 /* Resolve the end marker if it is indirect. */
1890
1891 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1892 deftype = ROFFDEF_USER;
1893 name = roff_getstrn(r, iname, namesz, &deftype);
1894 if (name == NULL) {
1895 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1896 r->parse, ln, (int)(iname - buf->buf),
1897 "%.*s", (int)namesz, iname);
1898 namesz = 0;
1899 } else
1900 namesz = strlen(name);
1901 } else
1902 name = iname;
1903
1904 if (namesz)
1905 r->last->end = mandoc_strndup(name, namesz);
1906
1907 if (*cp != '\0')
1908 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1909 ln, pos, ".%s ... %s", roff_name[tok], cp);
1910
1911 return ROFF_IGN;
1912 }
1913
1914 static enum rofferr
1915 roff_block_sub(ROFF_ARGS)
1916 {
1917 enum roff_tok t;
1918 int i, j;
1919
1920 /*
1921 * First check whether a custom macro exists at this level. If
1922 * it does, then check against it. This is some of groff's
1923 * stranger behaviours. If we encountered a custom end-scope
1924 * tag and that tag also happens to be a "real" macro, then we
1925 * need to try interpreting it again as a real macro. If it's
1926 * not, then return ignore. Else continue.
1927 */
1928
1929 if (r->last->end) {
1930 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1931 if (buf->buf[i] != r->last->end[j])
1932 break;
1933
1934 if (r->last->end[j] == '\0' &&
1935 (buf->buf[i] == '\0' ||
1936 buf->buf[i] == ' ' ||
1937 buf->buf[i] == '\t')) {
1938 roffnode_pop(r);
1939 roffnode_cleanscope(r);
1940
1941 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1942 i++;
1943
1944 pos = i;
1945 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1946 TOKEN_NONE)
1947 return ROFF_RERUN;
1948 return ROFF_IGN;
1949 }
1950 }
1951
1952 /*
1953 * If we have no custom end-query or lookup failed, then try
1954 * pulling it out of the hashtable.
1955 */
1956
1957 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1958
1959 if (t != ROFF_cblock) {
1960 if (tok != ROFF_ig)
1961 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1962 return ROFF_IGN;
1963 }
1964
1965 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1966 }
1967
1968 static enum rofferr
1969 roff_block_text(ROFF_ARGS)
1970 {
1971
1972 if (tok != ROFF_ig)
1973 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1974
1975 return ROFF_IGN;
1976 }
1977
1978 static enum rofferr
1979 roff_cond_sub(ROFF_ARGS)
1980 {
1981 enum roff_tok t;
1982 char *ep;
1983 int rr;
1984
1985 rr = r->last->rule;
1986 roffnode_cleanscope(r);
1987
1988 /*
1989 * If `\}' occurs on a macro line without a preceding macro,
1990 * drop the line completely.
1991 */
1992
1993 ep = buf->buf + pos;
1994 if (ep[0] == '\\' && ep[1] == '}')
1995 rr = 0;
1996
1997 /* Always check for the closing delimiter `\}'. */
1998
1999 while ((ep = strchr(ep, '\\')) != NULL) {
2000 switch (ep[1]) {
2001 case '}':
2002 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2003 roff_ccond(r, ln, ep - buf->buf);
2004 break;
2005 case '\0':
2006 ++ep;
2007 break;
2008 default:
2009 ep += 2;
2010 break;
2011 }
2012 }
2013
2014 /*
2015 * Fully handle known macros when they are structurally
2016 * required or when the conditional evaluated to true.
2017 */
2018
2019 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2020 return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
2021 ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
2022 ? ROFF_CONT : ROFF_IGN;
2023 }
2024
2025 static enum rofferr
2026 roff_cond_text(ROFF_ARGS)
2027 {
2028 char *ep;
2029 int rr;
2030
2031 rr = r->last->rule;
2032 roffnode_cleanscope(r);
2033
2034 ep = buf->buf + pos;
2035 while ((ep = strchr(ep, '\\')) != NULL) {
2036 if (*(++ep) == '}') {
2037 *ep = '&';
2038 roff_ccond(r, ln, ep - buf->buf - 1);
2039 }
2040 if (*ep != '\0')
2041 ++ep;
2042 }
2043 return rr ? ROFF_CONT : ROFF_IGN;
2044 }
2045
2046 /* --- handling of numeric and conditional expressions -------------------- */
2047
2048 /*
2049 * Parse a single signed integer number. Stop at the first non-digit.
2050 * If there is at least one digit, return success and advance the
2051 * parse point, else return failure and let the parse point unchanged.
2052 * Ignore overflows, treat them just like the C language.
2053 */
2054 static int
2055 roff_getnum(const char *v, int *pos, int *res, int flags)
2056 {
2057 int myres, scaled, n, p;
2058
2059 if (NULL == res)
2060 res = &myres;
2061
2062 p = *pos;
2063 n = v[p] == '-';
2064 if (n || v[p] == '+')
2065 p++;
2066
2067 if (flags & ROFFNUM_WHITE)
2068 while (isspace((unsigned char)v[p]))
2069 p++;
2070
2071 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2072 *res = 10 * *res + v[p] - '0';
2073 if (p == *pos + n)
2074 return 0;
2075
2076 if (n)
2077 *res = -*res;
2078
2079 /* Each number may be followed by one optional scaling unit. */
2080
2081 switch (v[p]) {
2082 case 'f':
2083 scaled = *res * 65536;
2084 break;
2085 case 'i':
2086 scaled = *res * 240;
2087 break;
2088 case 'c':
2089 scaled = *res * 240 / 2.54;
2090 break;
2091 case 'v':
2092 case 'P':
2093 scaled = *res * 40;
2094 break;
2095 case 'm':
2096 case 'n':
2097 scaled = *res * 24;
2098 break;
2099 case 'p':
2100 scaled = *res * 10 / 3;
2101 break;
2102 case 'u':
2103 scaled = *res;
2104 break;
2105 case 'M':
2106 scaled = *res * 6 / 25;
2107 break;
2108 default:
2109 scaled = *res;
2110 p--;
2111 break;
2112 }
2113 if (flags & ROFFNUM_SCALE)
2114 *res = scaled;
2115
2116 *pos = p + 1;
2117 return 1;
2118 }
2119
2120 /*
2121 * Evaluate a string comparison condition.
2122 * The first character is the delimiter.
2123 * Succeed if the string up to its second occurrence
2124 * matches the string up to its third occurence.
2125 * Advance the cursor after the third occurrence
2126 * or lacking that, to the end of the line.
2127 */
2128 static int
2129 roff_evalstrcond(const char *v, int *pos)
2130 {
2131 const char *s1, *s2, *s3;
2132 int match;
2133
2134 match = 0;
2135 s1 = v + *pos; /* initial delimiter */
2136 s2 = s1 + 1; /* for scanning the first string */
2137 s3 = strchr(s2, *s1); /* for scanning the second string */
2138
2139 if (NULL == s3) /* found no middle delimiter */
2140 goto out;
2141
2142 while ('\0' != *++s3) {
2143 if (*s2 != *s3) { /* mismatch */
2144 s3 = strchr(s3, *s1);
2145 break;
2146 }
2147 if (*s3 == *s1) { /* found the final delimiter */
2148 match = 1;
2149 break;
2150 }
2151 s2++;
2152 }
2153
2154 out:
2155 if (NULL == s3)
2156 s3 = strchr(s2, '\0');
2157 else if (*s3 != '\0')
2158 s3++;
2159 *pos = s3 - v;
2160 return match;
2161 }
2162
2163 /*
2164 * Evaluate an optionally negated single character, numerical,
2165 * or string condition.
2166 */
2167 static int
2168 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2169 {
2170 char *cp, *name;
2171 size_t sz;
2172 int deftype, number, savepos, istrue, wanttrue;
2173
2174 if ('!' == v[*pos]) {
2175 wanttrue = 0;
2176 (*pos)++;
2177 } else
2178 wanttrue = 1;
2179
2180 switch (v[*pos]) {
2181 case '\0':
2182 return 0;
2183 case 'n':
2184 case 'o':
2185 (*pos)++;
2186 return wanttrue;
2187 case 'c':
2188 case 'e':
2189 case 't':
2190 case 'v':
2191 (*pos)++;
2192 return !wanttrue;
2193 case 'd':
2194 case 'r':
2195 cp = v + *pos + 1;
2196 while (*cp == ' ')
2197 cp++;
2198 name = cp;
2199 sz = roff_getname(r, &cp, ln, cp - v);
2200 if (sz == 0)
2201 istrue = 0;
2202 else if (v[*pos] == 'r')
2203 istrue = roff_hasregn(r, name, sz);
2204 else {
2205 deftype = ROFFDEF_ANY;
2206 roff_getstrn(r, name, sz, &deftype);
2207 istrue = !!deftype;
2208 }
2209 *pos = cp - v;
2210 return istrue == wanttrue;
2211 default:
2212 break;
2213 }
2214
2215 savepos = *pos;
2216 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2217 return (number > 0) == wanttrue;
2218 else if (*pos == savepos)
2219 return roff_evalstrcond(v, pos) == wanttrue;
2220 else
2221 return 0;
2222 }
2223
2224 static enum rofferr
2225 roff_line_ignore(ROFF_ARGS)
2226 {
2227
2228 return ROFF_IGN;
2229 }
2230
2231 static enum rofferr
2232 roff_insec(ROFF_ARGS)
2233 {
2234
2235 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2236 ln, ppos, roff_name[tok]);
2237 return ROFF_IGN;
2238 }
2239
2240 static enum rofferr
2241 roff_unsupp(ROFF_ARGS)
2242 {
2243
2244 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2245 ln, ppos, roff_name[tok]);
2246 return ROFF_IGN;
2247 }
2248
2249 static enum rofferr
2250 roff_cond(ROFF_ARGS)
2251 {
2252
2253 roffnode_push(r, tok, NULL, ln, ppos);
2254
2255 /*
2256 * An `.el' has no conditional body: it will consume the value
2257 * of the current rstack entry set in prior `ie' calls or
2258 * defaults to DENY.
2259 *
2260 * If we're not an `el', however, then evaluate the conditional.
2261 */
2262
2263 r->last->rule = tok == ROFF_el ?
2264 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2265 roff_evalcond(r, ln, buf->buf, &pos);
2266
2267 /*
2268 * An if-else will put the NEGATION of the current evaluated
2269 * conditional into the stack of rules.
2270 */
2271
2272 if (tok == ROFF_ie) {
2273 if (r->rstackpos + 1 == r->rstacksz) {
2274 r->rstacksz += 16;
2275 r->rstack = mandoc_reallocarray(r->rstack,
2276 r->rstacksz, sizeof(int));
2277 }
2278 r->rstack[++r->rstackpos] = !r->last->rule;
2279 }
2280
2281 /* If the parent has false as its rule, then so do we. */
2282
2283 if (r->last->parent && !r->last->parent->rule)
2284 r->last->rule = 0;
2285
2286 /*
2287 * Determine scope.
2288 * If there is nothing on the line after the conditional,
2289 * not even whitespace, use next-line scope.
2290 */
2291
2292 if (buf->buf[pos] == '\0') {
2293 r->last->endspan = 2;
2294 goto out;
2295 }
2296
2297 while (buf->buf[pos] == ' ')
2298 pos++;
2299
2300 /* An opening brace requests multiline scope. */
2301
2302 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2303 r->last->endspan = -1;
2304 pos += 2;
2305 while (buf->buf[pos] == ' ')
2306 pos++;
2307 goto out;
2308 }
2309
2310 /*
2311 * Anything else following the conditional causes
2312 * single-line scope. Warn if the scope contains
2313 * nothing but trailing whitespace.
2314 */
2315
2316 if (buf->buf[pos] == '\0')
2317 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2318 ln, ppos, roff_name[tok]);
2319
2320 r->last->endspan = 1;
2321
2322 out:
2323 *offs = pos;
2324 return ROFF_RERUN;
2325 }
2326
2327 static enum rofferr
2328 roff_ds(ROFF_ARGS)
2329 {
2330 char *string;
2331 const char *name;
2332 size_t namesz;
2333
2334 /* Ignore groff compatibility mode for now. */
2335
2336 if (tok == ROFF_ds1)
2337 tok = ROFF_ds;
2338 else if (tok == ROFF_as1)
2339 tok = ROFF_as;
2340
2341 /*
2342 * The first word is the name of the string.
2343 * If it is empty or terminated by an escape sequence,
2344 * abort the `ds' request without defining anything.
2345 */
2346
2347 name = string = buf->buf + pos;
2348 if (*name == '\0')
2349 return ROFF_IGN;
2350
2351 namesz = roff_getname(r, &string, ln, pos);
2352 if (name[namesz] == '\\')
2353 return ROFF_IGN;
2354
2355 /* Read past the initial double-quote, if any. */
2356 if (*string == '"')
2357 string++;
2358
2359 /* The rest is the value. */
2360 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2361 ROFF_as == tok);
2362 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2363 return ROFF_IGN;
2364 }
2365
2366 /*
2367 * Parse a single operator, one or two characters long.
2368 * If the operator is recognized, return success and advance the
2369 * parse point, else return failure and let the parse point unchanged.
2370 */
2371 static int
2372 roff_getop(const char *v, int *pos, char *res)
2373 {
2374
2375 *res = v[*pos];
2376
2377 switch (*res) {
2378 case '+':
2379 case '-':
2380 case '*':
2381 case '/':
2382 case '%':
2383 case '&':
2384 case ':':
2385 break;
2386 case '<':
2387 switch (v[*pos + 1]) {
2388 case '=':
2389 *res = 'l';
2390 (*pos)++;
2391 break;
2392 case '>':
2393 *res = '!';
2394 (*pos)++;
2395 break;
2396 case '?':
2397 *res = 'i';
2398 (*pos)++;
2399 break;
2400 default:
2401 break;
2402 }
2403 break;
2404 case '>':
2405 switch (v[*pos + 1]) {
2406 case '=':
2407 *res = 'g';
2408 (*pos)++;
2409 break;
2410 case '?':
2411 *res = 'a';
2412 (*pos)++;
2413 break;
2414 default:
2415 break;
2416 }
2417 break;
2418 case '=':
2419 if ('=' == v[*pos + 1])
2420 (*pos)++;
2421 break;
2422 default:
2423 return 0;
2424 }
2425 (*pos)++;
2426
2427 return *res;
2428 }
2429
2430 /*
2431 * Evaluate either a parenthesized numeric expression
2432 * or a single signed integer number.
2433 */
2434 static int
2435 roff_evalpar(struct roff *r, int ln,
2436 const char *v, int *pos, int *res, int flags)
2437 {
2438
2439 if ('(' != v[*pos])
2440 return roff_getnum(v, pos, res, flags);
2441
2442 (*pos)++;
2443 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2444 return 0;
2445
2446 /*
2447 * Omission of the closing parenthesis
2448 * is an error in validation mode,
2449 * but ignored in evaluation mode.
2450 */
2451
2452 if (')' == v[*pos])
2453 (*pos)++;
2454 else if (NULL == res)
2455 return 0;
2456
2457 return 1;
2458 }
2459
2460 /*
2461 * Evaluate a complete numeric expression.
2462 * Proceed left to right, there is no concept of precedence.
2463 */
2464 static int
2465 roff_evalnum(struct roff *r, int ln, const char *v,
2466 int *pos, int *res, int flags)
2467 {
2468 int mypos, operand2;
2469 char operator;
2470
2471 if (NULL == pos) {
2472 mypos = 0;
2473 pos = &mypos;
2474 }
2475
2476 if (flags & ROFFNUM_WHITE)
2477 while (isspace((unsigned char)v[*pos]))
2478 (*pos)++;
2479
2480 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2481 return 0;
2482
2483 while (1) {
2484 if (flags & ROFFNUM_WHITE)
2485 while (isspace((unsigned char)v[*pos]))
2486 (*pos)++;
2487
2488 if ( ! roff_getop(v, pos, &operator))
2489 break;
2490
2491 if (flags & ROFFNUM_WHITE)
2492 while (isspace((unsigned char)v[*pos]))
2493 (*pos)++;
2494
2495 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2496 return 0;
2497
2498 if (flags & ROFFNUM_WHITE)
2499 while (isspace((unsigned char)v[*pos]))
2500 (*pos)++;
2501
2502 if (NULL == res)
2503 continue;
2504
2505 switch (operator) {
2506 case '+':
2507 *res += operand2;
2508 break;
2509 case '-':
2510 *res -= operand2;
2511 break;
2512 case '*':
2513 *res *= operand2;
2514 break;
2515 case '/':
2516 if (operand2 == 0) {
2517 mandoc_msg(MANDOCERR_DIVZERO,
2518 r->parse, ln, *pos, v);
2519 *res = 0;
2520 break;
2521 }
2522 *res /= operand2;
2523 break;
2524 case '%':
2525 if (operand2 == 0) {
2526 mandoc_msg(MANDOCERR_DIVZERO,
2527 r->parse, ln, *pos, v);
2528 *res = 0;
2529 break;
2530 }
2531 *res %= operand2;
2532 break;
2533 case '<':
2534 *res = *res < operand2;
2535 break;
2536 case '>':
2537 *res = *res > operand2;
2538 break;
2539 case 'l':
2540 *res = *res <= operand2;
2541 break;
2542 case 'g':
2543 *res = *res >= operand2;
2544 break;
2545 case '=':
2546 *res = *res == operand2;
2547 break;
2548 case '!':
2549 *res = *res != operand2;
2550 break;
2551 case '&':
2552 *res = *res && operand2;
2553 break;
2554 case ':':
2555 *res = *res || operand2;
2556 break;
2557 case 'i':
2558 if (operand2 < *res)
2559 *res = operand2;
2560 break;
2561 case 'a':
2562 if (operand2 > *res)
2563 *res = operand2;
2564 break;
2565 default:
2566 abort();
2567 }
2568 }
2569 return 1;
2570 }
2571
2572 /* --- register management ------------------------------------------------ */
2573
2574 void
2575 roff_setreg(struct roff *r, const char *name, int val, char sign)
2576 {
2577 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2578 }
2579
2580 static void
2581 roff_setregn(struct roff *r, const char *name, size_t len,
2582 int val, char sign, int step)
2583 {
2584 struct roffreg *reg;
2585
2586 /* Search for an existing register with the same name. */
2587 reg = r->regtab;
2588
2589 while (reg != NULL && (reg->key.sz != len ||
2590 strncmp(reg->key.p, name, len) != 0))
2591 reg = reg->next;
2592
2593 if (NULL == reg) {
2594 /* Create a new register. */
2595 reg = mandoc_malloc(sizeof(struct roffreg));
2596 reg->key.p = mandoc_strndup(name, len);
2597 reg->key.sz = len;
2598 reg->val = 0;
2599 reg->step = 0;
2600 reg->next = r->regtab;
2601 r->regtab = reg;
2602 }
2603
2604 if ('+' == sign)
2605 reg->val += val;
2606 else if ('-' == sign)
2607 reg->val -= val;
2608 else
2609 reg->val = val;
2610 if (step != INT_MIN)
2611 reg->step = step;
2612 }
2613
2614 /*
2615 * Handle some predefined read-only number registers.
2616 * For now, return -1 if the requested register is not predefined;
2617 * in case a predefined read-only register having the value -1
2618 * were to turn up, another special value would have to be chosen.
2619 */
2620 static int
2621 roff_getregro(const struct roff *r, const char *name)
2622 {
2623
2624 switch (*name) {
2625 case '$': /* Number of arguments of the last macro evaluated. */
2626 return r->argc;
2627 case 'A': /* ASCII approximation mode is always off. */
2628 return 0;
2629 case 'g': /* Groff compatibility mode is always on. */
2630 return 1;
2631 case 'H': /* Fixed horizontal resolution. */
2632 return 24;
2633 case 'j': /* Always adjust left margin only. */
2634 return 0;
2635 case 'T': /* Some output device is always defined. */
2636 return 1;
2637 case 'V': /* Fixed vertical resolution. */
2638 return 40;
2639 default:
2640 return -1;
2641 }
2642 }
2643
2644 int
2645 roff_getreg(struct roff *r, const char *name)
2646 {
2647 return roff_getregn(r, name, strlen(name), '\0');
2648 }
2649
2650 static int
2651 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2652 {
2653 struct roffreg *reg;
2654 int val;
2655
2656 if ('.' == name[0] && 2 == len) {
2657 val = roff_getregro(r, name + 1);
2658 if (-1 != val)
2659 return val;
2660 }
2661
2662 for (reg = r->regtab; reg; reg = reg->next) {
2663 if (len == reg->key.sz &&
2664 0 == strncmp(name, reg->key.p, len)) {
2665 switch (sign) {
2666 case '+':
2667 reg->val += reg->step;
2668 break;
2669 case '-':
2670 reg->val -= reg->step;
2671 break;
2672 default:
2673 break;
2674 }
2675 return reg->val;
2676 }
2677 }
2678
2679 roff_setregn(r, name, len, 0, '\0', INT_MIN);
2680 return 0;
2681 }
2682
2683 static int
2684 roff_hasregn(const struct roff *r, const char *name, size_t len)
2685 {
2686 struct roffreg *reg;
2687 int val;
2688
2689 if ('.' == name[0] && 2 == len) {
2690 val = roff_getregro(r, name + 1);
2691 if (-1 != val)
2692 return 1;
2693 }
2694
2695 for (reg = r->regtab; reg; reg = reg->next)
2696 if (len == reg->key.sz &&
2697 0 == strncmp(name, reg->key.p, len))
2698 return 1;
2699
2700 return 0;
2701 }
2702
2703 static void
2704 roff_freereg(struct roffreg *reg)
2705 {
2706 struct roffreg *old_reg;
2707
2708 while (NULL != reg) {
2709 free(reg->key.p);
2710 old_reg = reg;
2711 reg = reg->next;
2712 free(old_reg);
2713 }
2714 }
2715
2716 static enum rofferr
2717 roff_nr(ROFF_ARGS)
2718 {
2719 char *key, *val, *step;
2720 size_t keysz;
2721 int iv, is, len;
2722 char sign;
2723
2724 key = val = buf->buf + pos;
2725 if (*key == '\0')
2726 return ROFF_IGN;
2727
2728 keysz = roff_getname(r, &val, ln, pos);
2729 if (key[keysz] == '\\')
2730 return ROFF_IGN;
2731
2732 sign = *val;
2733 if (sign == '+' || sign == '-')
2734 val++;
2735
2736 len = 0;
2737 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
2738 return ROFF_IGN;
2739
2740 step = val + len;
2741 while (isspace((unsigned char)*step))
2742 step++;
2743 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
2744 is = INT_MIN;
2745
2746 roff_setregn(r, key, keysz, iv, sign, is);
2747 return ROFF_IGN;
2748 }
2749
2750 static enum rofferr
2751 roff_rr(ROFF_ARGS)
2752 {
2753 struct roffreg *reg, **prev;
2754 char *name, *cp;
2755 size_t namesz;
2756
2757 name = cp = buf->buf + pos;
2758 if (*name == '\0')
2759 return ROFF_IGN;
2760 namesz = roff_getname(r, &cp, ln, pos);
2761 name[namesz] = '\0';
2762
2763 prev = &r->regtab;
2764 while (1) {
2765 reg = *prev;
2766 if (reg == NULL || !strcmp(name, reg->key.p))
2767 break;
2768 prev = &reg->next;
2769 }
2770 if (reg != NULL) {
2771 *prev = reg->next;
2772 free(reg->key.p);
2773 free(reg);
2774 }
2775 return ROFF_IGN;
2776 }
2777
2778 /* --- handler functions for roff requests -------------------------------- */
2779
2780 static enum rofferr
2781 roff_rm(ROFF_ARGS)
2782 {
2783 const char *name;
2784 char *cp;
2785 size_t namesz;
2786
2787 cp = buf->buf + pos;
2788 while (*cp != '\0') {
2789 name = cp;
2790 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2791 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2792 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2793 if (name[namesz] == '\\')
2794 break;
2795 }
2796 return ROFF_IGN;
2797 }
2798
2799 static enum rofferr
2800 roff_it(ROFF_ARGS)
2801 {
2802 int iv;
2803
2804 /* Parse the number of lines. */
2805
2806 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2807 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2808 ln, ppos, buf->buf + 1);
2809 return ROFF_IGN;
2810 }
2811
2812 while (isspace((unsigned char)buf->buf[pos]))
2813 pos++;
2814
2815 /*
2816 * Arm the input line trap.
2817 * Special-casing "an-trap" is an ugly workaround to cope
2818 * with DocBook stupidly fiddling with man(7) internals.
2819 */
2820
2821 roffit_lines = iv;
2822 roffit_macro = mandoc_strdup(iv != 1 ||
2823 strcmp(buf->buf + pos, "an-trap") ?
2824 buf->buf + pos : "br");
2825 return ROFF_IGN;
2826 }
2827
2828 static enum rofferr
2829 roff_Dd(ROFF_ARGS)
2830 {
2831 int mask;
2832 enum roff_tok t, te;
2833
2834 switch (tok) {
2835 case ROFF_Dd:
2836 tok = MDOC_Dd;
2837 te = MDOC_MAX;
2838 if (r->format == 0)
2839 r->format = MPARSE_MDOC;
2840 mask = MPARSE_MDOC | MPARSE_QUICK;
2841 break;
2842 case ROFF_TH:
2843 tok = MAN_TH;
2844 te = MAN_MAX;
2845 if (r->format == 0)
2846 r->format = MPARSE_MAN;
2847 mask = MPARSE_QUICK;
2848 break;
2849 default:
2850 abort();
2851 }
2852 if ((r->options & mask) == 0)
2853 for (t = tok; t < te; t++)
2854 roff_setstr(r, roff_name[t], NULL, 0);
2855 return ROFF_CONT;
2856 }
2857
2858 static enum rofferr
2859 roff_TE(ROFF_ARGS)
2860 {
2861 if (r->tbl == NULL) {
2862 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2863 ln, ppos, "TE");
2864 return ROFF_IGN;
2865 }
2866 if (tbl_end(r->tbl) == 0) {
2867 r->tbl = NULL;
2868 free(buf->buf);
2869 buf->buf = mandoc_strdup(".sp");
2870 buf->sz = 4;
2871 *offs = 0;
2872 return ROFF_REPARSE;
2873 }
2874 r->tbl = NULL;
2875 return ROFF_IGN;
2876 }
2877
2878 static enum rofferr
2879 roff_T_(ROFF_ARGS)
2880 {
2881
2882 if (NULL == r->tbl)
2883 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2884 ln, ppos, "T&");
2885 else
2886 tbl_restart(ln, ppos, r->tbl);
2887
2888 return ROFF_IGN;
2889 }
2890
2891 /*
2892 * Handle in-line equation delimiters.
2893 */
2894 static enum rofferr
2895 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2896 {
2897 char *cp1, *cp2;
2898 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2899
2900 /*
2901 * Outside equations, look for an opening delimiter.
2902 * If we are inside an equation, we already know it is
2903 * in-line, or this function wouldn't have been called;
2904 * so look for a closing delimiter.
2905 */
2906
2907 cp1 = buf->buf + pos;
2908 cp2 = strchr(cp1, r->eqn == NULL ?
2909 r->last_eqn->odelim : r->last_eqn->cdelim);
2910 if (cp2 == NULL)
2911 return ROFF_CONT;
2912
2913 *cp2++ = '\0';
2914 bef_pr = bef_nl = aft_nl = aft_pr = "";
2915
2916 /* Handle preceding text, protecting whitespace. */
2917
2918 if (*buf->buf != '\0') {
2919 if (r->eqn == NULL)
2920 bef_pr = "\\&";
2921 bef_nl = "\n";
2922 }
2923
2924 /*
2925 * Prepare replacing the delimiter with an equation macro
2926 * and drop leading white space from the equation.
2927 */
2928
2929 if (r->eqn == NULL) {
2930 while (*cp2 == ' ')
2931 cp2++;
2932 mac = ".EQ";
2933 } else
2934 mac = ".EN";
2935
2936 /* Handle following text, protecting whitespace. */
2937
2938 if (*cp2 != '\0') {
2939 aft_nl = "\n";
2940 if (r->eqn != NULL)
2941 aft_pr = "\\&";
2942 }
2943
2944 /* Do the actual replacement. */
2945
2946 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2947 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2948 free(buf->buf);
2949 buf->buf = cp1;
2950
2951 /* Toggle the in-line state of the eqn subsystem. */
2952
2953 r->eqn_inline = r->eqn == NULL;
2954 return ROFF_REPARSE;
2955 }
2956
2957 static enum rofferr
2958 roff_EQ(ROFF_ARGS)
2959 {
2960 struct roff_node *n;
2961
2962 if (r->man->macroset == MACROSET_MAN)
2963 man_breakscope(r->man, ROFF_EQ);
2964 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2965 if (ln > r->man->last->line)
2966 n->flags |= NODE_LINE;
2967 n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2968 n->eqn->expectargs = UINT_MAX;
2969 roff_node_append(r->man, n);
2970 r->man->next = ROFF_NEXT_SIBLING;
2971
2972 assert(r->eqn == NULL);
2973 if (r->last_eqn == NULL)
2974 r->last_eqn = eqn_alloc(r->parse);
2975 else
2976 eqn_reset(r->last_eqn);
2977 r->eqn = r->last_eqn;
2978 r->eqn->node = n;
2979
2980 if (buf->buf[pos] != '\0')
2981 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2982 ".EQ %s", buf->buf + pos);
2983
2984 return ROFF_IGN;
2985 }
2986
2987 static enum rofferr
2988 roff_EN(ROFF_ARGS)
2989 {
2990 if (r->eqn != NULL) {
2991 eqn_parse(r->eqn);
2992 r->eqn = NULL;
2993 } else
2994 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2995 if (buf->buf[pos] != '\0')
2996 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2997 "EN %s", buf->buf + pos);
2998 return ROFF_IGN;
2999 }
3000
3001 static enum rofferr
3002 roff_TS(ROFF_ARGS)
3003 {
3004 if (r->tbl != NULL) {
3005 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
3006 ln, ppos, "TS breaks TS");
3007 tbl_end(r->tbl);
3008 }
3009 r->tbl = tbl_alloc(ppos, ln, r->parse);
3010 if (r->last_tbl)
3011 r->last_tbl->next = r->tbl;
3012 else
3013 r->first_tbl = r->tbl;
3014 r->last_tbl = r->tbl;
3015 return ROFF_IGN;
3016 }
3017
3018 static enum rofferr
3019 roff_onearg(ROFF_ARGS)
3020 {
3021 struct roff_node *n;
3022 char *cp;
3023 int npos;
3024
3025 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3026 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3027 tok == ROFF_ti))
3028 man_breakscope(r->man, tok);
3029
3030 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3031 r->man->last = roffce_node;
3032 r->man->next = ROFF_NEXT_SIBLING;
3033 }
3034
3035 roff_elem_alloc(r->man, ln, ppos, tok);
3036 n = r->man->last;
3037
3038 cp = buf->buf + pos;
3039 if (*cp != '\0') {
3040 while (*cp != '\0' && *cp != ' ')
3041 cp++;
3042 while (*cp == ' ')
3043 *cp++ = '\0';
3044 if (*cp != '\0')
3045 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
3046 r->parse, ln, cp - buf->buf,
3047 "%s ... %s", roff_name[tok], cp);
3048 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3049 }
3050
3051 if (tok == ROFF_ce || tok == ROFF_rj) {
3052 if (r->man->last->type == ROFFT_ELEM) {
3053 roff_word_alloc(r->man, ln, pos, "1");
3054 r->man->last->flags |= NODE_NOSRC;
3055 }
3056 npos = 0;
3057 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3058 &roffce_lines, 0) == 0) {
3059 mandoc_vmsg(MANDOCERR_CE_NONUM,
3060 r->parse, ln, pos, "ce %s", buf->buf + pos);
3061 roffce_lines = 1;
3062 }
3063 if (roffce_lines < 1) {
3064 r->man->last = r->man->last->parent;
3065 roffce_node = NULL;
3066 roffce_lines = 0;
3067 } else
3068 roffce_node = r->man->last->parent;
3069 } else {
3070 n->flags |= NODE_VALID | NODE_ENDED;
3071 r->man->last = n;
3072 }
3073 n->flags |= NODE_LINE;
3074 r->man->next = ROFF_NEXT_SIBLING;
3075 return ROFF_IGN;
3076 }
3077
3078 static enum rofferr
3079 roff_manyarg(ROFF_ARGS)
3080 {
3081 struct roff_node *n;
3082 char *sp, *ep;
3083
3084 roff_elem_alloc(r->man, ln, ppos, tok);
3085 n = r->man->last;
3086
3087 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3088 while (*ep != '\0' && *ep != ' ')
3089 ep++;
3090 while (*ep == ' ')
3091 *ep++ = '\0';
3092 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3093 }
3094
3095 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3096 r->man->last = n;
3097 r->man->next = ROFF_NEXT_SIBLING;
3098 return ROFF_IGN;
3099 }
3100
3101 static enum rofferr
3102 roff_als(ROFF_ARGS)
3103 {
3104 char *oldn, *newn, *end, *value;
3105 size_t oldsz, newsz, valsz;
3106
3107 newn = oldn = buf->buf + pos;
3108 if (*newn == '\0')
3109 return ROFF_IGN;
3110
3111 newsz = roff_getname(r, &oldn, ln, pos);
3112 if (newn[newsz] == '\\' || *oldn == '\0')
3113 return ROFF_IGN;
3114
3115 end = oldn;
3116 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3117 if (oldsz == 0)
3118 return ROFF_IGN;
3119
3120 valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3121 (int)oldsz, oldn);
3122 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3123 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3124 free(value);
3125 return ROFF_IGN;
3126 }
3127
3128 static enum rofferr
3129 roff_br(ROFF_ARGS)
3130 {
3131 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3132 man_breakscope(r->man, ROFF_br);
3133 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3134 if (buf->buf[pos] != '\0')
3135 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3136 "%s %s", roff_name[tok], buf->buf + pos);
3137 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3138 r->man->next = ROFF_NEXT_SIBLING;
3139 return ROFF_IGN;
3140 }
3141
3142 static enum rofferr
3143 roff_cc(ROFF_ARGS)
3144 {
3145 const char *p;
3146
3147 p = buf->buf + pos;
3148
3149 if (*p == '\0' || (r->control = *p++) == '.')
3150 r->control = '\0';
3151
3152 if (*p != '\0')
3153 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3154 ln, p - buf->buf, "cc ... %s", p);
3155
3156 return ROFF_IGN;
3157 }
3158
3159 static enum rofferr
3160 roff_ec(ROFF_ARGS)
3161 {
3162 const char *p;
3163
3164 p = buf->buf + pos;
3165 if (*p == '\0')
3166 r->escape = '\\';
3167 else {
3168 r->escape = *p;
3169 if (*++p != '\0')
3170 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3171 ln, p - buf->buf, "ec ... %s", p);
3172 }
3173 return ROFF_IGN;
3174 }
3175
3176 static enum rofferr
3177 roff_eo(ROFF_ARGS)
3178 {
3179 r->escape = '\0';
3180 if (buf->buf[pos] != '\0')
3181 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3182 ln, pos, "eo %s", buf->buf + pos);
3183 return ROFF_IGN;
3184 }
3185
3186 static enum rofferr
3187 roff_nop(ROFF_ARGS)
3188 {
3189 while (buf->buf[pos] == ' ')
3190 pos++;
3191 *offs = pos;
3192 return ROFF_RERUN;
3193 }
3194
3195 static enum rofferr
3196 roff_tr(ROFF_ARGS)
3197 {
3198 const char *p, *first, *second;
3199 size_t fsz, ssz;
3200 enum mandoc_esc esc;
3201
3202 p = buf->buf + pos;
3203
3204 if (*p == '\0') {
3205 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3206 return ROFF_IGN;
3207 }
3208
3209 while (*p != '\0') {
3210 fsz = ssz = 1;
3211
3212 first = p++;
3213 if (*first == '\\') {
3214 esc = mandoc_escape(&p, NULL, NULL);
3215 if (esc == ESCAPE_ERROR) {
3216 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3217 ln, (int)(p - buf->buf), first);
3218 return ROFF_IGN;
3219 }
3220 fsz = (size_t)(p - first);
3221 }
3222
3223 second = p++;
3224 if (*second == '\\') {
3225 esc = mandoc_escape(&p, NULL, NULL);
3226 if (esc == ESCAPE_ERROR) {
3227 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3228 ln, (int)(p - buf->buf), second);
3229 return ROFF_IGN;
3230 }
3231 ssz = (size_t)(p - second);
3232 } else if (*second == '\0') {
3233 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3234 ln, first - buf->buf, "tr %s", first);
3235 second = " ";
3236 p--;
3237 }
3238
3239 if (fsz > 1) {
3240 roff_setstrn(&r->xmbtab, first, fsz,
3241 second, ssz, 0);
3242 continue;
3243 }
3244
3245 if (r->xtab == NULL)
3246 r->xtab = mandoc_calloc(128,
3247 sizeof(struct roffstr));
3248
3249 free(r->xtab[(int)*first].p);
3250 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3251 r->xtab[(int)*first].sz = ssz;
3252 }
3253
3254 return ROFF_IGN;
3255 }
3256
3257 static enum rofferr
3258 roff_rn(ROFF_ARGS)
3259 {
3260 const char *value;
3261 char *oldn, *newn, *end;
3262 size_t oldsz, newsz;
3263 int deftype;
3264
3265 oldn = newn = buf->buf + pos;
3266 if (*oldn == '\0')
3267 return ROFF_IGN;
3268
3269 oldsz = roff_getname(r, &newn, ln, pos);
3270 if (oldn[oldsz] == '\\' || *newn == '\0')
3271 return ROFF_IGN;
3272
3273 end = newn;
3274 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3275 if (newsz == 0)
3276 return ROFF_IGN;
3277
3278 deftype = ROFFDEF_ANY;
3279 value = roff_getstrn(r, oldn, oldsz, &deftype);
3280 switch (deftype) {
3281 case ROFFDEF_USER:
3282 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3283 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3284 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3285 break;
3286 case ROFFDEF_PRE:
3287 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3288 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3289 break;
3290 case ROFFDEF_REN:
3291 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3292 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3293 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3294 break;
3295 case ROFFDEF_STD:
3296 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3297 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3298 break;
3299 default:
3300 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3301 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3302 break;
3303 }
3304 return ROFF_IGN;
3305 }
3306
3307 static enum rofferr
3308 roff_so(ROFF_ARGS)
3309 {
3310 char *name, *cp;
3311
3312 name = buf->buf + pos;
3313 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3314
3315 /*
3316 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3317 * opening anything that's not in our cwd or anything beneath
3318 * it. Thus, explicitly disallow traversing up the file-system
3319 * or using absolute paths.
3320 */
3321
3322 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3323 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3324 ".so %s", name);
3325 buf->sz = mandoc_asprintf(&cp,
3326 ".sp\nSee the file %s.\n.sp", name) + 1;
3327 free(buf->buf);
3328 buf->buf = cp;
3329 *offs = 0;
3330 return ROFF_REPARSE;
3331 }
3332
3333 *offs = pos;
3334 return ROFF_SO;
3335 }
3336
3337 /* --- user defined strings and macros ------------------------------------ */
3338
3339 static enum rofferr
3340 roff_userdef(ROFF_ARGS)
3341 {
3342 const char *arg[16], *ap;
3343 char *cp, *n1, *n2;
3344 int expand_count, i, ib, ie;
3345 size_t asz, rsz;
3346
3347 /*
3348 * Collect pointers to macro argument strings
3349 * and NUL-terminate them.
3350 */
3351
3352 r->argc = 0;
3353 cp = buf->buf + pos;
3354 for (i = 0; i < 16; i++) {
3355 if (*cp == '\0')
3356 arg[i] = "";
3357 else {
3358 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3359 r->argc = i + 1;
3360 }
3361 }
3362
3363 /*
3364 * Expand macro arguments.
3365 */
3366
3367 buf->sz = strlen(r->current_string) + 1;
3368 n1 = n2 = cp = mandoc_malloc(buf->sz);
3369 memcpy(n1, r->current_string, buf->sz);
3370 expand_count = 0;
3371 while (*cp != '\0') {
3372
3373 /* Scan ahead for the next argument invocation. */
3374
3375 if (*cp++ != '\\')
3376 continue;
3377 if (*cp++ != '$')
3378 continue;
3379 if (*cp == '*') { /* \\$* inserts all arguments */
3380 ib = 0;
3381 ie = r->argc - 1;
3382 } else { /* \\$1 .. \\$9 insert one argument */
3383 ib = ie = *cp - '1';
3384 if (ib < 0 || ib > 8)
3385 continue;
3386 }
3387 cp -= 2;
3388
3389 /*
3390 * Prevent infinite recursion.
3391 */
3392
3393 if (cp >= n2)
3394 expand_count = 1;
3395 else if (++expand_count > EXPAND_LIMIT) {
3396 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3397 ln, (int)(cp - n1), NULL);
3398 free(buf->buf);
3399 buf->buf = n1;
3400 *offs = 0;
3401 return ROFF_IGN;
3402 }
3403
3404 /*
3405 * Determine the size of the expanded argument,
3406 * taking escaping of quotes into account.
3407 */
3408
3409 asz = ie > ib ? ie - ib : 0; /* for blanks */
3410 for (i = ib; i <= ie; i++) {
3411 for (ap = arg[i]; *ap != '\0'; ap++) {
3412 asz++;
3413 if (*ap == '"')
3414 asz += 3;
3415 }
3416 }
3417 if (asz != 3) {
3418
3419 /*
3420 * Determine the size of the rest of the
3421 * unexpanded macro, including the NUL.
3422 */
3423
3424 rsz = buf->sz - (cp - n1) - 3;
3425
3426 /*
3427 * When shrinking, move before
3428 * releasing the storage.
3429 */
3430
3431 if (asz < 3)
3432 memmove(cp + asz, cp + 3, rsz);
3433
3434 /*
3435 * Resize the storage for the macro
3436 * and readjust the parse pointer.
3437 */
3438
3439 buf->sz += asz - 3;
3440 n2 = mandoc_realloc(n1, buf->sz);
3441 cp = n2 + (cp - n1);
3442 n1 = n2;
3443
3444 /*
3445 * When growing, make room
3446 * for the expanded argument.
3447 */
3448
3449 if (asz > 3)
3450 memmove(cp + asz, cp + 3, rsz);
3451 }
3452
3453 /* Copy the expanded argument, escaping quotes. */
3454
3455 n2 = cp;
3456 for (i = ib; i <= ie; i++) {
3457 for (ap = arg[i]; *ap != '\0'; ap++) {
3458 if (*ap == '"') {
3459 memcpy(n2, "\\(dq", 4);
3460 n2 += 4;
3461 } else
3462 *n2++ = *ap;
3463 }
3464 if (i < ie)
3465 *n2++ = ' ';
3466 }
3467 }
3468
3469 /*
3470 * Replace the macro invocation
3471 * by the expanded macro.
3472 */
3473
3474 free(buf->buf);
3475 buf->buf = n1;
3476 *offs = 0;
3477
3478 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3479 ROFF_REPARSE : ROFF_APPEND;
3480 }
3481
3482 /*
3483 * Calling a high-level macro that was renamed with .rn.
3484 * r->current_string has already been set up by roff_parse().
3485 */
3486 static enum rofferr
3487 roff_renamed(ROFF_ARGS)
3488 {
3489 char *nbuf;
3490
3491 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3492 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3493 free(buf->buf);
3494 buf->buf = nbuf;
3495 *offs = 0;
3496 return ROFF_CONT;
3497 }
3498
3499 static size_t
3500 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3501 {
3502 char *name, *cp;
3503 size_t namesz;
3504
3505 name = *cpp;
3506 if ('\0' == *name)
3507 return 0;
3508
3509 /* Read until end of name and terminate it with NUL. */
3510 for (cp = name; 1; cp++) {
3511 if ('\0' == *cp || ' ' == *cp) {
3512 namesz = cp - name;
3513 break;
3514 }
3515 if ('\\' != *cp)
3516 continue;
3517 namesz = cp - name;
3518 if ('{' == cp[1] || '}' == cp[1])
3519 break;
3520 cp++;
3521 if ('\\' == *cp)
3522 continue;
3523 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3524 "%.*s", (int)(cp - name + 1), name);
3525 mandoc_escape((const char **)&cp, NULL, NULL);
3526 break;
3527 }
3528
3529 /* Read past spaces. */
3530 while (' ' == *cp)
3531 cp++;
3532
3533 *cpp = cp;
3534 return namesz;
3535 }
3536
3537 /*
3538 * Store *string into the user-defined string called *name.
3539 * To clear an existing entry, call with (*r, *name, NULL, 0).
3540 * append == 0: replace mode
3541 * append == 1: single-line append mode
3542 * append == 2: multiline append mode, append '\n' after each call
3543 */
3544 static void
3545 roff_setstr(struct roff *r, const char *name, const char *string,
3546 int append)
3547 {
3548 size_t namesz;
3549
3550 namesz = strlen(name);
3551 roff_setstrn(&r->strtab, name, namesz, string,
3552 string ? strlen(string) : 0, append);
3553 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3554 }
3555
3556 static void
3557 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3558 const char *string, size_t stringsz, int append)
3559 {
3560 struct roffkv *n;
3561 char *c;
3562 int i;
3563 size_t oldch, newch;
3564
3565 /* Search for an existing string with the same name. */
3566 n = *r;
3567
3568 while (n && (namesz != n->key.sz ||
3569 strncmp(n->key.p, name, namesz)))
3570 n = n->next;
3571
3572 if (NULL == n) {
3573 /* Create a new string table entry. */
3574 n = mandoc_malloc(sizeof(struct roffkv));
3575 n->key.p = mandoc_strndup(name, namesz);
3576 n->key.sz = namesz;
3577 n->val.p = NULL;
3578 n->val.sz = 0;
3579 n->next = *r;
3580 *r = n;
3581 } else if (0 == append) {
3582 free(n->val.p);
3583 n->val.p = NULL;
3584 n->val.sz = 0;
3585 }
3586
3587 if (NULL == string)
3588 return;
3589
3590 /*
3591 * One additional byte for the '\n' in multiline mode,
3592 * and one for the terminating '\0'.
3593 */
3594 newch = stringsz + (1 < append ? 2u : 1u);
3595
3596 if (NULL == n->val.p) {
3597 n->val.p = mandoc_malloc(newch);
3598 *n->val.p = '\0';
3599 oldch = 0;
3600 } else {
3601 oldch = n->val.sz;
3602 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3603 }
3604
3605 /* Skip existing content in the destination buffer. */
3606 c = n->val.p + (int)oldch;
3607
3608 /* Append new content to the destination buffer. */
3609 i = 0;
3610 while (i < (int)stringsz) {
3611 /*
3612 * Rudimentary roff copy mode:
3613 * Handle escaped backslashes.
3614 */
3615 if ('\\' == string[i] && '\\' == string[i + 1])
3616 i++;
3617 *c++ = string[i++];
3618 }
3619
3620 /* Append terminating bytes. */
3621 if (1 < append)
3622 *c++ = '\n';
3623
3624 *c = '\0';
3625 n->val.sz = (int)(c - n->val.p);
3626 }
3627
3628 static const char *
3629 roff_getstrn(struct roff *r, const char *name, size_t len,
3630 int *deftype)
3631 {
3632 const struct roffkv *n;
3633 int found, i;
3634 enum roff_tok tok;
3635
3636 found = 0;
3637 for (n = r->strtab; n != NULL; n = n->next) {
3638 if (strncmp(name, n->key.p, len) != 0 ||
3639 n->key.p[len] != '\0' || n->val.p == NULL)
3640 continue;
3641 if (*deftype & ROFFDEF_USER) {
3642 *deftype = ROFFDEF_USER;
3643 return n->val.p;
3644 } else {
3645 found = 1;
3646 break;
3647 }
3648 }
3649 for (n = r->rentab; n != NULL; n = n->next) {
3650 if (strncmp(name, n->key.p, len) != 0 ||
3651 n->key.p[len] != '\0' || n->val.p == NULL)
3652 continue;
3653 if (*deftype & ROFFDEF_REN) {
3654 *deftype = ROFFDEF_REN;
3655 return n->val.p;
3656 } else {
3657 found = 1;
3658 break;
3659 }
3660 }
3661 for (i = 0; i < PREDEFS_MAX; i++) {
3662 if (strncmp(name, predefs[i].name, len) != 0 ||
3663 predefs[i].name[len] != '\0')
3664 continue;
3665 if (*deftype & ROFFDEF_PRE) {
3666 *deftype = ROFFDEF_PRE;
3667 return predefs[i].str;
3668 } else {
3669 found = 1;
3670 break;
3671 }
3672 }
3673 if (r->man->macroset != MACROSET_MAN) {
3674 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3675 if (strncmp(name, roff_name[tok], len) != 0 ||
3676 roff_name[tok][len] != '\0')
3677 continue;
3678 if (*deftype & ROFFDEF_STD) {
3679 *deftype = ROFFDEF_STD;
3680 return NULL;
3681 } else {
3682 found = 1;
3683 break;
3684 }
3685 }
3686 }
3687 if (r->man->macroset != MACROSET_MDOC) {
3688 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3689 if (strncmp(name, roff_name[tok], len) != 0 ||
3690 roff_name[tok][len] != '\0')
3691 continue;
3692 if (*deftype & ROFFDEF_STD) {
3693 *deftype = ROFFDEF_STD;
3694 return NULL;
3695 } else {
3696 found = 1;
3697 break;
3698 }
3699 }
3700 }
3701
3702 if (found == 0 && *deftype != ROFFDEF_ANY) {
3703 if (*deftype & ROFFDEF_REN) {
3704 /*
3705 * This might still be a request,
3706 * so do not treat it as undefined yet.
3707 */
3708 *deftype = ROFFDEF_UNDEF;
3709 return NULL;
3710 }
3711
3712 /* Using an undefined string defines it to be empty. */
3713
3714 roff_setstrn(&r->strtab, name, len, "", 0, 0);
3715 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
3716 }
3717
3718 *deftype = 0;
3719 return NULL;
3720 }
3721
3722 static void
3723 roff_freestr(struct roffkv *r)
3724 {
3725 struct roffkv *n, *nn;
3726
3727 for (n = r; n; n = nn) {
3728 free(n->key.p);
3729 free(n->val.p);
3730 nn = n->next;
3731 free(n);
3732 }
3733 }
3734
3735 /* --- accessors and utility functions ------------------------------------ */
3736
3737 /*
3738 * Duplicate an input string, making the appropriate character
3739 * conversations (as stipulated by `tr') along the way.
3740 * Returns a heap-allocated string with all the replacements made.
3741 */
3742 char *
3743 roff_strdup(const struct roff *r, const char *p)
3744 {
3745 const struct roffkv *cp;
3746 char *res;
3747 const char *pp;
3748 size_t ssz, sz;
3749 enum mandoc_esc esc;
3750
3751 if (NULL == r->xmbtab && NULL == r->xtab)
3752 return mandoc_strdup(p);
3753 else if ('\0' == *p)
3754 return mandoc_strdup("");
3755
3756 /*
3757 * Step through each character looking for term matches
3758 * (remember that a `tr' can be invoked with an escape, which is
3759 * a glyph but the escape is multi-character).
3760 * We only do this if the character hash has been initialised
3761 * and the string is >0 length.
3762 */
3763
3764 res = NULL;
3765 ssz = 0;
3766
3767 while ('\0' != *p) {
3768 assert((unsigned int)*p < 128);
3769 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3770 sz = r->xtab[(int)*p].sz;
3771 res = mandoc_realloc(res, ssz + sz + 1);
3772 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3773 ssz += sz;
3774 p++;
3775 continue;
3776 } else if ('\\' != *p) {
3777 res = mandoc_realloc(res, ssz + 2);
3778 res[ssz++] = *p++;
3779 continue;
3780 }
3781
3782 /* Search for term matches. */
3783 for (cp = r->xmbtab; cp; cp = cp->next)
3784 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3785 break;
3786
3787 if (NULL != cp) {
3788 /*
3789 * A match has been found.
3790 * Append the match to the array and move
3791 * forward by its keysize.
3792 */
3793 res = mandoc_realloc(res,
3794 ssz + cp->val.sz + 1);
3795 memcpy(res + ssz, cp->val.p, cp->val.sz);
3796 ssz += cp->val.sz;
3797 p += (int)cp->key.sz;
3798 continue;
3799 }
3800
3801 /*
3802 * Handle escapes carefully: we need to copy
3803 * over just the escape itself, or else we might
3804 * do replacements within the escape itself.
3805 * Make sure to pass along the bogus string.
3806 */
3807 pp = p++;
3808 esc = mandoc_escape(&p, NULL, NULL);
3809 if (ESCAPE_ERROR == esc) {
3810 sz = strlen(pp);
3811 res = mandoc_realloc(res, ssz + sz + 1);
3812 memcpy(res + ssz, pp, sz);
3813 break;
3814 }
3815 /*
3816 * We bail out on bad escapes.
3817 * No need to warn: we already did so when
3818 * roff_res() was called.
3819 */
3820 sz = (int)(p - pp);
3821 res = mandoc_realloc(res, ssz + sz + 1);
3822 memcpy(res + ssz, pp, sz);
3823 ssz += sz;
3824 }
3825
3826 res[(int)ssz] = '\0';
3827 return res;
3828 }
3829
3830 int
3831 roff_getformat(const struct roff *r)
3832 {
3833
3834 return r->format;
3835 }
3836
3837 /*
3838 * Find out whether a line is a macro line or not.
3839 * If it is, adjust the current position and return one; if it isn't,
3840 * return zero and don't change the current position.
3841 * If the control character has been set with `.cc', then let that grain
3842 * precedence.
3843 * This is slighly contrary to groff, where using the non-breaking
3844 * control character when `cc' has been invoked will cause the
3845 * non-breaking macro contents to be printed verbatim.
3846 */
3847 int
3848 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3849 {
3850 int pos;
3851
3852 pos = *ppos;
3853
3854 if (r->control != '\0' && cp[pos] == r->control)
3855 pos++;
3856 else if (r->control != '\0')
3857 return 0;
3858 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3859 pos += 2;
3860 else if ('.' == cp[pos] || '\'' == cp[pos])
3861 pos++;
3862 else
3863 return 0;
3864
3865 while (' ' == cp[pos] || '\t' == cp[pos])
3866 pos++;
3867
3868 *ppos = pos;
3869 return 1;
3870 }