]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Massively reduce the amount of text, cutting it down to what is needed
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.333 2018/08/18 02:08:27 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* Types of definitions of macros and strings. */
43 #define ROFFDEF_USER (1 << 1) /* User-defined. */
44 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
45 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
46 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
47 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
48 ROFFDEF_REN | ROFFDEF_STD)
49 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
50
51 /* --- data types --------------------------------------------------------- */
52
53 /*
54 * An incredibly-simple string buffer.
55 */
56 struct roffstr {
57 char *p; /* nil-terminated buffer */
58 size_t sz; /* saved strlen(p) */
59 };
60
61 /*
62 * A key-value roffstr pair as part of a singly-linked list.
63 */
64 struct roffkv {
65 struct roffstr key;
66 struct roffstr val;
67 struct roffkv *next; /* next in list */
68 };
69
70 /*
71 * A single number register as part of a singly-linked list.
72 */
73 struct roffreg {
74 struct roffstr key;
75 int val;
76 int step;
77 struct roffreg *next;
78 };
79
80 /*
81 * Association of request and macro names with token IDs.
82 */
83 struct roffreq {
84 enum roff_tok tok;
85 char name[];
86 };
87
88 struct roff {
89 struct mparse *parse; /* parse point */
90 struct roff_man *man; /* mdoc or man parser */
91 struct roffnode *last; /* leaf of stack */
92 int *rstack; /* stack of inverted `ie' values */
93 struct ohash *reqtab; /* request lookup table */
94 struct roffreg *regtab; /* number registers */
95 struct roffkv *strtab; /* user-defined strings & macros */
96 struct roffkv *rentab; /* renamed strings & macros */
97 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
98 struct roffstr *xtab; /* single-byte trans table (`tr') */
99 const char *current_string; /* value of last called user macro */
100 struct tbl_node *first_tbl; /* first table parsed */
101 struct tbl_node *last_tbl; /* last table parsed */
102 struct tbl_node *tbl; /* current table being parsed */
103 struct eqn_node *last_eqn; /* equation parser */
104 struct eqn_node *eqn; /* active equation parser */
105 int eqn_inline; /* current equation is inline */
106 int options; /* parse options */
107 int rstacksz; /* current size limit of rstack */
108 int rstackpos; /* position in rstack */
109 int format; /* current file in mdoc or man format */
110 int argc; /* number of args of the last macro */
111 char control; /* control character */
112 char escape; /* escape character */
113 };
114
115 struct roffnode {
116 enum roff_tok tok; /* type of node */
117 struct roffnode *parent; /* up one in stack */
118 int line; /* parse line */
119 int col; /* parse col */
120 char *name; /* node name, e.g. macro name */
121 char *end; /* end-rules: custom token */
122 int endspan; /* end-rules: next-line or infty */
123 int rule; /* current evaluation rule */
124 };
125
126 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
127 enum roff_tok tok, /* tok of macro */ \
128 struct buf *buf, /* input buffer */ \
129 int ln, /* parse line */ \
130 int ppos, /* original pos in buffer */ \
131 int pos, /* current pos in buffer */ \
132 int *offs /* reset offset of buffer data */
133
134 typedef enum rofferr (*roffproc)(ROFF_ARGS);
135
136 struct roffmac {
137 roffproc proc; /* process new macro */
138 roffproc text; /* process as child text of macro */
139 roffproc sub; /* process as child of macro */
140 int flags;
141 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
142 };
143
144 struct predef {
145 const char *name; /* predefined input name */
146 const char *str; /* replacement symbol */
147 };
148
149 #define PREDEF(__name, __str) \
150 { (__name), (__str) },
151
152 /* --- function prototypes ------------------------------------------------ */
153
154 static void roffnode_cleanscope(struct roff *);
155 static void roffnode_pop(struct roff *);
156 static void roffnode_push(struct roff *, enum roff_tok,
157 const char *, int, int);
158 static void roff_addtbl(struct roff_man *, struct tbl_node *);
159 static enum rofferr roff_als(ROFF_ARGS);
160 static enum rofferr roff_block(ROFF_ARGS);
161 static enum rofferr roff_block_text(ROFF_ARGS);
162 static enum rofferr roff_block_sub(ROFF_ARGS);
163 static enum rofferr roff_br(ROFF_ARGS);
164 static enum rofferr roff_cblock(ROFF_ARGS);
165 static enum rofferr roff_cc(ROFF_ARGS);
166 static void roff_ccond(struct roff *, int, int);
167 static enum rofferr roff_cond(ROFF_ARGS);
168 static enum rofferr roff_cond_text(ROFF_ARGS);
169 static enum rofferr roff_cond_sub(ROFF_ARGS);
170 static enum rofferr roff_ds(ROFF_ARGS);
171 static enum rofferr roff_ec(ROFF_ARGS);
172 static enum rofferr roff_eo(ROFF_ARGS);
173 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
174 static int roff_evalcond(struct roff *r, int, char *, int *);
175 static int roff_evalnum(struct roff *, int,
176 const char *, int *, int *, int);
177 static int roff_evalpar(struct roff *, int,
178 const char *, int *, int *, int);
179 static int roff_evalstrcond(const char *, int *);
180 static void roff_free1(struct roff *);
181 static void roff_freereg(struct roffreg *);
182 static void roff_freestr(struct roffkv *);
183 static size_t roff_getname(struct roff *, char **, int, int);
184 static int roff_getnum(const char *, int *, int *, int);
185 static int roff_getop(const char *, int *, char *);
186 static int roff_getregn(struct roff *,
187 const char *, size_t, char);
188 static int roff_getregro(const struct roff *,
189 const char *name);
190 static const char *roff_getstrn(struct roff *,
191 const char *, size_t, int *);
192 static int roff_hasregn(const struct roff *,
193 const char *, size_t);
194 static enum rofferr roff_insec(ROFF_ARGS);
195 static enum rofferr roff_it(ROFF_ARGS);
196 static enum rofferr roff_line_ignore(ROFF_ARGS);
197 static void roff_man_alloc1(struct roff_man *);
198 static void roff_man_free1(struct roff_man *);
199 static enum rofferr roff_manyarg(ROFF_ARGS);
200 static enum rofferr roff_nop(ROFF_ARGS);
201 static enum rofferr roff_nr(ROFF_ARGS);
202 static enum rofferr roff_onearg(ROFF_ARGS);
203 static enum roff_tok roff_parse(struct roff *, char *, int *,
204 int, int);
205 static enum rofferr roff_parsetext(struct roff *, struct buf *,
206 int, int *);
207 static enum rofferr roff_renamed(ROFF_ARGS);
208 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
209 static enum rofferr roff_rm(ROFF_ARGS);
210 static enum rofferr roff_rn(ROFF_ARGS);
211 static enum rofferr roff_rr(ROFF_ARGS);
212 static void roff_setregn(struct roff *, const char *,
213 size_t, int, char, int);
214 static void roff_setstr(struct roff *,
215 const char *, const char *, int);
216 static void roff_setstrn(struct roffkv **, const char *,
217 size_t, const char *, size_t, int);
218 static enum rofferr roff_so(ROFF_ARGS);
219 static enum rofferr roff_tr(ROFF_ARGS);
220 static enum rofferr roff_Dd(ROFF_ARGS);
221 static enum rofferr roff_TE(ROFF_ARGS);
222 static enum rofferr roff_TS(ROFF_ARGS);
223 static enum rofferr roff_EQ(ROFF_ARGS);
224 static enum rofferr roff_EN(ROFF_ARGS);
225 static enum rofferr roff_T_(ROFF_ARGS);
226 static enum rofferr roff_unsupp(ROFF_ARGS);
227 static enum rofferr roff_userdef(ROFF_ARGS);
228
229 /* --- constant data ------------------------------------------------------ */
230
231 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
232 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
233
234 const char *__roff_name[MAN_MAX + 1] = {
235 "br", "ce", "ft", "ll",
236 "mc", "po", "rj", "sp",
237 "ta", "ti", NULL,
238 "ab", "ad", "af", "aln",
239 "als", "am", "am1", "ami",
240 "ami1", "as", "as1", "asciify",
241 "backtrace", "bd", "bleedat", "blm",
242 "box", "boxa", "bp", "BP",
243 "break", "breakchar", "brnl", "brp",
244 "brpnl", "c2", "cc",
245 "cf", "cflags", "ch", "char",
246 "chop", "class", "close", "CL",
247 "color", "composite", "continue", "cp",
248 "cropat", "cs", "cu", "da",
249 "dch", "Dd", "de", "de1",
250 "defcolor", "dei", "dei1", "device",
251 "devicem", "di", "do", "ds",
252 "ds1", "dwh", "dt", "ec",
253 "ecr", "ecs", "el", "em",
254 "EN", "eo", "EP", "EQ",
255 "errprint", "ev", "evc", "ex",
256 "fallback", "fam", "fc", "fchar",
257 "fcolor", "fdeferlig", "feature", "fkern",
258 "fl", "flig", "fp", "fps",
259 "fschar", "fspacewidth", "fspecial", "ftr",
260 "fzoom", "gcolor", "hc", "hcode",
261 "hidechar", "hla", "hlm", "hpf",
262 "hpfa", "hpfcode", "hw", "hy",
263 "hylang", "hylen", "hym", "hypp",
264 "hys", "ie", "if", "ig",
265 "index", "it", "itc", "IX",
266 "kern", "kernafter", "kernbefore", "kernpair",
267 "lc", "lc_ctype", "lds", "length",
268 "letadj", "lf", "lg", "lhang",
269 "linetabs", "lnr", "lnrf", "lpfx",
270 "ls", "lsm", "lt",
271 "mediasize", "minss", "mk", "mso",
272 "na", "ne", "nh", "nhychar",
273 "nm", "nn", "nop", "nr",
274 "nrf", "nroff", "ns", "nx",
275 "open", "opena", "os", "output",
276 "padj", "papersize", "pc", "pev",
277 "pi", "PI", "pl", "pm",
278 "pn", "pnr", "ps",
279 "psbb", "pshape", "pso", "ptr",
280 "pvs", "rchar", "rd", "recursionlimit",
281 "return", "rfschar", "rhang",
282 "rm", "rn", "rnn", "rr",
283 "rs", "rt", "schar", "sentchar",
284 "shc", "shift", "sizes", "so",
285 "spacewidth", "special", "spreadwarn", "ss",
286 "sty", "substring", "sv", "sy",
287 "T&", "tc", "TE",
288 "TH", "tkf", "tl",
289 "tm", "tm1", "tmc", "tr",
290 "track", "transchar", "trf", "trimat",
291 "trin", "trnt", "troff", "TS",
292 "uf", "ul", "unformat", "unwatch",
293 "unwatchn", "vpt", "vs", "warn",
294 "warnscale", "watch", "watchlength", "watchn",
295 "wh", "while", "write", "writec",
296 "writem", "xflag", ".", NULL,
297 NULL, "text",
298 "Dd", "Dt", "Os", "Sh",
299 "Ss", "Pp", "D1", "Dl",
300 "Bd", "Ed", "Bl", "El",
301 "It", "Ad", "An", "Ap",
302 "Ar", "Cd", "Cm", "Dv",
303 "Er", "Ev", "Ex", "Fa",
304 "Fd", "Fl", "Fn", "Ft",
305 "Ic", "In", "Li", "Nd",
306 "Nm", "Op", "Ot", "Pa",
307 "Rv", "St", "Va", "Vt",
308 "Xr", "%A", "%B", "%D",
309 "%I", "%J", "%N", "%O",
310 "%P", "%R", "%T", "%V",
311 "Ac", "Ao", "Aq", "At",
312 "Bc", "Bf", "Bo", "Bq",
313 "Bsx", "Bx", "Db", "Dc",
314 "Do", "Dq", "Ec", "Ef",
315 "Em", "Eo", "Fx", "Ms",
316 "No", "Ns", "Nx", "Ox",
317 "Pc", "Pf", "Po", "Pq",
318 "Qc", "Ql", "Qo", "Qq",
319 "Re", "Rs", "Sc", "So",
320 "Sq", "Sm", "Sx", "Sy",
321 "Tn", "Ux", "Xc", "Xo",
322 "Fo", "Fc", "Oo", "Oc",
323 "Bk", "Ek", "Bt", "Hf",
324 "Fr", "Ud", "Lb", "Lp",
325 "Lk", "Mt", "Brq", "Bro",
326 "Brc", "%C", "Es", "En",
327 "Dx", "%Q", "%U", "Ta",
328 NULL,
329 "TH", "SH", "SS", "TP",
330 "TQ",
331 "LP", "PP", "P", "IP",
332 "HP", "SM", "SB", "BI",
333 "IB", "BR", "RB", "R",
334 "B", "I", "IR", "RI",
335 "nf", "fi",
336 "RE", "RS", "DT", "UC",
337 "PD", "AT", "in",
338 "SY", "YS", "OP",
339 "EX", "EE", "UR",
340 "UE", "MT", "ME", NULL
341 };
342 const char *const *roff_name = __roff_name;
343
344 static struct roffmac roffs[TOKEN_NONE] = {
345 { roff_br, NULL, NULL, 0 }, /* br */
346 { roff_onearg, NULL, NULL, 0 }, /* ce */
347 { roff_onearg, NULL, NULL, 0 }, /* ft */
348 { roff_onearg, NULL, NULL, 0 }, /* ll */
349 { roff_onearg, NULL, NULL, 0 }, /* mc */
350 { roff_onearg, NULL, NULL, 0 }, /* po */
351 { roff_onearg, NULL, NULL, 0 }, /* rj */
352 { roff_onearg, NULL, NULL, 0 }, /* sp */
353 { roff_manyarg, NULL, NULL, 0 }, /* ta */
354 { roff_onearg, NULL, NULL, 0 }, /* ti */
355 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
356 { roff_unsupp, NULL, NULL, 0 }, /* ab */
357 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
358 { roff_line_ignore, NULL, NULL, 0 }, /* af */
359 { roff_unsupp, NULL, NULL, 0 }, /* aln */
360 { roff_als, NULL, NULL, 0 }, /* als */
361 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
362 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
363 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
364 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
365 { roff_ds, NULL, NULL, 0 }, /* as */
366 { roff_ds, NULL, NULL, 0 }, /* as1 */
367 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
368 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
369 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
370 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
371 { roff_unsupp, NULL, NULL, 0 }, /* blm */
372 { roff_unsupp, NULL, NULL, 0 }, /* box */
373 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
374 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
375 { roff_unsupp, NULL, NULL, 0 }, /* BP */
376 { roff_unsupp, NULL, NULL, 0 }, /* break */
377 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
378 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
379 { roff_br, NULL, NULL, 0 }, /* brp */
380 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
381 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
382 { roff_cc, NULL, NULL, 0 }, /* cc */
383 { roff_insec, NULL, NULL, 0 }, /* cf */
384 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
385 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
386 { roff_unsupp, NULL, NULL, 0 }, /* char */
387 { roff_unsupp, NULL, NULL, 0 }, /* chop */
388 { roff_line_ignore, NULL, NULL, 0 }, /* class */
389 { roff_insec, NULL, NULL, 0 }, /* close */
390 { roff_unsupp, NULL, NULL, 0 }, /* CL */
391 { roff_line_ignore, NULL, NULL, 0 }, /* color */
392 { roff_unsupp, NULL, NULL, 0 }, /* composite */
393 { roff_unsupp, NULL, NULL, 0 }, /* continue */
394 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
395 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
396 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
397 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
398 { roff_unsupp, NULL, NULL, 0 }, /* da */
399 { roff_unsupp, NULL, NULL, 0 }, /* dch */
400 { roff_Dd, NULL, NULL, 0 }, /* Dd */
401 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
402 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
403 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
404 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
405 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
406 { roff_unsupp, NULL, NULL, 0 }, /* device */
407 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
408 { roff_unsupp, NULL, NULL, 0 }, /* di */
409 { roff_unsupp, NULL, NULL, 0 }, /* do */
410 { roff_ds, NULL, NULL, 0 }, /* ds */
411 { roff_ds, NULL, NULL, 0 }, /* ds1 */
412 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
413 { roff_unsupp, NULL, NULL, 0 }, /* dt */
414 { roff_ec, NULL, NULL, 0 }, /* ec */
415 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
416 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
417 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
418 { roff_unsupp, NULL, NULL, 0 }, /* em */
419 { roff_EN, NULL, NULL, 0 }, /* EN */
420 { roff_eo, NULL, NULL, 0 }, /* eo */
421 { roff_unsupp, NULL, NULL, 0 }, /* EP */
422 { roff_EQ, NULL, NULL, 0 }, /* EQ */
423 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
424 { roff_unsupp, NULL, NULL, 0 }, /* ev */
425 { roff_unsupp, NULL, NULL, 0 }, /* evc */
426 { roff_unsupp, NULL, NULL, 0 }, /* ex */
427 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
428 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
429 { roff_unsupp, NULL, NULL, 0 }, /* fc */
430 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
431 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
432 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
433 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
434 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
435 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
436 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
437 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
438 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
439 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
440 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
441 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
442 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
443 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
444 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
445 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
446 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
447 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
448 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
449 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
450 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
451 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
452 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
453 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
454 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
455 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
456 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
457 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
458 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
459 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
460 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
461 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
462 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
463 { roff_unsupp, NULL, NULL, 0 }, /* index */
464 { roff_it, NULL, NULL, 0 }, /* it */
465 { roff_unsupp, NULL, NULL, 0 }, /* itc */
466 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
467 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
468 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
469 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
470 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
471 { roff_unsupp, NULL, NULL, 0 }, /* lc */
472 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
473 { roff_unsupp, NULL, NULL, 0 }, /* lds */
474 { roff_unsupp, NULL, NULL, 0 }, /* length */
475 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
476 { roff_insec, NULL, NULL, 0 }, /* lf */
477 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
478 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
479 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
480 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
481 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
482 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
483 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
484 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
485 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
486 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
487 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
488 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
489 { roff_insec, NULL, NULL, 0 }, /* mso */
490 { roff_line_ignore, NULL, NULL, 0 }, /* na */
491 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
492 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
493 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
494 { roff_unsupp, NULL, NULL, 0 }, /* nm */
495 { roff_unsupp, NULL, NULL, 0 }, /* nn */
496 { roff_nop, NULL, NULL, 0 }, /* nop */
497 { roff_nr, NULL, NULL, 0 }, /* nr */
498 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
499 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
500 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
501 { roff_insec, NULL, NULL, 0 }, /* nx */
502 { roff_insec, NULL, NULL, 0 }, /* open */
503 { roff_insec, NULL, NULL, 0 }, /* opena */
504 { roff_line_ignore, NULL, NULL, 0 }, /* os */
505 { roff_unsupp, NULL, NULL, 0 }, /* output */
506 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
507 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
508 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
509 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
510 { roff_insec, NULL, NULL, 0 }, /* pi */
511 { roff_unsupp, NULL, NULL, 0 }, /* PI */
512 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
513 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
514 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
515 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
516 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
517 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
518 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
519 { roff_insec, NULL, NULL, 0 }, /* pso */
520 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
521 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
522 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
523 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
524 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
525 { roff_unsupp, NULL, NULL, 0 }, /* return */
526 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
527 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
528 { roff_rm, NULL, NULL, 0 }, /* rm */
529 { roff_rn, NULL, NULL, 0 }, /* rn */
530 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
531 { roff_rr, NULL, NULL, 0 }, /* rr */
532 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
533 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
534 { roff_unsupp, NULL, NULL, 0 }, /* schar */
535 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
536 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
537 { roff_unsupp, NULL, NULL, 0 }, /* shift */
538 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
539 { roff_so, NULL, NULL, 0 }, /* so */
540 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
541 { roff_line_ignore, NULL, NULL, 0 }, /* special */
542 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
543 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
544 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
545 { roff_unsupp, NULL, NULL, 0 }, /* substring */
546 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
547 { roff_insec, NULL, NULL, 0 }, /* sy */
548 { roff_T_, NULL, NULL, 0 }, /* T& */
549 { roff_unsupp, NULL, NULL, 0 }, /* tc */
550 { roff_TE, NULL, NULL, 0 }, /* TE */
551 { roff_Dd, NULL, NULL, 0 }, /* TH */
552 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
553 { roff_unsupp, NULL, NULL, 0 }, /* tl */
554 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
555 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
556 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
557 { roff_tr, NULL, NULL, 0 }, /* tr */
558 { roff_line_ignore, NULL, NULL, 0 }, /* track */
559 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
560 { roff_insec, NULL, NULL, 0 }, /* trf */
561 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
562 { roff_unsupp, NULL, NULL, 0 }, /* trin */
563 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
564 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
565 { roff_TS, NULL, NULL, 0 }, /* TS */
566 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
567 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
568 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
569 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
570 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
571 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
572 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
573 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
574 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
575 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
576 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
577 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
578 { roff_unsupp, NULL, NULL, 0 }, /* wh */
579 { roff_unsupp, NULL, NULL, 0 }, /* while */
580 { roff_insec, NULL, NULL, 0 }, /* write */
581 { roff_insec, NULL, NULL, 0 }, /* writec */
582 { roff_insec, NULL, NULL, 0 }, /* writem */
583 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
584 { roff_cblock, NULL, NULL, 0 }, /* . */
585 { roff_renamed, NULL, NULL, 0 },
586 { roff_userdef, NULL, NULL, 0 }
587 };
588
589 /* Array of injected predefined strings. */
590 #define PREDEFS_MAX 38
591 static const struct predef predefs[PREDEFS_MAX] = {
592 #include "predefs.in"
593 };
594
595 static int roffce_lines; /* number of input lines to center */
596 static struct roff_node *roffce_node; /* active request */
597 static int roffit_lines; /* number of lines to delay */
598 static char *roffit_macro; /* nil-terminated macro line */
599
600
601 /* --- request table ------------------------------------------------------ */
602
603 struct ohash *
604 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
605 {
606 struct ohash *htab;
607 struct roffreq *req;
608 enum roff_tok tok;
609 size_t sz;
610 unsigned int slot;
611
612 htab = mandoc_malloc(sizeof(*htab));
613 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
614
615 for (tok = mintok; tok < maxtok; tok++) {
616 if (roff_name[tok] == NULL)
617 continue;
618 sz = strlen(roff_name[tok]);
619 req = mandoc_malloc(sizeof(*req) + sz + 1);
620 req->tok = tok;
621 memcpy(req->name, roff_name[tok], sz + 1);
622 slot = ohash_qlookup(htab, req->name);
623 ohash_insert(htab, slot, req);
624 }
625 return htab;
626 }
627
628 void
629 roffhash_free(struct ohash *htab)
630 {
631 struct roffreq *req;
632 unsigned int slot;
633
634 if (htab == NULL)
635 return;
636 for (req = ohash_first(htab, &slot); req != NULL;
637 req = ohash_next(htab, &slot))
638 free(req);
639 ohash_delete(htab);
640 free(htab);
641 }
642
643 enum roff_tok
644 roffhash_find(struct ohash *htab, const char *name, size_t sz)
645 {
646 struct roffreq *req;
647 const char *end;
648
649 if (sz) {
650 end = name + sz;
651 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
652 } else
653 req = ohash_find(htab, ohash_qlookup(htab, name));
654 return req == NULL ? TOKEN_NONE : req->tok;
655 }
656
657 /* --- stack of request blocks -------------------------------------------- */
658
659 /*
660 * Pop the current node off of the stack of roff instructions currently
661 * pending.
662 */
663 static void
664 roffnode_pop(struct roff *r)
665 {
666 struct roffnode *p;
667
668 assert(r->last);
669 p = r->last;
670
671 r->last = r->last->parent;
672 free(p->name);
673 free(p->end);
674 free(p);
675 }
676
677 /*
678 * Push a roff node onto the instruction stack. This must later be
679 * removed with roffnode_pop().
680 */
681 static void
682 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
683 int line, int col)
684 {
685 struct roffnode *p;
686
687 p = mandoc_calloc(1, sizeof(struct roffnode));
688 p->tok = tok;
689 if (name)
690 p->name = mandoc_strdup(name);
691 p->parent = r->last;
692 p->line = line;
693 p->col = col;
694 p->rule = p->parent ? p->parent->rule : 0;
695
696 r->last = p;
697 }
698
699 /* --- roff parser state data management ---------------------------------- */
700
701 static void
702 roff_free1(struct roff *r)
703 {
704 struct tbl_node *tbl;
705 int i;
706
707 while (NULL != (tbl = r->first_tbl)) {
708 r->first_tbl = tbl->next;
709 tbl_free(tbl);
710 }
711 r->first_tbl = r->last_tbl = r->tbl = NULL;
712
713 if (r->last_eqn != NULL)
714 eqn_free(r->last_eqn);
715 r->last_eqn = r->eqn = NULL;
716
717 while (r->last)
718 roffnode_pop(r);
719
720 free (r->rstack);
721 r->rstack = NULL;
722 r->rstacksz = 0;
723 r->rstackpos = -1;
724
725 roff_freereg(r->regtab);
726 r->regtab = NULL;
727
728 roff_freestr(r->strtab);
729 roff_freestr(r->rentab);
730 roff_freestr(r->xmbtab);
731 r->strtab = r->rentab = r->xmbtab = NULL;
732
733 if (r->xtab)
734 for (i = 0; i < 128; i++)
735 free(r->xtab[i].p);
736 free(r->xtab);
737 r->xtab = NULL;
738 }
739
740 void
741 roff_reset(struct roff *r)
742 {
743 roff_free1(r);
744 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
745 r->control = '\0';
746 r->escape = '\\';
747 roffce_lines = 0;
748 roffce_node = NULL;
749 roffit_lines = 0;
750 roffit_macro = NULL;
751 }
752
753 void
754 roff_free(struct roff *r)
755 {
756 roff_free1(r);
757 roffhash_free(r->reqtab);
758 free(r);
759 }
760
761 struct roff *
762 roff_alloc(struct mparse *parse, int options)
763 {
764 struct roff *r;
765
766 r = mandoc_calloc(1, sizeof(struct roff));
767 r->parse = parse;
768 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
769 r->options = options;
770 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
771 r->rstackpos = -1;
772 r->escape = '\\';
773 return r;
774 }
775
776 /* --- syntax tree state data management ---------------------------------- */
777
778 static void
779 roff_man_free1(struct roff_man *man)
780 {
781
782 if (man->first != NULL)
783 roff_node_delete(man, man->first);
784 free(man->meta.msec);
785 free(man->meta.vol);
786 free(man->meta.os);
787 free(man->meta.arch);
788 free(man->meta.title);
789 free(man->meta.name);
790 free(man->meta.date);
791 }
792
793 static void
794 roff_man_alloc1(struct roff_man *man)
795 {
796
797 memset(&man->meta, 0, sizeof(man->meta));
798 man->first = mandoc_calloc(1, sizeof(*man->first));
799 man->first->type = ROFFT_ROOT;
800 man->last = man->first;
801 man->last_es = NULL;
802 man->flags = 0;
803 man->macroset = MACROSET_NONE;
804 man->lastsec = man->lastnamed = SEC_NONE;
805 man->next = ROFF_NEXT_CHILD;
806 }
807
808 void
809 roff_man_reset(struct roff_man *man)
810 {
811
812 roff_man_free1(man);
813 roff_man_alloc1(man);
814 }
815
816 void
817 roff_man_free(struct roff_man *man)
818 {
819
820 roff_man_free1(man);
821 free(man);
822 }
823
824 struct roff_man *
825 roff_man_alloc(struct roff *roff, struct mparse *parse,
826 const char *os_s, int quick)
827 {
828 struct roff_man *man;
829
830 man = mandoc_calloc(1, sizeof(*man));
831 man->parse = parse;
832 man->roff = roff;
833 man->os_s = os_s;
834 man->quick = quick;
835 roff_man_alloc1(man);
836 roff->man = man;
837 return man;
838 }
839
840 /* --- syntax tree handling ----------------------------------------------- */
841
842 struct roff_node *
843 roff_node_alloc(struct roff_man *man, int line, int pos,
844 enum roff_type type, int tok)
845 {
846 struct roff_node *n;
847
848 n = mandoc_calloc(1, sizeof(*n));
849 n->line = line;
850 n->pos = pos;
851 n->tok = tok;
852 n->type = type;
853 n->sec = man->lastsec;
854
855 if (man->flags & MDOC_SYNOPSIS)
856 n->flags |= NODE_SYNPRETTY;
857 else
858 n->flags &= ~NODE_SYNPRETTY;
859 if (man->flags & MDOC_NEWLINE)
860 n->flags |= NODE_LINE;
861 man->flags &= ~MDOC_NEWLINE;
862
863 return n;
864 }
865
866 void
867 roff_node_append(struct roff_man *man, struct roff_node *n)
868 {
869
870 switch (man->next) {
871 case ROFF_NEXT_SIBLING:
872 if (man->last->next != NULL) {
873 n->next = man->last->next;
874 man->last->next->prev = n;
875 } else
876 man->last->parent->last = n;
877 man->last->next = n;
878 n->prev = man->last;
879 n->parent = man->last->parent;
880 break;
881 case ROFF_NEXT_CHILD:
882 if (man->last->child != NULL) {
883 n->next = man->last->child;
884 man->last->child->prev = n;
885 } else
886 man->last->last = n;
887 man->last->child = n;
888 n->parent = man->last;
889 break;
890 default:
891 abort();
892 }
893 man->last = n;
894
895 switch (n->type) {
896 case ROFFT_HEAD:
897 n->parent->head = n;
898 break;
899 case ROFFT_BODY:
900 if (n->end != ENDBODY_NOT)
901 return;
902 n->parent->body = n;
903 break;
904 case ROFFT_TAIL:
905 n->parent->tail = n;
906 break;
907 default:
908 return;
909 }
910
911 /*
912 * Copy over the normalised-data pointer of our parent. Not
913 * everybody has one, but copying a null pointer is fine.
914 */
915
916 n->norm = n->parent->norm;
917 assert(n->parent->type == ROFFT_BLOCK);
918 }
919
920 void
921 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
922 {
923 struct roff_node *n;
924
925 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
926 n->string = roff_strdup(man->roff, word);
927 roff_node_append(man, n);
928 n->flags |= NODE_VALID | NODE_ENDED;
929 man->next = ROFF_NEXT_SIBLING;
930 }
931
932 void
933 roff_word_append(struct roff_man *man, const char *word)
934 {
935 struct roff_node *n;
936 char *addstr, *newstr;
937
938 n = man->last;
939 addstr = roff_strdup(man->roff, word);
940 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
941 free(addstr);
942 free(n->string);
943 n->string = newstr;
944 man->next = ROFF_NEXT_SIBLING;
945 }
946
947 void
948 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
949 {
950 struct roff_node *n;
951
952 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
953 roff_node_append(man, n);
954 man->next = ROFF_NEXT_CHILD;
955 }
956
957 struct roff_node *
958 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
959 {
960 struct roff_node *n;
961
962 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
963 roff_node_append(man, n);
964 man->next = ROFF_NEXT_CHILD;
965 return n;
966 }
967
968 struct roff_node *
969 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
970 {
971 struct roff_node *n;
972
973 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
974 roff_node_append(man, n);
975 man->next = ROFF_NEXT_CHILD;
976 return n;
977 }
978
979 struct roff_node *
980 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
981 {
982 struct roff_node *n;
983
984 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
985 roff_node_append(man, n);
986 man->next = ROFF_NEXT_CHILD;
987 return n;
988 }
989
990 static void
991 roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
992 {
993 struct roff_node *n;
994 const struct tbl_span *span;
995
996 if (man->macroset == MACROSET_MAN)
997 man_breakscope(man, ROFF_TS);
998 while ((span = tbl_span(tbl)) != NULL) {
999 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1000 n->span = span;
1001 roff_node_append(man, n);
1002 n->flags |= NODE_VALID | NODE_ENDED;
1003 man->next = ROFF_NEXT_SIBLING;
1004 }
1005 }
1006
1007 void
1008 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1009 {
1010
1011 /* Adjust siblings. */
1012
1013 if (n->prev)
1014 n->prev->next = n->next;
1015 if (n->next)
1016 n->next->prev = n->prev;
1017
1018 /* Adjust parent. */
1019
1020 if (n->parent != NULL) {
1021 if (n->parent->child == n)
1022 n->parent->child = n->next;
1023 if (n->parent->last == n)
1024 n->parent->last = n->prev;
1025 }
1026
1027 /* Adjust parse point. */
1028
1029 if (man == NULL)
1030 return;
1031 if (man->last == n) {
1032 if (n->prev == NULL) {
1033 man->last = n->parent;
1034 man->next = ROFF_NEXT_CHILD;
1035 } else {
1036 man->last = n->prev;
1037 man->next = ROFF_NEXT_SIBLING;
1038 }
1039 }
1040 if (man->first == n)
1041 man->first = NULL;
1042 }
1043
1044 void
1045 roff_node_free(struct roff_node *n)
1046 {
1047
1048 if (n->args != NULL)
1049 mdoc_argv_free(n->args);
1050 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1051 free(n->norm);
1052 if (n->eqn != NULL)
1053 eqn_box_free(n->eqn);
1054 free(n->string);
1055 free(n);
1056 }
1057
1058 void
1059 roff_node_delete(struct roff_man *man, struct roff_node *n)
1060 {
1061
1062 while (n->child != NULL)
1063 roff_node_delete(man, n->child);
1064 roff_node_unlink(man, n);
1065 roff_node_free(n);
1066 }
1067
1068 void
1069 deroff(char **dest, const struct roff_node *n)
1070 {
1071 char *cp;
1072 size_t sz;
1073
1074 if (n->type != ROFFT_TEXT) {
1075 for (n = n->child; n != NULL; n = n->next)
1076 deroff(dest, n);
1077 return;
1078 }
1079
1080 /* Skip leading whitespace. */
1081
1082 for (cp = n->string; *cp != '\0'; cp++) {
1083 if (cp[0] == '\\' && cp[1] != '\0' &&
1084 strchr(" %&0^|~", cp[1]) != NULL)
1085 cp++;
1086 else if ( ! isspace((unsigned char)*cp))
1087 break;
1088 }
1089
1090 /* Skip trailing backslash. */
1091
1092 sz = strlen(cp);
1093 if (sz > 0 && cp[sz - 1] == '\\')
1094 sz--;
1095
1096 /* Skip trailing whitespace. */
1097
1098 for (; sz; sz--)
1099 if ( ! isspace((unsigned char)cp[sz-1]))
1100 break;
1101
1102 /* Skip empty strings. */
1103
1104 if (sz == 0)
1105 return;
1106
1107 if (*dest == NULL) {
1108 *dest = mandoc_strndup(cp, sz);
1109 return;
1110 }
1111
1112 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1113 free(*dest);
1114 *dest = cp;
1115 }
1116
1117 /* --- main functions of the roff parser ---------------------------------- */
1118
1119 /*
1120 * In the current line, expand escape sequences that tend to get
1121 * used in numerical expressions and conditional requests.
1122 * Also check the syntax of the remaining escape sequences.
1123 */
1124 static enum rofferr
1125 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1126 {
1127 char ubuf[24]; /* buffer to print the number */
1128 struct roff_node *n; /* used for header comments */
1129 const char *start; /* start of the string to process */
1130 char *stesc; /* start of an escape sequence ('\\') */
1131 char *ep; /* end of comment string */
1132 const char *stnam; /* start of the name, after "[(*" */
1133 const char *cp; /* end of the name, e.g. before ']' */
1134 const char *res; /* the string to be substituted */
1135 char *nbuf; /* new buffer to copy buf->buf to */
1136 size_t maxl; /* expected length of the escape name */
1137 size_t naml; /* actual length of the escape name */
1138 enum mandoc_esc esc; /* type of the escape sequence */
1139 int inaml; /* length returned from mandoc_escape() */
1140 int expand_count; /* to avoid infinite loops */
1141 int npos; /* position in numeric expression */
1142 int arg_complete; /* argument not interrupted by eol */
1143 int done; /* no more input available */
1144 int deftype; /* type of definition to paste */
1145 int rcsid; /* kind of RCS id seen */
1146 char sign; /* increment number register */
1147 char term; /* character terminating the escape */
1148
1149 /* Search forward for comments. */
1150
1151 done = 0;
1152 start = buf->buf + pos;
1153 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1154 if (stesc[0] != r->escape || stesc[1] == '\0')
1155 continue;
1156 stesc++;
1157 if (*stesc != '"' && *stesc != '#')
1158 continue;
1159
1160 /* Comment found, look for RCS id. */
1161
1162 rcsid = 0;
1163 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1164 rcsid = 1 << MANDOC_OS_OPENBSD;
1165 cp += 8;
1166 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1167 rcsid = 1 << MANDOC_OS_NETBSD;
1168 cp += 7;
1169 }
1170 if (cp != NULL &&
1171 isalnum((unsigned char)*cp) == 0 &&
1172 strchr(cp, '$') != NULL) {
1173 if (r->man->meta.rcsids & rcsid)
1174 mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1175 ln, stesc + 1 - buf->buf, stesc + 1);
1176 r->man->meta.rcsids |= rcsid;
1177 }
1178
1179 /* Handle trailing whitespace. */
1180
1181 ep = strchr(stesc--, '\0') - 1;
1182 if (*ep == '\n') {
1183 done = 1;
1184 ep--;
1185 }
1186 if (*ep == ' ' || *ep == '\t')
1187 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1188 ln, ep - buf->buf, NULL);
1189
1190 /*
1191 * Save comments preceding the title macro
1192 * in the syntax tree.
1193 */
1194
1195 if (r->format == 0) {
1196 while (*ep == ' ' || *ep == '\t')
1197 ep--;
1198 ep[1] = '\0';
1199 n = roff_node_alloc(r->man,
1200 ln, stesc + 1 - buf->buf,
1201 ROFFT_COMMENT, TOKEN_NONE);
1202 n->string = mandoc_strdup(stesc + 2);
1203 roff_node_append(r->man, n);
1204 n->flags |= NODE_VALID | NODE_ENDED;
1205 r->man->next = ROFF_NEXT_SIBLING;
1206 }
1207
1208 /* Discard comments. */
1209
1210 while (stesc > start && stesc[-1] == ' ')
1211 stesc--;
1212 *stesc = '\0';
1213 break;
1214 }
1215 if (stesc == start)
1216 return ROFF_CONT;
1217 stesc--;
1218
1219 /* Notice the end of the input. */
1220
1221 if (*stesc == '\n') {
1222 *stesc-- = '\0';
1223 done = 1;
1224 }
1225
1226 expand_count = 0;
1227 while (stesc >= start) {
1228
1229 /* Search backwards for the next backslash. */
1230
1231 if (*stesc != r->escape) {
1232 if (*stesc == '\\') {
1233 *stesc = '\0';
1234 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1235 buf->buf, stesc + 1) + 1;
1236 start = nbuf + pos;
1237 stesc = nbuf + (stesc - buf->buf);
1238 free(buf->buf);
1239 buf->buf = nbuf;
1240 }
1241 stesc--;
1242 continue;
1243 }
1244
1245 /* If it is escaped, skip it. */
1246
1247 for (cp = stesc - 1; cp >= start; cp--)
1248 if (*cp != r->escape)
1249 break;
1250
1251 if ((stesc - cp) % 2 == 0) {
1252 while (stesc > cp)
1253 *stesc-- = '\\';
1254 continue;
1255 } else if (stesc[1] != '\0') {
1256 *stesc = '\\';
1257 } else {
1258 *stesc-- = '\0';
1259 if (done)
1260 continue;
1261 else
1262 return ROFF_APPEND;
1263 }
1264
1265 /* Decide whether to expand or to check only. */
1266
1267 term = '\0';
1268 cp = stesc + 1;
1269 switch (*cp) {
1270 case '*':
1271 res = NULL;
1272 break;
1273 case 'B':
1274 case 'w':
1275 term = cp[1];
1276 /* FALLTHROUGH */
1277 case 'n':
1278 sign = cp[1];
1279 if (sign == '+' || sign == '-')
1280 cp++;
1281 res = ubuf;
1282 break;
1283 default:
1284 esc = mandoc_escape(&cp, &stnam, &inaml);
1285 if (esc == ESCAPE_ERROR ||
1286 (esc == ESCAPE_SPECIAL &&
1287 mchars_spec2cp(stnam, inaml) < 0))
1288 mandoc_vmsg(MANDOCERR_ESC_BAD,
1289 r->parse, ln, (int)(stesc - buf->buf),
1290 "%.*s", (int)(cp - stesc), stesc);
1291 stesc--;
1292 continue;
1293 }
1294
1295 if (EXPAND_LIMIT < ++expand_count) {
1296 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1297 ln, (int)(stesc - buf->buf), NULL);
1298 return ROFF_IGN;
1299 }
1300
1301 /*
1302 * The third character decides the length
1303 * of the name of the string or register.
1304 * Save a pointer to the name.
1305 */
1306
1307 if (term == '\0') {
1308 switch (*++cp) {
1309 case '\0':
1310 maxl = 0;
1311 break;
1312 case '(':
1313 cp++;
1314 maxl = 2;
1315 break;
1316 case '[':
1317 cp++;
1318 term = ']';
1319 maxl = 0;
1320 break;
1321 default:
1322 maxl = 1;
1323 break;
1324 }
1325 } else {
1326 cp += 2;
1327 maxl = 0;
1328 }
1329 stnam = cp;
1330
1331 /* Advance to the end of the name. */
1332
1333 naml = 0;
1334 arg_complete = 1;
1335 while (maxl == 0 || naml < maxl) {
1336 if (*cp == '\0') {
1337 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1338 ln, (int)(stesc - buf->buf), stesc);
1339 arg_complete = 0;
1340 break;
1341 }
1342 if (maxl == 0 && *cp == term) {
1343 cp++;
1344 break;
1345 }
1346 if (*cp++ != '\\' || stesc[1] != 'w') {
1347 naml++;
1348 continue;
1349 }
1350 switch (mandoc_escape(&cp, NULL, NULL)) {
1351 case ESCAPE_SPECIAL:
1352 case ESCAPE_UNICODE:
1353 case ESCAPE_NUMBERED:
1354 case ESCAPE_OVERSTRIKE:
1355 naml++;
1356 break;
1357 default:
1358 break;
1359 }
1360 }
1361
1362 /*
1363 * Retrieve the replacement string; if it is
1364 * undefined, resume searching for escapes.
1365 */
1366
1367 switch (stesc[1]) {
1368 case '*':
1369 if (arg_complete) {
1370 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1371 res = roff_getstrn(r, stnam, naml, &deftype);
1372
1373 /*
1374 * If not overriden, let \*(.T
1375 * through to the formatters.
1376 */
1377
1378 if (res == NULL && naml == 2 &&
1379 stnam[0] == '.' && stnam[1] == 'T') {
1380 roff_setstrn(&r->strtab,
1381 ".T", 2, NULL, 0, 0);
1382 stesc--;
1383 continue;
1384 }
1385 }
1386 break;
1387 case 'B':
1388 npos = 0;
1389 ubuf[0] = arg_complete &&
1390 roff_evalnum(r, ln, stnam, &npos,
1391 NULL, ROFFNUM_SCALE) &&
1392 stnam + npos + 1 == cp ? '1' : '0';
1393 ubuf[1] = '\0';
1394 break;
1395 case 'n':
1396 if (arg_complete)
1397 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1398 roff_getregn(r, stnam, naml, sign));
1399 else
1400 ubuf[0] = '\0';
1401 break;
1402 case 'w':
1403 /* use even incomplete args */
1404 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1405 24 * (int)naml);
1406 break;
1407 }
1408
1409 if (res == NULL) {
1410 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1411 r->parse, ln, (int)(stesc - buf->buf),
1412 "%.*s", (int)naml, stnam);
1413 res = "";
1414 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1415 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1416 ln, (int)(stesc - buf->buf), NULL);
1417 return ROFF_IGN;
1418 }
1419
1420 /* Replace the escape sequence by the string. */
1421
1422 *stesc = '\0';
1423 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1424 buf->buf, res, cp) + 1;
1425
1426 /* Prepare for the next replacement. */
1427
1428 start = nbuf + pos;
1429 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1430 free(buf->buf);
1431 buf->buf = nbuf;
1432 }
1433 return ROFF_CONT;
1434 }
1435
1436 /*
1437 * Process text streams.
1438 */
1439 static enum rofferr
1440 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1441 {
1442 size_t sz;
1443 const char *start;
1444 char *p;
1445 int isz;
1446 enum mandoc_esc esc;
1447
1448 /* Spring the input line trap. */
1449
1450 if (roffit_lines == 1) {
1451 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1452 free(buf->buf);
1453 buf->buf = p;
1454 buf->sz = isz + 1;
1455 *offs = 0;
1456 free(roffit_macro);
1457 roffit_lines = 0;
1458 return ROFF_REPARSE;
1459 } else if (roffit_lines > 1)
1460 --roffit_lines;
1461
1462 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1463 if (roffce_lines < 1) {
1464 r->man->last = roffce_node;
1465 r->man->next = ROFF_NEXT_SIBLING;
1466 roffce_lines = 0;
1467 roffce_node = NULL;
1468 } else
1469 roffce_lines--;
1470 }
1471
1472 /* Convert all breakable hyphens into ASCII_HYPH. */
1473
1474 start = p = buf->buf + pos;
1475
1476 while (*p != '\0') {
1477 sz = strcspn(p, "-\\");
1478 p += sz;
1479
1480 if (*p == '\0')
1481 break;
1482
1483 if (*p == '\\') {
1484 /* Skip over escapes. */
1485 p++;
1486 esc = mandoc_escape((const char **)&p, NULL, NULL);
1487 if (esc == ESCAPE_ERROR)
1488 break;
1489 while (*p == '-')
1490 p++;
1491 continue;
1492 } else if (p == start) {
1493 p++;
1494 continue;
1495 }
1496
1497 if (isalpha((unsigned char)p[-1]) &&
1498 isalpha((unsigned char)p[1]))
1499 *p = ASCII_HYPH;
1500 p++;
1501 }
1502 return ROFF_CONT;
1503 }
1504
1505 enum rofferr
1506 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1507 {
1508 enum roff_tok t;
1509 enum rofferr e;
1510 int pos; /* parse point */
1511 int spos; /* saved parse point for messages */
1512 int ppos; /* original offset in buf->buf */
1513 int ctl; /* macro line (boolean) */
1514
1515 ppos = pos = *offs;
1516
1517 /* Handle in-line equation delimiters. */
1518
1519 if (r->tbl == NULL &&
1520 r->last_eqn != NULL && r->last_eqn->delim &&
1521 (r->eqn == NULL || r->eqn_inline)) {
1522 e = roff_eqndelim(r, buf, pos);
1523 if (e == ROFF_REPARSE)
1524 return e;
1525 assert(e == ROFF_CONT);
1526 }
1527
1528 /* Expand some escape sequences. */
1529
1530 e = roff_res(r, buf, ln, pos);
1531 if (e == ROFF_IGN || e == ROFF_APPEND)
1532 return e;
1533 assert(e == ROFF_CONT);
1534
1535 ctl = roff_getcontrol(r, buf->buf, &pos);
1536
1537 /*
1538 * First, if a scope is open and we're not a macro, pass the
1539 * text through the macro's filter.
1540 * Equations process all content themselves.
1541 * Tables process almost all content themselves, but we want
1542 * to warn about macros before passing it there.
1543 */
1544
1545 if (r->last != NULL && ! ctl) {
1546 t = r->last->tok;
1547 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1548 if (e == ROFF_IGN)
1549 return e;
1550 assert(e == ROFF_CONT);
1551 }
1552 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1553 eqn_read(r->eqn, buf->buf + ppos);
1554 return ROFF_IGN;
1555 }
1556 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1557 tbl_read(r->tbl, ln, buf->buf, ppos);
1558 roff_addtbl(r->man, r->tbl);
1559 return ROFF_IGN;
1560 }
1561 if ( ! ctl)
1562 return roff_parsetext(r, buf, pos, offs);
1563
1564 /* Skip empty request lines. */
1565
1566 if (buf->buf[pos] == '"') {
1567 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1568 ln, pos, NULL);
1569 return ROFF_IGN;
1570 } else if (buf->buf[pos] == '\0')
1571 return ROFF_IGN;
1572
1573 /*
1574 * If a scope is open, go to the child handler for that macro,
1575 * as it may want to preprocess before doing anything with it.
1576 * Don't do so if an equation is open.
1577 */
1578
1579 if (r->last) {
1580 t = r->last->tok;
1581 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1582 }
1583
1584 /* No scope is open. This is a new request or macro. */
1585
1586 spos = pos;
1587 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1588
1589 /* Tables ignore most macros. */
1590
1591 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1592 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1593 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1594 ln, pos, buf->buf + spos);
1595 if (t != TOKEN_NONE)
1596 return ROFF_IGN;
1597 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1598 pos++;
1599 while (buf->buf[pos] == ' ')
1600 pos++;
1601 tbl_read(r->tbl, ln, buf->buf, pos);
1602 roff_addtbl(r->man, r->tbl);
1603 return ROFF_IGN;
1604 }
1605
1606 /* For now, let high level macros abort .ce mode. */
1607
1608 if (ctl && roffce_node != NULL &&
1609 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1610 t == ROFF_TH || t == ROFF_TS)) {
1611 r->man->last = roffce_node;
1612 r->man->next = ROFF_NEXT_SIBLING;
1613 roffce_lines = 0;
1614 roffce_node = NULL;
1615 }
1616
1617 /*
1618 * This is neither a roff request nor a user-defined macro.
1619 * Let the standard macro set parsers handle it.
1620 */
1621
1622 if (t == TOKEN_NONE)
1623 return ROFF_CONT;
1624
1625 /* Execute a roff request or a user defined macro. */
1626
1627 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1628 }
1629
1630 void
1631 roff_endparse(struct roff *r)
1632 {
1633 if (r->last != NULL)
1634 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1635 r->last->line, r->last->col,
1636 roff_name[r->last->tok]);
1637
1638 if (r->eqn != NULL) {
1639 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1640 r->eqn->node->line, r->eqn->node->pos, "EQ");
1641 eqn_parse(r->eqn);
1642 r->eqn = NULL;
1643 }
1644
1645 if (r->tbl != NULL) {
1646 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1647 r->tbl->line, r->tbl->pos, "TS");
1648 tbl_end(r->tbl);
1649 r->tbl = NULL;
1650 }
1651 }
1652
1653 /*
1654 * Parse a roff node's type from the input buffer. This must be in the
1655 * form of ".foo xxx" in the usual way.
1656 */
1657 static enum roff_tok
1658 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1659 {
1660 char *cp;
1661 const char *mac;
1662 size_t maclen;
1663 int deftype;
1664 enum roff_tok t;
1665
1666 cp = buf + *pos;
1667
1668 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1669 return TOKEN_NONE;
1670
1671 mac = cp;
1672 maclen = roff_getname(r, &cp, ln, ppos);
1673
1674 deftype = ROFFDEF_USER | ROFFDEF_REN;
1675 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1676 switch (deftype) {
1677 case ROFFDEF_USER:
1678 t = ROFF_USERDEF;
1679 break;
1680 case ROFFDEF_REN:
1681 t = ROFF_RENAMED;
1682 break;
1683 default:
1684 t = roffhash_find(r->reqtab, mac, maclen);
1685 break;
1686 }
1687 if (t != TOKEN_NONE)
1688 *pos = cp - buf;
1689 else if (deftype == ROFFDEF_UNDEF) {
1690 /* Using an undefined macro defines it to be empty. */
1691 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1692 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1693 }
1694 return t;
1695 }
1696
1697 /* --- handling of request blocks ----------------------------------------- */
1698
1699 static enum rofferr
1700 roff_cblock(ROFF_ARGS)
1701 {
1702
1703 /*
1704 * A block-close `..' should only be invoked as a child of an
1705 * ignore macro, otherwise raise a warning and just ignore it.
1706 */
1707
1708 if (r->last == NULL) {
1709 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1710 ln, ppos, "..");
1711 return ROFF_IGN;
1712 }
1713
1714 switch (r->last->tok) {
1715 case ROFF_am:
1716 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1717 case ROFF_ami:
1718 case ROFF_de:
1719 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1720 case ROFF_dei:
1721 case ROFF_ig:
1722 break;
1723 default:
1724 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1725 ln, ppos, "..");
1726 return ROFF_IGN;
1727 }
1728
1729 if (buf->buf[pos] != '\0')
1730 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1731 ".. %s", buf->buf + pos);
1732
1733 roffnode_pop(r);
1734 roffnode_cleanscope(r);
1735 return ROFF_IGN;
1736
1737 }
1738
1739 static void
1740 roffnode_cleanscope(struct roff *r)
1741 {
1742
1743 while (r->last) {
1744 if (--r->last->endspan != 0)
1745 break;
1746 roffnode_pop(r);
1747 }
1748 }
1749
1750 static void
1751 roff_ccond(struct roff *r, int ln, int ppos)
1752 {
1753
1754 if (NULL == r->last) {
1755 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1756 ln, ppos, "\\}");
1757 return;
1758 }
1759
1760 switch (r->last->tok) {
1761 case ROFF_el:
1762 case ROFF_ie:
1763 case ROFF_if:
1764 break;
1765 default:
1766 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1767 ln, ppos, "\\}");
1768 return;
1769 }
1770
1771 if (r->last->endspan > -1) {
1772 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1773 ln, ppos, "\\}");
1774 return;
1775 }
1776
1777 roffnode_pop(r);
1778 roffnode_cleanscope(r);
1779 return;
1780 }
1781
1782 static enum rofferr
1783 roff_block(ROFF_ARGS)
1784 {
1785 const char *name, *value;
1786 char *call, *cp, *iname, *rname;
1787 size_t csz, namesz, rsz;
1788 int deftype;
1789
1790 /* Ignore groff compatibility mode for now. */
1791
1792 if (tok == ROFF_de1)
1793 tok = ROFF_de;
1794 else if (tok == ROFF_dei1)
1795 tok = ROFF_dei;
1796 else if (tok == ROFF_am1)
1797 tok = ROFF_am;
1798 else if (tok == ROFF_ami1)
1799 tok = ROFF_ami;
1800
1801 /* Parse the macro name argument. */
1802
1803 cp = buf->buf + pos;
1804 if (tok == ROFF_ig) {
1805 iname = NULL;
1806 namesz = 0;
1807 } else {
1808 iname = cp;
1809 namesz = roff_getname(r, &cp, ln, ppos);
1810 iname[namesz] = '\0';
1811 }
1812
1813 /* Resolve the macro name argument if it is indirect. */
1814
1815 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1816 deftype = ROFFDEF_USER;
1817 name = roff_getstrn(r, iname, namesz, &deftype);
1818 if (name == NULL) {
1819 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1820 r->parse, ln, (int)(iname - buf->buf),
1821 "%.*s", (int)namesz, iname);
1822 namesz = 0;
1823 } else
1824 namesz = strlen(name);
1825 } else
1826 name = iname;
1827
1828 if (namesz == 0 && tok != ROFF_ig) {
1829 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1830 ln, ppos, roff_name[tok]);
1831 return ROFF_IGN;
1832 }
1833
1834 roffnode_push(r, tok, name, ln, ppos);
1835
1836 /*
1837 * At the beginning of a `de' macro, clear the existing string
1838 * with the same name, if there is one. New content will be
1839 * appended from roff_block_text() in multiline mode.
1840 */
1841
1842 if (tok == ROFF_de || tok == ROFF_dei) {
1843 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1844 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1845 } else if (tok == ROFF_am || tok == ROFF_ami) {
1846 deftype = ROFFDEF_ANY;
1847 value = roff_getstrn(r, iname, namesz, &deftype);
1848 switch (deftype) { /* Before appending, ... */
1849 case ROFFDEF_PRE: /* copy predefined to user-defined. */
1850 roff_setstrn(&r->strtab, name, namesz,
1851 value, strlen(value), 0);
1852 break;
1853 case ROFFDEF_REN: /* call original standard macro. */
1854 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1855 (int)strlen(value), value);
1856 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1857 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1858 free(call);
1859 break;
1860 case ROFFDEF_STD: /* rename and call standard macro. */
1861 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1862 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1863 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1864 (int)rsz, rname);
1865 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1866 free(call);
1867 free(rname);
1868 break;
1869 default:
1870 break;
1871 }
1872 }
1873
1874 if (*cp == '\0')
1875 return ROFF_IGN;
1876
1877 /* Get the custom end marker. */
1878
1879 iname = cp;
1880 namesz = roff_getname(r, &cp, ln, ppos);
1881
1882 /* Resolve the end marker if it is indirect. */
1883
1884 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1885 deftype = ROFFDEF_USER;
1886 name = roff_getstrn(r, iname, namesz, &deftype);
1887 if (name == NULL) {
1888 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1889 r->parse, ln, (int)(iname - buf->buf),
1890 "%.*s", (int)namesz, iname);
1891 namesz = 0;
1892 } else
1893 namesz = strlen(name);
1894 } else
1895 name = iname;
1896
1897 if (namesz)
1898 r->last->end = mandoc_strndup(name, namesz);
1899
1900 if (*cp != '\0')
1901 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1902 ln, pos, ".%s ... %s", roff_name[tok], cp);
1903
1904 return ROFF_IGN;
1905 }
1906
1907 static enum rofferr
1908 roff_block_sub(ROFF_ARGS)
1909 {
1910 enum roff_tok t;
1911 int i, j;
1912
1913 /*
1914 * First check whether a custom macro exists at this level. If
1915 * it does, then check against it. This is some of groff's
1916 * stranger behaviours. If we encountered a custom end-scope
1917 * tag and that tag also happens to be a "real" macro, then we
1918 * need to try interpreting it again as a real macro. If it's
1919 * not, then return ignore. Else continue.
1920 */
1921
1922 if (r->last->end) {
1923 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1924 if (buf->buf[i] != r->last->end[j])
1925 break;
1926
1927 if (r->last->end[j] == '\0' &&
1928 (buf->buf[i] == '\0' ||
1929 buf->buf[i] == ' ' ||
1930 buf->buf[i] == '\t')) {
1931 roffnode_pop(r);
1932 roffnode_cleanscope(r);
1933
1934 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1935 i++;
1936
1937 pos = i;
1938 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1939 TOKEN_NONE)
1940 return ROFF_RERUN;
1941 return ROFF_IGN;
1942 }
1943 }
1944
1945 /*
1946 * If we have no custom end-query or lookup failed, then try
1947 * pulling it out of the hashtable.
1948 */
1949
1950 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1951
1952 if (t != ROFF_cblock) {
1953 if (tok != ROFF_ig)
1954 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1955 return ROFF_IGN;
1956 }
1957
1958 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1959 }
1960
1961 static enum rofferr
1962 roff_block_text(ROFF_ARGS)
1963 {
1964
1965 if (tok != ROFF_ig)
1966 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1967
1968 return ROFF_IGN;
1969 }
1970
1971 static enum rofferr
1972 roff_cond_sub(ROFF_ARGS)
1973 {
1974 enum roff_tok t;
1975 char *ep;
1976 int rr;
1977
1978 rr = r->last->rule;
1979 roffnode_cleanscope(r);
1980
1981 /*
1982 * If `\}' occurs on a macro line without a preceding macro,
1983 * drop the line completely.
1984 */
1985
1986 ep = buf->buf + pos;
1987 if (ep[0] == '\\' && ep[1] == '}')
1988 rr = 0;
1989
1990 /* Always check for the closing delimiter `\}'. */
1991
1992 while ((ep = strchr(ep, '\\')) != NULL) {
1993 switch (ep[1]) {
1994 case '}':
1995 memmove(ep, ep + 2, strlen(ep + 2) + 1);
1996 roff_ccond(r, ln, ep - buf->buf);
1997 break;
1998 case '\0':
1999 ++ep;
2000 break;
2001 default:
2002 ep += 2;
2003 break;
2004 }
2005 }
2006
2007 /*
2008 * Fully handle known macros when they are structurally
2009 * required or when the conditional evaluated to true.
2010 */
2011
2012 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2013 return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
2014 ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
2015 ? ROFF_CONT : ROFF_IGN;
2016 }
2017
2018 static enum rofferr
2019 roff_cond_text(ROFF_ARGS)
2020 {
2021 char *ep;
2022 int rr;
2023
2024 rr = r->last->rule;
2025 roffnode_cleanscope(r);
2026
2027 ep = buf->buf + pos;
2028 while ((ep = strchr(ep, '\\')) != NULL) {
2029 if (*(++ep) == '}') {
2030 *ep = '&';
2031 roff_ccond(r, ln, ep - buf->buf - 1);
2032 }
2033 if (*ep != '\0')
2034 ++ep;
2035 }
2036 return rr ? ROFF_CONT : ROFF_IGN;
2037 }
2038
2039 /* --- handling of numeric and conditional expressions -------------------- */
2040
2041 /*
2042 * Parse a single signed integer number. Stop at the first non-digit.
2043 * If there is at least one digit, return success and advance the
2044 * parse point, else return failure and let the parse point unchanged.
2045 * Ignore overflows, treat them just like the C language.
2046 */
2047 static int
2048 roff_getnum(const char *v, int *pos, int *res, int flags)
2049 {
2050 int myres, scaled, n, p;
2051
2052 if (NULL == res)
2053 res = &myres;
2054
2055 p = *pos;
2056 n = v[p] == '-';
2057 if (n || v[p] == '+')
2058 p++;
2059
2060 if (flags & ROFFNUM_WHITE)
2061 while (isspace((unsigned char)v[p]))
2062 p++;
2063
2064 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2065 *res = 10 * *res + v[p] - '0';
2066 if (p == *pos + n)
2067 return 0;
2068
2069 if (n)
2070 *res = -*res;
2071
2072 /* Each number may be followed by one optional scaling unit. */
2073
2074 switch (v[p]) {
2075 case 'f':
2076 scaled = *res * 65536;
2077 break;
2078 case 'i':
2079 scaled = *res * 240;
2080 break;
2081 case 'c':
2082 scaled = *res * 240 / 2.54;
2083 break;
2084 case 'v':
2085 case 'P':
2086 scaled = *res * 40;
2087 break;
2088 case 'm':
2089 case 'n':
2090 scaled = *res * 24;
2091 break;
2092 case 'p':
2093 scaled = *res * 10 / 3;
2094 break;
2095 case 'u':
2096 scaled = *res;
2097 break;
2098 case 'M':
2099 scaled = *res * 6 / 25;
2100 break;
2101 default:
2102 scaled = *res;
2103 p--;
2104 break;
2105 }
2106 if (flags & ROFFNUM_SCALE)
2107 *res = scaled;
2108
2109 *pos = p + 1;
2110 return 1;
2111 }
2112
2113 /*
2114 * Evaluate a string comparison condition.
2115 * The first character is the delimiter.
2116 * Succeed if the string up to its second occurrence
2117 * matches the string up to its third occurence.
2118 * Advance the cursor after the third occurrence
2119 * or lacking that, to the end of the line.
2120 */
2121 static int
2122 roff_evalstrcond(const char *v, int *pos)
2123 {
2124 const char *s1, *s2, *s3;
2125 int match;
2126
2127 match = 0;
2128 s1 = v + *pos; /* initial delimiter */
2129 s2 = s1 + 1; /* for scanning the first string */
2130 s3 = strchr(s2, *s1); /* for scanning the second string */
2131
2132 if (NULL == s3) /* found no middle delimiter */
2133 goto out;
2134
2135 while ('\0' != *++s3) {
2136 if (*s2 != *s3) { /* mismatch */
2137 s3 = strchr(s3, *s1);
2138 break;
2139 }
2140 if (*s3 == *s1) { /* found the final delimiter */
2141 match = 1;
2142 break;
2143 }
2144 s2++;
2145 }
2146
2147 out:
2148 if (NULL == s3)
2149 s3 = strchr(s2, '\0');
2150 else if (*s3 != '\0')
2151 s3++;
2152 *pos = s3 - v;
2153 return match;
2154 }
2155
2156 /*
2157 * Evaluate an optionally negated single character, numerical,
2158 * or string condition.
2159 */
2160 static int
2161 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2162 {
2163 char *cp, *name;
2164 size_t sz;
2165 int deftype, number, savepos, istrue, wanttrue;
2166
2167 if ('!' == v[*pos]) {
2168 wanttrue = 0;
2169 (*pos)++;
2170 } else
2171 wanttrue = 1;
2172
2173 switch (v[*pos]) {
2174 case '\0':
2175 return 0;
2176 case 'n':
2177 case 'o':
2178 (*pos)++;
2179 return wanttrue;
2180 case 'c':
2181 case 'e':
2182 case 't':
2183 case 'v':
2184 (*pos)++;
2185 return !wanttrue;
2186 case 'd':
2187 case 'r':
2188 cp = v + *pos + 1;
2189 while (*cp == ' ')
2190 cp++;
2191 name = cp;
2192 sz = roff_getname(r, &cp, ln, cp - v);
2193 if (sz == 0)
2194 istrue = 0;
2195 else if (v[*pos] == 'r')
2196 istrue = roff_hasregn(r, name, sz);
2197 else {
2198 deftype = ROFFDEF_ANY;
2199 roff_getstrn(r, name, sz, &deftype);
2200 istrue = !!deftype;
2201 }
2202 *pos = cp - v;
2203 return istrue == wanttrue;
2204 default:
2205 break;
2206 }
2207
2208 savepos = *pos;
2209 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2210 return (number > 0) == wanttrue;
2211 else if (*pos == savepos)
2212 return roff_evalstrcond(v, pos) == wanttrue;
2213 else
2214 return 0;
2215 }
2216
2217 static enum rofferr
2218 roff_line_ignore(ROFF_ARGS)
2219 {
2220
2221 return ROFF_IGN;
2222 }
2223
2224 static enum rofferr
2225 roff_insec(ROFF_ARGS)
2226 {
2227
2228 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2229 ln, ppos, roff_name[tok]);
2230 return ROFF_IGN;
2231 }
2232
2233 static enum rofferr
2234 roff_unsupp(ROFF_ARGS)
2235 {
2236
2237 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2238 ln, ppos, roff_name[tok]);
2239 return ROFF_IGN;
2240 }
2241
2242 static enum rofferr
2243 roff_cond(ROFF_ARGS)
2244 {
2245
2246 roffnode_push(r, tok, NULL, ln, ppos);
2247
2248 /*
2249 * An `.el' has no conditional body: it will consume the value
2250 * of the current rstack entry set in prior `ie' calls or
2251 * defaults to DENY.
2252 *
2253 * If we're not an `el', however, then evaluate the conditional.
2254 */
2255
2256 r->last->rule = tok == ROFF_el ?
2257 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2258 roff_evalcond(r, ln, buf->buf, &pos);
2259
2260 /*
2261 * An if-else will put the NEGATION of the current evaluated
2262 * conditional into the stack of rules.
2263 */
2264
2265 if (tok == ROFF_ie) {
2266 if (r->rstackpos + 1 == r->rstacksz) {
2267 r->rstacksz += 16;
2268 r->rstack = mandoc_reallocarray(r->rstack,
2269 r->rstacksz, sizeof(int));
2270 }
2271 r->rstack[++r->rstackpos] = !r->last->rule;
2272 }
2273
2274 /* If the parent has false as its rule, then so do we. */
2275
2276 if (r->last->parent && !r->last->parent->rule)
2277 r->last->rule = 0;
2278
2279 /*
2280 * Determine scope.
2281 * If there is nothing on the line after the conditional,
2282 * not even whitespace, use next-line scope.
2283 */
2284
2285 if (buf->buf[pos] == '\0') {
2286 r->last->endspan = 2;
2287 goto out;
2288 }
2289
2290 while (buf->buf[pos] == ' ')
2291 pos++;
2292
2293 /* An opening brace requests multiline scope. */
2294
2295 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2296 r->last->endspan = -1;
2297 pos += 2;
2298 while (buf->buf[pos] == ' ')
2299 pos++;
2300 goto out;
2301 }
2302
2303 /*
2304 * Anything else following the conditional causes
2305 * single-line scope. Warn if the scope contains
2306 * nothing but trailing whitespace.
2307 */
2308
2309 if (buf->buf[pos] == '\0')
2310 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2311 ln, ppos, roff_name[tok]);
2312
2313 r->last->endspan = 1;
2314
2315 out:
2316 *offs = pos;
2317 return ROFF_RERUN;
2318 }
2319
2320 static enum rofferr
2321 roff_ds(ROFF_ARGS)
2322 {
2323 char *string;
2324 const char *name;
2325 size_t namesz;
2326
2327 /* Ignore groff compatibility mode for now. */
2328
2329 if (tok == ROFF_ds1)
2330 tok = ROFF_ds;
2331 else if (tok == ROFF_as1)
2332 tok = ROFF_as;
2333
2334 /*
2335 * The first word is the name of the string.
2336 * If it is empty or terminated by an escape sequence,
2337 * abort the `ds' request without defining anything.
2338 */
2339
2340 name = string = buf->buf + pos;
2341 if (*name == '\0')
2342 return ROFF_IGN;
2343
2344 namesz = roff_getname(r, &string, ln, pos);
2345 if (name[namesz] == '\\')
2346 return ROFF_IGN;
2347
2348 /* Read past the initial double-quote, if any. */
2349 if (*string == '"')
2350 string++;
2351
2352 /* The rest is the value. */
2353 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2354 ROFF_as == tok);
2355 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2356 return ROFF_IGN;
2357 }
2358
2359 /*
2360 * Parse a single operator, one or two characters long.
2361 * If the operator is recognized, return success and advance the
2362 * parse point, else return failure and let the parse point unchanged.
2363 */
2364 static int
2365 roff_getop(const char *v, int *pos, char *res)
2366 {
2367
2368 *res = v[*pos];
2369
2370 switch (*res) {
2371 case '+':
2372 case '-':
2373 case '*':
2374 case '/':
2375 case '%':
2376 case '&':
2377 case ':':
2378 break;
2379 case '<':
2380 switch (v[*pos + 1]) {
2381 case '=':
2382 *res = 'l';
2383 (*pos)++;
2384 break;
2385 case '>':
2386 *res = '!';
2387 (*pos)++;
2388 break;
2389 case '?':
2390 *res = 'i';
2391 (*pos)++;
2392 break;
2393 default:
2394 break;
2395 }
2396 break;
2397 case '>':
2398 switch (v[*pos + 1]) {
2399 case '=':
2400 *res = 'g';
2401 (*pos)++;
2402 break;
2403 case '?':
2404 *res = 'a';
2405 (*pos)++;
2406 break;
2407 default:
2408 break;
2409 }
2410 break;
2411 case '=':
2412 if ('=' == v[*pos + 1])
2413 (*pos)++;
2414 break;
2415 default:
2416 return 0;
2417 }
2418 (*pos)++;
2419
2420 return *res;
2421 }
2422
2423 /*
2424 * Evaluate either a parenthesized numeric expression
2425 * or a single signed integer number.
2426 */
2427 static int
2428 roff_evalpar(struct roff *r, int ln,
2429 const char *v, int *pos, int *res, int flags)
2430 {
2431
2432 if ('(' != v[*pos])
2433 return roff_getnum(v, pos, res, flags);
2434
2435 (*pos)++;
2436 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2437 return 0;
2438
2439 /*
2440 * Omission of the closing parenthesis
2441 * is an error in validation mode,
2442 * but ignored in evaluation mode.
2443 */
2444
2445 if (')' == v[*pos])
2446 (*pos)++;
2447 else if (NULL == res)
2448 return 0;
2449
2450 return 1;
2451 }
2452
2453 /*
2454 * Evaluate a complete numeric expression.
2455 * Proceed left to right, there is no concept of precedence.
2456 */
2457 static int
2458 roff_evalnum(struct roff *r, int ln, const char *v,
2459 int *pos, int *res, int flags)
2460 {
2461 int mypos, operand2;
2462 char operator;
2463
2464 if (NULL == pos) {
2465 mypos = 0;
2466 pos = &mypos;
2467 }
2468
2469 if (flags & ROFFNUM_WHITE)
2470 while (isspace((unsigned char)v[*pos]))
2471 (*pos)++;
2472
2473 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2474 return 0;
2475
2476 while (1) {
2477 if (flags & ROFFNUM_WHITE)
2478 while (isspace((unsigned char)v[*pos]))
2479 (*pos)++;
2480
2481 if ( ! roff_getop(v, pos, &operator))
2482 break;
2483
2484 if (flags & ROFFNUM_WHITE)
2485 while (isspace((unsigned char)v[*pos]))
2486 (*pos)++;
2487
2488 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2489 return 0;
2490
2491 if (flags & ROFFNUM_WHITE)
2492 while (isspace((unsigned char)v[*pos]))
2493 (*pos)++;
2494
2495 if (NULL == res)
2496 continue;
2497
2498 switch (operator) {
2499 case '+':
2500 *res += operand2;
2501 break;
2502 case '-':
2503 *res -= operand2;
2504 break;
2505 case '*':
2506 *res *= operand2;
2507 break;
2508 case '/':
2509 if (operand2 == 0) {
2510 mandoc_msg(MANDOCERR_DIVZERO,
2511 r->parse, ln, *pos, v);
2512 *res = 0;
2513 break;
2514 }
2515 *res /= operand2;
2516 break;
2517 case '%':
2518 if (operand2 == 0) {
2519 mandoc_msg(MANDOCERR_DIVZERO,
2520 r->parse, ln, *pos, v);
2521 *res = 0;
2522 break;
2523 }
2524 *res %= operand2;
2525 break;
2526 case '<':
2527 *res = *res < operand2;
2528 break;
2529 case '>':
2530 *res = *res > operand2;
2531 break;
2532 case 'l':
2533 *res = *res <= operand2;
2534 break;
2535 case 'g':
2536 *res = *res >= operand2;
2537 break;
2538 case '=':
2539 *res = *res == operand2;
2540 break;
2541 case '!':
2542 *res = *res != operand2;
2543 break;
2544 case '&':
2545 *res = *res && operand2;
2546 break;
2547 case ':':
2548 *res = *res || operand2;
2549 break;
2550 case 'i':
2551 if (operand2 < *res)
2552 *res = operand2;
2553 break;
2554 case 'a':
2555 if (operand2 > *res)
2556 *res = operand2;
2557 break;
2558 default:
2559 abort();
2560 }
2561 }
2562 return 1;
2563 }
2564
2565 /* --- register management ------------------------------------------------ */
2566
2567 void
2568 roff_setreg(struct roff *r, const char *name, int val, char sign)
2569 {
2570 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2571 }
2572
2573 static void
2574 roff_setregn(struct roff *r, const char *name, size_t len,
2575 int val, char sign, int step)
2576 {
2577 struct roffreg *reg;
2578
2579 /* Search for an existing register with the same name. */
2580 reg = r->regtab;
2581
2582 while (reg != NULL && (reg->key.sz != len ||
2583 strncmp(reg->key.p, name, len) != 0))
2584 reg = reg->next;
2585
2586 if (NULL == reg) {
2587 /* Create a new register. */
2588 reg = mandoc_malloc(sizeof(struct roffreg));
2589 reg->key.p = mandoc_strndup(name, len);
2590 reg->key.sz = len;
2591 reg->val = 0;
2592 reg->step = 0;
2593 reg->next = r->regtab;
2594 r->regtab = reg;
2595 }
2596
2597 if ('+' == sign)
2598 reg->val += val;
2599 else if ('-' == sign)
2600 reg->val -= val;
2601 else
2602 reg->val = val;
2603 if (step != INT_MIN)
2604 reg->step = step;
2605 }
2606
2607 /*
2608 * Handle some predefined read-only number registers.
2609 * For now, return -1 if the requested register is not predefined;
2610 * in case a predefined read-only register having the value -1
2611 * were to turn up, another special value would have to be chosen.
2612 */
2613 static int
2614 roff_getregro(const struct roff *r, const char *name)
2615 {
2616
2617 switch (*name) {
2618 case '$': /* Number of arguments of the last macro evaluated. */
2619 return r->argc;
2620 case 'A': /* ASCII approximation mode is always off. */
2621 return 0;
2622 case 'g': /* Groff compatibility mode is always on. */
2623 return 1;
2624 case 'H': /* Fixed horizontal resolution. */
2625 return 24;
2626 case 'j': /* Always adjust left margin only. */
2627 return 0;
2628 case 'T': /* Some output device is always defined. */
2629 return 1;
2630 case 'V': /* Fixed vertical resolution. */
2631 return 40;
2632 default:
2633 return -1;
2634 }
2635 }
2636
2637 int
2638 roff_getreg(struct roff *r, const char *name)
2639 {
2640 return roff_getregn(r, name, strlen(name), '\0');
2641 }
2642
2643 static int
2644 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2645 {
2646 struct roffreg *reg;
2647 int val;
2648
2649 if ('.' == name[0] && 2 == len) {
2650 val = roff_getregro(r, name + 1);
2651 if (-1 != val)
2652 return val;
2653 }
2654
2655 for (reg = r->regtab; reg; reg = reg->next) {
2656 if (len == reg->key.sz &&
2657 0 == strncmp(name, reg->key.p, len)) {
2658 switch (sign) {
2659 case '+':
2660 reg->val += reg->step;
2661 break;
2662 case '-':
2663 reg->val -= reg->step;
2664 break;
2665 default:
2666 break;
2667 }
2668 return reg->val;
2669 }
2670 }
2671
2672 roff_setregn(r, name, len, 0, '\0', INT_MIN);
2673 return 0;
2674 }
2675
2676 static int
2677 roff_hasregn(const struct roff *r, const char *name, size_t len)
2678 {
2679 struct roffreg *reg;
2680 int val;
2681
2682 if ('.' == name[0] && 2 == len) {
2683 val = roff_getregro(r, name + 1);
2684 if (-1 != val)
2685 return 1;
2686 }
2687
2688 for (reg = r->regtab; reg; reg = reg->next)
2689 if (len == reg->key.sz &&
2690 0 == strncmp(name, reg->key.p, len))
2691 return 1;
2692
2693 return 0;
2694 }
2695
2696 static void
2697 roff_freereg(struct roffreg *reg)
2698 {
2699 struct roffreg *old_reg;
2700
2701 while (NULL != reg) {
2702 free(reg->key.p);
2703 old_reg = reg;
2704 reg = reg->next;
2705 free(old_reg);
2706 }
2707 }
2708
2709 static enum rofferr
2710 roff_nr(ROFF_ARGS)
2711 {
2712 char *key, *val, *step;
2713 size_t keysz;
2714 int iv, is, len;
2715 char sign;
2716
2717 key = val = buf->buf + pos;
2718 if (*key == '\0')
2719 return ROFF_IGN;
2720
2721 keysz = roff_getname(r, &val, ln, pos);
2722 if (key[keysz] == '\\')
2723 return ROFF_IGN;
2724
2725 sign = *val;
2726 if (sign == '+' || sign == '-')
2727 val++;
2728
2729 len = 0;
2730 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
2731 return ROFF_IGN;
2732
2733 step = val + len;
2734 while (isspace((unsigned char)*step))
2735 step++;
2736 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
2737 is = INT_MIN;
2738
2739 roff_setregn(r, key, keysz, iv, sign, is);
2740 return ROFF_IGN;
2741 }
2742
2743 static enum rofferr
2744 roff_rr(ROFF_ARGS)
2745 {
2746 struct roffreg *reg, **prev;
2747 char *name, *cp;
2748 size_t namesz;
2749
2750 name = cp = buf->buf + pos;
2751 if (*name == '\0')
2752 return ROFF_IGN;
2753 namesz = roff_getname(r, &cp, ln, pos);
2754 name[namesz] = '\0';
2755
2756 prev = &r->regtab;
2757 while (1) {
2758 reg = *prev;
2759 if (reg == NULL || !strcmp(name, reg->key.p))
2760 break;
2761 prev = &reg->next;
2762 }
2763 if (reg != NULL) {
2764 *prev = reg->next;
2765 free(reg->key.p);
2766 free(reg);
2767 }
2768 return ROFF_IGN;
2769 }
2770
2771 /* --- handler functions for roff requests -------------------------------- */
2772
2773 static enum rofferr
2774 roff_rm(ROFF_ARGS)
2775 {
2776 const char *name;
2777 char *cp;
2778 size_t namesz;
2779
2780 cp = buf->buf + pos;
2781 while (*cp != '\0') {
2782 name = cp;
2783 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2784 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2785 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2786 if (name[namesz] == '\\')
2787 break;
2788 }
2789 return ROFF_IGN;
2790 }
2791
2792 static enum rofferr
2793 roff_it(ROFF_ARGS)
2794 {
2795 int iv;
2796
2797 /* Parse the number of lines. */
2798
2799 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2800 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2801 ln, ppos, buf->buf + 1);
2802 return ROFF_IGN;
2803 }
2804
2805 while (isspace((unsigned char)buf->buf[pos]))
2806 pos++;
2807
2808 /*
2809 * Arm the input line trap.
2810 * Special-casing "an-trap" is an ugly workaround to cope
2811 * with DocBook stupidly fiddling with man(7) internals.
2812 */
2813
2814 roffit_lines = iv;
2815 roffit_macro = mandoc_strdup(iv != 1 ||
2816 strcmp(buf->buf + pos, "an-trap") ?
2817 buf->buf + pos : "br");
2818 return ROFF_IGN;
2819 }
2820
2821 static enum rofferr
2822 roff_Dd(ROFF_ARGS)
2823 {
2824 int mask;
2825 enum roff_tok t, te;
2826
2827 switch (tok) {
2828 case ROFF_Dd:
2829 tok = MDOC_Dd;
2830 te = MDOC_MAX;
2831 if (r->format == 0)
2832 r->format = MPARSE_MDOC;
2833 mask = MPARSE_MDOC | MPARSE_QUICK;
2834 break;
2835 case ROFF_TH:
2836 tok = MAN_TH;
2837 te = MAN_MAX;
2838 if (r->format == 0)
2839 r->format = MPARSE_MAN;
2840 mask = MPARSE_QUICK;
2841 break;
2842 default:
2843 abort();
2844 }
2845 if ((r->options & mask) == 0)
2846 for (t = tok; t < te; t++)
2847 roff_setstr(r, roff_name[t], NULL, 0);
2848 return ROFF_CONT;
2849 }
2850
2851 static enum rofferr
2852 roff_TE(ROFF_ARGS)
2853 {
2854 if (r->tbl == NULL) {
2855 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2856 ln, ppos, "TE");
2857 return ROFF_IGN;
2858 }
2859 if (tbl_end(r->tbl) == 0) {
2860 r->tbl = NULL;
2861 free(buf->buf);
2862 buf->buf = mandoc_strdup(".sp");
2863 buf->sz = 4;
2864 *offs = 0;
2865 return ROFF_REPARSE;
2866 }
2867 r->tbl = NULL;
2868 return ROFF_IGN;
2869 }
2870
2871 static enum rofferr
2872 roff_T_(ROFF_ARGS)
2873 {
2874
2875 if (NULL == r->tbl)
2876 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2877 ln, ppos, "T&");
2878 else
2879 tbl_restart(ln, ppos, r->tbl);
2880
2881 return ROFF_IGN;
2882 }
2883
2884 /*
2885 * Handle in-line equation delimiters.
2886 */
2887 static enum rofferr
2888 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2889 {
2890 char *cp1, *cp2;
2891 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2892
2893 /*
2894 * Outside equations, look for an opening delimiter.
2895 * If we are inside an equation, we already know it is
2896 * in-line, or this function wouldn't have been called;
2897 * so look for a closing delimiter.
2898 */
2899
2900 cp1 = buf->buf + pos;
2901 cp2 = strchr(cp1, r->eqn == NULL ?
2902 r->last_eqn->odelim : r->last_eqn->cdelim);
2903 if (cp2 == NULL)
2904 return ROFF_CONT;
2905
2906 *cp2++ = '\0';
2907 bef_pr = bef_nl = aft_nl = aft_pr = "";
2908
2909 /* Handle preceding text, protecting whitespace. */
2910
2911 if (*buf->buf != '\0') {
2912 if (r->eqn == NULL)
2913 bef_pr = "\\&";
2914 bef_nl = "\n";
2915 }
2916
2917 /*
2918 * Prepare replacing the delimiter with an equation macro
2919 * and drop leading white space from the equation.
2920 */
2921
2922 if (r->eqn == NULL) {
2923 while (*cp2 == ' ')
2924 cp2++;
2925 mac = ".EQ";
2926 } else
2927 mac = ".EN";
2928
2929 /* Handle following text, protecting whitespace. */
2930
2931 if (*cp2 != '\0') {
2932 aft_nl = "\n";
2933 if (r->eqn != NULL)
2934 aft_pr = "\\&";
2935 }
2936
2937 /* Do the actual replacement. */
2938
2939 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2940 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2941 free(buf->buf);
2942 buf->buf = cp1;
2943
2944 /* Toggle the in-line state of the eqn subsystem. */
2945
2946 r->eqn_inline = r->eqn == NULL;
2947 return ROFF_REPARSE;
2948 }
2949
2950 static enum rofferr
2951 roff_EQ(ROFF_ARGS)
2952 {
2953 struct roff_node *n;
2954
2955 if (r->man->macroset == MACROSET_MAN)
2956 man_breakscope(r->man, ROFF_EQ);
2957 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2958 if (ln > r->man->last->line)
2959 n->flags |= NODE_LINE;
2960 n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2961 n->eqn->expectargs = UINT_MAX;
2962 roff_node_append(r->man, n);
2963 r->man->next = ROFF_NEXT_SIBLING;
2964
2965 assert(r->eqn == NULL);
2966 if (r->last_eqn == NULL)
2967 r->last_eqn = eqn_alloc(r->parse);
2968 else
2969 eqn_reset(r->last_eqn);
2970 r->eqn = r->last_eqn;
2971 r->eqn->node = n;
2972
2973 if (buf->buf[pos] != '\0')
2974 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2975 ".EQ %s", buf->buf + pos);
2976
2977 return ROFF_IGN;
2978 }
2979
2980 static enum rofferr
2981 roff_EN(ROFF_ARGS)
2982 {
2983 if (r->eqn != NULL) {
2984 eqn_parse(r->eqn);
2985 r->eqn = NULL;
2986 } else
2987 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2988 if (buf->buf[pos] != '\0')
2989 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2990 "EN %s", buf->buf + pos);
2991 return ROFF_IGN;
2992 }
2993
2994 static enum rofferr
2995 roff_TS(ROFF_ARGS)
2996 {
2997 if (r->tbl != NULL) {
2998 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2999 ln, ppos, "TS breaks TS");
3000 tbl_end(r->tbl);
3001 }
3002 r->tbl = tbl_alloc(ppos, ln, r->parse);
3003 if (r->last_tbl)
3004 r->last_tbl->next = r->tbl;
3005 else
3006 r->first_tbl = r->tbl;
3007 r->last_tbl = r->tbl;
3008 return ROFF_IGN;
3009 }
3010
3011 static enum rofferr
3012 roff_onearg(ROFF_ARGS)
3013 {
3014 struct roff_node *n;
3015 char *cp;
3016 int npos;
3017
3018 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3019 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3020 tok == ROFF_ti))
3021 man_breakscope(r->man, tok);
3022
3023 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3024 r->man->last = roffce_node;
3025 r->man->next = ROFF_NEXT_SIBLING;
3026 }
3027
3028 roff_elem_alloc(r->man, ln, ppos, tok);
3029 n = r->man->last;
3030
3031 cp = buf->buf + pos;
3032 if (*cp != '\0') {
3033 while (*cp != '\0' && *cp != ' ')
3034 cp++;
3035 while (*cp == ' ')
3036 *cp++ = '\0';
3037 if (*cp != '\0')
3038 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
3039 r->parse, ln, cp - buf->buf,
3040 "%s ... %s", roff_name[tok], cp);
3041 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3042 }
3043
3044 if (tok == ROFF_ce || tok == ROFF_rj) {
3045 if (r->man->last->type == ROFFT_ELEM) {
3046 roff_word_alloc(r->man, ln, pos, "1");
3047 r->man->last->flags |= NODE_NOSRC;
3048 }
3049 npos = 0;
3050 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3051 &roffce_lines, 0) == 0) {
3052 mandoc_vmsg(MANDOCERR_CE_NONUM,
3053 r->parse, ln, pos, "ce %s", buf->buf + pos);
3054 roffce_lines = 1;
3055 }
3056 if (roffce_lines < 1) {
3057 r->man->last = r->man->last->parent;
3058 roffce_node = NULL;
3059 roffce_lines = 0;
3060 } else
3061 roffce_node = r->man->last->parent;
3062 } else {
3063 n->flags |= NODE_VALID | NODE_ENDED;
3064 r->man->last = n;
3065 }
3066 n->flags |= NODE_LINE;
3067 r->man->next = ROFF_NEXT_SIBLING;
3068 return ROFF_IGN;
3069 }
3070
3071 static enum rofferr
3072 roff_manyarg(ROFF_ARGS)
3073 {
3074 struct roff_node *n;
3075 char *sp, *ep;
3076
3077 roff_elem_alloc(r->man, ln, ppos, tok);
3078 n = r->man->last;
3079
3080 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3081 while (*ep != '\0' && *ep != ' ')
3082 ep++;
3083 while (*ep == ' ')
3084 *ep++ = '\0';
3085 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3086 }
3087
3088 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3089 r->man->last = n;
3090 r->man->next = ROFF_NEXT_SIBLING;
3091 return ROFF_IGN;
3092 }
3093
3094 static enum rofferr
3095 roff_als(ROFF_ARGS)
3096 {
3097 char *oldn, *newn, *end, *value;
3098 size_t oldsz, newsz, valsz;
3099
3100 newn = oldn = buf->buf + pos;
3101 if (*newn == '\0')
3102 return ROFF_IGN;
3103
3104 newsz = roff_getname(r, &oldn, ln, pos);
3105 if (newn[newsz] == '\\' || *oldn == '\0')
3106 return ROFF_IGN;
3107
3108 end = oldn;
3109 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3110 if (oldsz == 0)
3111 return ROFF_IGN;
3112
3113 valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3114 (int)oldsz, oldn);
3115 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3116 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3117 free(value);
3118 return ROFF_IGN;
3119 }
3120
3121 static enum rofferr
3122 roff_br(ROFF_ARGS)
3123 {
3124 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3125 man_breakscope(r->man, ROFF_br);
3126 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3127 if (buf->buf[pos] != '\0')
3128 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3129 "%s %s", roff_name[tok], buf->buf + pos);
3130 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3131 r->man->next = ROFF_NEXT_SIBLING;
3132 return ROFF_IGN;
3133 }
3134
3135 static enum rofferr
3136 roff_cc(ROFF_ARGS)
3137 {
3138 const char *p;
3139
3140 p = buf->buf + pos;
3141
3142 if (*p == '\0' || (r->control = *p++) == '.')
3143 r->control = '\0';
3144
3145 if (*p != '\0')
3146 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3147 ln, p - buf->buf, "cc ... %s", p);
3148
3149 return ROFF_IGN;
3150 }
3151
3152 static enum rofferr
3153 roff_ec(ROFF_ARGS)
3154 {
3155 const char *p;
3156
3157 p = buf->buf + pos;
3158 if (*p == '\0')
3159 r->escape = '\\';
3160 else {
3161 r->escape = *p;
3162 if (*++p != '\0')
3163 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3164 ln, p - buf->buf, "ec ... %s", p);
3165 }
3166 return ROFF_IGN;
3167 }
3168
3169 static enum rofferr
3170 roff_eo(ROFF_ARGS)
3171 {
3172 r->escape = '\0';
3173 if (buf->buf[pos] != '\0')
3174 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3175 ln, pos, "eo %s", buf->buf + pos);
3176 return ROFF_IGN;
3177 }
3178
3179 static enum rofferr
3180 roff_nop(ROFF_ARGS)
3181 {
3182 while (buf->buf[pos] == ' ')
3183 pos++;
3184 *offs = pos;
3185 return ROFF_RERUN;
3186 }
3187
3188 static enum rofferr
3189 roff_tr(ROFF_ARGS)
3190 {
3191 const char *p, *first, *second;
3192 size_t fsz, ssz;
3193 enum mandoc_esc esc;
3194
3195 p = buf->buf + pos;
3196
3197 if (*p == '\0') {
3198 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3199 return ROFF_IGN;
3200 }
3201
3202 while (*p != '\0') {
3203 fsz = ssz = 1;
3204
3205 first = p++;
3206 if (*first == '\\') {
3207 esc = mandoc_escape(&p, NULL, NULL);
3208 if (esc == ESCAPE_ERROR) {
3209 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3210 ln, (int)(p - buf->buf), first);
3211 return ROFF_IGN;
3212 }
3213 fsz = (size_t)(p - first);
3214 }
3215
3216 second = p++;
3217 if (*second == '\\') {
3218 esc = mandoc_escape(&p, NULL, NULL);
3219 if (esc == ESCAPE_ERROR) {
3220 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3221 ln, (int)(p - buf->buf), second);
3222 return ROFF_IGN;
3223 }
3224 ssz = (size_t)(p - second);
3225 } else if (*second == '\0') {
3226 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3227 ln, first - buf->buf, "tr %s", first);
3228 second = " ";
3229 p--;
3230 }
3231
3232 if (fsz > 1) {
3233 roff_setstrn(&r->xmbtab, first, fsz,
3234 second, ssz, 0);
3235 continue;
3236 }
3237
3238 if (r->xtab == NULL)
3239 r->xtab = mandoc_calloc(128,
3240 sizeof(struct roffstr));
3241
3242 free(r->xtab[(int)*first].p);
3243 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3244 r->xtab[(int)*first].sz = ssz;
3245 }
3246
3247 return ROFF_IGN;
3248 }
3249
3250 static enum rofferr
3251 roff_rn(ROFF_ARGS)
3252 {
3253 const char *value;
3254 char *oldn, *newn, *end;
3255 size_t oldsz, newsz;
3256 int deftype;
3257
3258 oldn = newn = buf->buf + pos;
3259 if (*oldn == '\0')
3260 return ROFF_IGN;
3261
3262 oldsz = roff_getname(r, &newn, ln, pos);
3263 if (oldn[oldsz] == '\\' || *newn == '\0')
3264 return ROFF_IGN;
3265
3266 end = newn;
3267 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3268 if (newsz == 0)
3269 return ROFF_IGN;
3270
3271 deftype = ROFFDEF_ANY;
3272 value = roff_getstrn(r, oldn, oldsz, &deftype);
3273 switch (deftype) {
3274 case ROFFDEF_USER:
3275 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3276 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3277 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3278 break;
3279 case ROFFDEF_PRE:
3280 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3281 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3282 break;
3283 case ROFFDEF_REN:
3284 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3285 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3286 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3287 break;
3288 case ROFFDEF_STD:
3289 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3290 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3291 break;
3292 default:
3293 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3294 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3295 break;
3296 }
3297 return ROFF_IGN;
3298 }
3299
3300 static enum rofferr
3301 roff_so(ROFF_ARGS)
3302 {
3303 char *name, *cp;
3304
3305 name = buf->buf + pos;
3306 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3307
3308 /*
3309 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3310 * opening anything that's not in our cwd or anything beneath
3311 * it. Thus, explicitly disallow traversing up the file-system
3312 * or using absolute paths.
3313 */
3314
3315 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3316 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3317 ".so %s", name);
3318 buf->sz = mandoc_asprintf(&cp,
3319 ".sp\nSee the file %s.\n.sp", name) + 1;
3320 free(buf->buf);
3321 buf->buf = cp;
3322 *offs = 0;
3323 return ROFF_REPARSE;
3324 }
3325
3326 *offs = pos;
3327 return ROFF_SO;
3328 }
3329
3330 /* --- user defined strings and macros ------------------------------------ */
3331
3332 static enum rofferr
3333 roff_userdef(ROFF_ARGS)
3334 {
3335 const char *arg[16], *ap;
3336 char *cp, *n1, *n2;
3337 int expand_count, i, ib, ie;
3338 size_t asz, rsz;
3339
3340 /*
3341 * Collect pointers to macro argument strings
3342 * and NUL-terminate them.
3343 */
3344
3345 r->argc = 0;
3346 cp = buf->buf + pos;
3347 for (i = 0; i < 16; i++) {
3348 if (*cp == '\0')
3349 arg[i] = "";
3350 else {
3351 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3352 r->argc = i + 1;
3353 }
3354 }
3355
3356 /*
3357 * Expand macro arguments.
3358 */
3359
3360 buf->sz = strlen(r->current_string) + 1;
3361 n1 = n2 = cp = mandoc_malloc(buf->sz);
3362 memcpy(n1, r->current_string, buf->sz);
3363 expand_count = 0;
3364 while (*cp != '\0') {
3365
3366 /* Scan ahead for the next argument invocation. */
3367
3368 if (*cp++ != '\\')
3369 continue;
3370 if (*cp++ != '$')
3371 continue;
3372 if (*cp == '*') { /* \\$* inserts all arguments */
3373 ib = 0;
3374 ie = r->argc - 1;
3375 } else { /* \\$1 .. \\$9 insert one argument */
3376 ib = ie = *cp - '1';
3377 if (ib < 0 || ib > 8)
3378 continue;
3379 }
3380 cp -= 2;
3381
3382 /*
3383 * Prevent infinite recursion.
3384 */
3385
3386 if (cp >= n2)
3387 expand_count = 1;
3388 else if (++expand_count > EXPAND_LIMIT) {
3389 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3390 ln, (int)(cp - n1), NULL);
3391 free(buf->buf);
3392 buf->buf = n1;
3393 *offs = 0;
3394 return ROFF_IGN;
3395 }
3396
3397 /*
3398 * Determine the size of the expanded argument,
3399 * taking escaping of quotes into account.
3400 */
3401
3402 asz = ie > ib ? ie - ib : 0; /* for blanks */
3403 for (i = ib; i <= ie; i++) {
3404 for (ap = arg[i]; *ap != '\0'; ap++) {
3405 asz++;
3406 if (*ap == '"')
3407 asz += 3;
3408 }
3409 }
3410 if (asz != 3) {
3411
3412 /*
3413 * Determine the size of the rest of the
3414 * unexpanded macro, including the NUL.
3415 */
3416
3417 rsz = buf->sz - (cp - n1) - 3;
3418
3419 /*
3420 * When shrinking, move before
3421 * releasing the storage.
3422 */
3423
3424 if (asz < 3)
3425 memmove(cp + asz, cp + 3, rsz);
3426
3427 /*
3428 * Resize the storage for the macro
3429 * and readjust the parse pointer.
3430 */
3431
3432 buf->sz += asz - 3;
3433 n2 = mandoc_realloc(n1, buf->sz);
3434 cp = n2 + (cp - n1);
3435 n1 = n2;
3436
3437 /*
3438 * When growing, make room
3439 * for the expanded argument.
3440 */
3441
3442 if (asz > 3)
3443 memmove(cp + asz, cp + 3, rsz);
3444 }
3445
3446 /* Copy the expanded argument, escaping quotes. */
3447
3448 n2 = cp;
3449 for (i = ib; i <= ie; i++) {
3450 for (ap = arg[i]; *ap != '\0'; ap++) {
3451 if (*ap == '"') {
3452 memcpy(n2, "\\(dq", 4);
3453 n2 += 4;
3454 } else
3455 *n2++ = *ap;
3456 }
3457 if (i < ie)
3458 *n2++ = ' ';
3459 }
3460 }
3461
3462 /*
3463 * Replace the macro invocation
3464 * by the expanded macro.
3465 */
3466
3467 free(buf->buf);
3468 buf->buf = n1;
3469 *offs = 0;
3470
3471 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3472 ROFF_REPARSE : ROFF_APPEND;
3473 }
3474
3475 /*
3476 * Calling a high-level macro that was renamed with .rn.
3477 * r->current_string has already been set up by roff_parse().
3478 */
3479 static enum rofferr
3480 roff_renamed(ROFF_ARGS)
3481 {
3482 char *nbuf;
3483
3484 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3485 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3486 free(buf->buf);
3487 buf->buf = nbuf;
3488 *offs = 0;
3489 return ROFF_CONT;
3490 }
3491
3492 static size_t
3493 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3494 {
3495 char *name, *cp;
3496 size_t namesz;
3497
3498 name = *cpp;
3499 if ('\0' == *name)
3500 return 0;
3501
3502 /* Read until end of name and terminate it with NUL. */
3503 for (cp = name; 1; cp++) {
3504 if ('\0' == *cp || ' ' == *cp) {
3505 namesz = cp - name;
3506 break;
3507 }
3508 if ('\\' != *cp)
3509 continue;
3510 namesz = cp - name;
3511 if ('{' == cp[1] || '}' == cp[1])
3512 break;
3513 cp++;
3514 if ('\\' == *cp)
3515 continue;
3516 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3517 "%.*s", (int)(cp - name + 1), name);
3518 mandoc_escape((const char **)&cp, NULL, NULL);
3519 break;
3520 }
3521
3522 /* Read past spaces. */
3523 while (' ' == *cp)
3524 cp++;
3525
3526 *cpp = cp;
3527 return namesz;
3528 }
3529
3530 /*
3531 * Store *string into the user-defined string called *name.
3532 * To clear an existing entry, call with (*r, *name, NULL, 0).
3533 * append == 0: replace mode
3534 * append == 1: single-line append mode
3535 * append == 2: multiline append mode, append '\n' after each call
3536 */
3537 static void
3538 roff_setstr(struct roff *r, const char *name, const char *string,
3539 int append)
3540 {
3541 size_t namesz;
3542
3543 namesz = strlen(name);
3544 roff_setstrn(&r->strtab, name, namesz, string,
3545 string ? strlen(string) : 0, append);
3546 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3547 }
3548
3549 static void
3550 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3551 const char *string, size_t stringsz, int append)
3552 {
3553 struct roffkv *n;
3554 char *c;
3555 int i;
3556 size_t oldch, newch;
3557
3558 /* Search for an existing string with the same name. */
3559 n = *r;
3560
3561 while (n && (namesz != n->key.sz ||
3562 strncmp(n->key.p, name, namesz)))
3563 n = n->next;
3564
3565 if (NULL == n) {
3566 /* Create a new string table entry. */
3567 n = mandoc_malloc(sizeof(struct roffkv));
3568 n->key.p = mandoc_strndup(name, namesz);
3569 n->key.sz = namesz;
3570 n->val.p = NULL;
3571 n->val.sz = 0;
3572 n->next = *r;
3573 *r = n;
3574 } else if (0 == append) {
3575 free(n->val.p);
3576 n->val.p = NULL;
3577 n->val.sz = 0;
3578 }
3579
3580 if (NULL == string)
3581 return;
3582
3583 /*
3584 * One additional byte for the '\n' in multiline mode,
3585 * and one for the terminating '\0'.
3586 */
3587 newch = stringsz + (1 < append ? 2u : 1u);
3588
3589 if (NULL == n->val.p) {
3590 n->val.p = mandoc_malloc(newch);
3591 *n->val.p = '\0';
3592 oldch = 0;
3593 } else {
3594 oldch = n->val.sz;
3595 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3596 }
3597
3598 /* Skip existing content in the destination buffer. */
3599 c = n->val.p + (int)oldch;
3600
3601 /* Append new content to the destination buffer. */
3602 i = 0;
3603 while (i < (int)stringsz) {
3604 /*
3605 * Rudimentary roff copy mode:
3606 * Handle escaped backslashes.
3607 */
3608 if ('\\' == string[i] && '\\' == string[i + 1])
3609 i++;
3610 *c++ = string[i++];
3611 }
3612
3613 /* Append terminating bytes. */
3614 if (1 < append)
3615 *c++ = '\n';
3616
3617 *c = '\0';
3618 n->val.sz = (int)(c - n->val.p);
3619 }
3620
3621 static const char *
3622 roff_getstrn(struct roff *r, const char *name, size_t len,
3623 int *deftype)
3624 {
3625 const struct roffkv *n;
3626 int found, i;
3627 enum roff_tok tok;
3628
3629 found = 0;
3630 for (n = r->strtab; n != NULL; n = n->next) {
3631 if (strncmp(name, n->key.p, len) != 0 ||
3632 n->key.p[len] != '\0' || n->val.p == NULL)
3633 continue;
3634 if (*deftype & ROFFDEF_USER) {
3635 *deftype = ROFFDEF_USER;
3636 return n->val.p;
3637 } else {
3638 found = 1;
3639 break;
3640 }
3641 }
3642 for (n = r->rentab; n != NULL; n = n->next) {
3643 if (strncmp(name, n->key.p, len) != 0 ||
3644 n->key.p[len] != '\0' || n->val.p == NULL)
3645 continue;
3646 if (*deftype & ROFFDEF_REN) {
3647 *deftype = ROFFDEF_REN;
3648 return n->val.p;
3649 } else {
3650 found = 1;
3651 break;
3652 }
3653 }
3654 for (i = 0; i < PREDEFS_MAX; i++) {
3655 if (strncmp(name, predefs[i].name, len) != 0 ||
3656 predefs[i].name[len] != '\0')
3657 continue;
3658 if (*deftype & ROFFDEF_PRE) {
3659 *deftype = ROFFDEF_PRE;
3660 return predefs[i].str;
3661 } else {
3662 found = 1;
3663 break;
3664 }
3665 }
3666 if (r->man->macroset != MACROSET_MAN) {
3667 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3668 if (strncmp(name, roff_name[tok], len) != 0 ||
3669 roff_name[tok][len] != '\0')
3670 continue;
3671 if (*deftype & ROFFDEF_STD) {
3672 *deftype = ROFFDEF_STD;
3673 return NULL;
3674 } else {
3675 found = 1;
3676 break;
3677 }
3678 }
3679 }
3680 if (r->man->macroset != MACROSET_MDOC) {
3681 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3682 if (strncmp(name, roff_name[tok], len) != 0 ||
3683 roff_name[tok][len] != '\0')
3684 continue;
3685 if (*deftype & ROFFDEF_STD) {
3686 *deftype = ROFFDEF_STD;
3687 return NULL;
3688 } else {
3689 found = 1;
3690 break;
3691 }
3692 }
3693 }
3694
3695 if (found == 0 && *deftype != ROFFDEF_ANY) {
3696 if (*deftype & ROFFDEF_REN) {
3697 /*
3698 * This might still be a request,
3699 * so do not treat it as undefined yet.
3700 */
3701 *deftype = ROFFDEF_UNDEF;
3702 return NULL;
3703 }
3704
3705 /* Using an undefined string defines it to be empty. */
3706
3707 roff_setstrn(&r->strtab, name, len, "", 0, 0);
3708 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
3709 }
3710
3711 *deftype = 0;
3712 return NULL;
3713 }
3714
3715 static void
3716 roff_freestr(struct roffkv *r)
3717 {
3718 struct roffkv *n, *nn;
3719
3720 for (n = r; n; n = nn) {
3721 free(n->key.p);
3722 free(n->val.p);
3723 nn = n->next;
3724 free(n);
3725 }
3726 }
3727
3728 /* --- accessors and utility functions ------------------------------------ */
3729
3730 /*
3731 * Duplicate an input string, making the appropriate character
3732 * conversations (as stipulated by `tr') along the way.
3733 * Returns a heap-allocated string with all the replacements made.
3734 */
3735 char *
3736 roff_strdup(const struct roff *r, const char *p)
3737 {
3738 const struct roffkv *cp;
3739 char *res;
3740 const char *pp;
3741 size_t ssz, sz;
3742 enum mandoc_esc esc;
3743
3744 if (NULL == r->xmbtab && NULL == r->xtab)
3745 return mandoc_strdup(p);
3746 else if ('\0' == *p)
3747 return mandoc_strdup("");
3748
3749 /*
3750 * Step through each character looking for term matches
3751 * (remember that a `tr' can be invoked with an escape, which is
3752 * a glyph but the escape is multi-character).
3753 * We only do this if the character hash has been initialised
3754 * and the string is >0 length.
3755 */
3756
3757 res = NULL;
3758 ssz = 0;
3759
3760 while ('\0' != *p) {
3761 assert((unsigned int)*p < 128);
3762 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3763 sz = r->xtab[(int)*p].sz;
3764 res = mandoc_realloc(res, ssz + sz + 1);
3765 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3766 ssz += sz;
3767 p++;
3768 continue;
3769 } else if ('\\' != *p) {
3770 res = mandoc_realloc(res, ssz + 2);
3771 res[ssz++] = *p++;
3772 continue;
3773 }
3774
3775 /* Search for term matches. */
3776 for (cp = r->xmbtab; cp; cp = cp->next)
3777 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3778 break;
3779
3780 if (NULL != cp) {
3781 /*
3782 * A match has been found.
3783 * Append the match to the array and move
3784 * forward by its keysize.
3785 */
3786 res = mandoc_realloc(res,
3787 ssz + cp->val.sz + 1);
3788 memcpy(res + ssz, cp->val.p, cp->val.sz);
3789 ssz += cp->val.sz;
3790 p += (int)cp->key.sz;
3791 continue;
3792 }
3793
3794 /*
3795 * Handle escapes carefully: we need to copy
3796 * over just the escape itself, or else we might
3797 * do replacements within the escape itself.
3798 * Make sure to pass along the bogus string.
3799 */
3800 pp = p++;
3801 esc = mandoc_escape(&p, NULL, NULL);
3802 if (ESCAPE_ERROR == esc) {
3803 sz = strlen(pp);
3804 res = mandoc_realloc(res, ssz + sz + 1);
3805 memcpy(res + ssz, pp, sz);
3806 break;
3807 }
3808 /*
3809 * We bail out on bad escapes.
3810 * No need to warn: we already did so when
3811 * roff_res() was called.
3812 */
3813 sz = (int)(p - pp);
3814 res = mandoc_realloc(res, ssz + sz + 1);
3815 memcpy(res + ssz, pp, sz);
3816 ssz += sz;
3817 }
3818
3819 res[(int)ssz] = '\0';
3820 return res;
3821 }
3822
3823 int
3824 roff_getformat(const struct roff *r)
3825 {
3826
3827 return r->format;
3828 }
3829
3830 /*
3831 * Find out whether a line is a macro line or not.
3832 * If it is, adjust the current position and return one; if it isn't,
3833 * return zero and don't change the current position.
3834 * If the control character has been set with `.cc', then let that grain
3835 * precedence.
3836 * This is slighly contrary to groff, where using the non-breaking
3837 * control character when `cc' has been invoked will cause the
3838 * non-breaking macro contents to be printed verbatim.
3839 */
3840 int
3841 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3842 {
3843 int pos;
3844
3845 pos = *ppos;
3846
3847 if (r->control != '\0' && cp[pos] == r->control)
3848 pos++;
3849 else if (r->control != '\0')
3850 return 0;
3851 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3852 pos += 2;
3853 else if ('.' == cp[pos] || '\'' == cp[pos])
3854 pos++;
3855 else
3856 return 0;
3857
3858 while (' ' == cp[pos] || '\t' == cp[pos])
3859 pos++;
3860
3861 *ppos = pos;
3862 return 1;
3863 }