]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
no-fill mode has to be suspended during tbl(7) rendering, too
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.361 2019/01/05 09:10:32 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40
41 /*
42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43 * that an escape sequence resulted from copy-in processing and
44 * needs to be checked or interpolated. As it is used nowhere
45 * else, it is defined here rather than in a header file.
46 */
47 #define ASCII_ESC 27
48
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define EXPAND_LIMIT 1000
51
52 /* Types of definitions of macros and strings. */
53 #define ROFFDEF_USER (1 << 1) /* User-defined. */
54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
58 ROFFDEF_REN | ROFFDEF_STD)
59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
60
61 /* --- data types --------------------------------------------------------- */
62
63 /*
64 * An incredibly-simple string buffer.
65 */
66 struct roffstr {
67 char *p; /* nil-terminated buffer */
68 size_t sz; /* saved strlen(p) */
69 };
70
71 /*
72 * A key-value roffstr pair as part of a singly-linked list.
73 */
74 struct roffkv {
75 struct roffstr key;
76 struct roffstr val;
77 struct roffkv *next; /* next in list */
78 };
79
80 /*
81 * A single number register as part of a singly-linked list.
82 */
83 struct roffreg {
84 struct roffstr key;
85 int val;
86 int step;
87 struct roffreg *next;
88 };
89
90 /*
91 * Association of request and macro names with token IDs.
92 */
93 struct roffreq {
94 enum roff_tok tok;
95 char name[];
96 };
97
98 /*
99 * A macro processing context.
100 * More than one is needed when macro calls are nested.
101 */
102 struct mctx {
103 char **argv;
104 int argc;
105 int argsz;
106 };
107
108 struct roff {
109 struct roff_man *man; /* mdoc or man parser */
110 struct roffnode *last; /* leaf of stack */
111 struct mctx *mstack; /* stack of macro contexts */
112 int *rstack; /* stack of inverted `ie' values */
113 struct ohash *reqtab; /* request lookup table */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *rentab; /* renamed strings & macros */
117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
118 struct roffstr *xtab; /* single-byte trans table (`tr') */
119 const char *current_string; /* value of last called user macro */
120 struct tbl_node *first_tbl; /* first table parsed */
121 struct tbl_node *last_tbl; /* last table parsed */
122 struct tbl_node *tbl; /* current table being parsed */
123 struct eqn_node *last_eqn; /* equation parser */
124 struct eqn_node *eqn; /* active equation parser */
125 int eqn_inline; /* current equation is inline */
126 int options; /* parse options */
127 int mstacksz; /* current size of mstack */
128 int mstackpos; /* position in mstack */
129 int rstacksz; /* current size limit of rstack */
130 int rstackpos; /* position in rstack */
131 int format; /* current file in mdoc or man format */
132 char control; /* control character */
133 char escape; /* escape character */
134 };
135
136 struct roffnode {
137 enum roff_tok tok; /* type of node */
138 struct roffnode *parent; /* up one in stack */
139 int line; /* parse line */
140 int col; /* parse col */
141 char *name; /* node name, e.g. macro name */
142 char *end; /* end-rules: custom token */
143 int endspan; /* end-rules: next-line or infty */
144 int rule; /* current evaluation rule */
145 };
146
147 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
148 enum roff_tok tok, /* tok of macro */ \
149 struct buf *buf, /* input buffer */ \
150 int ln, /* parse line */ \
151 int ppos, /* original pos in buffer */ \
152 int pos, /* current pos in buffer */ \
153 int *offs /* reset offset of buffer data */
154
155 typedef int (*roffproc)(ROFF_ARGS);
156
157 struct roffmac {
158 roffproc proc; /* process new macro */
159 roffproc text; /* process as child text of macro */
160 roffproc sub; /* process as child of macro */
161 int flags;
162 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
163 };
164
165 struct predef {
166 const char *name; /* predefined input name */
167 const char *str; /* replacement symbol */
168 };
169
170 #define PREDEF(__name, __str) \
171 { (__name), (__str) },
172
173 /* --- function prototypes ------------------------------------------------ */
174
175 static int roffnode_cleanscope(struct roff *);
176 static int roffnode_pop(struct roff *);
177 static void roffnode_push(struct roff *, enum roff_tok,
178 const char *, int, int);
179 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
180 static int roff_als(ROFF_ARGS);
181 static int roff_block(ROFF_ARGS);
182 static int roff_block_text(ROFF_ARGS);
183 static int roff_block_sub(ROFF_ARGS);
184 static int roff_cblock(ROFF_ARGS);
185 static int roff_cc(ROFF_ARGS);
186 static int roff_ccond(struct roff *, int, int);
187 static int roff_char(ROFF_ARGS);
188 static int roff_cond(ROFF_ARGS);
189 static int roff_cond_text(ROFF_ARGS);
190 static int roff_cond_sub(ROFF_ARGS);
191 static int roff_ds(ROFF_ARGS);
192 static int roff_ec(ROFF_ARGS);
193 static int roff_eo(ROFF_ARGS);
194 static int roff_eqndelim(struct roff *, struct buf *, int);
195 static int roff_evalcond(struct roff *r, int, char *, int *);
196 static int roff_evalnum(struct roff *, int,
197 const char *, int *, int *, int);
198 static int roff_evalpar(struct roff *, int,
199 const char *, int *, int *, int);
200 static int roff_evalstrcond(const char *, int *);
201 static int roff_expand(struct roff *, struct buf *,
202 int, int, char);
203 static void roff_free1(struct roff *);
204 static void roff_freereg(struct roffreg *);
205 static void roff_freestr(struct roffkv *);
206 static size_t roff_getname(struct roff *, char **, int, int);
207 static int roff_getnum(const char *, int *, int *, int);
208 static int roff_getop(const char *, int *, char *);
209 static int roff_getregn(struct roff *,
210 const char *, size_t, char);
211 static int roff_getregro(const struct roff *,
212 const char *name);
213 static const char *roff_getstrn(struct roff *,
214 const char *, size_t, int *);
215 static int roff_hasregn(const struct roff *,
216 const char *, size_t);
217 static int roff_insec(ROFF_ARGS);
218 static int roff_it(ROFF_ARGS);
219 static int roff_line_ignore(ROFF_ARGS);
220 static void roff_man_alloc1(struct roff_man *);
221 static void roff_man_free1(struct roff_man *);
222 static int roff_manyarg(ROFF_ARGS);
223 static int roff_noarg(ROFF_ARGS);
224 static int roff_nop(ROFF_ARGS);
225 static int roff_nr(ROFF_ARGS);
226 static int roff_onearg(ROFF_ARGS);
227 static enum roff_tok roff_parse(struct roff *, char *, int *,
228 int, int);
229 static int roff_parsetext(struct roff *, struct buf *,
230 int, int *);
231 static int roff_renamed(ROFF_ARGS);
232 static int roff_return(ROFF_ARGS);
233 static int roff_rm(ROFF_ARGS);
234 static int roff_rn(ROFF_ARGS);
235 static int roff_rr(ROFF_ARGS);
236 static void roff_setregn(struct roff *, const char *,
237 size_t, int, char, int);
238 static void roff_setstr(struct roff *,
239 const char *, const char *, int);
240 static void roff_setstrn(struct roffkv **, const char *,
241 size_t, const char *, size_t, int);
242 static int roff_shift(ROFF_ARGS);
243 static int roff_so(ROFF_ARGS);
244 static int roff_tr(ROFF_ARGS);
245 static int roff_Dd(ROFF_ARGS);
246 static int roff_TE(ROFF_ARGS);
247 static int roff_TS(ROFF_ARGS);
248 static int roff_EQ(ROFF_ARGS);
249 static int roff_EN(ROFF_ARGS);
250 static int roff_T_(ROFF_ARGS);
251 static int roff_unsupp(ROFF_ARGS);
252 static int roff_userdef(ROFF_ARGS);
253
254 /* --- constant data ------------------------------------------------------ */
255
256 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
257 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
258
259 const char *__roff_name[MAN_MAX + 1] = {
260 "br", "ce", "fi", "ft",
261 "ll", "mc", "nf",
262 "po", "rj", "sp",
263 "ta", "ti", NULL,
264 "ab", "ad", "af", "aln",
265 "als", "am", "am1", "ami",
266 "ami1", "as", "as1", "asciify",
267 "backtrace", "bd", "bleedat", "blm",
268 "box", "boxa", "bp", "BP",
269 "break", "breakchar", "brnl", "brp",
270 "brpnl", "c2", "cc",
271 "cf", "cflags", "ch", "char",
272 "chop", "class", "close", "CL",
273 "color", "composite", "continue", "cp",
274 "cropat", "cs", "cu", "da",
275 "dch", "Dd", "de", "de1",
276 "defcolor", "dei", "dei1", "device",
277 "devicem", "di", "do", "ds",
278 "ds1", "dwh", "dt", "ec",
279 "ecr", "ecs", "el", "em",
280 "EN", "eo", "EP", "EQ",
281 "errprint", "ev", "evc", "ex",
282 "fallback", "fam", "fc", "fchar",
283 "fcolor", "fdeferlig", "feature", "fkern",
284 "fl", "flig", "fp", "fps",
285 "fschar", "fspacewidth", "fspecial", "ftr",
286 "fzoom", "gcolor", "hc", "hcode",
287 "hidechar", "hla", "hlm", "hpf",
288 "hpfa", "hpfcode", "hw", "hy",
289 "hylang", "hylen", "hym", "hypp",
290 "hys", "ie", "if", "ig",
291 "index", "it", "itc", "IX",
292 "kern", "kernafter", "kernbefore", "kernpair",
293 "lc", "lc_ctype", "lds", "length",
294 "letadj", "lf", "lg", "lhang",
295 "linetabs", "lnr", "lnrf", "lpfx",
296 "ls", "lsm", "lt",
297 "mediasize", "minss", "mk", "mso",
298 "na", "ne", "nh", "nhychar",
299 "nm", "nn", "nop", "nr",
300 "nrf", "nroff", "ns", "nx",
301 "open", "opena", "os", "output",
302 "padj", "papersize", "pc", "pev",
303 "pi", "PI", "pl", "pm",
304 "pn", "pnr", "ps",
305 "psbb", "pshape", "pso", "ptr",
306 "pvs", "rchar", "rd", "recursionlimit",
307 "return", "rfschar", "rhang",
308 "rm", "rn", "rnn", "rr",
309 "rs", "rt", "schar", "sentchar",
310 "shc", "shift", "sizes", "so",
311 "spacewidth", "special", "spreadwarn", "ss",
312 "sty", "substring", "sv", "sy",
313 "T&", "tc", "TE",
314 "TH", "tkf", "tl",
315 "tm", "tm1", "tmc", "tr",
316 "track", "transchar", "trf", "trimat",
317 "trin", "trnt", "troff", "TS",
318 "uf", "ul", "unformat", "unwatch",
319 "unwatchn", "vpt", "vs", "warn",
320 "warnscale", "watch", "watchlength", "watchn",
321 "wh", "while", "write", "writec",
322 "writem", "xflag", ".", NULL,
323 NULL, "text",
324 "Dd", "Dt", "Os", "Sh",
325 "Ss", "Pp", "D1", "Dl",
326 "Bd", "Ed", "Bl", "El",
327 "It", "Ad", "An", "Ap",
328 "Ar", "Cd", "Cm", "Dv",
329 "Er", "Ev", "Ex", "Fa",
330 "Fd", "Fl", "Fn", "Ft",
331 "Ic", "In", "Li", "Nd",
332 "Nm", "Op", "Ot", "Pa",
333 "Rv", "St", "Va", "Vt",
334 "Xr", "%A", "%B", "%D",
335 "%I", "%J", "%N", "%O",
336 "%P", "%R", "%T", "%V",
337 "Ac", "Ao", "Aq", "At",
338 "Bc", "Bf", "Bo", "Bq",
339 "Bsx", "Bx", "Db", "Dc",
340 "Do", "Dq", "Ec", "Ef",
341 "Em", "Eo", "Fx", "Ms",
342 "No", "Ns", "Nx", "Ox",
343 "Pc", "Pf", "Po", "Pq",
344 "Qc", "Ql", "Qo", "Qq",
345 "Re", "Rs", "Sc", "So",
346 "Sq", "Sm", "Sx", "Sy",
347 "Tn", "Ux", "Xc", "Xo",
348 "Fo", "Fc", "Oo", "Oc",
349 "Bk", "Ek", "Bt", "Hf",
350 "Fr", "Ud", "Lb", "Lp",
351 "Lk", "Mt", "Brq", "Bro",
352 "Brc", "%C", "Es", "En",
353 "Dx", "%Q", "%U", "Ta",
354 NULL,
355 "TH", "SH", "SS", "TP",
356 "TQ",
357 "LP", "PP", "P", "IP",
358 "HP", "SM", "SB", "BI",
359 "IB", "BR", "RB", "R",
360 "B", "I", "IR", "RI",
361 "RE", "RS", "DT", "UC",
362 "PD", "AT", "in",
363 "SY", "YS", "OP",
364 "EX", "EE", "UR",
365 "UE", "MT", "ME", NULL
366 };
367 const char *const *roff_name = __roff_name;
368
369 static struct roffmac roffs[TOKEN_NONE] = {
370 { roff_noarg, NULL, NULL, 0 }, /* br */
371 { roff_onearg, NULL, NULL, 0 }, /* ce */
372 { roff_noarg, NULL, NULL, 0 }, /* fi */
373 { roff_onearg, NULL, NULL, 0 }, /* ft */
374 { roff_onearg, NULL, NULL, 0 }, /* ll */
375 { roff_onearg, NULL, NULL, 0 }, /* mc */
376 { roff_noarg, NULL, NULL, 0 }, /* nf */
377 { roff_onearg, NULL, NULL, 0 }, /* po */
378 { roff_onearg, NULL, NULL, 0 }, /* rj */
379 { roff_onearg, NULL, NULL, 0 }, /* sp */
380 { roff_manyarg, NULL, NULL, 0 }, /* ta */
381 { roff_onearg, NULL, NULL, 0 }, /* ti */
382 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
383 { roff_unsupp, NULL, NULL, 0 }, /* ab */
384 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
385 { roff_line_ignore, NULL, NULL, 0 }, /* af */
386 { roff_unsupp, NULL, NULL, 0 }, /* aln */
387 { roff_als, NULL, NULL, 0 }, /* als */
388 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
391 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
392 { roff_ds, NULL, NULL, 0 }, /* as */
393 { roff_ds, NULL, NULL, 0 }, /* as1 */
394 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
395 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
396 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
397 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
398 { roff_unsupp, NULL, NULL, 0 }, /* blm */
399 { roff_unsupp, NULL, NULL, 0 }, /* box */
400 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
401 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
402 { roff_unsupp, NULL, NULL, 0 }, /* BP */
403 { roff_unsupp, NULL, NULL, 0 }, /* break */
404 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
405 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
406 { roff_noarg, NULL, NULL, 0 }, /* brp */
407 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
408 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
409 { roff_cc, NULL, NULL, 0 }, /* cc */
410 { roff_insec, NULL, NULL, 0 }, /* cf */
411 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
412 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
413 { roff_char, NULL, NULL, 0 }, /* char */
414 { roff_unsupp, NULL, NULL, 0 }, /* chop */
415 { roff_line_ignore, NULL, NULL, 0 }, /* class */
416 { roff_insec, NULL, NULL, 0 }, /* close */
417 { roff_unsupp, NULL, NULL, 0 }, /* CL */
418 { roff_line_ignore, NULL, NULL, 0 }, /* color */
419 { roff_unsupp, NULL, NULL, 0 }, /* composite */
420 { roff_unsupp, NULL, NULL, 0 }, /* continue */
421 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
422 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
423 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
424 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
425 { roff_unsupp, NULL, NULL, 0 }, /* da */
426 { roff_unsupp, NULL, NULL, 0 }, /* dch */
427 { roff_Dd, NULL, NULL, 0 }, /* Dd */
428 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
429 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
430 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
431 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
433 { roff_unsupp, NULL, NULL, 0 }, /* device */
434 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
435 { roff_unsupp, NULL, NULL, 0 }, /* di */
436 { roff_unsupp, NULL, NULL, 0 }, /* do */
437 { roff_ds, NULL, NULL, 0 }, /* ds */
438 { roff_ds, NULL, NULL, 0 }, /* ds1 */
439 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
440 { roff_unsupp, NULL, NULL, 0 }, /* dt */
441 { roff_ec, NULL, NULL, 0 }, /* ec */
442 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
443 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
444 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
445 { roff_unsupp, NULL, NULL, 0 }, /* em */
446 { roff_EN, NULL, NULL, 0 }, /* EN */
447 { roff_eo, NULL, NULL, 0 }, /* eo */
448 { roff_unsupp, NULL, NULL, 0 }, /* EP */
449 { roff_EQ, NULL, NULL, 0 }, /* EQ */
450 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
451 { roff_unsupp, NULL, NULL, 0 }, /* ev */
452 { roff_unsupp, NULL, NULL, 0 }, /* evc */
453 { roff_unsupp, NULL, NULL, 0 }, /* ex */
454 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
455 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
456 { roff_unsupp, NULL, NULL, 0 }, /* fc */
457 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
458 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
459 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
460 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
461 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
463 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
464 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
466 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
467 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
469 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
470 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
471 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
472 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
473 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
474 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
475 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
477 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
487 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
488 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
489 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
490 { roff_unsupp, NULL, NULL, 0 }, /* index */
491 { roff_it, NULL, NULL, 0 }, /* it */
492 { roff_unsupp, NULL, NULL, 0 }, /* itc */
493 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
494 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
495 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
496 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
497 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
498 { roff_unsupp, NULL, NULL, 0 }, /* lc */
499 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
500 { roff_unsupp, NULL, NULL, 0 }, /* lds */
501 { roff_unsupp, NULL, NULL, 0 }, /* length */
502 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
503 { roff_insec, NULL, NULL, 0 }, /* lf */
504 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
505 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
506 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
507 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
508 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
509 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
510 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
511 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
512 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
513 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
514 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
515 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
516 { roff_insec, NULL, NULL, 0 }, /* mso */
517 { roff_line_ignore, NULL, NULL, 0 }, /* na */
518 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
519 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
520 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
521 { roff_unsupp, NULL, NULL, 0 }, /* nm */
522 { roff_unsupp, NULL, NULL, 0 }, /* nn */
523 { roff_nop, NULL, NULL, 0 }, /* nop */
524 { roff_nr, NULL, NULL, 0 }, /* nr */
525 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
526 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
527 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
528 { roff_insec, NULL, NULL, 0 }, /* nx */
529 { roff_insec, NULL, NULL, 0 }, /* open */
530 { roff_insec, NULL, NULL, 0 }, /* opena */
531 { roff_line_ignore, NULL, NULL, 0 }, /* os */
532 { roff_unsupp, NULL, NULL, 0 }, /* output */
533 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
534 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
535 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
536 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
537 { roff_insec, NULL, NULL, 0 }, /* pi */
538 { roff_unsupp, NULL, NULL, 0 }, /* PI */
539 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
540 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
541 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
542 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
543 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
544 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
545 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
546 { roff_insec, NULL, NULL, 0 }, /* pso */
547 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
549 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
550 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
551 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
552 { roff_return, NULL, NULL, 0 }, /* return */
553 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
554 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
555 { roff_rm, NULL, NULL, 0 }, /* rm */
556 { roff_rn, NULL, NULL, 0 }, /* rn */
557 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
558 { roff_rr, NULL, NULL, 0 }, /* rr */
559 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
560 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
561 { roff_unsupp, NULL, NULL, 0 }, /* schar */
562 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
563 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
564 { roff_shift, NULL, NULL, 0 }, /* shift */
565 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
566 { roff_so, NULL, NULL, 0 }, /* so */
567 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
568 { roff_line_ignore, NULL, NULL, 0 }, /* special */
569 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
570 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
571 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
572 { roff_unsupp, NULL, NULL, 0 }, /* substring */
573 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
574 { roff_insec, NULL, NULL, 0 }, /* sy */
575 { roff_T_, NULL, NULL, 0 }, /* T& */
576 { roff_unsupp, NULL, NULL, 0 }, /* tc */
577 { roff_TE, NULL, NULL, 0 }, /* TE */
578 { roff_Dd, NULL, NULL, 0 }, /* TH */
579 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
580 { roff_unsupp, NULL, NULL, 0 }, /* tl */
581 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
582 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
583 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
584 { roff_tr, NULL, NULL, 0 }, /* tr */
585 { roff_line_ignore, NULL, NULL, 0 }, /* track */
586 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
587 { roff_insec, NULL, NULL, 0 }, /* trf */
588 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
589 { roff_unsupp, NULL, NULL, 0 }, /* trin */
590 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
591 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
592 { roff_TS, NULL, NULL, 0 }, /* TS */
593 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
594 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
595 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
596 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
597 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
598 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
599 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
600 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
601 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
602 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
603 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
604 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
605 { roff_unsupp, NULL, NULL, 0 }, /* wh */
606 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
607 { roff_insec, NULL, NULL, 0 }, /* write */
608 { roff_insec, NULL, NULL, 0 }, /* writec */
609 { roff_insec, NULL, NULL, 0 }, /* writem */
610 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
611 { roff_cblock, NULL, NULL, 0 }, /* . */
612 { roff_renamed, NULL, NULL, 0 },
613 { roff_userdef, NULL, NULL, 0 }
614 };
615
616 /* Array of injected predefined strings. */
617 #define PREDEFS_MAX 38
618 static const struct predef predefs[PREDEFS_MAX] = {
619 #include "predefs.in"
620 };
621
622 static int roffce_lines; /* number of input lines to center */
623 static struct roff_node *roffce_node; /* active request */
624 static int roffit_lines; /* number of lines to delay */
625 static char *roffit_macro; /* nil-terminated macro line */
626
627
628 /* --- request table ------------------------------------------------------ */
629
630 struct ohash *
631 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
632 {
633 struct ohash *htab;
634 struct roffreq *req;
635 enum roff_tok tok;
636 size_t sz;
637 unsigned int slot;
638
639 htab = mandoc_malloc(sizeof(*htab));
640 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
641
642 for (tok = mintok; tok < maxtok; tok++) {
643 if (roff_name[tok] == NULL)
644 continue;
645 sz = strlen(roff_name[tok]);
646 req = mandoc_malloc(sizeof(*req) + sz + 1);
647 req->tok = tok;
648 memcpy(req->name, roff_name[tok], sz + 1);
649 slot = ohash_qlookup(htab, req->name);
650 ohash_insert(htab, slot, req);
651 }
652 return htab;
653 }
654
655 void
656 roffhash_free(struct ohash *htab)
657 {
658 struct roffreq *req;
659 unsigned int slot;
660
661 if (htab == NULL)
662 return;
663 for (req = ohash_first(htab, &slot); req != NULL;
664 req = ohash_next(htab, &slot))
665 free(req);
666 ohash_delete(htab);
667 free(htab);
668 }
669
670 enum roff_tok
671 roffhash_find(struct ohash *htab, const char *name, size_t sz)
672 {
673 struct roffreq *req;
674 const char *end;
675
676 if (sz) {
677 end = name + sz;
678 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
679 } else
680 req = ohash_find(htab, ohash_qlookup(htab, name));
681 return req == NULL ? TOKEN_NONE : req->tok;
682 }
683
684 /* --- stack of request blocks -------------------------------------------- */
685
686 /*
687 * Pop the current node off of the stack of roff instructions currently
688 * pending.
689 */
690 static int
691 roffnode_pop(struct roff *r)
692 {
693 struct roffnode *p;
694 int inloop;
695
696 p = r->last;
697 inloop = p->tok == ROFF_while;
698 r->last = p->parent;
699 free(p->name);
700 free(p->end);
701 free(p);
702 return inloop;
703 }
704
705 /*
706 * Push a roff node onto the instruction stack. This must later be
707 * removed with roffnode_pop().
708 */
709 static void
710 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
711 int line, int col)
712 {
713 struct roffnode *p;
714
715 p = mandoc_calloc(1, sizeof(struct roffnode));
716 p->tok = tok;
717 if (name)
718 p->name = mandoc_strdup(name);
719 p->parent = r->last;
720 p->line = line;
721 p->col = col;
722 p->rule = p->parent ? p->parent->rule : 0;
723
724 r->last = p;
725 }
726
727 /* --- roff parser state data management ---------------------------------- */
728
729 static void
730 roff_free1(struct roff *r)
731 {
732 int i;
733
734 tbl_free(r->first_tbl);
735 r->first_tbl = r->last_tbl = r->tbl = NULL;
736
737 eqn_free(r->last_eqn);
738 r->last_eqn = r->eqn = NULL;
739
740 while (r->mstackpos >= 0)
741 roff_userret(r);
742
743 while (r->last)
744 roffnode_pop(r);
745
746 free (r->rstack);
747 r->rstack = NULL;
748 r->rstacksz = 0;
749 r->rstackpos = -1;
750
751 roff_freereg(r->regtab);
752 r->regtab = NULL;
753
754 roff_freestr(r->strtab);
755 roff_freestr(r->rentab);
756 roff_freestr(r->xmbtab);
757 r->strtab = r->rentab = r->xmbtab = NULL;
758
759 if (r->xtab)
760 for (i = 0; i < 128; i++)
761 free(r->xtab[i].p);
762 free(r->xtab);
763 r->xtab = NULL;
764 }
765
766 void
767 roff_reset(struct roff *r)
768 {
769 roff_free1(r);
770 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
771 r->control = '\0';
772 r->escape = '\\';
773 roffce_lines = 0;
774 roffce_node = NULL;
775 roffit_lines = 0;
776 roffit_macro = NULL;
777 }
778
779 void
780 roff_free(struct roff *r)
781 {
782 int i;
783
784 roff_free1(r);
785 for (i = 0; i < r->mstacksz; i++)
786 free(r->mstack[i].argv);
787 free(r->mstack);
788 roffhash_free(r->reqtab);
789 free(r);
790 }
791
792 struct roff *
793 roff_alloc(int options)
794 {
795 struct roff *r;
796
797 r = mandoc_calloc(1, sizeof(struct roff));
798 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
799 r->options = options;
800 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
801 r->mstackpos = -1;
802 r->rstackpos = -1;
803 r->escape = '\\';
804 return r;
805 }
806
807 /* --- syntax tree state data management ---------------------------------- */
808
809 static void
810 roff_man_free1(struct roff_man *man)
811 {
812 if (man->meta.first != NULL)
813 roff_node_delete(man, man->meta.first);
814 free(man->meta.msec);
815 free(man->meta.vol);
816 free(man->meta.os);
817 free(man->meta.arch);
818 free(man->meta.title);
819 free(man->meta.name);
820 free(man->meta.date);
821 free(man->meta.sodest);
822 }
823
824 void
825 roff_state_reset(struct roff_man *man)
826 {
827 man->last = man->meta.first;
828 man->last_es = NULL;
829 man->flags = 0;
830 man->lastsec = man->lastnamed = SEC_NONE;
831 man->next = ROFF_NEXT_CHILD;
832 roff_setreg(man->roff, "nS", 0, '=');
833 }
834
835 static void
836 roff_man_alloc1(struct roff_man *man)
837 {
838 memset(&man->meta, 0, sizeof(man->meta));
839 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
840 man->meta.first->type = ROFFT_ROOT;
841 man->meta.macroset = MACROSET_NONE;
842 roff_state_reset(man);
843 }
844
845 void
846 roff_man_reset(struct roff_man *man)
847 {
848 roff_man_free1(man);
849 roff_man_alloc1(man);
850 }
851
852 void
853 roff_man_free(struct roff_man *man)
854 {
855 roff_man_free1(man);
856 free(man);
857 }
858
859 struct roff_man *
860 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
861 {
862 struct roff_man *man;
863
864 man = mandoc_calloc(1, sizeof(*man));
865 man->roff = roff;
866 man->os_s = os_s;
867 man->quick = quick;
868 roff_man_alloc1(man);
869 roff->man = man;
870 return man;
871 }
872
873 /* --- syntax tree handling ----------------------------------------------- */
874
875 struct roff_node *
876 roff_node_alloc(struct roff_man *man, int line, int pos,
877 enum roff_type type, int tok)
878 {
879 struct roff_node *n;
880
881 n = mandoc_calloc(1, sizeof(*n));
882 n->line = line;
883 n->pos = pos;
884 n->tok = tok;
885 n->type = type;
886 n->sec = man->lastsec;
887
888 if (man->flags & MDOC_SYNOPSIS)
889 n->flags |= NODE_SYNPRETTY;
890 else
891 n->flags &= ~NODE_SYNPRETTY;
892 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
893 n->flags |= NODE_NOFILL;
894 else
895 n->flags &= ~NODE_NOFILL;
896 if (man->flags & MDOC_NEWLINE)
897 n->flags |= NODE_LINE;
898 man->flags &= ~MDOC_NEWLINE;
899
900 return n;
901 }
902
903 void
904 roff_node_append(struct roff_man *man, struct roff_node *n)
905 {
906
907 switch (man->next) {
908 case ROFF_NEXT_SIBLING:
909 if (man->last->next != NULL) {
910 n->next = man->last->next;
911 man->last->next->prev = n;
912 } else
913 man->last->parent->last = n;
914 man->last->next = n;
915 n->prev = man->last;
916 n->parent = man->last->parent;
917 break;
918 case ROFF_NEXT_CHILD:
919 if (man->last->child != NULL) {
920 n->next = man->last->child;
921 man->last->child->prev = n;
922 } else
923 man->last->last = n;
924 man->last->child = n;
925 n->parent = man->last;
926 break;
927 default:
928 abort();
929 }
930 man->last = n;
931
932 switch (n->type) {
933 case ROFFT_HEAD:
934 n->parent->head = n;
935 break;
936 case ROFFT_BODY:
937 if (n->end != ENDBODY_NOT)
938 return;
939 n->parent->body = n;
940 break;
941 case ROFFT_TAIL:
942 n->parent->tail = n;
943 break;
944 default:
945 return;
946 }
947
948 /*
949 * Copy over the normalised-data pointer of our parent. Not
950 * everybody has one, but copying a null pointer is fine.
951 */
952
953 n->norm = n->parent->norm;
954 assert(n->parent->type == ROFFT_BLOCK);
955 }
956
957 void
958 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
959 {
960 struct roff_node *n;
961
962 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
963 n->string = roff_strdup(man->roff, word);
964 roff_node_append(man, n);
965 n->flags |= NODE_VALID | NODE_ENDED;
966 man->next = ROFF_NEXT_SIBLING;
967 }
968
969 void
970 roff_word_append(struct roff_man *man, const char *word)
971 {
972 struct roff_node *n;
973 char *addstr, *newstr;
974
975 n = man->last;
976 addstr = roff_strdup(man->roff, word);
977 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
978 free(addstr);
979 free(n->string);
980 n->string = newstr;
981 man->next = ROFF_NEXT_SIBLING;
982 }
983
984 void
985 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
986 {
987 struct roff_node *n;
988
989 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
990 roff_node_append(man, n);
991 man->next = ROFF_NEXT_CHILD;
992 }
993
994 struct roff_node *
995 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
996 {
997 struct roff_node *n;
998
999 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1000 roff_node_append(man, n);
1001 man->next = ROFF_NEXT_CHILD;
1002 return n;
1003 }
1004
1005 struct roff_node *
1006 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1007 {
1008 struct roff_node *n;
1009
1010 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1011 roff_node_append(man, n);
1012 man->next = ROFF_NEXT_CHILD;
1013 return n;
1014 }
1015
1016 struct roff_node *
1017 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1018 {
1019 struct roff_node *n;
1020
1021 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1022 roff_node_append(man, n);
1023 man->next = ROFF_NEXT_CHILD;
1024 return n;
1025 }
1026
1027 static void
1028 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1029 {
1030 struct roff_node *n;
1031 struct tbl_span *span;
1032
1033 if (man->meta.macroset == MACROSET_MAN)
1034 man_breakscope(man, ROFF_TS);
1035 while ((span = tbl_span(tbl)) != NULL) {
1036 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1037 n->span = span;
1038 roff_node_append(man, n);
1039 n->flags |= NODE_VALID | NODE_ENDED;
1040 man->next = ROFF_NEXT_SIBLING;
1041 }
1042 }
1043
1044 void
1045 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1046 {
1047
1048 /* Adjust siblings. */
1049
1050 if (n->prev)
1051 n->prev->next = n->next;
1052 if (n->next)
1053 n->next->prev = n->prev;
1054
1055 /* Adjust parent. */
1056
1057 if (n->parent != NULL) {
1058 if (n->parent->child == n)
1059 n->parent->child = n->next;
1060 if (n->parent->last == n)
1061 n->parent->last = n->prev;
1062 }
1063
1064 /* Adjust parse point. */
1065
1066 if (man == NULL)
1067 return;
1068 if (man->last == n) {
1069 if (n->prev == NULL) {
1070 man->last = n->parent;
1071 man->next = ROFF_NEXT_CHILD;
1072 } else {
1073 man->last = n->prev;
1074 man->next = ROFF_NEXT_SIBLING;
1075 }
1076 }
1077 if (man->meta.first == n)
1078 man->meta.first = NULL;
1079 }
1080
1081 void
1082 roff_node_relink(struct roff_man *man, struct roff_node *n)
1083 {
1084 roff_node_unlink(man, n);
1085 n->prev = n->next = NULL;
1086 roff_node_append(man, n);
1087 }
1088
1089 void
1090 roff_node_free(struct roff_node *n)
1091 {
1092
1093 if (n->args != NULL)
1094 mdoc_argv_free(n->args);
1095 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1096 free(n->norm);
1097 eqn_box_free(n->eqn);
1098 free(n->string);
1099 free(n);
1100 }
1101
1102 void
1103 roff_node_delete(struct roff_man *man, struct roff_node *n)
1104 {
1105
1106 while (n->child != NULL)
1107 roff_node_delete(man, n->child);
1108 roff_node_unlink(man, n);
1109 roff_node_free(n);
1110 }
1111
1112 void
1113 deroff(char **dest, const struct roff_node *n)
1114 {
1115 char *cp;
1116 size_t sz;
1117
1118 if (n->type != ROFFT_TEXT) {
1119 for (n = n->child; n != NULL; n = n->next)
1120 deroff(dest, n);
1121 return;
1122 }
1123
1124 /* Skip leading whitespace. */
1125
1126 for (cp = n->string; *cp != '\0'; cp++) {
1127 if (cp[0] == '\\' && cp[1] != '\0' &&
1128 strchr(" %&0^|~", cp[1]) != NULL)
1129 cp++;
1130 else if ( ! isspace((unsigned char)*cp))
1131 break;
1132 }
1133
1134 /* Skip trailing backslash. */
1135
1136 sz = strlen(cp);
1137 if (sz > 0 && cp[sz - 1] == '\\')
1138 sz--;
1139
1140 /* Skip trailing whitespace. */
1141
1142 for (; sz; sz--)
1143 if ( ! isspace((unsigned char)cp[sz-1]))
1144 break;
1145
1146 /* Skip empty strings. */
1147
1148 if (sz == 0)
1149 return;
1150
1151 if (*dest == NULL) {
1152 *dest = mandoc_strndup(cp, sz);
1153 return;
1154 }
1155
1156 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1157 free(*dest);
1158 *dest = cp;
1159 }
1160
1161 /* --- main functions of the roff parser ---------------------------------- */
1162
1163 /*
1164 * In the current line, expand escape sequences that produce parsable
1165 * input text. Also check the syntax of the remaining escape sequences,
1166 * which typically produce output glyphs or change formatter state.
1167 */
1168 static int
1169 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1170 {
1171 struct mctx *ctx; /* current macro call context */
1172 char ubuf[24]; /* buffer to print the number */
1173 struct roff_node *n; /* used for header comments */
1174 const char *start; /* start of the string to process */
1175 char *stesc; /* start of an escape sequence ('\\') */
1176 const char *esct; /* type of esccape sequence */
1177 char *ep; /* end of comment string */
1178 const char *stnam; /* start of the name, after "[(*" */
1179 const char *cp; /* end of the name, e.g. before ']' */
1180 const char *res; /* the string to be substituted */
1181 char *nbuf; /* new buffer to copy buf->buf to */
1182 size_t maxl; /* expected length of the escape name */
1183 size_t naml; /* actual length of the escape name */
1184 size_t asz; /* length of the replacement */
1185 size_t rsz; /* length of the rest of the string */
1186 int inaml; /* length returned from mandoc_escape() */
1187 int expand_count; /* to avoid infinite loops */
1188 int npos; /* position in numeric expression */
1189 int arg_complete; /* argument not interrupted by eol */
1190 int quote_args; /* true for \\$@, false for \\$* */
1191 int done; /* no more input available */
1192 int deftype; /* type of definition to paste */
1193 int rcsid; /* kind of RCS id seen */
1194 enum mandocerr err; /* for escape sequence problems */
1195 char sign; /* increment number register */
1196 char term; /* character terminating the escape */
1197
1198 /* Search forward for comments. */
1199
1200 done = 0;
1201 start = buf->buf + pos;
1202 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1203 if (stesc[0] != newesc || stesc[1] == '\0')
1204 continue;
1205 stesc++;
1206 if (*stesc != '"' && *stesc != '#')
1207 continue;
1208
1209 /* Comment found, look for RCS id. */
1210
1211 rcsid = 0;
1212 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1213 rcsid = 1 << MANDOC_OS_OPENBSD;
1214 cp += 8;
1215 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1216 rcsid = 1 << MANDOC_OS_NETBSD;
1217 cp += 7;
1218 }
1219 if (cp != NULL &&
1220 isalnum((unsigned char)*cp) == 0 &&
1221 strchr(cp, '$') != NULL) {
1222 if (r->man->meta.rcsids & rcsid)
1223 mandoc_msg(MANDOCERR_RCS_REP, ln,
1224 (int)(stesc - buf->buf) + 1,
1225 "%s", stesc + 1);
1226 r->man->meta.rcsids |= rcsid;
1227 }
1228
1229 /* Handle trailing whitespace. */
1230
1231 ep = strchr(stesc--, '\0') - 1;
1232 if (*ep == '\n') {
1233 done = 1;
1234 ep--;
1235 }
1236 if (*ep == ' ' || *ep == '\t')
1237 mandoc_msg(MANDOCERR_SPACE_EOL,
1238 ln, (int)(ep - buf->buf), NULL);
1239
1240 /*
1241 * Save comments preceding the title macro
1242 * in the syntax tree.
1243 */
1244
1245 if (newesc != ASCII_ESC && r->format == 0) {
1246 while (*ep == ' ' || *ep == '\t')
1247 ep--;
1248 ep[1] = '\0';
1249 n = roff_node_alloc(r->man,
1250 ln, stesc + 1 - buf->buf,
1251 ROFFT_COMMENT, TOKEN_NONE);
1252 n->string = mandoc_strdup(stesc + 2);
1253 roff_node_append(r->man, n);
1254 n->flags |= NODE_VALID | NODE_ENDED;
1255 r->man->next = ROFF_NEXT_SIBLING;
1256 }
1257
1258 /* Line continuation with comment. */
1259
1260 if (stesc[1] == '#') {
1261 *stesc = '\0';
1262 return ROFF_IGN | ROFF_APPEND;
1263 }
1264
1265 /* Discard normal comments. */
1266
1267 while (stesc > start && stesc[-1] == ' ' &&
1268 (stesc == start + 1 || stesc[-2] != '\\'))
1269 stesc--;
1270 *stesc = '\0';
1271 break;
1272 }
1273 if (stesc == start)
1274 return ROFF_CONT;
1275 stesc--;
1276
1277 /* Notice the end of the input. */
1278
1279 if (*stesc == '\n') {
1280 *stesc-- = '\0';
1281 done = 1;
1282 }
1283
1284 expand_count = 0;
1285 while (stesc >= start) {
1286 if (*stesc != newesc) {
1287
1288 /*
1289 * If we have a non-standard escape character,
1290 * escape literal backslashes because all
1291 * processing in subsequent functions uses
1292 * the standard escaping rules.
1293 */
1294
1295 if (newesc != ASCII_ESC && *stesc == '\\') {
1296 *stesc = '\0';
1297 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1298 buf->buf, stesc + 1) + 1;
1299 start = nbuf + pos;
1300 stesc = nbuf + (stesc - buf->buf);
1301 free(buf->buf);
1302 buf->buf = nbuf;
1303 }
1304
1305 /* Search backwards for the next escape. */
1306
1307 stesc--;
1308 continue;
1309 }
1310
1311 /* If it is escaped, skip it. */
1312
1313 for (cp = stesc - 1; cp >= start; cp--)
1314 if (*cp != r->escape)
1315 break;
1316
1317 if ((stesc - cp) % 2 == 0) {
1318 while (stesc > cp)
1319 *stesc-- = '\\';
1320 continue;
1321 } else if (stesc[1] != '\0') {
1322 *stesc = '\\';
1323 } else {
1324 *stesc-- = '\0';
1325 if (done)
1326 continue;
1327 else
1328 return ROFF_IGN | ROFF_APPEND;
1329 }
1330
1331 /* Decide whether to expand or to check only. */
1332
1333 term = '\0';
1334 cp = stesc + 1;
1335 if (*cp == 'E')
1336 cp++;
1337 esct = cp;
1338 switch (*esct) {
1339 case '*':
1340 case '$':
1341 res = NULL;
1342 break;
1343 case 'B':
1344 case 'w':
1345 term = cp[1];
1346 /* FALLTHROUGH */
1347 case 'n':
1348 sign = cp[1];
1349 if (sign == '+' || sign == '-')
1350 cp++;
1351 res = ubuf;
1352 break;
1353 default:
1354 err = MANDOCERR_OK;
1355 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1356 case ESCAPE_SPECIAL:
1357 if (mchars_spec2cp(stnam, inaml) >= 0)
1358 break;
1359 /* FALLTHROUGH */
1360 case ESCAPE_ERROR:
1361 err = MANDOCERR_ESC_BAD;
1362 break;
1363 case ESCAPE_UNDEF:
1364 err = MANDOCERR_ESC_UNDEF;
1365 break;
1366 case ESCAPE_UNSUPP:
1367 err = MANDOCERR_ESC_UNSUPP;
1368 break;
1369 default:
1370 break;
1371 }
1372 if (err != MANDOCERR_OK)
1373 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1374 "%.*s", (int)(cp - stesc), stesc);
1375 stesc--;
1376 continue;
1377 }
1378
1379 if (EXPAND_LIMIT < ++expand_count) {
1380 mandoc_msg(MANDOCERR_ROFFLOOP,
1381 ln, (int)(stesc - buf->buf), NULL);
1382 return ROFF_IGN;
1383 }
1384
1385 /*
1386 * The third character decides the length
1387 * of the name of the string or register.
1388 * Save a pointer to the name.
1389 */
1390
1391 if (term == '\0') {
1392 switch (*++cp) {
1393 case '\0':
1394 maxl = 0;
1395 break;
1396 case '(':
1397 cp++;
1398 maxl = 2;
1399 break;
1400 case '[':
1401 cp++;
1402 term = ']';
1403 maxl = 0;
1404 break;
1405 default:
1406 maxl = 1;
1407 break;
1408 }
1409 } else {
1410 cp += 2;
1411 maxl = 0;
1412 }
1413 stnam = cp;
1414
1415 /* Advance to the end of the name. */
1416
1417 naml = 0;
1418 arg_complete = 1;
1419 while (maxl == 0 || naml < maxl) {
1420 if (*cp == '\0') {
1421 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1422 (int)(stesc - buf->buf), "%s", stesc);
1423 arg_complete = 0;
1424 break;
1425 }
1426 if (maxl == 0 && *cp == term) {
1427 cp++;
1428 break;
1429 }
1430 if (*cp++ != '\\' || *esct != 'w') {
1431 naml++;
1432 continue;
1433 }
1434 switch (mandoc_escape(&cp, NULL, NULL)) {
1435 case ESCAPE_SPECIAL:
1436 case ESCAPE_UNICODE:
1437 case ESCAPE_NUMBERED:
1438 case ESCAPE_UNDEF:
1439 case ESCAPE_OVERSTRIKE:
1440 naml++;
1441 break;
1442 default:
1443 break;
1444 }
1445 }
1446
1447 /*
1448 * Retrieve the replacement string; if it is
1449 * undefined, resume searching for escapes.
1450 */
1451
1452 switch (*esct) {
1453 case '*':
1454 if (arg_complete) {
1455 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1456 res = roff_getstrn(r, stnam, naml, &deftype);
1457
1458 /*
1459 * If not overriden, let \*(.T
1460 * through to the formatters.
1461 */
1462
1463 if (res == NULL && naml == 2 &&
1464 stnam[0] == '.' && stnam[1] == 'T') {
1465 roff_setstrn(&r->strtab,
1466 ".T", 2, NULL, 0, 0);
1467 stesc--;
1468 continue;
1469 }
1470 }
1471 break;
1472 case '$':
1473 if (r->mstackpos < 0) {
1474 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1475 (int)(stesc - buf->buf), "%.3s", stesc);
1476 break;
1477 }
1478 ctx = r->mstack + r->mstackpos;
1479 npos = esct[1] - '1';
1480 if (npos >= 0 && npos <= 8) {
1481 res = npos < ctx->argc ?
1482 ctx->argv[npos] : "";
1483 break;
1484 }
1485 if (esct[1] == '*')
1486 quote_args = 0;
1487 else if (esct[1] == '@')
1488 quote_args = 1;
1489 else {
1490 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1491 (int)(stesc - buf->buf), "%.3s", stesc);
1492 break;
1493 }
1494 asz = 0;
1495 for (npos = 0; npos < ctx->argc; npos++) {
1496 if (npos)
1497 asz++; /* blank */
1498 if (quote_args)
1499 asz += 2; /* quotes */
1500 asz += strlen(ctx->argv[npos]);
1501 }
1502 if (asz != 3) {
1503 rsz = buf->sz - (stesc - buf->buf) - 3;
1504 if (asz < 3)
1505 memmove(stesc + asz, stesc + 3, rsz);
1506 buf->sz += asz - 3;
1507 nbuf = mandoc_realloc(buf->buf, buf->sz);
1508 start = nbuf + pos;
1509 stesc = nbuf + (stesc - buf->buf);
1510 buf->buf = nbuf;
1511 if (asz > 3)
1512 memmove(stesc + asz, stesc + 3, rsz);
1513 }
1514 for (npos = 0; npos < ctx->argc; npos++) {
1515 if (npos)
1516 *stesc++ = ' ';
1517 if (quote_args)
1518 *stesc++ = '"';
1519 cp = ctx->argv[npos];
1520 while (*cp != '\0')
1521 *stesc++ = *cp++;
1522 if (quote_args)
1523 *stesc++ = '"';
1524 }
1525 continue;
1526 case 'B':
1527 npos = 0;
1528 ubuf[0] = arg_complete &&
1529 roff_evalnum(r, ln, stnam, &npos,
1530 NULL, ROFFNUM_SCALE) &&
1531 stnam + npos + 1 == cp ? '1' : '0';
1532 ubuf[1] = '\0';
1533 break;
1534 case 'n':
1535 if (arg_complete)
1536 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1537 roff_getregn(r, stnam, naml, sign));
1538 else
1539 ubuf[0] = '\0';
1540 break;
1541 case 'w':
1542 /* use even incomplete args */
1543 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1544 24 * (int)naml);
1545 break;
1546 }
1547
1548 if (res == NULL) {
1549 if (*esct == '*')
1550 mandoc_msg(MANDOCERR_STR_UNDEF,
1551 ln, (int)(stesc - buf->buf),
1552 "%.*s", (int)naml, stnam);
1553 res = "";
1554 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1555 mandoc_msg(MANDOCERR_ROFFLOOP,
1556 ln, (int)(stesc - buf->buf), NULL);
1557 return ROFF_IGN;
1558 }
1559
1560 /* Replace the escape sequence by the string. */
1561
1562 *stesc = '\0';
1563 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1564 buf->buf, res, cp) + 1;
1565
1566 /* Prepare for the next replacement. */
1567
1568 start = nbuf + pos;
1569 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1570 free(buf->buf);
1571 buf->buf = nbuf;
1572 }
1573 return ROFF_CONT;
1574 }
1575
1576 /*
1577 * Parse a quoted or unquoted roff-style request or macro argument.
1578 * Return a pointer to the parsed argument, which is either the original
1579 * pointer or advanced by one byte in case the argument is quoted.
1580 * NUL-terminate the argument in place.
1581 * Collapse pairs of quotes inside quoted arguments.
1582 * Advance the argument pointer to the next argument,
1583 * or to the NUL byte terminating the argument line.
1584 */
1585 char *
1586 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1587 {
1588 struct buf buf;
1589 char *cp, *start;
1590 int newesc, pairs, quoted, white;
1591
1592 /* Quoting can only start with a new word. */
1593 start = *cpp;
1594 quoted = 0;
1595 if ('"' == *start) {
1596 quoted = 1;
1597 start++;
1598 }
1599
1600 newesc = pairs = white = 0;
1601 for (cp = start; '\0' != *cp; cp++) {
1602
1603 /*
1604 * Move the following text left
1605 * after quoted quotes and after "\\" and "\t".
1606 */
1607 if (pairs)
1608 cp[-pairs] = cp[0];
1609
1610 if ('\\' == cp[0]) {
1611 /*
1612 * In copy mode, translate double to single
1613 * backslashes and backslash-t to literal tabs.
1614 */
1615 switch (cp[1]) {
1616 case 'a':
1617 case 't':
1618 cp[-pairs] = '\t';
1619 pairs++;
1620 cp++;
1621 break;
1622 case '\\':
1623 newesc = 1;
1624 cp[-pairs] = ASCII_ESC;
1625 pairs++;
1626 cp++;
1627 break;
1628 case ' ':
1629 /* Skip escaped blanks. */
1630 if (0 == quoted)
1631 cp++;
1632 break;
1633 default:
1634 break;
1635 }
1636 } else if (0 == quoted) {
1637 if (' ' == cp[0]) {
1638 /* Unescaped blanks end unquoted args. */
1639 white = 1;
1640 break;
1641 }
1642 } else if ('"' == cp[0]) {
1643 if ('"' == cp[1]) {
1644 /* Quoted quotes collapse. */
1645 pairs++;
1646 cp++;
1647 } else {
1648 /* Unquoted quotes end quoted args. */
1649 quoted = 2;
1650 break;
1651 }
1652 }
1653 }
1654
1655 /* Quoted argument without a closing quote. */
1656 if (1 == quoted)
1657 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1658
1659 /* NUL-terminate this argument and move to the next one. */
1660 if (pairs)
1661 cp[-pairs] = '\0';
1662 if ('\0' != *cp) {
1663 *cp++ = '\0';
1664 while (' ' == *cp)
1665 cp++;
1666 }
1667 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1668 *cpp = cp;
1669
1670 if ('\0' == *cp && (white || ' ' == cp[-1]))
1671 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1672
1673 start = mandoc_strdup(start);
1674 if (newesc == 0)
1675 return start;
1676
1677 buf.buf = start;
1678 buf.sz = strlen(start) + 1;
1679 buf.next = NULL;
1680 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1681 free(buf.buf);
1682 buf.buf = mandoc_strdup("");
1683 }
1684 return buf.buf;
1685 }
1686
1687
1688 /*
1689 * Process text streams.
1690 */
1691 static int
1692 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1693 {
1694 size_t sz;
1695 const char *start;
1696 char *p;
1697 int isz;
1698 enum mandoc_esc esc;
1699
1700 /* Spring the input line trap. */
1701
1702 if (roffit_lines == 1) {
1703 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1704 free(buf->buf);
1705 buf->buf = p;
1706 buf->sz = isz + 1;
1707 *offs = 0;
1708 free(roffit_macro);
1709 roffit_lines = 0;
1710 return ROFF_REPARSE;
1711 } else if (roffit_lines > 1)
1712 --roffit_lines;
1713
1714 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1715 if (roffce_lines < 1) {
1716 r->man->last = roffce_node;
1717 r->man->next = ROFF_NEXT_SIBLING;
1718 roffce_lines = 0;
1719 roffce_node = NULL;
1720 } else
1721 roffce_lines--;
1722 }
1723
1724 /* Convert all breakable hyphens into ASCII_HYPH. */
1725
1726 start = p = buf->buf + pos;
1727
1728 while (*p != '\0') {
1729 sz = strcspn(p, "-\\");
1730 p += sz;
1731
1732 if (*p == '\0')
1733 break;
1734
1735 if (*p == '\\') {
1736 /* Skip over escapes. */
1737 p++;
1738 esc = mandoc_escape((const char **)&p, NULL, NULL);
1739 if (esc == ESCAPE_ERROR)
1740 break;
1741 while (*p == '-')
1742 p++;
1743 continue;
1744 } else if (p == start) {
1745 p++;
1746 continue;
1747 }
1748
1749 if (isalpha((unsigned char)p[-1]) &&
1750 isalpha((unsigned char)p[1]))
1751 *p = ASCII_HYPH;
1752 p++;
1753 }
1754 return ROFF_CONT;
1755 }
1756
1757 int
1758 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1759 {
1760 enum roff_tok t;
1761 int e;
1762 int pos; /* parse point */
1763 int spos; /* saved parse point for messages */
1764 int ppos; /* original offset in buf->buf */
1765 int ctl; /* macro line (boolean) */
1766
1767 ppos = pos = *offs;
1768
1769 /* Handle in-line equation delimiters. */
1770
1771 if (r->tbl == NULL &&
1772 r->last_eqn != NULL && r->last_eqn->delim &&
1773 (r->eqn == NULL || r->eqn_inline)) {
1774 e = roff_eqndelim(r, buf, pos);
1775 if (e == ROFF_REPARSE)
1776 return e;
1777 assert(e == ROFF_CONT);
1778 }
1779
1780 /* Expand some escape sequences. */
1781
1782 e = roff_expand(r, buf, ln, pos, r->escape);
1783 if ((e & ROFF_MASK) == ROFF_IGN)
1784 return e;
1785 assert(e == ROFF_CONT);
1786
1787 ctl = roff_getcontrol(r, buf->buf, &pos);
1788
1789 /*
1790 * First, if a scope is open and we're not a macro, pass the
1791 * text through the macro's filter.
1792 * Equations process all content themselves.
1793 * Tables process almost all content themselves, but we want
1794 * to warn about macros before passing it there.
1795 */
1796
1797 if (r->last != NULL && ! ctl) {
1798 t = r->last->tok;
1799 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1800 if ((e & ROFF_MASK) == ROFF_IGN)
1801 return e;
1802 e &= ~ROFF_MASK;
1803 } else
1804 e = ROFF_IGN;
1805 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1806 eqn_read(r->eqn, buf->buf + ppos);
1807 return e;
1808 }
1809 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1810 tbl_read(r->tbl, ln, buf->buf, ppos);
1811 roff_addtbl(r->man, ln, r->tbl);
1812 return e;
1813 }
1814 if ( ! ctl)
1815 return roff_parsetext(r, buf, pos, offs) | e;
1816
1817 /* Skip empty request lines. */
1818
1819 if (buf->buf[pos] == '"') {
1820 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1821 return ROFF_IGN;
1822 } else if (buf->buf[pos] == '\0')
1823 return ROFF_IGN;
1824
1825 /*
1826 * If a scope is open, go to the child handler for that macro,
1827 * as it may want to preprocess before doing anything with it.
1828 * Don't do so if an equation is open.
1829 */
1830
1831 if (r->last) {
1832 t = r->last->tok;
1833 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1834 }
1835
1836 /* No scope is open. This is a new request or macro. */
1837
1838 spos = pos;
1839 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1840
1841 /* Tables ignore most macros. */
1842
1843 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1844 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1845 mandoc_msg(MANDOCERR_TBLMACRO,
1846 ln, pos, "%s", buf->buf + spos);
1847 if (t != TOKEN_NONE)
1848 return ROFF_IGN;
1849 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1850 pos++;
1851 while (buf->buf[pos] == ' ')
1852 pos++;
1853 tbl_read(r->tbl, ln, buf->buf, pos);
1854 roff_addtbl(r->man, ln, r->tbl);
1855 return ROFF_IGN;
1856 }
1857
1858 /* For now, let high level macros abort .ce mode. */
1859
1860 if (ctl && roffce_node != NULL &&
1861 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1862 t == ROFF_TH || t == ROFF_TS)) {
1863 r->man->last = roffce_node;
1864 r->man->next = ROFF_NEXT_SIBLING;
1865 roffce_lines = 0;
1866 roffce_node = NULL;
1867 }
1868
1869 /*
1870 * This is neither a roff request nor a user-defined macro.
1871 * Let the standard macro set parsers handle it.
1872 */
1873
1874 if (t == TOKEN_NONE)
1875 return ROFF_CONT;
1876
1877 /* Execute a roff request or a user defined macro. */
1878
1879 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1880 }
1881
1882 /*
1883 * Internal interface function to tell the roff parser that execution
1884 * of the current macro ended. This is required because macro
1885 * definitions usually do not end with a .return request.
1886 */
1887 void
1888 roff_userret(struct roff *r)
1889 {
1890 struct mctx *ctx;
1891 int i;
1892
1893 assert(r->mstackpos >= 0);
1894 ctx = r->mstack + r->mstackpos;
1895 for (i = 0; i < ctx->argc; i++)
1896 free(ctx->argv[i]);
1897 ctx->argc = 0;
1898 r->mstackpos--;
1899 }
1900
1901 void
1902 roff_endparse(struct roff *r)
1903 {
1904 if (r->last != NULL)
1905 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1906 r->last->col, "%s", roff_name[r->last->tok]);
1907
1908 if (r->eqn != NULL) {
1909 mandoc_msg(MANDOCERR_BLK_NOEND,
1910 r->eqn->node->line, r->eqn->node->pos, "EQ");
1911 eqn_parse(r->eqn);
1912 r->eqn = NULL;
1913 }
1914
1915 if (r->tbl != NULL) {
1916 tbl_end(r->tbl, 1);
1917 r->tbl = NULL;
1918 }
1919 }
1920
1921 /*
1922 * Parse a roff node's type from the input buffer. This must be in the
1923 * form of ".foo xxx" in the usual way.
1924 */
1925 static enum roff_tok
1926 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1927 {
1928 char *cp;
1929 const char *mac;
1930 size_t maclen;
1931 int deftype;
1932 enum roff_tok t;
1933
1934 cp = buf + *pos;
1935
1936 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1937 return TOKEN_NONE;
1938
1939 mac = cp;
1940 maclen = roff_getname(r, &cp, ln, ppos);
1941
1942 deftype = ROFFDEF_USER | ROFFDEF_REN;
1943 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1944 switch (deftype) {
1945 case ROFFDEF_USER:
1946 t = ROFF_USERDEF;
1947 break;
1948 case ROFFDEF_REN:
1949 t = ROFF_RENAMED;
1950 break;
1951 default:
1952 t = roffhash_find(r->reqtab, mac, maclen);
1953 break;
1954 }
1955 if (t != TOKEN_NONE)
1956 *pos = cp - buf;
1957 else if (deftype == ROFFDEF_UNDEF) {
1958 /* Using an undefined macro defines it to be empty. */
1959 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1960 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1961 }
1962 return t;
1963 }
1964
1965 /* --- handling of request blocks ----------------------------------------- */
1966
1967 static int
1968 roff_cblock(ROFF_ARGS)
1969 {
1970
1971 /*
1972 * A block-close `..' should only be invoked as a child of an
1973 * ignore macro, otherwise raise a warning and just ignore it.
1974 */
1975
1976 if (r->last == NULL) {
1977 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1978 return ROFF_IGN;
1979 }
1980
1981 switch (r->last->tok) {
1982 case ROFF_am:
1983 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1984 case ROFF_ami:
1985 case ROFF_de:
1986 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1987 case ROFF_dei:
1988 case ROFF_ig:
1989 break;
1990 default:
1991 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1992 return ROFF_IGN;
1993 }
1994
1995 if (buf->buf[pos] != '\0')
1996 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
1997 ".. %s", buf->buf + pos);
1998
1999 roffnode_pop(r);
2000 roffnode_cleanscope(r);
2001 return ROFF_IGN;
2002
2003 }
2004
2005 static int
2006 roffnode_cleanscope(struct roff *r)
2007 {
2008 int inloop;
2009
2010 inloop = 0;
2011 while (r->last != NULL) {
2012 if (--r->last->endspan != 0)
2013 break;
2014 inloop += roffnode_pop(r);
2015 }
2016 return inloop;
2017 }
2018
2019 static int
2020 roff_ccond(struct roff *r, int ln, int ppos)
2021 {
2022 if (NULL == r->last) {
2023 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2024 return 0;
2025 }
2026
2027 switch (r->last->tok) {
2028 case ROFF_el:
2029 case ROFF_ie:
2030 case ROFF_if:
2031 case ROFF_while:
2032 break;
2033 default:
2034 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2035 return 0;
2036 }
2037
2038 if (r->last->endspan > -1) {
2039 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2040 return 0;
2041 }
2042
2043 return roffnode_pop(r) + roffnode_cleanscope(r);
2044 }
2045
2046 static int
2047 roff_block(ROFF_ARGS)
2048 {
2049 const char *name, *value;
2050 char *call, *cp, *iname, *rname;
2051 size_t csz, namesz, rsz;
2052 int deftype;
2053
2054 /* Ignore groff compatibility mode for now. */
2055
2056 if (tok == ROFF_de1)
2057 tok = ROFF_de;
2058 else if (tok == ROFF_dei1)
2059 tok = ROFF_dei;
2060 else if (tok == ROFF_am1)
2061 tok = ROFF_am;
2062 else if (tok == ROFF_ami1)
2063 tok = ROFF_ami;
2064
2065 /* Parse the macro name argument. */
2066
2067 cp = buf->buf + pos;
2068 if (tok == ROFF_ig) {
2069 iname = NULL;
2070 namesz = 0;
2071 } else {
2072 iname = cp;
2073 namesz = roff_getname(r, &cp, ln, ppos);
2074 iname[namesz] = '\0';
2075 }
2076
2077 /* Resolve the macro name argument if it is indirect. */
2078
2079 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2080 deftype = ROFFDEF_USER;
2081 name = roff_getstrn(r, iname, namesz, &deftype);
2082 if (name == NULL) {
2083 mandoc_msg(MANDOCERR_STR_UNDEF,
2084 ln, (int)(iname - buf->buf),
2085 "%.*s", (int)namesz, iname);
2086 namesz = 0;
2087 } else
2088 namesz = strlen(name);
2089 } else
2090 name = iname;
2091
2092 if (namesz == 0 && tok != ROFF_ig) {
2093 mandoc_msg(MANDOCERR_REQ_EMPTY,
2094 ln, ppos, "%s", roff_name[tok]);
2095 return ROFF_IGN;
2096 }
2097
2098 roffnode_push(r, tok, name, ln, ppos);
2099
2100 /*
2101 * At the beginning of a `de' macro, clear the existing string
2102 * with the same name, if there is one. New content will be
2103 * appended from roff_block_text() in multiline mode.
2104 */
2105
2106 if (tok == ROFF_de || tok == ROFF_dei) {
2107 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2108 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2109 } else if (tok == ROFF_am || tok == ROFF_ami) {
2110 deftype = ROFFDEF_ANY;
2111 value = roff_getstrn(r, iname, namesz, &deftype);
2112 switch (deftype) { /* Before appending, ... */
2113 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2114 roff_setstrn(&r->strtab, name, namesz,
2115 value, strlen(value), 0);
2116 break;
2117 case ROFFDEF_REN: /* call original standard macro. */
2118 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2119 (int)strlen(value), value);
2120 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2121 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2122 free(call);
2123 break;
2124 case ROFFDEF_STD: /* rename and call standard macro. */
2125 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2126 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2127 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2128 (int)rsz, rname);
2129 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2130 free(call);
2131 free(rname);
2132 break;
2133 default:
2134 break;
2135 }
2136 }
2137
2138 if (*cp == '\0')
2139 return ROFF_IGN;
2140
2141 /* Get the custom end marker. */
2142
2143 iname = cp;
2144 namesz = roff_getname(r, &cp, ln, ppos);
2145
2146 /* Resolve the end marker if it is indirect. */
2147
2148 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2149 deftype = ROFFDEF_USER;
2150 name = roff_getstrn(r, iname, namesz, &deftype);
2151 if (name == NULL) {
2152 mandoc_msg(MANDOCERR_STR_UNDEF,
2153 ln, (int)(iname - buf->buf),
2154 "%.*s", (int)namesz, iname);
2155 namesz = 0;
2156 } else
2157 namesz = strlen(name);
2158 } else
2159 name = iname;
2160
2161 if (namesz)
2162 r->last->end = mandoc_strndup(name, namesz);
2163
2164 if (*cp != '\0')
2165 mandoc_msg(MANDOCERR_ARG_EXCESS,
2166 ln, pos, ".%s ... %s", roff_name[tok], cp);
2167
2168 return ROFF_IGN;
2169 }
2170
2171 static int
2172 roff_block_sub(ROFF_ARGS)
2173 {
2174 enum roff_tok t;
2175 int i, j;
2176
2177 /*
2178 * First check whether a custom macro exists at this level. If
2179 * it does, then check against it. This is some of groff's
2180 * stranger behaviours. If we encountered a custom end-scope
2181 * tag and that tag also happens to be a "real" macro, then we
2182 * need to try interpreting it again as a real macro. If it's
2183 * not, then return ignore. Else continue.
2184 */
2185
2186 if (r->last->end) {
2187 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2188 if (buf->buf[i] != r->last->end[j])
2189 break;
2190
2191 if (r->last->end[j] == '\0' &&
2192 (buf->buf[i] == '\0' ||
2193 buf->buf[i] == ' ' ||
2194 buf->buf[i] == '\t')) {
2195 roffnode_pop(r);
2196 roffnode_cleanscope(r);
2197
2198 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2199 i++;
2200
2201 pos = i;
2202 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2203 TOKEN_NONE)
2204 return ROFF_RERUN;
2205 return ROFF_IGN;
2206 }
2207 }
2208
2209 /*
2210 * If we have no custom end-query or lookup failed, then try
2211 * pulling it out of the hashtable.
2212 */
2213
2214 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2215
2216 if (t != ROFF_cblock) {
2217 if (tok != ROFF_ig)
2218 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2219 return ROFF_IGN;
2220 }
2221
2222 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2223 }
2224
2225 static int
2226 roff_block_text(ROFF_ARGS)
2227 {
2228
2229 if (tok != ROFF_ig)
2230 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2231
2232 return ROFF_IGN;
2233 }
2234
2235 static int
2236 roff_cond_sub(ROFF_ARGS)
2237 {
2238 char *ep;
2239 int endloop, irc, rr;
2240 enum roff_tok t;
2241
2242 irc = ROFF_IGN;
2243 rr = r->last->rule;
2244 endloop = tok != ROFF_while ? ROFF_IGN :
2245 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2246 if (roffnode_cleanscope(r))
2247 irc |= endloop;
2248
2249 /*
2250 * If `\}' occurs on a macro line without a preceding macro,
2251 * drop the line completely.
2252 */
2253
2254 ep = buf->buf + pos;
2255 if (ep[0] == '\\' && ep[1] == '}')
2256 rr = 0;
2257
2258 /*
2259 * The closing delimiter `\}' rewinds the conditional scope
2260 * but is otherwise ignored when interpreting the line.
2261 */
2262
2263 while ((ep = strchr(ep, '\\')) != NULL) {
2264 switch (ep[1]) {
2265 case '}':
2266 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2267 if (roff_ccond(r, ln, ep - buf->buf))
2268 irc |= endloop;
2269 break;
2270 case '\0':
2271 ++ep;
2272 break;
2273 default:
2274 ep += 2;
2275 break;
2276 }
2277 }
2278
2279 /*
2280 * Fully handle known macros when they are structurally
2281 * required or when the conditional evaluated to true.
2282 */
2283
2284 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2285 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ?
2286 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) :
2287 rr ? ROFF_CONT : ROFF_IGN;
2288 return irc;
2289 }
2290
2291 static int
2292 roff_cond_text(ROFF_ARGS)
2293 {
2294 char *ep;
2295 int endloop, irc, rr;
2296
2297 irc = ROFF_IGN;
2298 rr = r->last->rule;
2299 endloop = tok != ROFF_while ? ROFF_IGN :
2300 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2301 if (roffnode_cleanscope(r))
2302 irc |= endloop;
2303
2304 /*
2305 * If `\}' occurs on a text line with neither preceding
2306 * nor following characters, drop the line completely.
2307 */
2308
2309 ep = buf->buf + pos;
2310 if (strcmp(ep, "\\}") == 0)
2311 rr = 0;
2312
2313 /*
2314 * The closing delimiter `\}' rewinds the conditional scope
2315 * but is otherwise ignored when interpreting the line.
2316 */
2317
2318 while ((ep = strchr(ep, '\\')) != NULL) {
2319 switch (ep[1]) {
2320 case '}':
2321 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2322 if (roff_ccond(r, ln, ep - buf->buf))
2323 irc |= endloop;
2324 break;
2325 case '\0':
2326 ++ep;
2327 break;
2328 default:
2329 ep += 2;
2330 break;
2331 }
2332 }
2333 if (rr)
2334 irc |= ROFF_CONT;
2335 return irc;
2336 }
2337
2338 /* --- handling of numeric and conditional expressions -------------------- */
2339
2340 /*
2341 * Parse a single signed integer number. Stop at the first non-digit.
2342 * If there is at least one digit, return success and advance the
2343 * parse point, else return failure and let the parse point unchanged.
2344 * Ignore overflows, treat them just like the C language.
2345 */
2346 static int
2347 roff_getnum(const char *v, int *pos, int *res, int flags)
2348 {
2349 int myres, scaled, n, p;
2350
2351 if (NULL == res)
2352 res = &myres;
2353
2354 p = *pos;
2355 n = v[p] == '-';
2356 if (n || v[p] == '+')
2357 p++;
2358
2359 if (flags & ROFFNUM_WHITE)
2360 while (isspace((unsigned char)v[p]))
2361 p++;
2362
2363 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2364 *res = 10 * *res + v[p] - '0';
2365 if (p == *pos + n)
2366 return 0;
2367
2368 if (n)
2369 *res = -*res;
2370
2371 /* Each number may be followed by one optional scaling unit. */
2372
2373 switch (v[p]) {
2374 case 'f':
2375 scaled = *res * 65536;
2376 break;
2377 case 'i':
2378 scaled = *res * 240;
2379 break;
2380 case 'c':
2381 scaled = *res * 240 / 2.54;
2382 break;
2383 case 'v':
2384 case 'P':
2385 scaled = *res * 40;
2386 break;
2387 case 'm':
2388 case 'n':
2389 scaled = *res * 24;
2390 break;
2391 case 'p':
2392 scaled = *res * 10 / 3;
2393 break;
2394 case 'u':
2395 scaled = *res;
2396 break;
2397 case 'M':
2398 scaled = *res * 6 / 25;
2399 break;
2400 default:
2401 scaled = *res;
2402 p--;
2403 break;
2404 }
2405 if (flags & ROFFNUM_SCALE)
2406 *res = scaled;
2407
2408 *pos = p + 1;
2409 return 1;
2410 }
2411
2412 /*
2413 * Evaluate a string comparison condition.
2414 * The first character is the delimiter.
2415 * Succeed if the string up to its second occurrence
2416 * matches the string up to its third occurence.
2417 * Advance the cursor after the third occurrence
2418 * or lacking that, to the end of the line.
2419 */
2420 static int
2421 roff_evalstrcond(const char *v, int *pos)
2422 {
2423 const char *s1, *s2, *s3;
2424 int match;
2425
2426 match = 0;
2427 s1 = v + *pos; /* initial delimiter */
2428 s2 = s1 + 1; /* for scanning the first string */
2429 s3 = strchr(s2, *s1); /* for scanning the second string */
2430
2431 if (NULL == s3) /* found no middle delimiter */
2432 goto out;
2433
2434 while ('\0' != *++s3) {
2435 if (*s2 != *s3) { /* mismatch */
2436 s3 = strchr(s3, *s1);
2437 break;
2438 }
2439 if (*s3 == *s1) { /* found the final delimiter */
2440 match = 1;
2441 break;
2442 }
2443 s2++;
2444 }
2445
2446 out:
2447 if (NULL == s3)
2448 s3 = strchr(s2, '\0');
2449 else if (*s3 != '\0')
2450 s3++;
2451 *pos = s3 - v;
2452 return match;
2453 }
2454
2455 /*
2456 * Evaluate an optionally negated single character, numerical,
2457 * or string condition.
2458 */
2459 static int
2460 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2461 {
2462 const char *start, *end;
2463 char *cp, *name;
2464 size_t sz;
2465 int deftype, len, number, savepos, istrue, wanttrue;
2466
2467 if ('!' == v[*pos]) {
2468 wanttrue = 0;
2469 (*pos)++;
2470 } else
2471 wanttrue = 1;
2472
2473 switch (v[*pos]) {
2474 case '\0':
2475 return 0;
2476 case 'n':
2477 case 'o':
2478 (*pos)++;
2479 return wanttrue;
2480 case 'e':
2481 case 't':
2482 case 'v':
2483 (*pos)++;
2484 return !wanttrue;
2485 case 'c':
2486 do {
2487 (*pos)++;
2488 } while (v[*pos] == ' ');
2489
2490 /*
2491 * Quirk for groff compatibility:
2492 * The horizontal tab is neither available nor unavailable.
2493 */
2494
2495 if (v[*pos] == '\t') {
2496 (*pos)++;
2497 return 0;
2498 }
2499
2500 /* Printable ASCII characters are available. */
2501
2502 if (v[*pos] != '\\') {
2503 (*pos)++;
2504 return wanttrue;
2505 }
2506
2507 end = v + ++*pos;
2508 switch (mandoc_escape(&end, &start, &len)) {
2509 case ESCAPE_SPECIAL:
2510 istrue = mchars_spec2cp(start, len) != -1;
2511 break;
2512 case ESCAPE_UNICODE:
2513 istrue = 1;
2514 break;
2515 case ESCAPE_NUMBERED:
2516 istrue = mchars_num2char(start, len) != -1;
2517 break;
2518 default:
2519 istrue = !wanttrue;
2520 break;
2521 }
2522 *pos = end - v;
2523 return istrue == wanttrue;
2524 case 'd':
2525 case 'r':
2526 cp = v + *pos + 1;
2527 while (*cp == ' ')
2528 cp++;
2529 name = cp;
2530 sz = roff_getname(r, &cp, ln, cp - v);
2531 if (sz == 0)
2532 istrue = 0;
2533 else if (v[*pos] == 'r')
2534 istrue = roff_hasregn(r, name, sz);
2535 else {
2536 deftype = ROFFDEF_ANY;
2537 roff_getstrn(r, name, sz, &deftype);
2538 istrue = !!deftype;
2539 }
2540 *pos = cp - v;
2541 return istrue == wanttrue;
2542 default:
2543 break;
2544 }
2545
2546 savepos = *pos;
2547 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2548 return (number > 0) == wanttrue;
2549 else if (*pos == savepos)
2550 return roff_evalstrcond(v, pos) == wanttrue;
2551 else
2552 return 0;
2553 }
2554
2555 static int
2556 roff_line_ignore(ROFF_ARGS)
2557 {
2558
2559 return ROFF_IGN;
2560 }
2561
2562 static int
2563 roff_insec(ROFF_ARGS)
2564 {
2565
2566 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2567 return ROFF_IGN;
2568 }
2569
2570 static int
2571 roff_unsupp(ROFF_ARGS)
2572 {
2573
2574 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2575 return ROFF_IGN;
2576 }
2577
2578 static int
2579 roff_cond(ROFF_ARGS)
2580 {
2581 int irc;
2582
2583 roffnode_push(r, tok, NULL, ln, ppos);
2584
2585 /*
2586 * An `.el' has no conditional body: it will consume the value
2587 * of the current rstack entry set in prior `ie' calls or
2588 * defaults to DENY.
2589 *
2590 * If we're not an `el', however, then evaluate the conditional.
2591 */
2592
2593 r->last->rule = tok == ROFF_el ?
2594 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2595 roff_evalcond(r, ln, buf->buf, &pos);
2596
2597 /*
2598 * An if-else will put the NEGATION of the current evaluated
2599 * conditional into the stack of rules.
2600 */
2601
2602 if (tok == ROFF_ie) {
2603 if (r->rstackpos + 1 == r->rstacksz) {
2604 r->rstacksz += 16;
2605 r->rstack = mandoc_reallocarray(r->rstack,
2606 r->rstacksz, sizeof(int));
2607 }
2608 r->rstack[++r->rstackpos] = !r->last->rule;
2609 }
2610
2611 /* If the parent has false as its rule, then so do we. */
2612
2613 if (r->last->parent && !r->last->parent->rule)
2614 r->last->rule = 0;
2615
2616 /*
2617 * Determine scope.
2618 * If there is nothing on the line after the conditional,
2619 * not even whitespace, use next-line scope.
2620 * Except that .while does not support next-line scope.
2621 */
2622
2623 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2624 r->last->endspan = 2;
2625 goto out;
2626 }
2627
2628 while (buf->buf[pos] == ' ')
2629 pos++;
2630
2631 /* An opening brace requests multiline scope. */
2632
2633 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2634 r->last->endspan = -1;
2635 pos += 2;
2636 while (buf->buf[pos] == ' ')
2637 pos++;
2638 goto out;
2639 }
2640
2641 /*
2642 * Anything else following the conditional causes
2643 * single-line scope. Warn if the scope contains
2644 * nothing but trailing whitespace.
2645 */
2646
2647 if (buf->buf[pos] == '\0')
2648 mandoc_msg(MANDOCERR_COND_EMPTY,
2649 ln, ppos, "%s", roff_name[tok]);
2650
2651 r->last->endspan = 1;
2652
2653 out:
2654 *offs = pos;
2655 irc = ROFF_RERUN;
2656 if (tok == ROFF_while)
2657 irc |= ROFF_WHILE;
2658 return irc;
2659 }
2660
2661 static int
2662 roff_ds(ROFF_ARGS)
2663 {
2664 char *string;
2665 const char *name;
2666 size_t namesz;
2667
2668 /* Ignore groff compatibility mode for now. */
2669
2670 if (tok == ROFF_ds1)
2671 tok = ROFF_ds;
2672 else if (tok == ROFF_as1)
2673 tok = ROFF_as;
2674
2675 /*
2676 * The first word is the name of the string.
2677 * If it is empty or terminated by an escape sequence,
2678 * abort the `ds' request without defining anything.
2679 */
2680
2681 name = string = buf->buf + pos;
2682 if (*name == '\0')
2683 return ROFF_IGN;
2684
2685 namesz = roff_getname(r, &string, ln, pos);
2686 if (name[namesz] == '\\')
2687 return ROFF_IGN;
2688
2689 /* Read past the initial double-quote, if any. */
2690 if (*string == '"')
2691 string++;
2692
2693 /* The rest is the value. */
2694 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2695 ROFF_as == tok);
2696 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2697 return ROFF_IGN;
2698 }
2699
2700 /*
2701 * Parse a single operator, one or two characters long.
2702 * If the operator is recognized, return success and advance the
2703 * parse point, else return failure and let the parse point unchanged.
2704 */
2705 static int
2706 roff_getop(const char *v, int *pos, char *res)
2707 {
2708
2709 *res = v[*pos];
2710
2711 switch (*res) {
2712 case '+':
2713 case '-':
2714 case '*':
2715 case '/':
2716 case '%':
2717 case '&':
2718 case ':':
2719 break;
2720 case '<':
2721 switch (v[*pos + 1]) {
2722 case '=':
2723 *res = 'l';
2724 (*pos)++;
2725 break;
2726 case '>':
2727 *res = '!';
2728 (*pos)++;
2729 break;
2730 case '?':
2731 *res = 'i';
2732 (*pos)++;
2733 break;
2734 default:
2735 break;
2736 }
2737 break;
2738 case '>':
2739 switch (v[*pos + 1]) {
2740 case '=':
2741 *res = 'g';
2742 (*pos)++;
2743 break;
2744 case '?':
2745 *res = 'a';
2746 (*pos)++;
2747 break;
2748 default:
2749 break;
2750 }
2751 break;
2752 case '=':
2753 if ('=' == v[*pos + 1])
2754 (*pos)++;
2755 break;
2756 default:
2757 return 0;
2758 }
2759 (*pos)++;
2760
2761 return *res;
2762 }
2763
2764 /*
2765 * Evaluate either a parenthesized numeric expression
2766 * or a single signed integer number.
2767 */
2768 static int
2769 roff_evalpar(struct roff *r, int ln,
2770 const char *v, int *pos, int *res, int flags)
2771 {
2772
2773 if ('(' != v[*pos])
2774 return roff_getnum(v, pos, res, flags);
2775
2776 (*pos)++;
2777 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2778 return 0;
2779
2780 /*
2781 * Omission of the closing parenthesis
2782 * is an error in validation mode,
2783 * but ignored in evaluation mode.
2784 */
2785
2786 if (')' == v[*pos])
2787 (*pos)++;
2788 else if (NULL == res)
2789 return 0;
2790
2791 return 1;
2792 }
2793
2794 /*
2795 * Evaluate a complete numeric expression.
2796 * Proceed left to right, there is no concept of precedence.
2797 */
2798 static int
2799 roff_evalnum(struct roff *r, int ln, const char *v,
2800 int *pos, int *res, int flags)
2801 {
2802 int mypos, operand2;
2803 char operator;
2804
2805 if (NULL == pos) {
2806 mypos = 0;
2807 pos = &mypos;
2808 }
2809
2810 if (flags & ROFFNUM_WHITE)
2811 while (isspace((unsigned char)v[*pos]))
2812 (*pos)++;
2813
2814 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2815 return 0;
2816
2817 while (1) {
2818 if (flags & ROFFNUM_WHITE)
2819 while (isspace((unsigned char)v[*pos]))
2820 (*pos)++;
2821
2822 if ( ! roff_getop(v, pos, &operator))
2823 break;
2824
2825 if (flags & ROFFNUM_WHITE)
2826 while (isspace((unsigned char)v[*pos]))
2827 (*pos)++;
2828
2829 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2830 return 0;
2831
2832 if (flags & ROFFNUM_WHITE)
2833 while (isspace((unsigned char)v[*pos]))
2834 (*pos)++;
2835
2836 if (NULL == res)
2837 continue;
2838
2839 switch (operator) {
2840 case '+':
2841 *res += operand2;
2842 break;
2843 case '-':
2844 *res -= operand2;
2845 break;
2846 case '*':
2847 *res *= operand2;
2848 break;
2849 case '/':
2850 if (operand2 == 0) {
2851 mandoc_msg(MANDOCERR_DIVZERO,
2852 ln, *pos, "%s", v);
2853 *res = 0;
2854 break;
2855 }
2856 *res /= operand2;
2857 break;
2858 case '%':
2859 if (operand2 == 0) {
2860 mandoc_msg(MANDOCERR_DIVZERO,
2861 ln, *pos, "%s", v);
2862 *res = 0;
2863 break;
2864 }
2865 *res %= operand2;
2866 break;
2867 case '<':
2868 *res = *res < operand2;
2869 break;
2870 case '>':
2871 *res = *res > operand2;
2872 break;
2873 case 'l':
2874 *res = *res <= operand2;
2875 break;
2876 case 'g':
2877 *res = *res >= operand2;
2878 break;
2879 case '=':
2880 *res = *res == operand2;
2881 break;
2882 case '!':
2883 *res = *res != operand2;
2884 break;
2885 case '&':
2886 *res = *res && operand2;
2887 break;
2888 case ':':
2889 *res = *res || operand2;
2890 break;
2891 case 'i':
2892 if (operand2 < *res)
2893 *res = operand2;
2894 break;
2895 case 'a':
2896 if (operand2 > *res)
2897 *res = operand2;
2898 break;
2899 default:
2900 abort();
2901 }
2902 }
2903 return 1;
2904 }
2905
2906 /* --- register management ------------------------------------------------ */
2907
2908 void
2909 roff_setreg(struct roff *r, const char *name, int val, char sign)
2910 {
2911 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2912 }
2913
2914 static void
2915 roff_setregn(struct roff *r, const char *name, size_t len,
2916 int val, char sign, int step)
2917 {
2918 struct roffreg *reg;
2919
2920 /* Search for an existing register with the same name. */
2921 reg = r->regtab;
2922
2923 while (reg != NULL && (reg->key.sz != len ||
2924 strncmp(reg->key.p, name, len) != 0))
2925 reg = reg->next;
2926
2927 if (NULL == reg) {
2928 /* Create a new register. */
2929 reg = mandoc_malloc(sizeof(struct roffreg));
2930 reg->key.p = mandoc_strndup(name, len);
2931 reg->key.sz = len;
2932 reg->val = 0;
2933 reg->step = 0;
2934 reg->next = r->regtab;
2935 r->regtab = reg;
2936 }
2937
2938 if ('+' == sign)
2939 reg->val += val;
2940 else if ('-' == sign)
2941 reg->val -= val;
2942 else
2943 reg->val = val;
2944 if (step != INT_MIN)
2945 reg->step = step;
2946 }
2947
2948 /*
2949 * Handle some predefined read-only number registers.
2950 * For now, return -1 if the requested register is not predefined;
2951 * in case a predefined read-only register having the value -1
2952 * were to turn up, another special value would have to be chosen.
2953 */
2954 static int
2955 roff_getregro(const struct roff *r, const char *name)
2956 {
2957
2958 switch (*name) {
2959 case '$': /* Number of arguments of the last macro evaluated. */
2960 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2961 case 'A': /* ASCII approximation mode is always off. */
2962 return 0;
2963 case 'g': /* Groff compatibility mode is always on. */
2964 return 1;
2965 case 'H': /* Fixed horizontal resolution. */
2966 return 24;
2967 case 'j': /* Always adjust left margin only. */
2968 return 0;
2969 case 'T': /* Some output device is always defined. */
2970 return 1;
2971 case 'V': /* Fixed vertical resolution. */
2972 return 40;
2973 default:
2974 return -1;
2975 }
2976 }
2977
2978 int
2979 roff_getreg(struct roff *r, const char *name)
2980 {
2981 return roff_getregn(r, name, strlen(name), '\0');
2982 }
2983
2984 static int
2985 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2986 {
2987 struct roffreg *reg;
2988 int val;
2989
2990 if ('.' == name[0] && 2 == len) {
2991 val = roff_getregro(r, name + 1);
2992 if (-1 != val)
2993 return val;
2994 }
2995
2996 for (reg = r->regtab; reg; reg = reg->next) {
2997 if (len == reg->key.sz &&
2998 0 == strncmp(name, reg->key.p, len)) {
2999 switch (sign) {
3000 case '+':
3001 reg->val += reg->step;
3002 break;
3003 case '-':
3004 reg->val -= reg->step;
3005 break;
3006 default:
3007 break;
3008 }
3009 return reg->val;
3010 }
3011 }
3012
3013 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3014 return 0;
3015 }
3016
3017 static int
3018 roff_hasregn(const struct roff *r, const char *name, size_t len)
3019 {
3020 struct roffreg *reg;
3021 int val;
3022
3023 if ('.' == name[0] && 2 == len) {
3024 val = roff_getregro(r, name + 1);
3025 if (-1 != val)
3026 return 1;
3027 }
3028
3029 for (reg = r->regtab; reg; reg = reg->next)
3030 if (len == reg->key.sz &&
3031 0 == strncmp(name, reg->key.p, len))
3032 return 1;
3033
3034 return 0;
3035 }
3036
3037 static void
3038 roff_freereg(struct roffreg *reg)
3039 {
3040 struct roffreg *old_reg;
3041
3042 while (NULL != reg) {
3043 free(reg->key.p);
3044 old_reg = reg;
3045 reg = reg->next;
3046 free(old_reg);
3047 }
3048 }
3049
3050 static int
3051 roff_nr(ROFF_ARGS)
3052 {
3053 char *key, *val, *step;
3054 size_t keysz;
3055 int iv, is, len;
3056 char sign;
3057
3058 key = val = buf->buf + pos;
3059 if (*key == '\0')
3060 return ROFF_IGN;
3061
3062 keysz = roff_getname(r, &val, ln, pos);
3063 if (key[keysz] == '\\')
3064 return ROFF_IGN;
3065
3066 sign = *val;
3067 if (sign == '+' || sign == '-')
3068 val++;
3069
3070 len = 0;
3071 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3072 return ROFF_IGN;
3073
3074 step = val + len;
3075 while (isspace((unsigned char)*step))
3076 step++;
3077 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3078 is = INT_MIN;
3079
3080 roff_setregn(r, key, keysz, iv, sign, is);
3081 return ROFF_IGN;
3082 }
3083
3084 static int
3085 roff_rr(ROFF_ARGS)
3086 {
3087 struct roffreg *reg, **prev;
3088 char *name, *cp;
3089 size_t namesz;
3090
3091 name = cp = buf->buf + pos;
3092 if (*name == '\0')
3093 return ROFF_IGN;
3094 namesz = roff_getname(r, &cp, ln, pos);
3095 name[namesz] = '\0';
3096
3097 prev = &r->regtab;
3098 while (1) {
3099 reg = *prev;
3100 if (reg == NULL || !strcmp(name, reg->key.p))
3101 break;
3102 prev = &reg->next;
3103 }
3104 if (reg != NULL) {
3105 *prev = reg->next;
3106 free(reg->key.p);
3107 free(reg);
3108 }
3109 return ROFF_IGN;
3110 }
3111
3112 /* --- handler functions for roff requests -------------------------------- */
3113
3114 static int
3115 roff_rm(ROFF_ARGS)
3116 {
3117 const char *name;
3118 char *cp;
3119 size_t namesz;
3120
3121 cp = buf->buf + pos;
3122 while (*cp != '\0') {
3123 name = cp;
3124 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3125 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3126 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3127 if (name[namesz] == '\\')
3128 break;
3129 }
3130 return ROFF_IGN;
3131 }
3132
3133 static int
3134 roff_it(ROFF_ARGS)
3135 {
3136 int iv;
3137
3138 /* Parse the number of lines. */
3139
3140 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3141 mandoc_msg(MANDOCERR_IT_NONUM,
3142 ln, ppos, "%s", buf->buf + 1);
3143 return ROFF_IGN;
3144 }
3145
3146 while (isspace((unsigned char)buf->buf[pos]))
3147 pos++;
3148
3149 /*
3150 * Arm the input line trap.
3151 * Special-casing "an-trap" is an ugly workaround to cope
3152 * with DocBook stupidly fiddling with man(7) internals.
3153 */
3154
3155 roffit_lines = iv;
3156 roffit_macro = mandoc_strdup(iv != 1 ||
3157 strcmp(buf->buf + pos, "an-trap") ?
3158 buf->buf + pos : "br");
3159 return ROFF_IGN;
3160 }
3161
3162 static int
3163 roff_Dd(ROFF_ARGS)
3164 {
3165 int mask;
3166 enum roff_tok t, te;
3167
3168 switch (tok) {
3169 case ROFF_Dd:
3170 tok = MDOC_Dd;
3171 te = MDOC_MAX;
3172 if (r->format == 0)
3173 r->format = MPARSE_MDOC;
3174 mask = MPARSE_MDOC | MPARSE_QUICK;
3175 break;
3176 case ROFF_TH:
3177 tok = MAN_TH;
3178 te = MAN_MAX;
3179 if (r->format == 0)
3180 r->format = MPARSE_MAN;
3181 mask = MPARSE_QUICK;
3182 break;
3183 default:
3184 abort();
3185 }
3186 if ((r->options & mask) == 0)
3187 for (t = tok; t < te; t++)
3188 roff_setstr(r, roff_name[t], NULL, 0);
3189 return ROFF_CONT;
3190 }
3191
3192 static int
3193 roff_TE(ROFF_ARGS)
3194 {
3195 r->man->flags &= ~ROFF_NONOFILL;
3196 if (r->tbl == NULL) {
3197 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3198 return ROFF_IGN;
3199 }
3200 if (tbl_end(r->tbl, 0) == 0) {
3201 r->tbl = NULL;
3202 free(buf->buf);
3203 buf->buf = mandoc_strdup(".sp");
3204 buf->sz = 4;
3205 *offs = 0;
3206 return ROFF_REPARSE;
3207 }
3208 r->tbl = NULL;
3209 return ROFF_IGN;
3210 }
3211
3212 static int
3213 roff_T_(ROFF_ARGS)
3214 {
3215
3216 if (NULL == r->tbl)
3217 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3218 else
3219 tbl_restart(ln, ppos, r->tbl);
3220
3221 return ROFF_IGN;
3222 }
3223
3224 /*
3225 * Handle in-line equation delimiters.
3226 */
3227 static int
3228 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3229 {
3230 char *cp1, *cp2;
3231 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3232
3233 /*
3234 * Outside equations, look for an opening delimiter.
3235 * If we are inside an equation, we already know it is
3236 * in-line, or this function wouldn't have been called;
3237 * so look for a closing delimiter.
3238 */
3239
3240 cp1 = buf->buf + pos;
3241 cp2 = strchr(cp1, r->eqn == NULL ?
3242 r->last_eqn->odelim : r->last_eqn->cdelim);
3243 if (cp2 == NULL)
3244 return ROFF_CONT;
3245
3246 *cp2++ = '\0';
3247 bef_pr = bef_nl = aft_nl = aft_pr = "";
3248
3249 /* Handle preceding text, protecting whitespace. */
3250
3251 if (*buf->buf != '\0') {
3252 if (r->eqn == NULL)
3253 bef_pr = "\\&";
3254 bef_nl = "\n";
3255 }
3256
3257 /*
3258 * Prepare replacing the delimiter with an equation macro
3259 * and drop leading white space from the equation.
3260 */
3261
3262 if (r->eqn == NULL) {
3263 while (*cp2 == ' ')
3264 cp2++;
3265 mac = ".EQ";
3266 } else
3267 mac = ".EN";
3268
3269 /* Handle following text, protecting whitespace. */
3270
3271 if (*cp2 != '\0') {
3272 aft_nl = "\n";
3273 if (r->eqn != NULL)
3274 aft_pr = "\\&";
3275 }
3276
3277 /* Do the actual replacement. */
3278
3279 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3280 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3281 free(buf->buf);
3282 buf->buf = cp1;
3283
3284 /* Toggle the in-line state of the eqn subsystem. */
3285
3286 r->eqn_inline = r->eqn == NULL;
3287 return ROFF_REPARSE;
3288 }
3289
3290 static int
3291 roff_EQ(ROFF_ARGS)
3292 {
3293 struct roff_node *n;
3294
3295 if (r->man->meta.macroset == MACROSET_MAN)
3296 man_breakscope(r->man, ROFF_EQ);
3297 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3298 if (ln > r->man->last->line)
3299 n->flags |= NODE_LINE;
3300 n->eqn = eqn_box_new();
3301 roff_node_append(r->man, n);
3302 r->man->next = ROFF_NEXT_SIBLING;
3303
3304 assert(r->eqn == NULL);
3305 if (r->last_eqn == NULL)
3306 r->last_eqn = eqn_alloc();
3307 else
3308 eqn_reset(r->last_eqn);
3309 r->eqn = r->last_eqn;
3310 r->eqn->node = n;
3311
3312 if (buf->buf[pos] != '\0')
3313 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3314 ".EQ %s", buf->buf + pos);
3315
3316 return ROFF_IGN;
3317 }
3318
3319 static int
3320 roff_EN(ROFF_ARGS)
3321 {
3322 if (r->eqn != NULL) {
3323 eqn_parse(r->eqn);
3324 r->eqn = NULL;
3325 } else
3326 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3327 if (buf->buf[pos] != '\0')
3328 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3329 "EN %s", buf->buf + pos);
3330 return ROFF_IGN;
3331 }
3332
3333 static int
3334 roff_TS(ROFF_ARGS)
3335 {
3336 if (r->tbl != NULL) {
3337 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3338 tbl_end(r->tbl, 0);
3339 }
3340 r->man->flags |= ROFF_NONOFILL;
3341 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3342 if (r->last_tbl == NULL)
3343 r->first_tbl = r->tbl;
3344 r->last_tbl = r->tbl;
3345 return ROFF_IGN;
3346 }
3347
3348 static int
3349 roff_noarg(ROFF_ARGS)
3350 {
3351 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3352 man_breakscope(r->man, tok);
3353 if (tok == ROFF_brp)
3354 tok = ROFF_br;
3355 roff_elem_alloc(r->man, ln, ppos, tok);
3356 if (buf->buf[pos] != '\0')
3357 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3358 "%s %s", roff_name[tok], buf->buf + pos);
3359 if (tok == ROFF_nf)
3360 r->man->flags |= ROFF_NOFILL;
3361 else if (tok == ROFF_fi)
3362 r->man->flags &= ~ROFF_NOFILL;
3363 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3364 r->man->next = ROFF_NEXT_SIBLING;
3365 return ROFF_IGN;
3366 }
3367
3368 static int
3369 roff_onearg(ROFF_ARGS)
3370 {
3371 struct roff_node *n;
3372 char *cp;
3373 int npos;
3374
3375 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3376 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3377 tok == ROFF_ti))
3378 man_breakscope(r->man, tok);
3379
3380 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3381 r->man->last = roffce_node;
3382 r->man->next = ROFF_NEXT_SIBLING;
3383 }
3384
3385 roff_elem_alloc(r->man, ln, ppos, tok);
3386 n = r->man->last;
3387
3388 cp = buf->buf + pos;
3389 if (*cp != '\0') {
3390 while (*cp != '\0' && *cp != ' ')
3391 cp++;
3392 while (*cp == ' ')
3393 *cp++ = '\0';
3394 if (*cp != '\0')
3395 mandoc_msg(MANDOCERR_ARG_EXCESS,
3396 ln, (int)(cp - buf->buf),
3397 "%s ... %s", roff_name[tok], cp);
3398 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3399 }
3400
3401 if (tok == ROFF_ce || tok == ROFF_rj) {
3402 if (r->man->last->type == ROFFT_ELEM) {
3403 roff_word_alloc(r->man, ln, pos, "1");
3404 r->man->last->flags |= NODE_NOSRC;
3405 }
3406 npos = 0;
3407 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3408 &roffce_lines, 0) == 0) {
3409 mandoc_msg(MANDOCERR_CE_NONUM,
3410 ln, pos, "ce %s", buf->buf + pos);
3411 roffce_lines = 1;
3412 }
3413 if (roffce_lines < 1) {
3414 r->man->last = r->man->last->parent;
3415 roffce_node = NULL;
3416 roffce_lines = 0;
3417 } else
3418 roffce_node = r->man->last->parent;
3419 } else {
3420 n->flags |= NODE_VALID | NODE_ENDED;
3421 r->man->last = n;
3422 }
3423 n->flags |= NODE_LINE;
3424 r->man->next = ROFF_NEXT_SIBLING;
3425 return ROFF_IGN;
3426 }
3427
3428 static int
3429 roff_manyarg(ROFF_ARGS)
3430 {
3431 struct roff_node *n;
3432 char *sp, *ep;
3433
3434 roff_elem_alloc(r->man, ln, ppos, tok);
3435 n = r->man->last;
3436
3437 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3438 while (*ep != '\0' && *ep != ' ')
3439 ep++;
3440 while (*ep == ' ')
3441 *ep++ = '\0';
3442 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3443 }
3444
3445 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3446 r->man->last = n;
3447 r->man->next = ROFF_NEXT_SIBLING;
3448 return ROFF_IGN;
3449 }
3450
3451 static int
3452 roff_als(ROFF_ARGS)
3453 {
3454 char *oldn, *newn, *end, *value;
3455 size_t oldsz, newsz, valsz;
3456
3457 newn = oldn = buf->buf + pos;
3458 if (*newn == '\0')
3459 return ROFF_IGN;
3460
3461 newsz = roff_getname(r, &oldn, ln, pos);
3462 if (newn[newsz] == '\\' || *oldn == '\0')
3463 return ROFF_IGN;
3464
3465 end = oldn;
3466 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3467 if (oldsz == 0)
3468 return ROFF_IGN;
3469
3470 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3471 (int)oldsz, oldn);
3472 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3473 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3474 free(value);
3475 return ROFF_IGN;
3476 }
3477
3478 static int
3479 roff_cc(ROFF_ARGS)
3480 {
3481 const char *p;
3482
3483 p = buf->buf + pos;
3484
3485 if (*p == '\0' || (r->control = *p++) == '.')
3486 r->control = '\0';
3487
3488 if (*p != '\0')
3489 mandoc_msg(MANDOCERR_ARG_EXCESS,
3490 ln, p - buf->buf, "cc ... %s", p);
3491
3492 return ROFF_IGN;
3493 }
3494
3495 static int
3496 roff_char(ROFF_ARGS)
3497 {
3498 const char *p, *kp, *vp;
3499 size_t ksz, vsz;
3500 int font;
3501
3502 /* Parse the character to be replaced. */
3503
3504 kp = buf->buf + pos;
3505 p = kp + 1;
3506 if (*kp == '\0' || (*kp == '\\' &&
3507 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3508 (*p != ' ' && *p != '\0')) {
3509 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3510 return ROFF_IGN;
3511 }
3512 ksz = p - kp;
3513 while (*p == ' ')
3514 p++;
3515
3516 /*
3517 * If the replacement string contains a font escape sequence,
3518 * we have to restore the font at the end.
3519 */
3520
3521 vp = p;
3522 vsz = strlen(p);
3523 font = 0;
3524 while (*p != '\0') {
3525 if (*p++ != '\\')
3526 continue;
3527 switch (mandoc_escape(&p, NULL, NULL)) {
3528 case ESCAPE_FONT:
3529 case ESCAPE_FONTROMAN:
3530 case ESCAPE_FONTITALIC:
3531 case ESCAPE_FONTBOLD:
3532 case ESCAPE_FONTBI:
3533 case ESCAPE_FONTCW:
3534 case ESCAPE_FONTPREV:
3535 font++;
3536 break;
3537 default:
3538 break;
3539 }
3540 }
3541 if (font > 1)
3542 mandoc_msg(MANDOCERR_CHAR_FONT,
3543 ln, (int)(vp - buf->buf), "%s", vp);
3544
3545 /*
3546 * Approximate the effect of .char using the .tr tables.
3547 * XXX In groff, .char and .tr interact differently.
3548 */
3549
3550 if (ksz == 1) {
3551 if (r->xtab == NULL)
3552 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3553 assert((unsigned int)*kp < 128);
3554 free(r->xtab[(int)*kp].p);
3555 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3556 "%s%s", vp, font ? "\fP" : "");
3557 } else {
3558 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3559 if (font)
3560 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3561 }
3562 return ROFF_IGN;
3563 }
3564
3565 static int
3566 roff_ec(ROFF_ARGS)
3567 {
3568 const char *p;
3569
3570 p = buf->buf + pos;
3571 if (*p == '\0')
3572 r->escape = '\\';
3573 else {
3574 r->escape = *p;
3575 if (*++p != '\0')
3576 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3577 (int)(p - buf->buf), "ec ... %s", p);
3578 }
3579 return ROFF_IGN;
3580 }
3581
3582 static int
3583 roff_eo(ROFF_ARGS)
3584 {
3585 r->escape = '\0';
3586 if (buf->buf[pos] != '\0')
3587 mandoc_msg(MANDOCERR_ARG_SKIP,
3588 ln, pos, "eo %s", buf->buf + pos);
3589 return ROFF_IGN;
3590 }
3591
3592 static int
3593 roff_nop(ROFF_ARGS)
3594 {
3595 while (buf->buf[pos] == ' ')
3596 pos++;
3597 *offs = pos;
3598 return ROFF_RERUN;
3599 }
3600
3601 static int
3602 roff_tr(ROFF_ARGS)
3603 {
3604 const char *p, *first, *second;
3605 size_t fsz, ssz;
3606 enum mandoc_esc esc;
3607
3608 p = buf->buf + pos;
3609
3610 if (*p == '\0') {
3611 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3612 return ROFF_IGN;
3613 }
3614
3615 while (*p != '\0') {
3616 fsz = ssz = 1;
3617
3618 first = p++;
3619 if (*first == '\\') {
3620 esc = mandoc_escape(&p, NULL, NULL);
3621 if (esc == ESCAPE_ERROR) {
3622 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3623 (int)(p - buf->buf), "%s", first);
3624 return ROFF_IGN;
3625 }
3626 fsz = (size_t)(p - first);
3627 }
3628
3629 second = p++;
3630 if (*second == '\\') {
3631 esc = mandoc_escape(&p, NULL, NULL);
3632 if (esc == ESCAPE_ERROR) {
3633 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3634 (int)(p - buf->buf), "%s", second);
3635 return ROFF_IGN;
3636 }
3637 ssz = (size_t)(p - second);
3638 } else if (*second == '\0') {
3639 mandoc_msg(MANDOCERR_TR_ODD, ln,
3640 (int)(first - buf->buf), "tr %s", first);
3641 second = " ";
3642 p--;
3643 }
3644
3645 if (fsz > 1) {
3646 roff_setstrn(&r->xmbtab, first, fsz,
3647 second, ssz, 0);
3648 continue;
3649 }
3650
3651 if (r->xtab == NULL)
3652 r->xtab = mandoc_calloc(128,
3653 sizeof(struct roffstr));
3654
3655 free(r->xtab[(int)*first].p);
3656 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3657 r->xtab[(int)*first].sz = ssz;
3658 }
3659
3660 return ROFF_IGN;
3661 }
3662
3663 /*
3664 * Implementation of the .return request.
3665 * There is no need to call roff_userret() from here.
3666 * The read module will call that after rewinding the reader stack
3667 * to the place from where the current macro was called.
3668 */
3669 static int
3670 roff_return(ROFF_ARGS)
3671 {
3672 if (r->mstackpos >= 0)
3673 return ROFF_IGN | ROFF_USERRET;
3674
3675 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3676 return ROFF_IGN;
3677 }
3678
3679 static int
3680 roff_rn(ROFF_ARGS)
3681 {
3682 const char *value;
3683 char *oldn, *newn, *end;
3684 size_t oldsz, newsz;
3685 int deftype;
3686
3687 oldn = newn = buf->buf + pos;
3688 if (*oldn == '\0')
3689 return ROFF_IGN;
3690
3691 oldsz = roff_getname(r, &newn, ln, pos);
3692 if (oldn[oldsz] == '\\' || *newn == '\0')
3693 return ROFF_IGN;
3694
3695 end = newn;
3696 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3697 if (newsz == 0)
3698 return ROFF_IGN;
3699
3700 deftype = ROFFDEF_ANY;
3701 value = roff_getstrn(r, oldn, oldsz, &deftype);
3702 switch (deftype) {
3703 case ROFFDEF_USER:
3704 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3705 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3706 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3707 break;
3708 case ROFFDEF_PRE:
3709 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3710 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3711 break;
3712 case ROFFDEF_REN:
3713 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3714 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3715 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3716 break;
3717 case ROFFDEF_STD:
3718 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3719 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3720 break;
3721 default:
3722 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3723 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3724 break;
3725 }
3726 return ROFF_IGN;
3727 }
3728
3729 static int
3730 roff_shift(ROFF_ARGS)
3731 {
3732 struct mctx *ctx;
3733 int levels, i;
3734
3735 levels = 1;
3736 if (buf->buf[pos] != '\0' &&
3737 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3738 mandoc_msg(MANDOCERR_CE_NONUM,
3739 ln, pos, "shift %s", buf->buf + pos);
3740 levels = 1;
3741 }
3742 if (r->mstackpos < 0) {
3743 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3744 return ROFF_IGN;
3745 }
3746 ctx = r->mstack + r->mstackpos;
3747 if (levels > ctx->argc) {
3748 mandoc_msg(MANDOCERR_SHIFT,
3749 ln, pos, "%d, but max is %d", levels, ctx->argc);
3750 levels = ctx->argc;
3751 }
3752 if (levels == 0)
3753 return ROFF_IGN;
3754 for (i = 0; i < levels; i++)
3755 free(ctx->argv[i]);
3756 ctx->argc -= levels;
3757 for (i = 0; i < ctx->argc; i++)
3758 ctx->argv[i] = ctx->argv[i + levels];
3759 return ROFF_IGN;
3760 }
3761
3762 static int
3763 roff_so(ROFF_ARGS)
3764 {
3765 char *name, *cp;
3766
3767 name = buf->buf + pos;
3768 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3769
3770 /*
3771 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3772 * opening anything that's not in our cwd or anything beneath
3773 * it. Thus, explicitly disallow traversing up the file-system
3774 * or using absolute paths.
3775 */
3776
3777 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3778 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3779 buf->sz = mandoc_asprintf(&cp,
3780 ".sp\nSee the file %s.\n.sp", name) + 1;
3781 free(buf->buf);
3782 buf->buf = cp;
3783 *offs = 0;
3784 return ROFF_REPARSE;
3785 }
3786
3787 *offs = pos;
3788 return ROFF_SO;
3789 }
3790
3791 /* --- user defined strings and macros ------------------------------------ */
3792
3793 static int
3794 roff_userdef(ROFF_ARGS)
3795 {
3796 struct mctx *ctx;
3797 char *arg, *ap, *dst, *src;
3798 size_t sz;
3799
3800 /* Initialize a new macro stack context. */
3801
3802 if (++r->mstackpos == r->mstacksz) {
3803 r->mstack = mandoc_recallocarray(r->mstack,
3804 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3805 r->mstacksz += 8;
3806 }
3807 ctx = r->mstack + r->mstackpos;
3808 ctx->argsz = 0;
3809 ctx->argc = 0;
3810 ctx->argv = NULL;
3811
3812 /*
3813 * Collect pointers to macro argument strings,
3814 * NUL-terminating them and escaping quotes.
3815 */
3816
3817 src = buf->buf + pos;
3818 while (*src != '\0') {
3819 if (ctx->argc == ctx->argsz) {
3820 ctx->argsz += 8;
3821 ctx->argv = mandoc_reallocarray(ctx->argv,
3822 ctx->argsz, sizeof(*ctx->argv));
3823 }
3824 arg = roff_getarg(r, &src, ln, &pos);
3825 sz = 1; /* For the terminating NUL. */
3826 for (ap = arg; *ap != '\0'; ap++)
3827 sz += *ap == '"' ? 4 : 1;
3828 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3829 for (ap = arg; *ap != '\0'; ap++) {
3830 if (*ap == '"') {
3831 memcpy(dst, "\\(dq", 4);
3832 dst += 4;
3833 } else
3834 *dst++ = *ap;
3835 }
3836 *dst = '\0';
3837 free(arg);
3838 }
3839
3840 /* Replace the macro invocation by the macro definition. */
3841
3842 free(buf->buf);
3843 buf->buf = mandoc_strdup(r->current_string);
3844 buf->sz = strlen(buf->buf) + 1;
3845 *offs = 0;
3846
3847 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3848 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3849 }
3850
3851 /*
3852 * Calling a high-level macro that was renamed with .rn.
3853 * r->current_string has already been set up by roff_parse().
3854 */
3855 static int
3856 roff_renamed(ROFF_ARGS)
3857 {
3858 char *nbuf;
3859
3860 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3861 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3862 free(buf->buf);
3863 buf->buf = nbuf;
3864 *offs = 0;
3865 return ROFF_CONT;
3866 }
3867
3868 static size_t
3869 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3870 {
3871 char *name, *cp;
3872 size_t namesz;
3873
3874 name = *cpp;
3875 if ('\0' == *name)
3876 return 0;
3877
3878 /* Read until end of name and terminate it with NUL. */
3879 for (cp = name; 1; cp++) {
3880 if ('\0' == *cp || ' ' == *cp) {
3881 namesz = cp - name;
3882 break;
3883 }
3884 if ('\\' != *cp)
3885 continue;
3886 namesz = cp - name;
3887 if ('{' == cp[1] || '}' == cp[1])
3888 break;
3889 cp++;
3890 if ('\\' == *cp)
3891 continue;
3892 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
3893 "%.*s", (int)(cp - name + 1), name);
3894 mandoc_escape((const char **)&cp, NULL, NULL);
3895 break;
3896 }
3897
3898 /* Read past spaces. */
3899 while (' ' == *cp)
3900 cp++;
3901
3902 *cpp = cp;
3903 return namesz;
3904 }
3905
3906 /*
3907 * Store *string into the user-defined string called *name.
3908 * To clear an existing entry, call with (*r, *name, NULL, 0).
3909 * append == 0: replace mode
3910 * append == 1: single-line append mode
3911 * append == 2: multiline append mode, append '\n' after each call
3912 */
3913 static void
3914 roff_setstr(struct roff *r, const char *name, const char *string,
3915 int append)
3916 {
3917 size_t namesz;
3918
3919 namesz = strlen(name);
3920 roff_setstrn(&r->strtab, name, namesz, string,
3921 string ? strlen(string) : 0, append);
3922 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3923 }
3924
3925 static void
3926 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3927 const char *string, size_t stringsz, int append)
3928 {
3929 struct roffkv *n;
3930 char *c;
3931 int i;
3932 size_t oldch, newch;
3933
3934 /* Search for an existing string with the same name. */
3935 n = *r;
3936
3937 while (n && (namesz != n->key.sz ||
3938 strncmp(n->key.p, name, namesz)))
3939 n = n->next;
3940
3941 if (NULL == n) {
3942 /* Create a new string table entry. */
3943 n = mandoc_malloc(sizeof(struct roffkv));
3944 n->key.p = mandoc_strndup(name, namesz);
3945 n->key.sz = namesz;
3946 n->val.p = NULL;
3947 n->val.sz = 0;
3948 n->next = *r;
3949 *r = n;
3950 } else if (0 == append) {
3951 free(n->val.p);
3952 n->val.p = NULL;
3953 n->val.sz = 0;
3954 }
3955
3956 if (NULL == string)
3957 return;
3958
3959 /*
3960 * One additional byte for the '\n' in multiline mode,
3961 * and one for the terminating '\0'.
3962 */
3963 newch = stringsz + (1 < append ? 2u : 1u);
3964
3965 if (NULL == n->val.p) {
3966 n->val.p = mandoc_malloc(newch);
3967 *n->val.p = '\0';
3968 oldch = 0;
3969 } else {
3970 oldch = n->val.sz;
3971 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3972 }
3973
3974 /* Skip existing content in the destination buffer. */
3975 c = n->val.p + (int)oldch;
3976
3977 /* Append new content to the destination buffer. */
3978 i = 0;
3979 while (i < (int)stringsz) {
3980 /*
3981 * Rudimentary roff copy mode:
3982 * Handle escaped backslashes.
3983 */
3984 if ('\\' == string[i] && '\\' == string[i + 1])
3985 i++;
3986 *c++ = string[i++];
3987 }
3988
3989 /* Append terminating bytes. */
3990 if (1 < append)
3991 *c++ = '\n';
3992
3993 *c = '\0';
3994 n->val.sz = (int)(c - n->val.p);
3995 }
3996
3997 static const char *
3998 roff_getstrn(struct roff *r, const char *name, size_t len,
3999 int *deftype)
4000 {
4001 const struct roffkv *n;
4002 int found, i;
4003 enum roff_tok tok;
4004
4005 found = 0;
4006 for (n = r->strtab; n != NULL; n = n->next) {
4007 if (strncmp(name, n->key.p, len) != 0 ||
4008 n->key.p[len] != '\0' || n->val.p == NULL)
4009 continue;
4010 if (*deftype & ROFFDEF_USER) {
4011 *deftype = ROFFDEF_USER;
4012 return n->val.p;
4013 } else {
4014 found = 1;
4015 break;
4016 }
4017 }
4018 for (n = r->rentab; n != NULL; n = n->next) {
4019 if (strncmp(name, n->key.p, len) != 0 ||
4020 n->key.p[len] != '\0' || n->val.p == NULL)
4021 continue;
4022 if (*deftype & ROFFDEF_REN) {
4023 *deftype = ROFFDEF_REN;
4024 return n->val.p;
4025 } else {
4026 found = 1;
4027 break;
4028 }
4029 }
4030 for (i = 0; i < PREDEFS_MAX; i++) {
4031 if (strncmp(name, predefs[i].name, len) != 0 ||
4032 predefs[i].name[len] != '\0')
4033 continue;
4034 if (*deftype & ROFFDEF_PRE) {
4035 *deftype = ROFFDEF_PRE;
4036 return predefs[i].str;
4037 } else {
4038 found = 1;
4039 break;
4040 }
4041 }
4042 if (r->man->meta.macroset != MACROSET_MAN) {
4043 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4044 if (strncmp(name, roff_name[tok], len) != 0 ||
4045 roff_name[tok][len] != '\0')
4046 continue;
4047 if (*deftype & ROFFDEF_STD) {
4048 *deftype = ROFFDEF_STD;
4049 return NULL;
4050 } else {
4051 found = 1;
4052 break;
4053 }
4054 }
4055 }
4056 if (r->man->meta.macroset != MACROSET_MDOC) {
4057 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4058 if (strncmp(name, roff_name[tok], len) != 0 ||
4059 roff_name[tok][len] != '\0')
4060 continue;
4061 if (*deftype & ROFFDEF_STD) {
4062 *deftype = ROFFDEF_STD;
4063 return NULL;
4064 } else {
4065 found = 1;
4066 break;
4067 }
4068 }
4069 }
4070
4071 if (found == 0 && *deftype != ROFFDEF_ANY) {
4072 if (*deftype & ROFFDEF_REN) {
4073 /*
4074 * This might still be a request,
4075 * so do not treat it as undefined yet.
4076 */
4077 *deftype = ROFFDEF_UNDEF;
4078 return NULL;
4079 }
4080
4081 /* Using an undefined string defines it to be empty. */
4082
4083 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4084 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4085 }
4086
4087 *deftype = 0;
4088 return NULL;
4089 }
4090
4091 static void
4092 roff_freestr(struct roffkv *r)
4093 {
4094 struct roffkv *n, *nn;
4095
4096 for (n = r; n; n = nn) {
4097 free(n->key.p);
4098 free(n->val.p);
4099 nn = n->next;
4100 free(n);
4101 }
4102 }
4103
4104 /* --- accessors and utility functions ------------------------------------ */
4105
4106 /*
4107 * Duplicate an input string, making the appropriate character
4108 * conversations (as stipulated by `tr') along the way.
4109 * Returns a heap-allocated string with all the replacements made.
4110 */
4111 char *
4112 roff_strdup(const struct roff *r, const char *p)
4113 {
4114 const struct roffkv *cp;
4115 char *res;
4116 const char *pp;
4117 size_t ssz, sz;
4118 enum mandoc_esc esc;
4119
4120 if (NULL == r->xmbtab && NULL == r->xtab)
4121 return mandoc_strdup(p);
4122 else if ('\0' == *p)
4123 return mandoc_strdup("");
4124
4125 /*
4126 * Step through each character looking for term matches
4127 * (remember that a `tr' can be invoked with an escape, which is
4128 * a glyph but the escape is multi-character).
4129 * We only do this if the character hash has been initialised
4130 * and the string is >0 length.
4131 */
4132
4133 res = NULL;
4134 ssz = 0;
4135
4136 while ('\0' != *p) {
4137 assert((unsigned int)*p < 128);
4138 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4139 sz = r->xtab[(int)*p].sz;
4140 res = mandoc_realloc(res, ssz + sz + 1);
4141 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4142 ssz += sz;
4143 p++;
4144 continue;
4145 } else if ('\\' != *p) {
4146 res = mandoc_realloc(res, ssz + 2);
4147 res[ssz++] = *p++;
4148 continue;
4149 }
4150
4151 /* Search for term matches. */
4152 for (cp = r->xmbtab; cp; cp = cp->next)
4153 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4154 break;
4155
4156 if (NULL != cp) {
4157 /*
4158 * A match has been found.
4159 * Append the match to the array and move
4160 * forward by its keysize.
4161 */
4162 res = mandoc_realloc(res,
4163 ssz + cp->val.sz + 1);
4164 memcpy(res + ssz, cp->val.p, cp->val.sz);
4165 ssz += cp->val.sz;
4166 p += (int)cp->key.sz;
4167 continue;
4168 }
4169
4170 /*
4171 * Handle escapes carefully: we need to copy
4172 * over just the escape itself, or else we might
4173 * do replacements within the escape itself.
4174 * Make sure to pass along the bogus string.
4175 */
4176 pp = p++;
4177 esc = mandoc_escape(&p, NULL, NULL);
4178 if (ESCAPE_ERROR == esc) {
4179 sz = strlen(pp);
4180 res = mandoc_realloc(res, ssz + sz + 1);
4181 memcpy(res + ssz, pp, sz);
4182 break;
4183 }
4184 /*
4185 * We bail out on bad escapes.
4186 * No need to warn: we already did so when
4187 * roff_expand() was called.
4188 */
4189 sz = (int)(p - pp);
4190 res = mandoc_realloc(res, ssz + sz + 1);
4191 memcpy(res + ssz, pp, sz);
4192 ssz += sz;
4193 }
4194
4195 res[(int)ssz] = '\0';
4196 return res;
4197 }
4198
4199 int
4200 roff_getformat(const struct roff *r)
4201 {
4202
4203 return r->format;
4204 }
4205
4206 /*
4207 * Find out whether a line is a macro line or not.
4208 * If it is, adjust the current position and return one; if it isn't,
4209 * return zero and don't change the current position.
4210 * If the control character has been set with `.cc', then let that grain
4211 * precedence.
4212 * This is slighly contrary to groff, where using the non-breaking
4213 * control character when `cc' has been invoked will cause the
4214 * non-breaking macro contents to be printed verbatim.
4215 */
4216 int
4217 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4218 {
4219 int pos;
4220
4221 pos = *ppos;
4222
4223 if (r->control != '\0' && cp[pos] == r->control)
4224 pos++;
4225 else if (r->control != '\0')
4226 return 0;
4227 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4228 pos += 2;
4229 else if ('.' == cp[pos] || '\'' == cp[pos])
4230 pos++;
4231 else
4232 return 0;
4233
4234 while (' ' == cp[pos] || '\t' == cp[pos])
4235 pos++;
4236
4237 *ppos = pos;
4238 return 1;
4239 }