]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Cleanup, minus 15 LOC, no functional change:
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.357 2018/12/31 04:55:47 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40
41 /*
42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43 * that an escape sequence resulted from copy-in processing and
44 * needs to be checked or interpolated. As it is used nowhere
45 * else, it is defined here rather than in a header file.
46 */
47 #define ASCII_ESC 27
48
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define EXPAND_LIMIT 1000
51
52 /* Types of definitions of macros and strings. */
53 #define ROFFDEF_USER (1 << 1) /* User-defined. */
54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
58 ROFFDEF_REN | ROFFDEF_STD)
59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
60
61 /* --- data types --------------------------------------------------------- */
62
63 /*
64 * An incredibly-simple string buffer.
65 */
66 struct roffstr {
67 char *p; /* nil-terminated buffer */
68 size_t sz; /* saved strlen(p) */
69 };
70
71 /*
72 * A key-value roffstr pair as part of a singly-linked list.
73 */
74 struct roffkv {
75 struct roffstr key;
76 struct roffstr val;
77 struct roffkv *next; /* next in list */
78 };
79
80 /*
81 * A single number register as part of a singly-linked list.
82 */
83 struct roffreg {
84 struct roffstr key;
85 int val;
86 int step;
87 struct roffreg *next;
88 };
89
90 /*
91 * Association of request and macro names with token IDs.
92 */
93 struct roffreq {
94 enum roff_tok tok;
95 char name[];
96 };
97
98 /*
99 * A macro processing context.
100 * More than one is needed when macro calls are nested.
101 */
102 struct mctx {
103 char **argv;
104 int argc;
105 int argsz;
106 };
107
108 struct roff {
109 struct roff_man *man; /* mdoc or man parser */
110 struct roffnode *last; /* leaf of stack */
111 struct mctx *mstack; /* stack of macro contexts */
112 int *rstack; /* stack of inverted `ie' values */
113 struct ohash *reqtab; /* request lookup table */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *rentab; /* renamed strings & macros */
117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
118 struct roffstr *xtab; /* single-byte trans table (`tr') */
119 const char *current_string; /* value of last called user macro */
120 struct tbl_node *first_tbl; /* first table parsed */
121 struct tbl_node *last_tbl; /* last table parsed */
122 struct tbl_node *tbl; /* current table being parsed */
123 struct eqn_node *last_eqn; /* equation parser */
124 struct eqn_node *eqn; /* active equation parser */
125 int eqn_inline; /* current equation is inline */
126 int options; /* parse options */
127 int mstacksz; /* current size of mstack */
128 int mstackpos; /* position in mstack */
129 int rstacksz; /* current size limit of rstack */
130 int rstackpos; /* position in rstack */
131 int format; /* current file in mdoc or man format */
132 char control; /* control character */
133 char escape; /* escape character */
134 };
135
136 struct roffnode {
137 enum roff_tok tok; /* type of node */
138 struct roffnode *parent; /* up one in stack */
139 int line; /* parse line */
140 int col; /* parse col */
141 char *name; /* node name, e.g. macro name */
142 char *end; /* end-rules: custom token */
143 int endspan; /* end-rules: next-line or infty */
144 int rule; /* current evaluation rule */
145 };
146
147 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
148 enum roff_tok tok, /* tok of macro */ \
149 struct buf *buf, /* input buffer */ \
150 int ln, /* parse line */ \
151 int ppos, /* original pos in buffer */ \
152 int pos, /* current pos in buffer */ \
153 int *offs /* reset offset of buffer data */
154
155 typedef int (*roffproc)(ROFF_ARGS);
156
157 struct roffmac {
158 roffproc proc; /* process new macro */
159 roffproc text; /* process as child text of macro */
160 roffproc sub; /* process as child of macro */
161 int flags;
162 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
163 };
164
165 struct predef {
166 const char *name; /* predefined input name */
167 const char *str; /* replacement symbol */
168 };
169
170 #define PREDEF(__name, __str) \
171 { (__name), (__str) },
172
173 /* --- function prototypes ------------------------------------------------ */
174
175 static int roffnode_cleanscope(struct roff *);
176 static int roffnode_pop(struct roff *);
177 static void roffnode_push(struct roff *, enum roff_tok,
178 const char *, int, int);
179 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
180 static int roff_als(ROFF_ARGS);
181 static int roff_block(ROFF_ARGS);
182 static int roff_block_text(ROFF_ARGS);
183 static int roff_block_sub(ROFF_ARGS);
184 static int roff_br(ROFF_ARGS);
185 static int roff_cblock(ROFF_ARGS);
186 static int roff_cc(ROFF_ARGS);
187 static int roff_ccond(struct roff *, int, int);
188 static int roff_char(ROFF_ARGS);
189 static int roff_cond(ROFF_ARGS);
190 static int roff_cond_text(ROFF_ARGS);
191 static int roff_cond_sub(ROFF_ARGS);
192 static int roff_ds(ROFF_ARGS);
193 static int roff_ec(ROFF_ARGS);
194 static int roff_eo(ROFF_ARGS);
195 static int roff_eqndelim(struct roff *, struct buf *, int);
196 static int roff_evalcond(struct roff *r, int, char *, int *);
197 static int roff_evalnum(struct roff *, int,
198 const char *, int *, int *, int);
199 static int roff_evalpar(struct roff *, int,
200 const char *, int *, int *, int);
201 static int roff_evalstrcond(const char *, int *);
202 static int roff_expand(struct roff *, struct buf *,
203 int, int, char);
204 static void roff_free1(struct roff *);
205 static void roff_freereg(struct roffreg *);
206 static void roff_freestr(struct roffkv *);
207 static size_t roff_getname(struct roff *, char **, int, int);
208 static int roff_getnum(const char *, int *, int *, int);
209 static int roff_getop(const char *, int *, char *);
210 static int roff_getregn(struct roff *,
211 const char *, size_t, char);
212 static int roff_getregro(const struct roff *,
213 const char *name);
214 static const char *roff_getstrn(struct roff *,
215 const char *, size_t, int *);
216 static int roff_hasregn(const struct roff *,
217 const char *, size_t);
218 static int roff_insec(ROFF_ARGS);
219 static int roff_it(ROFF_ARGS);
220 static int roff_line_ignore(ROFF_ARGS);
221 static void roff_man_alloc1(struct roff_man *);
222 static void roff_man_free1(struct roff_man *);
223 static int roff_manyarg(ROFF_ARGS);
224 static int roff_nop(ROFF_ARGS);
225 static int roff_nr(ROFF_ARGS);
226 static int roff_onearg(ROFF_ARGS);
227 static enum roff_tok roff_parse(struct roff *, char *, int *,
228 int, int);
229 static int roff_parsetext(struct roff *, struct buf *,
230 int, int *);
231 static int roff_renamed(ROFF_ARGS);
232 static int roff_return(ROFF_ARGS);
233 static int roff_rm(ROFF_ARGS);
234 static int roff_rn(ROFF_ARGS);
235 static int roff_rr(ROFF_ARGS);
236 static void roff_setregn(struct roff *, const char *,
237 size_t, int, char, int);
238 static void roff_setstr(struct roff *,
239 const char *, const char *, int);
240 static void roff_setstrn(struct roffkv **, const char *,
241 size_t, const char *, size_t, int);
242 static int roff_shift(ROFF_ARGS);
243 static int roff_so(ROFF_ARGS);
244 static int roff_tr(ROFF_ARGS);
245 static int roff_Dd(ROFF_ARGS);
246 static int roff_TE(ROFF_ARGS);
247 static int roff_TS(ROFF_ARGS);
248 static int roff_EQ(ROFF_ARGS);
249 static int roff_EN(ROFF_ARGS);
250 static int roff_T_(ROFF_ARGS);
251 static int roff_unsupp(ROFF_ARGS);
252 static int roff_userdef(ROFF_ARGS);
253
254 /* --- constant data ------------------------------------------------------ */
255
256 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
257 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
258
259 const char *__roff_name[MAN_MAX + 1] = {
260 "br", "ce", "ft", "ll",
261 "mc", "po", "rj", "sp",
262 "ta", "ti", NULL,
263 "ab", "ad", "af", "aln",
264 "als", "am", "am1", "ami",
265 "ami1", "as", "as1", "asciify",
266 "backtrace", "bd", "bleedat", "blm",
267 "box", "boxa", "bp", "BP",
268 "break", "breakchar", "brnl", "brp",
269 "brpnl", "c2", "cc",
270 "cf", "cflags", "ch", "char",
271 "chop", "class", "close", "CL",
272 "color", "composite", "continue", "cp",
273 "cropat", "cs", "cu", "da",
274 "dch", "Dd", "de", "de1",
275 "defcolor", "dei", "dei1", "device",
276 "devicem", "di", "do", "ds",
277 "ds1", "dwh", "dt", "ec",
278 "ecr", "ecs", "el", "em",
279 "EN", "eo", "EP", "EQ",
280 "errprint", "ev", "evc", "ex",
281 "fallback", "fam", "fc", "fchar",
282 "fcolor", "fdeferlig", "feature", "fkern",
283 "fl", "flig", "fp", "fps",
284 "fschar", "fspacewidth", "fspecial", "ftr",
285 "fzoom", "gcolor", "hc", "hcode",
286 "hidechar", "hla", "hlm", "hpf",
287 "hpfa", "hpfcode", "hw", "hy",
288 "hylang", "hylen", "hym", "hypp",
289 "hys", "ie", "if", "ig",
290 "index", "it", "itc", "IX",
291 "kern", "kernafter", "kernbefore", "kernpair",
292 "lc", "lc_ctype", "lds", "length",
293 "letadj", "lf", "lg", "lhang",
294 "linetabs", "lnr", "lnrf", "lpfx",
295 "ls", "lsm", "lt",
296 "mediasize", "minss", "mk", "mso",
297 "na", "ne", "nh", "nhychar",
298 "nm", "nn", "nop", "nr",
299 "nrf", "nroff", "ns", "nx",
300 "open", "opena", "os", "output",
301 "padj", "papersize", "pc", "pev",
302 "pi", "PI", "pl", "pm",
303 "pn", "pnr", "ps",
304 "psbb", "pshape", "pso", "ptr",
305 "pvs", "rchar", "rd", "recursionlimit",
306 "return", "rfschar", "rhang",
307 "rm", "rn", "rnn", "rr",
308 "rs", "rt", "schar", "sentchar",
309 "shc", "shift", "sizes", "so",
310 "spacewidth", "special", "spreadwarn", "ss",
311 "sty", "substring", "sv", "sy",
312 "T&", "tc", "TE",
313 "TH", "tkf", "tl",
314 "tm", "tm1", "tmc", "tr",
315 "track", "transchar", "trf", "trimat",
316 "trin", "trnt", "troff", "TS",
317 "uf", "ul", "unformat", "unwatch",
318 "unwatchn", "vpt", "vs", "warn",
319 "warnscale", "watch", "watchlength", "watchn",
320 "wh", "while", "write", "writec",
321 "writem", "xflag", ".", NULL,
322 NULL, "text",
323 "Dd", "Dt", "Os", "Sh",
324 "Ss", "Pp", "D1", "Dl",
325 "Bd", "Ed", "Bl", "El",
326 "It", "Ad", "An", "Ap",
327 "Ar", "Cd", "Cm", "Dv",
328 "Er", "Ev", "Ex", "Fa",
329 "Fd", "Fl", "Fn", "Ft",
330 "Ic", "In", "Li", "Nd",
331 "Nm", "Op", "Ot", "Pa",
332 "Rv", "St", "Va", "Vt",
333 "Xr", "%A", "%B", "%D",
334 "%I", "%J", "%N", "%O",
335 "%P", "%R", "%T", "%V",
336 "Ac", "Ao", "Aq", "At",
337 "Bc", "Bf", "Bo", "Bq",
338 "Bsx", "Bx", "Db", "Dc",
339 "Do", "Dq", "Ec", "Ef",
340 "Em", "Eo", "Fx", "Ms",
341 "No", "Ns", "Nx", "Ox",
342 "Pc", "Pf", "Po", "Pq",
343 "Qc", "Ql", "Qo", "Qq",
344 "Re", "Rs", "Sc", "So",
345 "Sq", "Sm", "Sx", "Sy",
346 "Tn", "Ux", "Xc", "Xo",
347 "Fo", "Fc", "Oo", "Oc",
348 "Bk", "Ek", "Bt", "Hf",
349 "Fr", "Ud", "Lb", "Lp",
350 "Lk", "Mt", "Brq", "Bro",
351 "Brc", "%C", "Es", "En",
352 "Dx", "%Q", "%U", "Ta",
353 NULL,
354 "TH", "SH", "SS", "TP",
355 "TQ",
356 "LP", "PP", "P", "IP",
357 "HP", "SM", "SB", "BI",
358 "IB", "BR", "RB", "R",
359 "B", "I", "IR", "RI",
360 "nf", "fi",
361 "RE", "RS", "DT", "UC",
362 "PD", "AT", "in",
363 "SY", "YS", "OP",
364 "EX", "EE", "UR",
365 "UE", "MT", "ME", NULL
366 };
367 const char *const *roff_name = __roff_name;
368
369 static struct roffmac roffs[TOKEN_NONE] = {
370 { roff_br, NULL, NULL, 0 }, /* br */
371 { roff_onearg, NULL, NULL, 0 }, /* ce */
372 { roff_onearg, NULL, NULL, 0 }, /* ft */
373 { roff_onearg, NULL, NULL, 0 }, /* ll */
374 { roff_onearg, NULL, NULL, 0 }, /* mc */
375 { roff_onearg, NULL, NULL, 0 }, /* po */
376 { roff_onearg, NULL, NULL, 0 }, /* rj */
377 { roff_onearg, NULL, NULL, 0 }, /* sp */
378 { roff_manyarg, NULL, NULL, 0 }, /* ta */
379 { roff_onearg, NULL, NULL, 0 }, /* ti */
380 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
381 { roff_unsupp, NULL, NULL, 0 }, /* ab */
382 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
383 { roff_line_ignore, NULL, NULL, 0 }, /* af */
384 { roff_unsupp, NULL, NULL, 0 }, /* aln */
385 { roff_als, NULL, NULL, 0 }, /* als */
386 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
387 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
388 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
390 { roff_ds, NULL, NULL, 0 }, /* as */
391 { roff_ds, NULL, NULL, 0 }, /* as1 */
392 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
393 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
394 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
395 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
396 { roff_unsupp, NULL, NULL, 0 }, /* blm */
397 { roff_unsupp, NULL, NULL, 0 }, /* box */
398 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
399 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
400 { roff_unsupp, NULL, NULL, 0 }, /* BP */
401 { roff_unsupp, NULL, NULL, 0 }, /* break */
402 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
403 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
404 { roff_br, NULL, NULL, 0 }, /* brp */
405 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
406 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
407 { roff_cc, NULL, NULL, 0 }, /* cc */
408 { roff_insec, NULL, NULL, 0 }, /* cf */
409 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
410 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
411 { roff_char, NULL, NULL, 0 }, /* char */
412 { roff_unsupp, NULL, NULL, 0 }, /* chop */
413 { roff_line_ignore, NULL, NULL, 0 }, /* class */
414 { roff_insec, NULL, NULL, 0 }, /* close */
415 { roff_unsupp, NULL, NULL, 0 }, /* CL */
416 { roff_line_ignore, NULL, NULL, 0 }, /* color */
417 { roff_unsupp, NULL, NULL, 0 }, /* composite */
418 { roff_unsupp, NULL, NULL, 0 }, /* continue */
419 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
420 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
421 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
422 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
423 { roff_unsupp, NULL, NULL, 0 }, /* da */
424 { roff_unsupp, NULL, NULL, 0 }, /* dch */
425 { roff_Dd, NULL, NULL, 0 }, /* Dd */
426 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
427 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
428 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
429 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
430 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
431 { roff_unsupp, NULL, NULL, 0 }, /* device */
432 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
433 { roff_unsupp, NULL, NULL, 0 }, /* di */
434 { roff_unsupp, NULL, NULL, 0 }, /* do */
435 { roff_ds, NULL, NULL, 0 }, /* ds */
436 { roff_ds, NULL, NULL, 0 }, /* ds1 */
437 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
438 { roff_unsupp, NULL, NULL, 0 }, /* dt */
439 { roff_ec, NULL, NULL, 0 }, /* ec */
440 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
441 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
442 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
443 { roff_unsupp, NULL, NULL, 0 }, /* em */
444 { roff_EN, NULL, NULL, 0 }, /* EN */
445 { roff_eo, NULL, NULL, 0 }, /* eo */
446 { roff_unsupp, NULL, NULL, 0 }, /* EP */
447 { roff_EQ, NULL, NULL, 0 }, /* EQ */
448 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
449 { roff_unsupp, NULL, NULL, 0 }, /* ev */
450 { roff_unsupp, NULL, NULL, 0 }, /* evc */
451 { roff_unsupp, NULL, NULL, 0 }, /* ex */
452 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
453 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
454 { roff_unsupp, NULL, NULL, 0 }, /* fc */
455 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
456 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
457 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
458 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
459 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
460 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
461 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
464 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
467 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
469 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
470 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
471 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
472 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
473 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
474 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
475 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
477 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
485 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
486 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
487 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
488 { roff_unsupp, NULL, NULL, 0 }, /* index */
489 { roff_it, NULL, NULL, 0 }, /* it */
490 { roff_unsupp, NULL, NULL, 0 }, /* itc */
491 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
492 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
493 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
494 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
495 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
496 { roff_unsupp, NULL, NULL, 0 }, /* lc */
497 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
498 { roff_unsupp, NULL, NULL, 0 }, /* lds */
499 { roff_unsupp, NULL, NULL, 0 }, /* length */
500 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
501 { roff_insec, NULL, NULL, 0 }, /* lf */
502 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
503 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
504 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
505 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
506 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
507 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
508 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
509 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
510 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
511 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
512 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
513 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
514 { roff_insec, NULL, NULL, 0 }, /* mso */
515 { roff_line_ignore, NULL, NULL, 0 }, /* na */
516 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
517 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
518 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
519 { roff_unsupp, NULL, NULL, 0 }, /* nm */
520 { roff_unsupp, NULL, NULL, 0 }, /* nn */
521 { roff_nop, NULL, NULL, 0 }, /* nop */
522 { roff_nr, NULL, NULL, 0 }, /* nr */
523 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
524 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
525 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
526 { roff_insec, NULL, NULL, 0 }, /* nx */
527 { roff_insec, NULL, NULL, 0 }, /* open */
528 { roff_insec, NULL, NULL, 0 }, /* opena */
529 { roff_line_ignore, NULL, NULL, 0 }, /* os */
530 { roff_unsupp, NULL, NULL, 0 }, /* output */
531 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
532 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
533 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
534 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
535 { roff_insec, NULL, NULL, 0 }, /* pi */
536 { roff_unsupp, NULL, NULL, 0 }, /* PI */
537 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
538 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
539 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
540 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
541 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
542 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
543 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
544 { roff_insec, NULL, NULL, 0 }, /* pso */
545 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
547 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
548 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
549 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
550 { roff_return, NULL, NULL, 0 }, /* return */
551 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
552 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
553 { roff_rm, NULL, NULL, 0 }, /* rm */
554 { roff_rn, NULL, NULL, 0 }, /* rn */
555 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
556 { roff_rr, NULL, NULL, 0 }, /* rr */
557 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
558 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
559 { roff_unsupp, NULL, NULL, 0 }, /* schar */
560 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
561 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
562 { roff_shift, NULL, NULL, 0 }, /* shift */
563 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
564 { roff_so, NULL, NULL, 0 }, /* so */
565 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
566 { roff_line_ignore, NULL, NULL, 0 }, /* special */
567 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
568 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
570 { roff_unsupp, NULL, NULL, 0 }, /* substring */
571 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
572 { roff_insec, NULL, NULL, 0 }, /* sy */
573 { roff_T_, NULL, NULL, 0 }, /* T& */
574 { roff_unsupp, NULL, NULL, 0 }, /* tc */
575 { roff_TE, NULL, NULL, 0 }, /* TE */
576 { roff_Dd, NULL, NULL, 0 }, /* TH */
577 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
578 { roff_unsupp, NULL, NULL, 0 }, /* tl */
579 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
580 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
581 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
582 { roff_tr, NULL, NULL, 0 }, /* tr */
583 { roff_line_ignore, NULL, NULL, 0 }, /* track */
584 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
585 { roff_insec, NULL, NULL, 0 }, /* trf */
586 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
587 { roff_unsupp, NULL, NULL, 0 }, /* trin */
588 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
589 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
590 { roff_TS, NULL, NULL, 0 }, /* TS */
591 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
592 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
593 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
594 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
595 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
596 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
597 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
598 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
599 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
600 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
601 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
602 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
603 { roff_unsupp, NULL, NULL, 0 }, /* wh */
604 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
605 { roff_insec, NULL, NULL, 0 }, /* write */
606 { roff_insec, NULL, NULL, 0 }, /* writec */
607 { roff_insec, NULL, NULL, 0 }, /* writem */
608 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
609 { roff_cblock, NULL, NULL, 0 }, /* . */
610 { roff_renamed, NULL, NULL, 0 },
611 { roff_userdef, NULL, NULL, 0 }
612 };
613
614 /* Array of injected predefined strings. */
615 #define PREDEFS_MAX 38
616 static const struct predef predefs[PREDEFS_MAX] = {
617 #include "predefs.in"
618 };
619
620 static int roffce_lines; /* number of input lines to center */
621 static struct roff_node *roffce_node; /* active request */
622 static int roffit_lines; /* number of lines to delay */
623 static char *roffit_macro; /* nil-terminated macro line */
624
625
626 /* --- request table ------------------------------------------------------ */
627
628 struct ohash *
629 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
630 {
631 struct ohash *htab;
632 struct roffreq *req;
633 enum roff_tok tok;
634 size_t sz;
635 unsigned int slot;
636
637 htab = mandoc_malloc(sizeof(*htab));
638 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
639
640 for (tok = mintok; tok < maxtok; tok++) {
641 if (roff_name[tok] == NULL)
642 continue;
643 sz = strlen(roff_name[tok]);
644 req = mandoc_malloc(sizeof(*req) + sz + 1);
645 req->tok = tok;
646 memcpy(req->name, roff_name[tok], sz + 1);
647 slot = ohash_qlookup(htab, req->name);
648 ohash_insert(htab, slot, req);
649 }
650 return htab;
651 }
652
653 void
654 roffhash_free(struct ohash *htab)
655 {
656 struct roffreq *req;
657 unsigned int slot;
658
659 if (htab == NULL)
660 return;
661 for (req = ohash_first(htab, &slot); req != NULL;
662 req = ohash_next(htab, &slot))
663 free(req);
664 ohash_delete(htab);
665 free(htab);
666 }
667
668 enum roff_tok
669 roffhash_find(struct ohash *htab, const char *name, size_t sz)
670 {
671 struct roffreq *req;
672 const char *end;
673
674 if (sz) {
675 end = name + sz;
676 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
677 } else
678 req = ohash_find(htab, ohash_qlookup(htab, name));
679 return req == NULL ? TOKEN_NONE : req->tok;
680 }
681
682 /* --- stack of request blocks -------------------------------------------- */
683
684 /*
685 * Pop the current node off of the stack of roff instructions currently
686 * pending.
687 */
688 static int
689 roffnode_pop(struct roff *r)
690 {
691 struct roffnode *p;
692 int inloop;
693
694 p = r->last;
695 inloop = p->tok == ROFF_while;
696 r->last = p->parent;
697 free(p->name);
698 free(p->end);
699 free(p);
700 return inloop;
701 }
702
703 /*
704 * Push a roff node onto the instruction stack. This must later be
705 * removed with roffnode_pop().
706 */
707 static void
708 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
709 int line, int col)
710 {
711 struct roffnode *p;
712
713 p = mandoc_calloc(1, sizeof(struct roffnode));
714 p->tok = tok;
715 if (name)
716 p->name = mandoc_strdup(name);
717 p->parent = r->last;
718 p->line = line;
719 p->col = col;
720 p->rule = p->parent ? p->parent->rule : 0;
721
722 r->last = p;
723 }
724
725 /* --- roff parser state data management ---------------------------------- */
726
727 static void
728 roff_free1(struct roff *r)
729 {
730 int i;
731
732 tbl_free(r->first_tbl);
733 r->first_tbl = r->last_tbl = r->tbl = NULL;
734
735 eqn_free(r->last_eqn);
736 r->last_eqn = r->eqn = NULL;
737
738 while (r->mstackpos >= 0)
739 roff_userret(r);
740
741 while (r->last)
742 roffnode_pop(r);
743
744 free (r->rstack);
745 r->rstack = NULL;
746 r->rstacksz = 0;
747 r->rstackpos = -1;
748
749 roff_freereg(r->regtab);
750 r->regtab = NULL;
751
752 roff_freestr(r->strtab);
753 roff_freestr(r->rentab);
754 roff_freestr(r->xmbtab);
755 r->strtab = r->rentab = r->xmbtab = NULL;
756
757 if (r->xtab)
758 for (i = 0; i < 128; i++)
759 free(r->xtab[i].p);
760 free(r->xtab);
761 r->xtab = NULL;
762 }
763
764 void
765 roff_reset(struct roff *r)
766 {
767 roff_free1(r);
768 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
769 r->control = '\0';
770 r->escape = '\\';
771 roffce_lines = 0;
772 roffce_node = NULL;
773 roffit_lines = 0;
774 roffit_macro = NULL;
775 }
776
777 void
778 roff_free(struct roff *r)
779 {
780 int i;
781
782 roff_free1(r);
783 for (i = 0; i < r->mstacksz; i++)
784 free(r->mstack[i].argv);
785 free(r->mstack);
786 roffhash_free(r->reqtab);
787 free(r);
788 }
789
790 struct roff *
791 roff_alloc(int options)
792 {
793 struct roff *r;
794
795 r = mandoc_calloc(1, sizeof(struct roff));
796 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
797 r->options = options;
798 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
799 r->mstackpos = -1;
800 r->rstackpos = -1;
801 r->escape = '\\';
802 return r;
803 }
804
805 /* --- syntax tree state data management ---------------------------------- */
806
807 static void
808 roff_man_free1(struct roff_man *man)
809 {
810 if (man->meta.first != NULL)
811 roff_node_delete(man, man->meta.first);
812 free(man->meta.msec);
813 free(man->meta.vol);
814 free(man->meta.os);
815 free(man->meta.arch);
816 free(man->meta.title);
817 free(man->meta.name);
818 free(man->meta.date);
819 free(man->meta.sodest);
820 }
821
822 void
823 roff_state_reset(struct roff_man *man)
824 {
825 man->last = man->meta.first;
826 man->last_es = NULL;
827 man->flags = 0;
828 man->lastsec = man->lastnamed = SEC_NONE;
829 man->next = ROFF_NEXT_CHILD;
830 roff_setreg(man->roff, "nS", 0, '=');
831 }
832
833 static void
834 roff_man_alloc1(struct roff_man *man)
835 {
836 memset(&man->meta, 0, sizeof(man->meta));
837 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
838 man->meta.first->type = ROFFT_ROOT;
839 man->meta.macroset = MACROSET_NONE;
840 roff_state_reset(man);
841 }
842
843 void
844 roff_man_reset(struct roff_man *man)
845 {
846 roff_man_free1(man);
847 roff_man_alloc1(man);
848 }
849
850 void
851 roff_man_free(struct roff_man *man)
852 {
853 roff_man_free1(man);
854 free(man);
855 }
856
857 struct roff_man *
858 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
859 {
860 struct roff_man *man;
861
862 man = mandoc_calloc(1, sizeof(*man));
863 man->roff = roff;
864 man->os_s = os_s;
865 man->quick = quick;
866 roff_man_alloc1(man);
867 roff->man = man;
868 return man;
869 }
870
871 /* --- syntax tree handling ----------------------------------------------- */
872
873 struct roff_node *
874 roff_node_alloc(struct roff_man *man, int line, int pos,
875 enum roff_type type, int tok)
876 {
877 struct roff_node *n;
878
879 n = mandoc_calloc(1, sizeof(*n));
880 n->line = line;
881 n->pos = pos;
882 n->tok = tok;
883 n->type = type;
884 n->sec = man->lastsec;
885
886 if (man->flags & MDOC_SYNOPSIS)
887 n->flags |= NODE_SYNPRETTY;
888 else
889 n->flags &= ~NODE_SYNPRETTY;
890 if (man->flags & MDOC_NEWLINE)
891 n->flags |= NODE_LINE;
892 man->flags &= ~MDOC_NEWLINE;
893
894 return n;
895 }
896
897 void
898 roff_node_append(struct roff_man *man, struct roff_node *n)
899 {
900
901 switch (man->next) {
902 case ROFF_NEXT_SIBLING:
903 if (man->last->next != NULL) {
904 n->next = man->last->next;
905 man->last->next->prev = n;
906 } else
907 man->last->parent->last = n;
908 man->last->next = n;
909 n->prev = man->last;
910 n->parent = man->last->parent;
911 break;
912 case ROFF_NEXT_CHILD:
913 if (man->last->child != NULL) {
914 n->next = man->last->child;
915 man->last->child->prev = n;
916 } else
917 man->last->last = n;
918 man->last->child = n;
919 n->parent = man->last;
920 break;
921 default:
922 abort();
923 }
924 man->last = n;
925
926 switch (n->type) {
927 case ROFFT_HEAD:
928 n->parent->head = n;
929 break;
930 case ROFFT_BODY:
931 if (n->end != ENDBODY_NOT)
932 return;
933 n->parent->body = n;
934 break;
935 case ROFFT_TAIL:
936 n->parent->tail = n;
937 break;
938 default:
939 return;
940 }
941
942 /*
943 * Copy over the normalised-data pointer of our parent. Not
944 * everybody has one, but copying a null pointer is fine.
945 */
946
947 n->norm = n->parent->norm;
948 assert(n->parent->type == ROFFT_BLOCK);
949 }
950
951 void
952 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
953 {
954 struct roff_node *n;
955
956 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
957 n->string = roff_strdup(man->roff, word);
958 roff_node_append(man, n);
959 n->flags |= NODE_VALID | NODE_ENDED;
960 man->next = ROFF_NEXT_SIBLING;
961 }
962
963 void
964 roff_word_append(struct roff_man *man, const char *word)
965 {
966 struct roff_node *n;
967 char *addstr, *newstr;
968
969 n = man->last;
970 addstr = roff_strdup(man->roff, word);
971 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
972 free(addstr);
973 free(n->string);
974 n->string = newstr;
975 man->next = ROFF_NEXT_SIBLING;
976 }
977
978 void
979 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
980 {
981 struct roff_node *n;
982
983 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
984 roff_node_append(man, n);
985 man->next = ROFF_NEXT_CHILD;
986 }
987
988 struct roff_node *
989 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
990 {
991 struct roff_node *n;
992
993 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
994 roff_node_append(man, n);
995 man->next = ROFF_NEXT_CHILD;
996 return n;
997 }
998
999 struct roff_node *
1000 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1001 {
1002 struct roff_node *n;
1003
1004 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1005 roff_node_append(man, n);
1006 man->next = ROFF_NEXT_CHILD;
1007 return n;
1008 }
1009
1010 struct roff_node *
1011 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1012 {
1013 struct roff_node *n;
1014
1015 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1016 roff_node_append(man, n);
1017 man->next = ROFF_NEXT_CHILD;
1018 return n;
1019 }
1020
1021 static void
1022 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1023 {
1024 struct roff_node *n;
1025 struct tbl_span *span;
1026
1027 if (man->meta.macroset == MACROSET_MAN)
1028 man_breakscope(man, ROFF_TS);
1029 while ((span = tbl_span(tbl)) != NULL) {
1030 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1031 n->span = span;
1032 roff_node_append(man, n);
1033 n->flags |= NODE_VALID | NODE_ENDED;
1034 man->next = ROFF_NEXT_SIBLING;
1035 }
1036 }
1037
1038 void
1039 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1040 {
1041
1042 /* Adjust siblings. */
1043
1044 if (n->prev)
1045 n->prev->next = n->next;
1046 if (n->next)
1047 n->next->prev = n->prev;
1048
1049 /* Adjust parent. */
1050
1051 if (n->parent != NULL) {
1052 if (n->parent->child == n)
1053 n->parent->child = n->next;
1054 if (n->parent->last == n)
1055 n->parent->last = n->prev;
1056 }
1057
1058 /* Adjust parse point. */
1059
1060 if (man == NULL)
1061 return;
1062 if (man->last == n) {
1063 if (n->prev == NULL) {
1064 man->last = n->parent;
1065 man->next = ROFF_NEXT_CHILD;
1066 } else {
1067 man->last = n->prev;
1068 man->next = ROFF_NEXT_SIBLING;
1069 }
1070 }
1071 if (man->meta.first == n)
1072 man->meta.first = NULL;
1073 }
1074
1075 void
1076 roff_node_relink(struct roff_man *man, struct roff_node *n)
1077 {
1078 roff_node_unlink(man, n);
1079 n->prev = n->next = NULL;
1080 roff_node_append(man, n);
1081 }
1082
1083 void
1084 roff_node_free(struct roff_node *n)
1085 {
1086
1087 if (n->args != NULL)
1088 mdoc_argv_free(n->args);
1089 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1090 free(n->norm);
1091 eqn_box_free(n->eqn);
1092 free(n->string);
1093 free(n);
1094 }
1095
1096 void
1097 roff_node_delete(struct roff_man *man, struct roff_node *n)
1098 {
1099
1100 while (n->child != NULL)
1101 roff_node_delete(man, n->child);
1102 roff_node_unlink(man, n);
1103 roff_node_free(n);
1104 }
1105
1106 void
1107 deroff(char **dest, const struct roff_node *n)
1108 {
1109 char *cp;
1110 size_t sz;
1111
1112 if (n->type != ROFFT_TEXT) {
1113 for (n = n->child; n != NULL; n = n->next)
1114 deroff(dest, n);
1115 return;
1116 }
1117
1118 /* Skip leading whitespace. */
1119
1120 for (cp = n->string; *cp != '\0'; cp++) {
1121 if (cp[0] == '\\' && cp[1] != '\0' &&
1122 strchr(" %&0^|~", cp[1]) != NULL)
1123 cp++;
1124 else if ( ! isspace((unsigned char)*cp))
1125 break;
1126 }
1127
1128 /* Skip trailing backslash. */
1129
1130 sz = strlen(cp);
1131 if (sz > 0 && cp[sz - 1] == '\\')
1132 sz--;
1133
1134 /* Skip trailing whitespace. */
1135
1136 for (; sz; sz--)
1137 if ( ! isspace((unsigned char)cp[sz-1]))
1138 break;
1139
1140 /* Skip empty strings. */
1141
1142 if (sz == 0)
1143 return;
1144
1145 if (*dest == NULL) {
1146 *dest = mandoc_strndup(cp, sz);
1147 return;
1148 }
1149
1150 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1151 free(*dest);
1152 *dest = cp;
1153 }
1154
1155 /* --- main functions of the roff parser ---------------------------------- */
1156
1157 /*
1158 * In the current line, expand escape sequences that produce parsable
1159 * input text. Also check the syntax of the remaining escape sequences,
1160 * which typically produce output glyphs or change formatter state.
1161 */
1162 static int
1163 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1164 {
1165 struct mctx *ctx; /* current macro call context */
1166 char ubuf[24]; /* buffer to print the number */
1167 struct roff_node *n; /* used for header comments */
1168 const char *start; /* start of the string to process */
1169 char *stesc; /* start of an escape sequence ('\\') */
1170 const char *esct; /* type of esccape sequence */
1171 char *ep; /* end of comment string */
1172 const char *stnam; /* start of the name, after "[(*" */
1173 const char *cp; /* end of the name, e.g. before ']' */
1174 const char *res; /* the string to be substituted */
1175 char *nbuf; /* new buffer to copy buf->buf to */
1176 size_t maxl; /* expected length of the escape name */
1177 size_t naml; /* actual length of the escape name */
1178 size_t asz; /* length of the replacement */
1179 size_t rsz; /* length of the rest of the string */
1180 int inaml; /* length returned from mandoc_escape() */
1181 int expand_count; /* to avoid infinite loops */
1182 int npos; /* position in numeric expression */
1183 int arg_complete; /* argument not interrupted by eol */
1184 int quote_args; /* true for \\$@, false for \\$* */
1185 int done; /* no more input available */
1186 int deftype; /* type of definition to paste */
1187 int rcsid; /* kind of RCS id seen */
1188 enum mandocerr err; /* for escape sequence problems */
1189 char sign; /* increment number register */
1190 char term; /* character terminating the escape */
1191
1192 /* Search forward for comments. */
1193
1194 done = 0;
1195 start = buf->buf + pos;
1196 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1197 if (stesc[0] != newesc || stesc[1] == '\0')
1198 continue;
1199 stesc++;
1200 if (*stesc != '"' && *stesc != '#')
1201 continue;
1202
1203 /* Comment found, look for RCS id. */
1204
1205 rcsid = 0;
1206 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1207 rcsid = 1 << MANDOC_OS_OPENBSD;
1208 cp += 8;
1209 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1210 rcsid = 1 << MANDOC_OS_NETBSD;
1211 cp += 7;
1212 }
1213 if (cp != NULL &&
1214 isalnum((unsigned char)*cp) == 0 &&
1215 strchr(cp, '$') != NULL) {
1216 if (r->man->meta.rcsids & rcsid)
1217 mandoc_msg(MANDOCERR_RCS_REP, ln,
1218 (int)(stesc - buf->buf) + 1,
1219 "%s", stesc + 1);
1220 r->man->meta.rcsids |= rcsid;
1221 }
1222
1223 /* Handle trailing whitespace. */
1224
1225 ep = strchr(stesc--, '\0') - 1;
1226 if (*ep == '\n') {
1227 done = 1;
1228 ep--;
1229 }
1230 if (*ep == ' ' || *ep == '\t')
1231 mandoc_msg(MANDOCERR_SPACE_EOL,
1232 ln, (int)(ep - buf->buf), NULL);
1233
1234 /*
1235 * Save comments preceding the title macro
1236 * in the syntax tree.
1237 */
1238
1239 if (newesc != ASCII_ESC && r->format == 0) {
1240 while (*ep == ' ' || *ep == '\t')
1241 ep--;
1242 ep[1] = '\0';
1243 n = roff_node_alloc(r->man,
1244 ln, stesc + 1 - buf->buf,
1245 ROFFT_COMMENT, TOKEN_NONE);
1246 n->string = mandoc_strdup(stesc + 2);
1247 roff_node_append(r->man, n);
1248 n->flags |= NODE_VALID | NODE_ENDED;
1249 r->man->next = ROFF_NEXT_SIBLING;
1250 }
1251
1252 /* Line continuation with comment. */
1253
1254 if (stesc[1] == '#') {
1255 *stesc = '\0';
1256 return ROFF_IGN | ROFF_APPEND;
1257 }
1258
1259 /* Discard normal comments. */
1260
1261 while (stesc > start && stesc[-1] == ' ' &&
1262 (stesc == start + 1 || stesc[-2] != '\\'))
1263 stesc--;
1264 *stesc = '\0';
1265 break;
1266 }
1267 if (stesc == start)
1268 return ROFF_CONT;
1269 stesc--;
1270
1271 /* Notice the end of the input. */
1272
1273 if (*stesc == '\n') {
1274 *stesc-- = '\0';
1275 done = 1;
1276 }
1277
1278 expand_count = 0;
1279 while (stesc >= start) {
1280 if (*stesc != newesc) {
1281
1282 /*
1283 * If we have a non-standard escape character,
1284 * escape literal backslashes because all
1285 * processing in subsequent functions uses
1286 * the standard escaping rules.
1287 */
1288
1289 if (newesc != ASCII_ESC && *stesc == '\\') {
1290 *stesc = '\0';
1291 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1292 buf->buf, stesc + 1) + 1;
1293 start = nbuf + pos;
1294 stesc = nbuf + (stesc - buf->buf);
1295 free(buf->buf);
1296 buf->buf = nbuf;
1297 }
1298
1299 /* Search backwards for the next escape. */
1300
1301 stesc--;
1302 continue;
1303 }
1304
1305 /* If it is escaped, skip it. */
1306
1307 for (cp = stesc - 1; cp >= start; cp--)
1308 if (*cp != r->escape)
1309 break;
1310
1311 if ((stesc - cp) % 2 == 0) {
1312 while (stesc > cp)
1313 *stesc-- = '\\';
1314 continue;
1315 } else if (stesc[1] != '\0') {
1316 *stesc = '\\';
1317 } else {
1318 *stesc-- = '\0';
1319 if (done)
1320 continue;
1321 else
1322 return ROFF_IGN | ROFF_APPEND;
1323 }
1324
1325 /* Decide whether to expand or to check only. */
1326
1327 term = '\0';
1328 cp = stesc + 1;
1329 if (*cp == 'E')
1330 cp++;
1331 esct = cp;
1332 switch (*esct) {
1333 case '*':
1334 case '$':
1335 res = NULL;
1336 break;
1337 case 'B':
1338 case 'w':
1339 term = cp[1];
1340 /* FALLTHROUGH */
1341 case 'n':
1342 sign = cp[1];
1343 if (sign == '+' || sign == '-')
1344 cp++;
1345 res = ubuf;
1346 break;
1347 default:
1348 err = MANDOCERR_OK;
1349 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1350 case ESCAPE_SPECIAL:
1351 if (mchars_spec2cp(stnam, inaml) >= 0)
1352 break;
1353 /* FALLTHROUGH */
1354 case ESCAPE_ERROR:
1355 err = MANDOCERR_ESC_BAD;
1356 break;
1357 case ESCAPE_UNDEF:
1358 err = MANDOCERR_ESC_UNDEF;
1359 break;
1360 case ESCAPE_UNSUPP:
1361 err = MANDOCERR_ESC_UNSUPP;
1362 break;
1363 default:
1364 break;
1365 }
1366 if (err != MANDOCERR_OK)
1367 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1368 "%.*s", (int)(cp - stesc), stesc);
1369 stesc--;
1370 continue;
1371 }
1372
1373 if (EXPAND_LIMIT < ++expand_count) {
1374 mandoc_msg(MANDOCERR_ROFFLOOP,
1375 ln, (int)(stesc - buf->buf), NULL);
1376 return ROFF_IGN;
1377 }
1378
1379 /*
1380 * The third character decides the length
1381 * of the name of the string or register.
1382 * Save a pointer to the name.
1383 */
1384
1385 if (term == '\0') {
1386 switch (*++cp) {
1387 case '\0':
1388 maxl = 0;
1389 break;
1390 case '(':
1391 cp++;
1392 maxl = 2;
1393 break;
1394 case '[':
1395 cp++;
1396 term = ']';
1397 maxl = 0;
1398 break;
1399 default:
1400 maxl = 1;
1401 break;
1402 }
1403 } else {
1404 cp += 2;
1405 maxl = 0;
1406 }
1407 stnam = cp;
1408
1409 /* Advance to the end of the name. */
1410
1411 naml = 0;
1412 arg_complete = 1;
1413 while (maxl == 0 || naml < maxl) {
1414 if (*cp == '\0') {
1415 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1416 (int)(stesc - buf->buf), "%s", stesc);
1417 arg_complete = 0;
1418 break;
1419 }
1420 if (maxl == 0 && *cp == term) {
1421 cp++;
1422 break;
1423 }
1424 if (*cp++ != '\\' || *esct != 'w') {
1425 naml++;
1426 continue;
1427 }
1428 switch (mandoc_escape(&cp, NULL, NULL)) {
1429 case ESCAPE_SPECIAL:
1430 case ESCAPE_UNICODE:
1431 case ESCAPE_NUMBERED:
1432 case ESCAPE_UNDEF:
1433 case ESCAPE_OVERSTRIKE:
1434 naml++;
1435 break;
1436 default:
1437 break;
1438 }
1439 }
1440
1441 /*
1442 * Retrieve the replacement string; if it is
1443 * undefined, resume searching for escapes.
1444 */
1445
1446 switch (*esct) {
1447 case '*':
1448 if (arg_complete) {
1449 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1450 res = roff_getstrn(r, stnam, naml, &deftype);
1451
1452 /*
1453 * If not overriden, let \*(.T
1454 * through to the formatters.
1455 */
1456
1457 if (res == NULL && naml == 2 &&
1458 stnam[0] == '.' && stnam[1] == 'T') {
1459 roff_setstrn(&r->strtab,
1460 ".T", 2, NULL, 0, 0);
1461 stesc--;
1462 continue;
1463 }
1464 }
1465 break;
1466 case '$':
1467 if (r->mstackpos < 0) {
1468 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1469 (int)(stesc - buf->buf), "%.3s", stesc);
1470 break;
1471 }
1472 ctx = r->mstack + r->mstackpos;
1473 npos = esct[1] - '1';
1474 if (npos >= 0 && npos <= 8) {
1475 res = npos < ctx->argc ?
1476 ctx->argv[npos] : "";
1477 break;
1478 }
1479 if (esct[1] == '*')
1480 quote_args = 0;
1481 else if (esct[1] == '@')
1482 quote_args = 1;
1483 else {
1484 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1485 (int)(stesc - buf->buf), "%.3s", stesc);
1486 break;
1487 }
1488 asz = 0;
1489 for (npos = 0; npos < ctx->argc; npos++) {
1490 if (npos)
1491 asz++; /* blank */
1492 if (quote_args)
1493 asz += 2; /* quotes */
1494 asz += strlen(ctx->argv[npos]);
1495 }
1496 if (asz != 3) {
1497 rsz = buf->sz - (stesc - buf->buf) - 3;
1498 if (asz < 3)
1499 memmove(stesc + asz, stesc + 3, rsz);
1500 buf->sz += asz - 3;
1501 nbuf = mandoc_realloc(buf->buf, buf->sz);
1502 start = nbuf + pos;
1503 stesc = nbuf + (stesc - buf->buf);
1504 buf->buf = nbuf;
1505 if (asz > 3)
1506 memmove(stesc + asz, stesc + 3, rsz);
1507 }
1508 for (npos = 0; npos < ctx->argc; npos++) {
1509 if (npos)
1510 *stesc++ = ' ';
1511 if (quote_args)
1512 *stesc++ = '"';
1513 cp = ctx->argv[npos];
1514 while (*cp != '\0')
1515 *stesc++ = *cp++;
1516 if (quote_args)
1517 *stesc++ = '"';
1518 }
1519 continue;
1520 case 'B':
1521 npos = 0;
1522 ubuf[0] = arg_complete &&
1523 roff_evalnum(r, ln, stnam, &npos,
1524 NULL, ROFFNUM_SCALE) &&
1525 stnam + npos + 1 == cp ? '1' : '0';
1526 ubuf[1] = '\0';
1527 break;
1528 case 'n':
1529 if (arg_complete)
1530 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1531 roff_getregn(r, stnam, naml, sign));
1532 else
1533 ubuf[0] = '\0';
1534 break;
1535 case 'w':
1536 /* use even incomplete args */
1537 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1538 24 * (int)naml);
1539 break;
1540 }
1541
1542 if (res == NULL) {
1543 if (*esct == '*')
1544 mandoc_msg(MANDOCERR_STR_UNDEF,
1545 ln, (int)(stesc - buf->buf),
1546 "%.*s", (int)naml, stnam);
1547 res = "";
1548 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1549 mandoc_msg(MANDOCERR_ROFFLOOP,
1550 ln, (int)(stesc - buf->buf), NULL);
1551 return ROFF_IGN;
1552 }
1553
1554 /* Replace the escape sequence by the string. */
1555
1556 *stesc = '\0';
1557 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1558 buf->buf, res, cp) + 1;
1559
1560 /* Prepare for the next replacement. */
1561
1562 start = nbuf + pos;
1563 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1564 free(buf->buf);
1565 buf->buf = nbuf;
1566 }
1567 return ROFF_CONT;
1568 }
1569
1570 /*
1571 * Parse a quoted or unquoted roff-style request or macro argument.
1572 * Return a pointer to the parsed argument, which is either the original
1573 * pointer or advanced by one byte in case the argument is quoted.
1574 * NUL-terminate the argument in place.
1575 * Collapse pairs of quotes inside quoted arguments.
1576 * Advance the argument pointer to the next argument,
1577 * or to the NUL byte terminating the argument line.
1578 */
1579 char *
1580 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1581 {
1582 struct buf buf;
1583 char *cp, *start;
1584 int newesc, pairs, quoted, white;
1585
1586 /* Quoting can only start with a new word. */
1587 start = *cpp;
1588 quoted = 0;
1589 if ('"' == *start) {
1590 quoted = 1;
1591 start++;
1592 }
1593
1594 newesc = pairs = white = 0;
1595 for (cp = start; '\0' != *cp; cp++) {
1596
1597 /*
1598 * Move the following text left
1599 * after quoted quotes and after "\\" and "\t".
1600 */
1601 if (pairs)
1602 cp[-pairs] = cp[0];
1603
1604 if ('\\' == cp[0]) {
1605 /*
1606 * In copy mode, translate double to single
1607 * backslashes and backslash-t to literal tabs.
1608 */
1609 switch (cp[1]) {
1610 case 'a':
1611 case 't':
1612 cp[-pairs] = '\t';
1613 pairs++;
1614 cp++;
1615 break;
1616 case '\\':
1617 newesc = 1;
1618 cp[-pairs] = ASCII_ESC;
1619 pairs++;
1620 cp++;
1621 break;
1622 case ' ':
1623 /* Skip escaped blanks. */
1624 if (0 == quoted)
1625 cp++;
1626 break;
1627 default:
1628 break;
1629 }
1630 } else if (0 == quoted) {
1631 if (' ' == cp[0]) {
1632 /* Unescaped blanks end unquoted args. */
1633 white = 1;
1634 break;
1635 }
1636 } else if ('"' == cp[0]) {
1637 if ('"' == cp[1]) {
1638 /* Quoted quotes collapse. */
1639 pairs++;
1640 cp++;
1641 } else {
1642 /* Unquoted quotes end quoted args. */
1643 quoted = 2;
1644 break;
1645 }
1646 }
1647 }
1648
1649 /* Quoted argument without a closing quote. */
1650 if (1 == quoted)
1651 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1652
1653 /* NUL-terminate this argument and move to the next one. */
1654 if (pairs)
1655 cp[-pairs] = '\0';
1656 if ('\0' != *cp) {
1657 *cp++ = '\0';
1658 while (' ' == *cp)
1659 cp++;
1660 }
1661 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1662 *cpp = cp;
1663
1664 if ('\0' == *cp && (white || ' ' == cp[-1]))
1665 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1666
1667 start = mandoc_strdup(start);
1668 if (newesc == 0)
1669 return start;
1670
1671 buf.buf = start;
1672 buf.sz = strlen(start) + 1;
1673 buf.next = NULL;
1674 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1675 free(buf.buf);
1676 buf.buf = mandoc_strdup("");
1677 }
1678 return buf.buf;
1679 }
1680
1681
1682 /*
1683 * Process text streams.
1684 */
1685 static int
1686 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1687 {
1688 size_t sz;
1689 const char *start;
1690 char *p;
1691 int isz;
1692 enum mandoc_esc esc;
1693
1694 /* Spring the input line trap. */
1695
1696 if (roffit_lines == 1) {
1697 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1698 free(buf->buf);
1699 buf->buf = p;
1700 buf->sz = isz + 1;
1701 *offs = 0;
1702 free(roffit_macro);
1703 roffit_lines = 0;
1704 return ROFF_REPARSE;
1705 } else if (roffit_lines > 1)
1706 --roffit_lines;
1707
1708 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1709 if (roffce_lines < 1) {
1710 r->man->last = roffce_node;
1711 r->man->next = ROFF_NEXT_SIBLING;
1712 roffce_lines = 0;
1713 roffce_node = NULL;
1714 } else
1715 roffce_lines--;
1716 }
1717
1718 /* Convert all breakable hyphens into ASCII_HYPH. */
1719
1720 start = p = buf->buf + pos;
1721
1722 while (*p != '\0') {
1723 sz = strcspn(p, "-\\");
1724 p += sz;
1725
1726 if (*p == '\0')
1727 break;
1728
1729 if (*p == '\\') {
1730 /* Skip over escapes. */
1731 p++;
1732 esc = mandoc_escape((const char **)&p, NULL, NULL);
1733 if (esc == ESCAPE_ERROR)
1734 break;
1735 while (*p == '-')
1736 p++;
1737 continue;
1738 } else if (p == start) {
1739 p++;
1740 continue;
1741 }
1742
1743 if (isalpha((unsigned char)p[-1]) &&
1744 isalpha((unsigned char)p[1]))
1745 *p = ASCII_HYPH;
1746 p++;
1747 }
1748 return ROFF_CONT;
1749 }
1750
1751 int
1752 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1753 {
1754 enum roff_tok t;
1755 int e;
1756 int pos; /* parse point */
1757 int spos; /* saved parse point for messages */
1758 int ppos; /* original offset in buf->buf */
1759 int ctl; /* macro line (boolean) */
1760
1761 ppos = pos = *offs;
1762
1763 /* Handle in-line equation delimiters. */
1764
1765 if (r->tbl == NULL &&
1766 r->last_eqn != NULL && r->last_eqn->delim &&
1767 (r->eqn == NULL || r->eqn_inline)) {
1768 e = roff_eqndelim(r, buf, pos);
1769 if (e == ROFF_REPARSE)
1770 return e;
1771 assert(e == ROFF_CONT);
1772 }
1773
1774 /* Expand some escape sequences. */
1775
1776 e = roff_expand(r, buf, ln, pos, r->escape);
1777 if ((e & ROFF_MASK) == ROFF_IGN)
1778 return e;
1779 assert(e == ROFF_CONT);
1780
1781 ctl = roff_getcontrol(r, buf->buf, &pos);
1782
1783 /*
1784 * First, if a scope is open and we're not a macro, pass the
1785 * text through the macro's filter.
1786 * Equations process all content themselves.
1787 * Tables process almost all content themselves, but we want
1788 * to warn about macros before passing it there.
1789 */
1790
1791 if (r->last != NULL && ! ctl) {
1792 t = r->last->tok;
1793 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1794 if ((e & ROFF_MASK) == ROFF_IGN)
1795 return e;
1796 e &= ~ROFF_MASK;
1797 } else
1798 e = ROFF_IGN;
1799 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1800 eqn_read(r->eqn, buf->buf + ppos);
1801 return e;
1802 }
1803 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1804 tbl_read(r->tbl, ln, buf->buf, ppos);
1805 roff_addtbl(r->man, ln, r->tbl);
1806 return e;
1807 }
1808 if ( ! ctl)
1809 return roff_parsetext(r, buf, pos, offs) | e;
1810
1811 /* Skip empty request lines. */
1812
1813 if (buf->buf[pos] == '"') {
1814 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1815 return ROFF_IGN;
1816 } else if (buf->buf[pos] == '\0')
1817 return ROFF_IGN;
1818
1819 /*
1820 * If a scope is open, go to the child handler for that macro,
1821 * as it may want to preprocess before doing anything with it.
1822 * Don't do so if an equation is open.
1823 */
1824
1825 if (r->last) {
1826 t = r->last->tok;
1827 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1828 }
1829
1830 /* No scope is open. This is a new request or macro. */
1831
1832 spos = pos;
1833 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1834
1835 /* Tables ignore most macros. */
1836
1837 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1838 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1839 mandoc_msg(MANDOCERR_TBLMACRO,
1840 ln, pos, "%s", buf->buf + spos);
1841 if (t != TOKEN_NONE)
1842 return ROFF_IGN;
1843 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1844 pos++;
1845 while (buf->buf[pos] == ' ')
1846 pos++;
1847 tbl_read(r->tbl, ln, buf->buf, pos);
1848 roff_addtbl(r->man, ln, r->tbl);
1849 return ROFF_IGN;
1850 }
1851
1852 /* For now, let high level macros abort .ce mode. */
1853
1854 if (ctl && roffce_node != NULL &&
1855 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1856 t == ROFF_TH || t == ROFF_TS)) {
1857 r->man->last = roffce_node;
1858 r->man->next = ROFF_NEXT_SIBLING;
1859 roffce_lines = 0;
1860 roffce_node = NULL;
1861 }
1862
1863 /*
1864 * This is neither a roff request nor a user-defined macro.
1865 * Let the standard macro set parsers handle it.
1866 */
1867
1868 if (t == TOKEN_NONE)
1869 return ROFF_CONT;
1870
1871 /* Execute a roff request or a user defined macro. */
1872
1873 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1874 }
1875
1876 /*
1877 * Internal interface function to tell the roff parser that execution
1878 * of the current macro ended. This is required because macro
1879 * definitions usually do not end with a .return request.
1880 */
1881 void
1882 roff_userret(struct roff *r)
1883 {
1884 struct mctx *ctx;
1885 int i;
1886
1887 assert(r->mstackpos >= 0);
1888 ctx = r->mstack + r->mstackpos;
1889 for (i = 0; i < ctx->argc; i++)
1890 free(ctx->argv[i]);
1891 ctx->argc = 0;
1892 r->mstackpos--;
1893 }
1894
1895 void
1896 roff_endparse(struct roff *r)
1897 {
1898 if (r->last != NULL)
1899 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1900 r->last->col, "%s", roff_name[r->last->tok]);
1901
1902 if (r->eqn != NULL) {
1903 mandoc_msg(MANDOCERR_BLK_NOEND,
1904 r->eqn->node->line, r->eqn->node->pos, "EQ");
1905 eqn_parse(r->eqn);
1906 r->eqn = NULL;
1907 }
1908
1909 if (r->tbl != NULL) {
1910 tbl_end(r->tbl, 1);
1911 r->tbl = NULL;
1912 }
1913 }
1914
1915 /*
1916 * Parse a roff node's type from the input buffer. This must be in the
1917 * form of ".foo xxx" in the usual way.
1918 */
1919 static enum roff_tok
1920 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1921 {
1922 char *cp;
1923 const char *mac;
1924 size_t maclen;
1925 int deftype;
1926 enum roff_tok t;
1927
1928 cp = buf + *pos;
1929
1930 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1931 return TOKEN_NONE;
1932
1933 mac = cp;
1934 maclen = roff_getname(r, &cp, ln, ppos);
1935
1936 deftype = ROFFDEF_USER | ROFFDEF_REN;
1937 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1938 switch (deftype) {
1939 case ROFFDEF_USER:
1940 t = ROFF_USERDEF;
1941 break;
1942 case ROFFDEF_REN:
1943 t = ROFF_RENAMED;
1944 break;
1945 default:
1946 t = roffhash_find(r->reqtab, mac, maclen);
1947 break;
1948 }
1949 if (t != TOKEN_NONE)
1950 *pos = cp - buf;
1951 else if (deftype == ROFFDEF_UNDEF) {
1952 /* Using an undefined macro defines it to be empty. */
1953 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1954 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1955 }
1956 return t;
1957 }
1958
1959 /* --- handling of request blocks ----------------------------------------- */
1960
1961 static int
1962 roff_cblock(ROFF_ARGS)
1963 {
1964
1965 /*
1966 * A block-close `..' should only be invoked as a child of an
1967 * ignore macro, otherwise raise a warning and just ignore it.
1968 */
1969
1970 if (r->last == NULL) {
1971 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1972 return ROFF_IGN;
1973 }
1974
1975 switch (r->last->tok) {
1976 case ROFF_am:
1977 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1978 case ROFF_ami:
1979 case ROFF_de:
1980 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1981 case ROFF_dei:
1982 case ROFF_ig:
1983 break;
1984 default:
1985 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1986 return ROFF_IGN;
1987 }
1988
1989 if (buf->buf[pos] != '\0')
1990 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
1991 ".. %s", buf->buf + pos);
1992
1993 roffnode_pop(r);
1994 roffnode_cleanscope(r);
1995 return ROFF_IGN;
1996
1997 }
1998
1999 static int
2000 roffnode_cleanscope(struct roff *r)
2001 {
2002 int inloop;
2003
2004 inloop = 0;
2005 while (r->last != NULL) {
2006 if (--r->last->endspan != 0)
2007 break;
2008 inloop += roffnode_pop(r);
2009 }
2010 return inloop;
2011 }
2012
2013 static int
2014 roff_ccond(struct roff *r, int ln, int ppos)
2015 {
2016 if (NULL == r->last) {
2017 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2018 return 0;
2019 }
2020
2021 switch (r->last->tok) {
2022 case ROFF_el:
2023 case ROFF_ie:
2024 case ROFF_if:
2025 case ROFF_while:
2026 break;
2027 default:
2028 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2029 return 0;
2030 }
2031
2032 if (r->last->endspan > -1) {
2033 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2034 return 0;
2035 }
2036
2037 return roffnode_pop(r) + roffnode_cleanscope(r);
2038 }
2039
2040 static int
2041 roff_block(ROFF_ARGS)
2042 {
2043 const char *name, *value;
2044 char *call, *cp, *iname, *rname;
2045 size_t csz, namesz, rsz;
2046 int deftype;
2047
2048 /* Ignore groff compatibility mode for now. */
2049
2050 if (tok == ROFF_de1)
2051 tok = ROFF_de;
2052 else if (tok == ROFF_dei1)
2053 tok = ROFF_dei;
2054 else if (tok == ROFF_am1)
2055 tok = ROFF_am;
2056 else if (tok == ROFF_ami1)
2057 tok = ROFF_ami;
2058
2059 /* Parse the macro name argument. */
2060
2061 cp = buf->buf + pos;
2062 if (tok == ROFF_ig) {
2063 iname = NULL;
2064 namesz = 0;
2065 } else {
2066 iname = cp;
2067 namesz = roff_getname(r, &cp, ln, ppos);
2068 iname[namesz] = '\0';
2069 }
2070
2071 /* Resolve the macro name argument if it is indirect. */
2072
2073 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2074 deftype = ROFFDEF_USER;
2075 name = roff_getstrn(r, iname, namesz, &deftype);
2076 if (name == NULL) {
2077 mandoc_msg(MANDOCERR_STR_UNDEF,
2078 ln, (int)(iname - buf->buf),
2079 "%.*s", (int)namesz, iname);
2080 namesz = 0;
2081 } else
2082 namesz = strlen(name);
2083 } else
2084 name = iname;
2085
2086 if (namesz == 0 && tok != ROFF_ig) {
2087 mandoc_msg(MANDOCERR_REQ_EMPTY,
2088 ln, ppos, "%s", roff_name[tok]);
2089 return ROFF_IGN;
2090 }
2091
2092 roffnode_push(r, tok, name, ln, ppos);
2093
2094 /*
2095 * At the beginning of a `de' macro, clear the existing string
2096 * with the same name, if there is one. New content will be
2097 * appended from roff_block_text() in multiline mode.
2098 */
2099
2100 if (tok == ROFF_de || tok == ROFF_dei) {
2101 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2102 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2103 } else if (tok == ROFF_am || tok == ROFF_ami) {
2104 deftype = ROFFDEF_ANY;
2105 value = roff_getstrn(r, iname, namesz, &deftype);
2106 switch (deftype) { /* Before appending, ... */
2107 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2108 roff_setstrn(&r->strtab, name, namesz,
2109 value, strlen(value), 0);
2110 break;
2111 case ROFFDEF_REN: /* call original standard macro. */
2112 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2113 (int)strlen(value), value);
2114 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2115 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2116 free(call);
2117 break;
2118 case ROFFDEF_STD: /* rename and call standard macro. */
2119 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2120 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2121 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2122 (int)rsz, rname);
2123 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2124 free(call);
2125 free(rname);
2126 break;
2127 default:
2128 break;
2129 }
2130 }
2131
2132 if (*cp == '\0')
2133 return ROFF_IGN;
2134
2135 /* Get the custom end marker. */
2136
2137 iname = cp;
2138 namesz = roff_getname(r, &cp, ln, ppos);
2139
2140 /* Resolve the end marker if it is indirect. */
2141
2142 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2143 deftype = ROFFDEF_USER;
2144 name = roff_getstrn(r, iname, namesz, &deftype);
2145 if (name == NULL) {
2146 mandoc_msg(MANDOCERR_STR_UNDEF,
2147 ln, (int)(iname - buf->buf),
2148 "%.*s", (int)namesz, iname);
2149 namesz = 0;
2150 } else
2151 namesz = strlen(name);
2152 } else
2153 name = iname;
2154
2155 if (namesz)
2156 r->last->end = mandoc_strndup(name, namesz);
2157
2158 if (*cp != '\0')
2159 mandoc_msg(MANDOCERR_ARG_EXCESS,
2160 ln, pos, ".%s ... %s", roff_name[tok], cp);
2161
2162 return ROFF_IGN;
2163 }
2164
2165 static int
2166 roff_block_sub(ROFF_ARGS)
2167 {
2168 enum roff_tok t;
2169 int i, j;
2170
2171 /*
2172 * First check whether a custom macro exists at this level. If
2173 * it does, then check against it. This is some of groff's
2174 * stranger behaviours. If we encountered a custom end-scope
2175 * tag and that tag also happens to be a "real" macro, then we
2176 * need to try interpreting it again as a real macro. If it's
2177 * not, then return ignore. Else continue.
2178 */
2179
2180 if (r->last->end) {
2181 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2182 if (buf->buf[i] != r->last->end[j])
2183 break;
2184
2185 if (r->last->end[j] == '\0' &&
2186 (buf->buf[i] == '\0' ||
2187 buf->buf[i] == ' ' ||
2188 buf->buf[i] == '\t')) {
2189 roffnode_pop(r);
2190 roffnode_cleanscope(r);
2191
2192 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2193 i++;
2194
2195 pos = i;
2196 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2197 TOKEN_NONE)
2198 return ROFF_RERUN;
2199 return ROFF_IGN;
2200 }
2201 }
2202
2203 /*
2204 * If we have no custom end-query or lookup failed, then try
2205 * pulling it out of the hashtable.
2206 */
2207
2208 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2209
2210 if (t != ROFF_cblock) {
2211 if (tok != ROFF_ig)
2212 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2213 return ROFF_IGN;
2214 }
2215
2216 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2217 }
2218
2219 static int
2220 roff_block_text(ROFF_ARGS)
2221 {
2222
2223 if (tok != ROFF_ig)
2224 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2225
2226 return ROFF_IGN;
2227 }
2228
2229 static int
2230 roff_cond_sub(ROFF_ARGS)
2231 {
2232 char *ep;
2233 int endloop, irc, rr;
2234 enum roff_tok t;
2235
2236 irc = ROFF_IGN;
2237 rr = r->last->rule;
2238 endloop = tok != ROFF_while ? ROFF_IGN :
2239 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2240 if (roffnode_cleanscope(r))
2241 irc |= endloop;
2242
2243 /*
2244 * If `\}' occurs on a macro line without a preceding macro,
2245 * drop the line completely.
2246 */
2247
2248 ep = buf->buf + pos;
2249 if (ep[0] == '\\' && ep[1] == '}')
2250 rr = 0;
2251
2252 /*
2253 * The closing delimiter `\}' rewinds the conditional scope
2254 * but is otherwise ignored when interpreting the line.
2255 */
2256
2257 while ((ep = strchr(ep, '\\')) != NULL) {
2258 switch (ep[1]) {
2259 case '}':
2260 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2261 if (roff_ccond(r, ln, ep - buf->buf))
2262 irc |= endloop;
2263 break;
2264 case '\0':
2265 ++ep;
2266 break;
2267 default:
2268 ep += 2;
2269 break;
2270 }
2271 }
2272
2273 /*
2274 * Fully handle known macros when they are structurally
2275 * required or when the conditional evaluated to true.
2276 */
2277
2278 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2279 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ?
2280 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) :
2281 rr ? ROFF_CONT : ROFF_IGN;
2282 return irc;
2283 }
2284
2285 static int
2286 roff_cond_text(ROFF_ARGS)
2287 {
2288 char *ep;
2289 int endloop, irc, rr;
2290
2291 irc = ROFF_IGN;
2292 rr = r->last->rule;
2293 endloop = tok != ROFF_while ? ROFF_IGN :
2294 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2295 if (roffnode_cleanscope(r))
2296 irc |= endloop;
2297
2298 /*
2299 * If `\}' occurs on a text line with neither preceding
2300 * nor following characters, drop the line completely.
2301 */
2302
2303 ep = buf->buf + pos;
2304 if (strcmp(ep, "\\}") == 0)
2305 rr = 0;
2306
2307 /*
2308 * The closing delimiter `\}' rewinds the conditional scope
2309 * but is otherwise ignored when interpreting the line.
2310 */
2311
2312 while ((ep = strchr(ep, '\\')) != NULL) {
2313 switch (ep[1]) {
2314 case '}':
2315 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2316 if (roff_ccond(r, ln, ep - buf->buf))
2317 irc |= endloop;
2318 break;
2319 case '\0':
2320 ++ep;
2321 break;
2322 default:
2323 ep += 2;
2324 break;
2325 }
2326 }
2327 if (rr)
2328 irc |= ROFF_CONT;
2329 return irc;
2330 }
2331
2332 /* --- handling of numeric and conditional expressions -------------------- */
2333
2334 /*
2335 * Parse a single signed integer number. Stop at the first non-digit.
2336 * If there is at least one digit, return success and advance the
2337 * parse point, else return failure and let the parse point unchanged.
2338 * Ignore overflows, treat them just like the C language.
2339 */
2340 static int
2341 roff_getnum(const char *v, int *pos, int *res, int flags)
2342 {
2343 int myres, scaled, n, p;
2344
2345 if (NULL == res)
2346 res = &myres;
2347
2348 p = *pos;
2349 n = v[p] == '-';
2350 if (n || v[p] == '+')
2351 p++;
2352
2353 if (flags & ROFFNUM_WHITE)
2354 while (isspace((unsigned char)v[p]))
2355 p++;
2356
2357 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2358 *res = 10 * *res + v[p] - '0';
2359 if (p == *pos + n)
2360 return 0;
2361
2362 if (n)
2363 *res = -*res;
2364
2365 /* Each number may be followed by one optional scaling unit. */
2366
2367 switch (v[p]) {
2368 case 'f':
2369 scaled = *res * 65536;
2370 break;
2371 case 'i':
2372 scaled = *res * 240;
2373 break;
2374 case 'c':
2375 scaled = *res * 240 / 2.54;
2376 break;
2377 case 'v':
2378 case 'P':
2379 scaled = *res * 40;
2380 break;
2381 case 'm':
2382 case 'n':
2383 scaled = *res * 24;
2384 break;
2385 case 'p':
2386 scaled = *res * 10 / 3;
2387 break;
2388 case 'u':
2389 scaled = *res;
2390 break;
2391 case 'M':
2392 scaled = *res * 6 / 25;
2393 break;
2394 default:
2395 scaled = *res;
2396 p--;
2397 break;
2398 }
2399 if (flags & ROFFNUM_SCALE)
2400 *res = scaled;
2401
2402 *pos = p + 1;
2403 return 1;
2404 }
2405
2406 /*
2407 * Evaluate a string comparison condition.
2408 * The first character is the delimiter.
2409 * Succeed if the string up to its second occurrence
2410 * matches the string up to its third occurence.
2411 * Advance the cursor after the third occurrence
2412 * or lacking that, to the end of the line.
2413 */
2414 static int
2415 roff_evalstrcond(const char *v, int *pos)
2416 {
2417 const char *s1, *s2, *s3;
2418 int match;
2419
2420 match = 0;
2421 s1 = v + *pos; /* initial delimiter */
2422 s2 = s1 + 1; /* for scanning the first string */
2423 s3 = strchr(s2, *s1); /* for scanning the second string */
2424
2425 if (NULL == s3) /* found no middle delimiter */
2426 goto out;
2427
2428 while ('\0' != *++s3) {
2429 if (*s2 != *s3) { /* mismatch */
2430 s3 = strchr(s3, *s1);
2431 break;
2432 }
2433 if (*s3 == *s1) { /* found the final delimiter */
2434 match = 1;
2435 break;
2436 }
2437 s2++;
2438 }
2439
2440 out:
2441 if (NULL == s3)
2442 s3 = strchr(s2, '\0');
2443 else if (*s3 != '\0')
2444 s3++;
2445 *pos = s3 - v;
2446 return match;
2447 }
2448
2449 /*
2450 * Evaluate an optionally negated single character, numerical,
2451 * or string condition.
2452 */
2453 static int
2454 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2455 {
2456 const char *start, *end;
2457 char *cp, *name;
2458 size_t sz;
2459 int deftype, len, number, savepos, istrue, wanttrue;
2460
2461 if ('!' == v[*pos]) {
2462 wanttrue = 0;
2463 (*pos)++;
2464 } else
2465 wanttrue = 1;
2466
2467 switch (v[*pos]) {
2468 case '\0':
2469 return 0;
2470 case 'n':
2471 case 'o':
2472 (*pos)++;
2473 return wanttrue;
2474 case 'e':
2475 case 't':
2476 case 'v':
2477 (*pos)++;
2478 return !wanttrue;
2479 case 'c':
2480 do {
2481 (*pos)++;
2482 } while (v[*pos] == ' ');
2483
2484 /*
2485 * Quirk for groff compatibility:
2486 * The horizontal tab is neither available nor unavailable.
2487 */
2488
2489 if (v[*pos] == '\t') {
2490 (*pos)++;
2491 return 0;
2492 }
2493
2494 /* Printable ASCII characters are available. */
2495
2496 if (v[*pos] != '\\') {
2497 (*pos)++;
2498 return wanttrue;
2499 }
2500
2501 end = v + ++*pos;
2502 switch (mandoc_escape(&end, &start, &len)) {
2503 case ESCAPE_SPECIAL:
2504 istrue = mchars_spec2cp(start, len) != -1;
2505 break;
2506 case ESCAPE_UNICODE:
2507 istrue = 1;
2508 break;
2509 case ESCAPE_NUMBERED:
2510 istrue = mchars_num2char(start, len) != -1;
2511 break;
2512 default:
2513 istrue = !wanttrue;
2514 break;
2515 }
2516 *pos = end - v;
2517 return istrue == wanttrue;
2518 case 'd':
2519 case 'r':
2520 cp = v + *pos + 1;
2521 while (*cp == ' ')
2522 cp++;
2523 name = cp;
2524 sz = roff_getname(r, &cp, ln, cp - v);
2525 if (sz == 0)
2526 istrue = 0;
2527 else if (v[*pos] == 'r')
2528 istrue = roff_hasregn(r, name, sz);
2529 else {
2530 deftype = ROFFDEF_ANY;
2531 roff_getstrn(r, name, sz, &deftype);
2532 istrue = !!deftype;
2533 }
2534 *pos = cp - v;
2535 return istrue == wanttrue;
2536 default:
2537 break;
2538 }
2539
2540 savepos = *pos;
2541 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2542 return (number > 0) == wanttrue;
2543 else if (*pos == savepos)
2544 return roff_evalstrcond(v, pos) == wanttrue;
2545 else
2546 return 0;
2547 }
2548
2549 static int
2550 roff_line_ignore(ROFF_ARGS)
2551 {
2552
2553 return ROFF_IGN;
2554 }
2555
2556 static int
2557 roff_insec(ROFF_ARGS)
2558 {
2559
2560 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2561 return ROFF_IGN;
2562 }
2563
2564 static int
2565 roff_unsupp(ROFF_ARGS)
2566 {
2567
2568 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2569 return ROFF_IGN;
2570 }
2571
2572 static int
2573 roff_cond(ROFF_ARGS)
2574 {
2575 int irc;
2576
2577 roffnode_push(r, tok, NULL, ln, ppos);
2578
2579 /*
2580 * An `.el' has no conditional body: it will consume the value
2581 * of the current rstack entry set in prior `ie' calls or
2582 * defaults to DENY.
2583 *
2584 * If we're not an `el', however, then evaluate the conditional.
2585 */
2586
2587 r->last->rule = tok == ROFF_el ?
2588 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2589 roff_evalcond(r, ln, buf->buf, &pos);
2590
2591 /*
2592 * An if-else will put the NEGATION of the current evaluated
2593 * conditional into the stack of rules.
2594 */
2595
2596 if (tok == ROFF_ie) {
2597 if (r->rstackpos + 1 == r->rstacksz) {
2598 r->rstacksz += 16;
2599 r->rstack = mandoc_reallocarray(r->rstack,
2600 r->rstacksz, sizeof(int));
2601 }
2602 r->rstack[++r->rstackpos] = !r->last->rule;
2603 }
2604
2605 /* If the parent has false as its rule, then so do we. */
2606
2607 if (r->last->parent && !r->last->parent->rule)
2608 r->last->rule = 0;
2609
2610 /*
2611 * Determine scope.
2612 * If there is nothing on the line after the conditional,
2613 * not even whitespace, use next-line scope.
2614 * Except that .while does not support next-line scope.
2615 */
2616
2617 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2618 r->last->endspan = 2;
2619 goto out;
2620 }
2621
2622 while (buf->buf[pos] == ' ')
2623 pos++;
2624
2625 /* An opening brace requests multiline scope. */
2626
2627 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2628 r->last->endspan = -1;
2629 pos += 2;
2630 while (buf->buf[pos] == ' ')
2631 pos++;
2632 goto out;
2633 }
2634
2635 /*
2636 * Anything else following the conditional causes
2637 * single-line scope. Warn if the scope contains
2638 * nothing but trailing whitespace.
2639 */
2640
2641 if (buf->buf[pos] == '\0')
2642 mandoc_msg(MANDOCERR_COND_EMPTY,
2643 ln, ppos, "%s", roff_name[tok]);
2644
2645 r->last->endspan = 1;
2646
2647 out:
2648 *offs = pos;
2649 irc = ROFF_RERUN;
2650 if (tok == ROFF_while)
2651 irc |= ROFF_WHILE;
2652 return irc;
2653 }
2654
2655 static int
2656 roff_ds(ROFF_ARGS)
2657 {
2658 char *string;
2659 const char *name;
2660 size_t namesz;
2661
2662 /* Ignore groff compatibility mode for now. */
2663
2664 if (tok == ROFF_ds1)
2665 tok = ROFF_ds;
2666 else if (tok == ROFF_as1)
2667 tok = ROFF_as;
2668
2669 /*
2670 * The first word is the name of the string.
2671 * If it is empty or terminated by an escape sequence,
2672 * abort the `ds' request without defining anything.
2673 */
2674
2675 name = string = buf->buf + pos;
2676 if (*name == '\0')
2677 return ROFF_IGN;
2678
2679 namesz = roff_getname(r, &string, ln, pos);
2680 if (name[namesz] == '\\')
2681 return ROFF_IGN;
2682
2683 /* Read past the initial double-quote, if any. */
2684 if (*string == '"')
2685 string++;
2686
2687 /* The rest is the value. */
2688 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2689 ROFF_as == tok);
2690 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2691 return ROFF_IGN;
2692 }
2693
2694 /*
2695 * Parse a single operator, one or two characters long.
2696 * If the operator is recognized, return success and advance the
2697 * parse point, else return failure and let the parse point unchanged.
2698 */
2699 static int
2700 roff_getop(const char *v, int *pos, char *res)
2701 {
2702
2703 *res = v[*pos];
2704
2705 switch (*res) {
2706 case '+':
2707 case '-':
2708 case '*':
2709 case '/':
2710 case '%':
2711 case '&':
2712 case ':':
2713 break;
2714 case '<':
2715 switch (v[*pos + 1]) {
2716 case '=':
2717 *res = 'l';
2718 (*pos)++;
2719 break;
2720 case '>':
2721 *res = '!';
2722 (*pos)++;
2723 break;
2724 case '?':
2725 *res = 'i';
2726 (*pos)++;
2727 break;
2728 default:
2729 break;
2730 }
2731 break;
2732 case '>':
2733 switch (v[*pos + 1]) {
2734 case '=':
2735 *res = 'g';
2736 (*pos)++;
2737 break;
2738 case '?':
2739 *res = 'a';
2740 (*pos)++;
2741 break;
2742 default:
2743 break;
2744 }
2745 break;
2746 case '=':
2747 if ('=' == v[*pos + 1])
2748 (*pos)++;
2749 break;
2750 default:
2751 return 0;
2752 }
2753 (*pos)++;
2754
2755 return *res;
2756 }
2757
2758 /*
2759 * Evaluate either a parenthesized numeric expression
2760 * or a single signed integer number.
2761 */
2762 static int
2763 roff_evalpar(struct roff *r, int ln,
2764 const char *v, int *pos, int *res, int flags)
2765 {
2766
2767 if ('(' != v[*pos])
2768 return roff_getnum(v, pos, res, flags);
2769
2770 (*pos)++;
2771 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2772 return 0;
2773
2774 /*
2775 * Omission of the closing parenthesis
2776 * is an error in validation mode,
2777 * but ignored in evaluation mode.
2778 */
2779
2780 if (')' == v[*pos])
2781 (*pos)++;
2782 else if (NULL == res)
2783 return 0;
2784
2785 return 1;
2786 }
2787
2788 /*
2789 * Evaluate a complete numeric expression.
2790 * Proceed left to right, there is no concept of precedence.
2791 */
2792 static int
2793 roff_evalnum(struct roff *r, int ln, const char *v,
2794 int *pos, int *res, int flags)
2795 {
2796 int mypos, operand2;
2797 char operator;
2798
2799 if (NULL == pos) {
2800 mypos = 0;
2801 pos = &mypos;
2802 }
2803
2804 if (flags & ROFFNUM_WHITE)
2805 while (isspace((unsigned char)v[*pos]))
2806 (*pos)++;
2807
2808 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2809 return 0;
2810
2811 while (1) {
2812 if (flags & ROFFNUM_WHITE)
2813 while (isspace((unsigned char)v[*pos]))
2814 (*pos)++;
2815
2816 if ( ! roff_getop(v, pos, &operator))
2817 break;
2818
2819 if (flags & ROFFNUM_WHITE)
2820 while (isspace((unsigned char)v[*pos]))
2821 (*pos)++;
2822
2823 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2824 return 0;
2825
2826 if (flags & ROFFNUM_WHITE)
2827 while (isspace((unsigned char)v[*pos]))
2828 (*pos)++;
2829
2830 if (NULL == res)
2831 continue;
2832
2833 switch (operator) {
2834 case '+':
2835 *res += operand2;
2836 break;
2837 case '-':
2838 *res -= operand2;
2839 break;
2840 case '*':
2841 *res *= operand2;
2842 break;
2843 case '/':
2844 if (operand2 == 0) {
2845 mandoc_msg(MANDOCERR_DIVZERO,
2846 ln, *pos, "%s", v);
2847 *res = 0;
2848 break;
2849 }
2850 *res /= operand2;
2851 break;
2852 case '%':
2853 if (operand2 == 0) {
2854 mandoc_msg(MANDOCERR_DIVZERO,
2855 ln, *pos, "%s", v);
2856 *res = 0;
2857 break;
2858 }
2859 *res %= operand2;
2860 break;
2861 case '<':
2862 *res = *res < operand2;
2863 break;
2864 case '>':
2865 *res = *res > operand2;
2866 break;
2867 case 'l':
2868 *res = *res <= operand2;
2869 break;
2870 case 'g':
2871 *res = *res >= operand2;
2872 break;
2873 case '=':
2874 *res = *res == operand2;
2875 break;
2876 case '!':
2877 *res = *res != operand2;
2878 break;
2879 case '&':
2880 *res = *res && operand2;
2881 break;
2882 case ':':
2883 *res = *res || operand2;
2884 break;
2885 case 'i':
2886 if (operand2 < *res)
2887 *res = operand2;
2888 break;
2889 case 'a':
2890 if (operand2 > *res)
2891 *res = operand2;
2892 break;
2893 default:
2894 abort();
2895 }
2896 }
2897 return 1;
2898 }
2899
2900 /* --- register management ------------------------------------------------ */
2901
2902 void
2903 roff_setreg(struct roff *r, const char *name, int val, char sign)
2904 {
2905 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2906 }
2907
2908 static void
2909 roff_setregn(struct roff *r, const char *name, size_t len,
2910 int val, char sign, int step)
2911 {
2912 struct roffreg *reg;
2913
2914 /* Search for an existing register with the same name. */
2915 reg = r->regtab;
2916
2917 while (reg != NULL && (reg->key.sz != len ||
2918 strncmp(reg->key.p, name, len) != 0))
2919 reg = reg->next;
2920
2921 if (NULL == reg) {
2922 /* Create a new register. */
2923 reg = mandoc_malloc(sizeof(struct roffreg));
2924 reg->key.p = mandoc_strndup(name, len);
2925 reg->key.sz = len;
2926 reg->val = 0;
2927 reg->step = 0;
2928 reg->next = r->regtab;
2929 r->regtab = reg;
2930 }
2931
2932 if ('+' == sign)
2933 reg->val += val;
2934 else if ('-' == sign)
2935 reg->val -= val;
2936 else
2937 reg->val = val;
2938 if (step != INT_MIN)
2939 reg->step = step;
2940 }
2941
2942 /*
2943 * Handle some predefined read-only number registers.
2944 * For now, return -1 if the requested register is not predefined;
2945 * in case a predefined read-only register having the value -1
2946 * were to turn up, another special value would have to be chosen.
2947 */
2948 static int
2949 roff_getregro(const struct roff *r, const char *name)
2950 {
2951
2952 switch (*name) {
2953 case '$': /* Number of arguments of the last macro evaluated. */
2954 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2955 case 'A': /* ASCII approximation mode is always off. */
2956 return 0;
2957 case 'g': /* Groff compatibility mode is always on. */
2958 return 1;
2959 case 'H': /* Fixed horizontal resolution. */
2960 return 24;
2961 case 'j': /* Always adjust left margin only. */
2962 return 0;
2963 case 'T': /* Some output device is always defined. */
2964 return 1;
2965 case 'V': /* Fixed vertical resolution. */
2966 return 40;
2967 default:
2968 return -1;
2969 }
2970 }
2971
2972 int
2973 roff_getreg(struct roff *r, const char *name)
2974 {
2975 return roff_getregn(r, name, strlen(name), '\0');
2976 }
2977
2978 static int
2979 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2980 {
2981 struct roffreg *reg;
2982 int val;
2983
2984 if ('.' == name[0] && 2 == len) {
2985 val = roff_getregro(r, name + 1);
2986 if (-1 != val)
2987 return val;
2988 }
2989
2990 for (reg = r->regtab; reg; reg = reg->next) {
2991 if (len == reg->key.sz &&
2992 0 == strncmp(name, reg->key.p, len)) {
2993 switch (sign) {
2994 case '+':
2995 reg->val += reg->step;
2996 break;
2997 case '-':
2998 reg->val -= reg->step;
2999 break;
3000 default:
3001 break;
3002 }
3003 return reg->val;
3004 }
3005 }
3006
3007 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3008 return 0;
3009 }
3010
3011 static int
3012 roff_hasregn(const struct roff *r, const char *name, size_t len)
3013 {
3014 struct roffreg *reg;
3015 int val;
3016
3017 if ('.' == name[0] && 2 == len) {
3018 val = roff_getregro(r, name + 1);
3019 if (-1 != val)
3020 return 1;
3021 }
3022
3023 for (reg = r->regtab; reg; reg = reg->next)
3024 if (len == reg->key.sz &&
3025 0 == strncmp(name, reg->key.p, len))
3026 return 1;
3027
3028 return 0;
3029 }
3030
3031 static void
3032 roff_freereg(struct roffreg *reg)
3033 {
3034 struct roffreg *old_reg;
3035
3036 while (NULL != reg) {
3037 free(reg->key.p);
3038 old_reg = reg;
3039 reg = reg->next;
3040 free(old_reg);
3041 }
3042 }
3043
3044 static int
3045 roff_nr(ROFF_ARGS)
3046 {
3047 char *key, *val, *step;
3048 size_t keysz;
3049 int iv, is, len;
3050 char sign;
3051
3052 key = val = buf->buf + pos;
3053 if (*key == '\0')
3054 return ROFF_IGN;
3055
3056 keysz = roff_getname(r, &val, ln, pos);
3057 if (key[keysz] == '\\')
3058 return ROFF_IGN;
3059
3060 sign = *val;
3061 if (sign == '+' || sign == '-')
3062 val++;
3063
3064 len = 0;
3065 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3066 return ROFF_IGN;
3067
3068 step = val + len;
3069 while (isspace((unsigned char)*step))
3070 step++;
3071 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3072 is = INT_MIN;
3073
3074 roff_setregn(r, key, keysz, iv, sign, is);
3075 return ROFF_IGN;
3076 }
3077
3078 static int
3079 roff_rr(ROFF_ARGS)
3080 {
3081 struct roffreg *reg, **prev;
3082 char *name, *cp;
3083 size_t namesz;
3084
3085 name = cp = buf->buf + pos;
3086 if (*name == '\0')
3087 return ROFF_IGN;
3088 namesz = roff_getname(r, &cp, ln, pos);
3089 name[namesz] = '\0';
3090
3091 prev = &r->regtab;
3092 while (1) {
3093 reg = *prev;
3094 if (reg == NULL || !strcmp(name, reg->key.p))
3095 break;
3096 prev = &reg->next;
3097 }
3098 if (reg != NULL) {
3099 *prev = reg->next;
3100 free(reg->key.p);
3101 free(reg);
3102 }
3103 return ROFF_IGN;
3104 }
3105
3106 /* --- handler functions for roff requests -------------------------------- */
3107
3108 static int
3109 roff_rm(ROFF_ARGS)
3110 {
3111 const char *name;
3112 char *cp;
3113 size_t namesz;
3114
3115 cp = buf->buf + pos;
3116 while (*cp != '\0') {
3117 name = cp;
3118 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3119 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3120 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3121 if (name[namesz] == '\\')
3122 break;
3123 }
3124 return ROFF_IGN;
3125 }
3126
3127 static int
3128 roff_it(ROFF_ARGS)
3129 {
3130 int iv;
3131
3132 /* Parse the number of lines. */
3133
3134 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3135 mandoc_msg(MANDOCERR_IT_NONUM,
3136 ln, ppos, "%s", buf->buf + 1);
3137 return ROFF_IGN;
3138 }
3139
3140 while (isspace((unsigned char)buf->buf[pos]))
3141 pos++;
3142
3143 /*
3144 * Arm the input line trap.
3145 * Special-casing "an-trap" is an ugly workaround to cope
3146 * with DocBook stupidly fiddling with man(7) internals.
3147 */
3148
3149 roffit_lines = iv;
3150 roffit_macro = mandoc_strdup(iv != 1 ||
3151 strcmp(buf->buf + pos, "an-trap") ?
3152 buf->buf + pos : "br");
3153 return ROFF_IGN;
3154 }
3155
3156 static int
3157 roff_Dd(ROFF_ARGS)
3158 {
3159 int mask;
3160 enum roff_tok t, te;
3161
3162 switch (tok) {
3163 case ROFF_Dd:
3164 tok = MDOC_Dd;
3165 te = MDOC_MAX;
3166 if (r->format == 0)
3167 r->format = MPARSE_MDOC;
3168 mask = MPARSE_MDOC | MPARSE_QUICK;
3169 break;
3170 case ROFF_TH:
3171 tok = MAN_TH;
3172 te = MAN_MAX;
3173 if (r->format == 0)
3174 r->format = MPARSE_MAN;
3175 mask = MPARSE_QUICK;
3176 break;
3177 default:
3178 abort();
3179 }
3180 if ((r->options & mask) == 0)
3181 for (t = tok; t < te; t++)
3182 roff_setstr(r, roff_name[t], NULL, 0);
3183 return ROFF_CONT;
3184 }
3185
3186 static int
3187 roff_TE(ROFF_ARGS)
3188 {
3189 if (r->tbl == NULL) {
3190 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3191 return ROFF_IGN;
3192 }
3193 if (tbl_end(r->tbl, 0) == 0) {
3194 r->tbl = NULL;
3195 free(buf->buf);
3196 buf->buf = mandoc_strdup(".sp");
3197 buf->sz = 4;
3198 *offs = 0;
3199 return ROFF_REPARSE;
3200 }
3201 r->tbl = NULL;
3202 return ROFF_IGN;
3203 }
3204
3205 static int
3206 roff_T_(ROFF_ARGS)
3207 {
3208
3209 if (NULL == r->tbl)
3210 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3211 else
3212 tbl_restart(ln, ppos, r->tbl);
3213
3214 return ROFF_IGN;
3215 }
3216
3217 /*
3218 * Handle in-line equation delimiters.
3219 */
3220 static int
3221 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3222 {
3223 char *cp1, *cp2;
3224 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3225
3226 /*
3227 * Outside equations, look for an opening delimiter.
3228 * If we are inside an equation, we already know it is
3229 * in-line, or this function wouldn't have been called;
3230 * so look for a closing delimiter.
3231 */
3232
3233 cp1 = buf->buf + pos;
3234 cp2 = strchr(cp1, r->eqn == NULL ?
3235 r->last_eqn->odelim : r->last_eqn->cdelim);
3236 if (cp2 == NULL)
3237 return ROFF_CONT;
3238
3239 *cp2++ = '\0';
3240 bef_pr = bef_nl = aft_nl = aft_pr = "";
3241
3242 /* Handle preceding text, protecting whitespace. */
3243
3244 if (*buf->buf != '\0') {
3245 if (r->eqn == NULL)
3246 bef_pr = "\\&";
3247 bef_nl = "\n";
3248 }
3249
3250 /*
3251 * Prepare replacing the delimiter with an equation macro
3252 * and drop leading white space from the equation.
3253 */
3254
3255 if (r->eqn == NULL) {
3256 while (*cp2 == ' ')
3257 cp2++;
3258 mac = ".EQ";
3259 } else
3260 mac = ".EN";
3261
3262 /* Handle following text, protecting whitespace. */
3263
3264 if (*cp2 != '\0') {
3265 aft_nl = "\n";
3266 if (r->eqn != NULL)
3267 aft_pr = "\\&";
3268 }
3269
3270 /* Do the actual replacement. */
3271
3272 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3273 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3274 free(buf->buf);
3275 buf->buf = cp1;
3276
3277 /* Toggle the in-line state of the eqn subsystem. */
3278
3279 r->eqn_inline = r->eqn == NULL;
3280 return ROFF_REPARSE;
3281 }
3282
3283 static int
3284 roff_EQ(ROFF_ARGS)
3285 {
3286 struct roff_node *n;
3287
3288 if (r->man->meta.macroset == MACROSET_MAN)
3289 man_breakscope(r->man, ROFF_EQ);
3290 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3291 if (ln > r->man->last->line)
3292 n->flags |= NODE_LINE;
3293 n->eqn = eqn_box_new();
3294 roff_node_append(r->man, n);
3295 r->man->next = ROFF_NEXT_SIBLING;
3296
3297 assert(r->eqn == NULL);
3298 if (r->last_eqn == NULL)
3299 r->last_eqn = eqn_alloc();
3300 else
3301 eqn_reset(r->last_eqn);
3302 r->eqn = r->last_eqn;
3303 r->eqn->node = n;
3304
3305 if (buf->buf[pos] != '\0')
3306 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3307 ".EQ %s", buf->buf + pos);
3308
3309 return ROFF_IGN;
3310 }
3311
3312 static int
3313 roff_EN(ROFF_ARGS)
3314 {
3315 if (r->eqn != NULL) {
3316 eqn_parse(r->eqn);
3317 r->eqn = NULL;
3318 } else
3319 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3320 if (buf->buf[pos] != '\0')
3321 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3322 "EN %s", buf->buf + pos);
3323 return ROFF_IGN;
3324 }
3325
3326 static int
3327 roff_TS(ROFF_ARGS)
3328 {
3329 if (r->tbl != NULL) {
3330 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3331 tbl_end(r->tbl, 0);
3332 }
3333 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3334 if (r->last_tbl == NULL)
3335 r->first_tbl = r->tbl;
3336 r->last_tbl = r->tbl;
3337 return ROFF_IGN;
3338 }
3339
3340 static int
3341 roff_onearg(ROFF_ARGS)
3342 {
3343 struct roff_node *n;
3344 char *cp;
3345 int npos;
3346
3347 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3348 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3349 tok == ROFF_ti))
3350 man_breakscope(r->man, tok);
3351
3352 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3353 r->man->last = roffce_node;
3354 r->man->next = ROFF_NEXT_SIBLING;
3355 }
3356
3357 roff_elem_alloc(r->man, ln, ppos, tok);
3358 n = r->man->last;
3359
3360 cp = buf->buf + pos;
3361 if (*cp != '\0') {
3362 while (*cp != '\0' && *cp != ' ')
3363 cp++;
3364 while (*cp == ' ')
3365 *cp++ = '\0';
3366 if (*cp != '\0')
3367 mandoc_msg(MANDOCERR_ARG_EXCESS,
3368 ln, (int)(cp - buf->buf),
3369 "%s ... %s", roff_name[tok], cp);
3370 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3371 }
3372
3373 if (tok == ROFF_ce || tok == ROFF_rj) {
3374 if (r->man->last->type == ROFFT_ELEM) {
3375 roff_word_alloc(r->man, ln, pos, "1");
3376 r->man->last->flags |= NODE_NOSRC;
3377 }
3378 npos = 0;
3379 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3380 &roffce_lines, 0) == 0) {
3381 mandoc_msg(MANDOCERR_CE_NONUM,
3382 ln, pos, "ce %s", buf->buf + pos);
3383 roffce_lines = 1;
3384 }
3385 if (roffce_lines < 1) {
3386 r->man->last = r->man->last->parent;
3387 roffce_node = NULL;
3388 roffce_lines = 0;
3389 } else
3390 roffce_node = r->man->last->parent;
3391 } else {
3392 n->flags |= NODE_VALID | NODE_ENDED;
3393 r->man->last = n;
3394 }
3395 n->flags |= NODE_LINE;
3396 r->man->next = ROFF_NEXT_SIBLING;
3397 return ROFF_IGN;
3398 }
3399
3400 static int
3401 roff_manyarg(ROFF_ARGS)
3402 {
3403 struct roff_node *n;
3404 char *sp, *ep;
3405
3406 roff_elem_alloc(r->man, ln, ppos, tok);
3407 n = r->man->last;
3408
3409 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3410 while (*ep != '\0' && *ep != ' ')
3411 ep++;
3412 while (*ep == ' ')
3413 *ep++ = '\0';
3414 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3415 }
3416
3417 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3418 r->man->last = n;
3419 r->man->next = ROFF_NEXT_SIBLING;
3420 return ROFF_IGN;
3421 }
3422
3423 static int
3424 roff_als(ROFF_ARGS)
3425 {
3426 char *oldn, *newn, *end, *value;
3427 size_t oldsz, newsz, valsz;
3428
3429 newn = oldn = buf->buf + pos;
3430 if (*newn == '\0')
3431 return ROFF_IGN;
3432
3433 newsz = roff_getname(r, &oldn, ln, pos);
3434 if (newn[newsz] == '\\' || *oldn == '\0')
3435 return ROFF_IGN;
3436
3437 end = oldn;
3438 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3439 if (oldsz == 0)
3440 return ROFF_IGN;
3441
3442 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3443 (int)oldsz, oldn);
3444 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3445 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3446 free(value);
3447 return ROFF_IGN;
3448 }
3449
3450 static int
3451 roff_br(ROFF_ARGS)
3452 {
3453 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3454 man_breakscope(r->man, ROFF_br);
3455 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3456 if (buf->buf[pos] != '\0')
3457 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3458 "%s %s", roff_name[tok], buf->buf + pos);
3459 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3460 r->man->next = ROFF_NEXT_SIBLING;
3461 return ROFF_IGN;
3462 }
3463
3464 static int
3465 roff_cc(ROFF_ARGS)
3466 {
3467 const char *p;
3468
3469 p = buf->buf + pos;
3470
3471 if (*p == '\0' || (r->control = *p++) == '.')
3472 r->control = '\0';
3473
3474 if (*p != '\0')
3475 mandoc_msg(MANDOCERR_ARG_EXCESS,
3476 ln, p - buf->buf, "cc ... %s", p);
3477
3478 return ROFF_IGN;
3479 }
3480
3481 static int
3482 roff_char(ROFF_ARGS)
3483 {
3484 const char *p, *kp, *vp;
3485 size_t ksz, vsz;
3486 int font;
3487
3488 /* Parse the character to be replaced. */
3489
3490 kp = buf->buf + pos;
3491 p = kp + 1;
3492 if (*kp == '\0' || (*kp == '\\' &&
3493 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3494 (*p != ' ' && *p != '\0')) {
3495 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3496 return ROFF_IGN;
3497 }
3498 ksz = p - kp;
3499 while (*p == ' ')
3500 p++;
3501
3502 /*
3503 * If the replacement string contains a font escape sequence,
3504 * we have to restore the font at the end.
3505 */
3506
3507 vp = p;
3508 vsz = strlen(p);
3509 font = 0;
3510 while (*p != '\0') {
3511 if (*p++ != '\\')
3512 continue;
3513 switch (mandoc_escape(&p, NULL, NULL)) {
3514 case ESCAPE_FONT:
3515 case ESCAPE_FONTROMAN:
3516 case ESCAPE_FONTITALIC:
3517 case ESCAPE_FONTBOLD:
3518 case ESCAPE_FONTBI:
3519 case ESCAPE_FONTCW:
3520 case ESCAPE_FONTPREV:
3521 font++;
3522 break;
3523 default:
3524 break;
3525 }
3526 }
3527 if (font > 1)
3528 mandoc_msg(MANDOCERR_CHAR_FONT,
3529 ln, (int)(vp - buf->buf), "%s", vp);
3530
3531 /*
3532 * Approximate the effect of .char using the .tr tables.
3533 * XXX In groff, .char and .tr interact differently.
3534 */
3535
3536 if (ksz == 1) {
3537 if (r->xtab == NULL)
3538 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3539 assert((unsigned int)*kp < 128);
3540 free(r->xtab[(int)*kp].p);
3541 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3542 "%s%s", vp, font ? "\fP" : "");
3543 } else {
3544 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3545 if (font)
3546 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3547 }
3548 return ROFF_IGN;
3549 }
3550
3551 static int
3552 roff_ec(ROFF_ARGS)
3553 {
3554 const char *p;
3555
3556 p = buf->buf + pos;
3557 if (*p == '\0')
3558 r->escape = '\\';
3559 else {
3560 r->escape = *p;
3561 if (*++p != '\0')
3562 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3563 (int)(p - buf->buf), "ec ... %s", p);
3564 }
3565 return ROFF_IGN;
3566 }
3567
3568 static int
3569 roff_eo(ROFF_ARGS)
3570 {
3571 r->escape = '\0';
3572 if (buf->buf[pos] != '\0')
3573 mandoc_msg(MANDOCERR_ARG_SKIP,
3574 ln, pos, "eo %s", buf->buf + pos);
3575 return ROFF_IGN;
3576 }
3577
3578 static int
3579 roff_nop(ROFF_ARGS)
3580 {
3581 while (buf->buf[pos] == ' ')
3582 pos++;
3583 *offs = pos;
3584 return ROFF_RERUN;
3585 }
3586
3587 static int
3588 roff_tr(ROFF_ARGS)
3589 {
3590 const char *p, *first, *second;
3591 size_t fsz, ssz;
3592 enum mandoc_esc esc;
3593
3594 p = buf->buf + pos;
3595
3596 if (*p == '\0') {
3597 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3598 return ROFF_IGN;
3599 }
3600
3601 while (*p != '\0') {
3602 fsz = ssz = 1;
3603
3604 first = p++;
3605 if (*first == '\\') {
3606 esc = mandoc_escape(&p, NULL, NULL);
3607 if (esc == ESCAPE_ERROR) {
3608 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3609 (int)(p - buf->buf), "%s", first);
3610 return ROFF_IGN;
3611 }
3612 fsz = (size_t)(p - first);
3613 }
3614
3615 second = p++;
3616 if (*second == '\\') {
3617 esc = mandoc_escape(&p, NULL, NULL);
3618 if (esc == ESCAPE_ERROR) {
3619 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3620 (int)(p - buf->buf), "%s", second);
3621 return ROFF_IGN;
3622 }
3623 ssz = (size_t)(p - second);
3624 } else if (*second == '\0') {
3625 mandoc_msg(MANDOCERR_TR_ODD, ln,
3626 (int)(first - buf->buf), "tr %s", first);
3627 second = " ";
3628 p--;
3629 }
3630
3631 if (fsz > 1) {
3632 roff_setstrn(&r->xmbtab, first, fsz,
3633 second, ssz, 0);
3634 continue;
3635 }
3636
3637 if (r->xtab == NULL)
3638 r->xtab = mandoc_calloc(128,
3639 sizeof(struct roffstr));
3640
3641 free(r->xtab[(int)*first].p);
3642 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3643 r->xtab[(int)*first].sz = ssz;
3644 }
3645
3646 return ROFF_IGN;
3647 }
3648
3649 /*
3650 * Implementation of the .return request.
3651 * There is no need to call roff_userret() from here.
3652 * The read module will call that after rewinding the reader stack
3653 * to the place from where the current macro was called.
3654 */
3655 static int
3656 roff_return(ROFF_ARGS)
3657 {
3658 if (r->mstackpos >= 0)
3659 return ROFF_IGN | ROFF_USERRET;
3660
3661 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3662 return ROFF_IGN;
3663 }
3664
3665 static int
3666 roff_rn(ROFF_ARGS)
3667 {
3668 const char *value;
3669 char *oldn, *newn, *end;
3670 size_t oldsz, newsz;
3671 int deftype;
3672
3673 oldn = newn = buf->buf + pos;
3674 if (*oldn == '\0')
3675 return ROFF_IGN;
3676
3677 oldsz = roff_getname(r, &newn, ln, pos);
3678 if (oldn[oldsz] == '\\' || *newn == '\0')
3679 return ROFF_IGN;
3680
3681 end = newn;
3682 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3683 if (newsz == 0)
3684 return ROFF_IGN;
3685
3686 deftype = ROFFDEF_ANY;
3687 value = roff_getstrn(r, oldn, oldsz, &deftype);
3688 switch (deftype) {
3689 case ROFFDEF_USER:
3690 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3691 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3692 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3693 break;
3694 case ROFFDEF_PRE:
3695 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3696 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3697 break;
3698 case ROFFDEF_REN:
3699 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3700 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3701 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3702 break;
3703 case ROFFDEF_STD:
3704 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3705 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3706 break;
3707 default:
3708 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3709 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3710 break;
3711 }
3712 return ROFF_IGN;
3713 }
3714
3715 static int
3716 roff_shift(ROFF_ARGS)
3717 {
3718 struct mctx *ctx;
3719 int levels, i;
3720
3721 levels = 1;
3722 if (buf->buf[pos] != '\0' &&
3723 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3724 mandoc_msg(MANDOCERR_CE_NONUM,
3725 ln, pos, "shift %s", buf->buf + pos);
3726 levels = 1;
3727 }
3728 if (r->mstackpos < 0) {
3729 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3730 return ROFF_IGN;
3731 }
3732 ctx = r->mstack + r->mstackpos;
3733 if (levels > ctx->argc) {
3734 mandoc_msg(MANDOCERR_SHIFT,
3735 ln, pos, "%d, but max is %d", levels, ctx->argc);
3736 levels = ctx->argc;
3737 }
3738 if (levels == 0)
3739 return ROFF_IGN;
3740 for (i = 0; i < levels; i++)
3741 free(ctx->argv[i]);
3742 ctx->argc -= levels;
3743 for (i = 0; i < ctx->argc; i++)
3744 ctx->argv[i] = ctx->argv[i + levels];
3745 return ROFF_IGN;
3746 }
3747
3748 static int
3749 roff_so(ROFF_ARGS)
3750 {
3751 char *name, *cp;
3752
3753 name = buf->buf + pos;
3754 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3755
3756 /*
3757 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3758 * opening anything that's not in our cwd or anything beneath
3759 * it. Thus, explicitly disallow traversing up the file-system
3760 * or using absolute paths.
3761 */
3762
3763 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3764 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3765 buf->sz = mandoc_asprintf(&cp,
3766 ".sp\nSee the file %s.\n.sp", name) + 1;
3767 free(buf->buf);
3768 buf->buf = cp;
3769 *offs = 0;
3770 return ROFF_REPARSE;
3771 }
3772
3773 *offs = pos;
3774 return ROFF_SO;
3775 }
3776
3777 /* --- user defined strings and macros ------------------------------------ */
3778
3779 static int
3780 roff_userdef(ROFF_ARGS)
3781 {
3782 struct mctx *ctx;
3783 char *arg, *ap, *dst, *src;
3784 size_t sz;
3785
3786 /* Initialize a new macro stack context. */
3787
3788 if (++r->mstackpos == r->mstacksz) {
3789 r->mstack = mandoc_recallocarray(r->mstack,
3790 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3791 r->mstacksz += 8;
3792 }
3793 ctx = r->mstack + r->mstackpos;
3794 ctx->argsz = 0;
3795 ctx->argc = 0;
3796 ctx->argv = NULL;
3797
3798 /*
3799 * Collect pointers to macro argument strings,
3800 * NUL-terminating them and escaping quotes.
3801 */
3802
3803 src = buf->buf + pos;
3804 while (*src != '\0') {
3805 if (ctx->argc == ctx->argsz) {
3806 ctx->argsz += 8;
3807 ctx->argv = mandoc_reallocarray(ctx->argv,
3808 ctx->argsz, sizeof(*ctx->argv));
3809 }
3810 arg = roff_getarg(r, &src, ln, &pos);
3811 sz = 1; /* For the terminating NUL. */
3812 for (ap = arg; *ap != '\0'; ap++)
3813 sz += *ap == '"' ? 4 : 1;
3814 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3815 for (ap = arg; *ap != '\0'; ap++) {
3816 if (*ap == '"') {
3817 memcpy(dst, "\\(dq", 4);
3818 dst += 4;
3819 } else
3820 *dst++ = *ap;
3821 }
3822 *dst = '\0';
3823 free(arg);
3824 }
3825
3826 /* Replace the macro invocation by the macro definition. */
3827
3828 free(buf->buf);
3829 buf->buf = mandoc_strdup(r->current_string);
3830 buf->sz = strlen(buf->buf) + 1;
3831 *offs = 0;
3832
3833 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3834 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3835 }
3836
3837 /*
3838 * Calling a high-level macro that was renamed with .rn.
3839 * r->current_string has already been set up by roff_parse().
3840 */
3841 static int
3842 roff_renamed(ROFF_ARGS)
3843 {
3844 char *nbuf;
3845
3846 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3847 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3848 free(buf->buf);
3849 buf->buf = nbuf;
3850 *offs = 0;
3851 return ROFF_CONT;
3852 }
3853
3854 static size_t
3855 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3856 {
3857 char *name, *cp;
3858 size_t namesz;
3859
3860 name = *cpp;
3861 if ('\0' == *name)
3862 return 0;
3863
3864 /* Read until end of name and terminate it with NUL. */
3865 for (cp = name; 1; cp++) {
3866 if ('\0' == *cp || ' ' == *cp) {
3867 namesz = cp - name;
3868 break;
3869 }
3870 if ('\\' != *cp)
3871 continue;
3872 namesz = cp - name;
3873 if ('{' == cp[1] || '}' == cp[1])
3874 break;
3875 cp++;
3876 if ('\\' == *cp)
3877 continue;
3878 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
3879 "%.*s", (int)(cp - name + 1), name);
3880 mandoc_escape((const char **)&cp, NULL, NULL);
3881 break;
3882 }
3883
3884 /* Read past spaces. */
3885 while (' ' == *cp)
3886 cp++;
3887
3888 *cpp = cp;
3889 return namesz;
3890 }
3891
3892 /*
3893 * Store *string into the user-defined string called *name.
3894 * To clear an existing entry, call with (*r, *name, NULL, 0).
3895 * append == 0: replace mode
3896 * append == 1: single-line append mode
3897 * append == 2: multiline append mode, append '\n' after each call
3898 */
3899 static void
3900 roff_setstr(struct roff *r, const char *name, const char *string,
3901 int append)
3902 {
3903 size_t namesz;
3904
3905 namesz = strlen(name);
3906 roff_setstrn(&r->strtab, name, namesz, string,
3907 string ? strlen(string) : 0, append);
3908 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3909 }
3910
3911 static void
3912 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3913 const char *string, size_t stringsz, int append)
3914 {
3915 struct roffkv *n;
3916 char *c;
3917 int i;
3918 size_t oldch, newch;
3919
3920 /* Search for an existing string with the same name. */
3921 n = *r;
3922
3923 while (n && (namesz != n->key.sz ||
3924 strncmp(n->key.p, name, namesz)))
3925 n = n->next;
3926
3927 if (NULL == n) {
3928 /* Create a new string table entry. */
3929 n = mandoc_malloc(sizeof(struct roffkv));
3930 n->key.p = mandoc_strndup(name, namesz);
3931 n->key.sz = namesz;
3932 n->val.p = NULL;
3933 n->val.sz = 0;
3934 n->next = *r;
3935 *r = n;
3936 } else if (0 == append) {
3937 free(n->val.p);
3938 n->val.p = NULL;
3939 n->val.sz = 0;
3940 }
3941
3942 if (NULL == string)
3943 return;
3944
3945 /*
3946 * One additional byte for the '\n' in multiline mode,
3947 * and one for the terminating '\0'.
3948 */
3949 newch = stringsz + (1 < append ? 2u : 1u);
3950
3951 if (NULL == n->val.p) {
3952 n->val.p = mandoc_malloc(newch);
3953 *n->val.p = '\0';
3954 oldch = 0;
3955 } else {
3956 oldch = n->val.sz;
3957 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3958 }
3959
3960 /* Skip existing content in the destination buffer. */
3961 c = n->val.p + (int)oldch;
3962
3963 /* Append new content to the destination buffer. */
3964 i = 0;
3965 while (i < (int)stringsz) {
3966 /*
3967 * Rudimentary roff copy mode:
3968 * Handle escaped backslashes.
3969 */
3970 if ('\\' == string[i] && '\\' == string[i + 1])
3971 i++;
3972 *c++ = string[i++];
3973 }
3974
3975 /* Append terminating bytes. */
3976 if (1 < append)
3977 *c++ = '\n';
3978
3979 *c = '\0';
3980 n->val.sz = (int)(c - n->val.p);
3981 }
3982
3983 static const char *
3984 roff_getstrn(struct roff *r, const char *name, size_t len,
3985 int *deftype)
3986 {
3987 const struct roffkv *n;
3988 int found, i;
3989 enum roff_tok tok;
3990
3991 found = 0;
3992 for (n = r->strtab; n != NULL; n = n->next) {
3993 if (strncmp(name, n->key.p, len) != 0 ||
3994 n->key.p[len] != '\0' || n->val.p == NULL)
3995 continue;
3996 if (*deftype & ROFFDEF_USER) {
3997 *deftype = ROFFDEF_USER;
3998 return n->val.p;
3999 } else {
4000 found = 1;
4001 break;
4002 }
4003 }
4004 for (n = r->rentab; n != NULL; n = n->next) {
4005 if (strncmp(name, n->key.p, len) != 0 ||
4006 n->key.p[len] != '\0' || n->val.p == NULL)
4007 continue;
4008 if (*deftype & ROFFDEF_REN) {
4009 *deftype = ROFFDEF_REN;
4010 return n->val.p;
4011 } else {
4012 found = 1;
4013 break;
4014 }
4015 }
4016 for (i = 0; i < PREDEFS_MAX; i++) {
4017 if (strncmp(name, predefs[i].name, len) != 0 ||
4018 predefs[i].name[len] != '\0')
4019 continue;
4020 if (*deftype & ROFFDEF_PRE) {
4021 *deftype = ROFFDEF_PRE;
4022 return predefs[i].str;
4023 } else {
4024 found = 1;
4025 break;
4026 }
4027 }
4028 if (r->man->meta.macroset != MACROSET_MAN) {
4029 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4030 if (strncmp(name, roff_name[tok], len) != 0 ||
4031 roff_name[tok][len] != '\0')
4032 continue;
4033 if (*deftype & ROFFDEF_STD) {
4034 *deftype = ROFFDEF_STD;
4035 return NULL;
4036 } else {
4037 found = 1;
4038 break;
4039 }
4040 }
4041 }
4042 if (r->man->meta.macroset != MACROSET_MDOC) {
4043 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4044 if (strncmp(name, roff_name[tok], len) != 0 ||
4045 roff_name[tok][len] != '\0')
4046 continue;
4047 if (*deftype & ROFFDEF_STD) {
4048 *deftype = ROFFDEF_STD;
4049 return NULL;
4050 } else {
4051 found = 1;
4052 break;
4053 }
4054 }
4055 }
4056
4057 if (found == 0 && *deftype != ROFFDEF_ANY) {
4058 if (*deftype & ROFFDEF_REN) {
4059 /*
4060 * This might still be a request,
4061 * so do not treat it as undefined yet.
4062 */
4063 *deftype = ROFFDEF_UNDEF;
4064 return NULL;
4065 }
4066
4067 /* Using an undefined string defines it to be empty. */
4068
4069 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4070 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4071 }
4072
4073 *deftype = 0;
4074 return NULL;
4075 }
4076
4077 static void
4078 roff_freestr(struct roffkv *r)
4079 {
4080 struct roffkv *n, *nn;
4081
4082 for (n = r; n; n = nn) {
4083 free(n->key.p);
4084 free(n->val.p);
4085 nn = n->next;
4086 free(n);
4087 }
4088 }
4089
4090 /* --- accessors and utility functions ------------------------------------ */
4091
4092 /*
4093 * Duplicate an input string, making the appropriate character
4094 * conversations (as stipulated by `tr') along the way.
4095 * Returns a heap-allocated string with all the replacements made.
4096 */
4097 char *
4098 roff_strdup(const struct roff *r, const char *p)
4099 {
4100 const struct roffkv *cp;
4101 char *res;
4102 const char *pp;
4103 size_t ssz, sz;
4104 enum mandoc_esc esc;
4105
4106 if (NULL == r->xmbtab && NULL == r->xtab)
4107 return mandoc_strdup(p);
4108 else if ('\0' == *p)
4109 return mandoc_strdup("");
4110
4111 /*
4112 * Step through each character looking for term matches
4113 * (remember that a `tr' can be invoked with an escape, which is
4114 * a glyph but the escape is multi-character).
4115 * We only do this if the character hash has been initialised
4116 * and the string is >0 length.
4117 */
4118
4119 res = NULL;
4120 ssz = 0;
4121
4122 while ('\0' != *p) {
4123 assert((unsigned int)*p < 128);
4124 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4125 sz = r->xtab[(int)*p].sz;
4126 res = mandoc_realloc(res, ssz + sz + 1);
4127 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4128 ssz += sz;
4129 p++;
4130 continue;
4131 } else if ('\\' != *p) {
4132 res = mandoc_realloc(res, ssz + 2);
4133 res[ssz++] = *p++;
4134 continue;
4135 }
4136
4137 /* Search for term matches. */
4138 for (cp = r->xmbtab; cp; cp = cp->next)
4139 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4140 break;
4141
4142 if (NULL != cp) {
4143 /*
4144 * A match has been found.
4145 * Append the match to the array and move
4146 * forward by its keysize.
4147 */
4148 res = mandoc_realloc(res,
4149 ssz + cp->val.sz + 1);
4150 memcpy(res + ssz, cp->val.p, cp->val.sz);
4151 ssz += cp->val.sz;
4152 p += (int)cp->key.sz;
4153 continue;
4154 }
4155
4156 /*
4157 * Handle escapes carefully: we need to copy
4158 * over just the escape itself, or else we might
4159 * do replacements within the escape itself.
4160 * Make sure to pass along the bogus string.
4161 */
4162 pp = p++;
4163 esc = mandoc_escape(&p, NULL, NULL);
4164 if (ESCAPE_ERROR == esc) {
4165 sz = strlen(pp);
4166 res = mandoc_realloc(res, ssz + sz + 1);
4167 memcpy(res + ssz, pp, sz);
4168 break;
4169 }
4170 /*
4171 * We bail out on bad escapes.
4172 * No need to warn: we already did so when
4173 * roff_expand() was called.
4174 */
4175 sz = (int)(p - pp);
4176 res = mandoc_realloc(res, ssz + sz + 1);
4177 memcpy(res + ssz, pp, sz);
4178 ssz += sz;
4179 }
4180
4181 res[(int)ssz] = '\0';
4182 return res;
4183 }
4184
4185 int
4186 roff_getformat(const struct roff *r)
4187 {
4188
4189 return r->format;
4190 }
4191
4192 /*
4193 * Find out whether a line is a macro line or not.
4194 * If it is, adjust the current position and return one; if it isn't,
4195 * return zero and don't change the current position.
4196 * If the control character has been set with `.cc', then let that grain
4197 * precedence.
4198 * This is slighly contrary to groff, where using the non-breaking
4199 * control character when `cc' has been invoked will cause the
4200 * non-breaking macro contents to be printed verbatim.
4201 */
4202 int
4203 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4204 {
4205 int pos;
4206
4207 pos = *ppos;
4208
4209 if (r->control != '\0' && cp[pos] == r->control)
4210 pos++;
4211 else if (r->control != '\0')
4212 return 0;
4213 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4214 pos += 2;
4215 else if ('.' == cp[pos] || '\'' == cp[pos])
4216 pos++;
4217 else
4218 return 0;
4219
4220 while (' ' == cp[pos] || '\t' == cp[pos])
4221 pos++;
4222
4223 *ppos = pos;
4224 return 1;
4225 }