]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
more info from John Gardner about ASCII control chars in roff(7) input
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.376 2020/08/27 12:59:02 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_noarg(ROFF_ARGS);
231 static int roff_nop(ROFF_ARGS);
232 static int roff_nr(ROFF_ARGS);
233 static int roff_onearg(ROFF_ARGS);
234 static enum roff_tok roff_parse(struct roff *, char *, int *,
235 int, int);
236 static int roff_parsetext(struct roff *, struct buf *,
237 int, int *);
238 static int roff_renamed(ROFF_ARGS);
239 static int roff_return(ROFF_ARGS);
240 static int roff_rm(ROFF_ARGS);
241 static int roff_rn(ROFF_ARGS);
242 static int roff_rr(ROFF_ARGS);
243 static void roff_setregn(struct roff *, const char *,
244 size_t, int, char, int);
245 static void roff_setstr(struct roff *,
246 const char *, const char *, int);
247 static void roff_setstrn(struct roffkv **, const char *,
248 size_t, const char *, size_t, int);
249 static int roff_shift(ROFF_ARGS);
250 static int roff_so(ROFF_ARGS);
251 static int roff_tr(ROFF_ARGS);
252 static int roff_Dd(ROFF_ARGS);
253 static int roff_TE(ROFF_ARGS);
254 static int roff_TS(ROFF_ARGS);
255 static int roff_EQ(ROFF_ARGS);
256 static int roff_EN(ROFF_ARGS);
257 static int roff_T_(ROFF_ARGS);
258 static int roff_unsupp(ROFF_ARGS);
259 static int roff_userdef(ROFF_ARGS);
260
261 /* --- constant data ------------------------------------------------------ */
262
263 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
264 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
265
266 const char *__roff_name[MAN_MAX + 1] = {
267 "br", "ce", "fi", "ft",
268 "ll", "mc", "nf",
269 "po", "rj", "sp",
270 "ta", "ti", NULL,
271 "ab", "ad", "af", "aln",
272 "als", "am", "am1", "ami",
273 "ami1", "as", "as1", "asciify",
274 "backtrace", "bd", "bleedat", "blm",
275 "box", "boxa", "bp", "BP",
276 "break", "breakchar", "brnl", "brp",
277 "brpnl", "c2", "cc",
278 "cf", "cflags", "ch", "char",
279 "chop", "class", "close", "CL",
280 "color", "composite", "continue", "cp",
281 "cropat", "cs", "cu", "da",
282 "dch", "Dd", "de", "de1",
283 "defcolor", "dei", "dei1", "device",
284 "devicem", "di", "do", "ds",
285 "ds1", "dwh", "dt", "ec",
286 "ecr", "ecs", "el", "em",
287 "EN", "eo", "EP", "EQ",
288 "errprint", "ev", "evc", "ex",
289 "fallback", "fam", "fc", "fchar",
290 "fcolor", "fdeferlig", "feature", "fkern",
291 "fl", "flig", "fp", "fps",
292 "fschar", "fspacewidth", "fspecial", "ftr",
293 "fzoom", "gcolor", "hc", "hcode",
294 "hidechar", "hla", "hlm", "hpf",
295 "hpfa", "hpfcode", "hw", "hy",
296 "hylang", "hylen", "hym", "hypp",
297 "hys", "ie", "if", "ig",
298 "index", "it", "itc", "IX",
299 "kern", "kernafter", "kernbefore", "kernpair",
300 "lc", "lc_ctype", "lds", "length",
301 "letadj", "lf", "lg", "lhang",
302 "linetabs", "lnr", "lnrf", "lpfx",
303 "ls", "lsm", "lt",
304 "mediasize", "minss", "mk", "mso",
305 "na", "ne", "nh", "nhychar",
306 "nm", "nn", "nop", "nr",
307 "nrf", "nroff", "ns", "nx",
308 "open", "opena", "os", "output",
309 "padj", "papersize", "pc", "pev",
310 "pi", "PI", "pl", "pm",
311 "pn", "pnr", "ps",
312 "psbb", "pshape", "pso", "ptr",
313 "pvs", "rchar", "rd", "recursionlimit",
314 "return", "rfschar", "rhang",
315 "rm", "rn", "rnn", "rr",
316 "rs", "rt", "schar", "sentchar",
317 "shc", "shift", "sizes", "so",
318 "spacewidth", "special", "spreadwarn", "ss",
319 "sty", "substring", "sv", "sy",
320 "T&", "tc", "TE",
321 "TH", "tkf", "tl",
322 "tm", "tm1", "tmc", "tr",
323 "track", "transchar", "trf", "trimat",
324 "trin", "trnt", "troff", "TS",
325 "uf", "ul", "unformat", "unwatch",
326 "unwatchn", "vpt", "vs", "warn",
327 "warnscale", "watch", "watchlength", "watchn",
328 "wh", "while", "write", "writec",
329 "writem", "xflag", ".", NULL,
330 NULL, "text",
331 "Dd", "Dt", "Os", "Sh",
332 "Ss", "Pp", "D1", "Dl",
333 "Bd", "Ed", "Bl", "El",
334 "It", "Ad", "An", "Ap",
335 "Ar", "Cd", "Cm", "Dv",
336 "Er", "Ev", "Ex", "Fa",
337 "Fd", "Fl", "Fn", "Ft",
338 "Ic", "In", "Li", "Nd",
339 "Nm", "Op", "Ot", "Pa",
340 "Rv", "St", "Va", "Vt",
341 "Xr", "%A", "%B", "%D",
342 "%I", "%J", "%N", "%O",
343 "%P", "%R", "%T", "%V",
344 "Ac", "Ao", "Aq", "At",
345 "Bc", "Bf", "Bo", "Bq",
346 "Bsx", "Bx", "Db", "Dc",
347 "Do", "Dq", "Ec", "Ef",
348 "Em", "Eo", "Fx", "Ms",
349 "No", "Ns", "Nx", "Ox",
350 "Pc", "Pf", "Po", "Pq",
351 "Qc", "Ql", "Qo", "Qq",
352 "Re", "Rs", "Sc", "So",
353 "Sq", "Sm", "Sx", "Sy",
354 "Tn", "Ux", "Xc", "Xo",
355 "Fo", "Fc", "Oo", "Oc",
356 "Bk", "Ek", "Bt", "Hf",
357 "Fr", "Ud", "Lb", "Lp",
358 "Lk", "Mt", "Brq", "Bro",
359 "Brc", "%C", "Es", "En",
360 "Dx", "%Q", "%U", "Ta",
361 "Tg", NULL,
362 "TH", "SH", "SS", "TP",
363 "TQ",
364 "LP", "PP", "P", "IP",
365 "HP", "SM", "SB", "BI",
366 "IB", "BR", "RB", "R",
367 "B", "I", "IR", "RI",
368 "RE", "RS", "DT", "UC",
369 "PD", "AT", "in",
370 "SY", "YS", "OP",
371 "EX", "EE", "UR",
372 "UE", "MT", "ME", NULL
373 };
374 const char *const *roff_name = __roff_name;
375
376 static struct roffmac roffs[TOKEN_NONE] = {
377 { roff_noarg, NULL, NULL, 0 }, /* br */
378 { roff_onearg, NULL, NULL, 0 }, /* ce */
379 { roff_noarg, NULL, NULL, 0 }, /* fi */
380 { roff_onearg, NULL, NULL, 0 }, /* ft */
381 { roff_onearg, NULL, NULL, 0 }, /* ll */
382 { roff_onearg, NULL, NULL, 0 }, /* mc */
383 { roff_noarg, NULL, NULL, 0 }, /* nf */
384 { roff_onearg, NULL, NULL, 0 }, /* po */
385 { roff_onearg, NULL, NULL, 0 }, /* rj */
386 { roff_onearg, NULL, NULL, 0 }, /* sp */
387 { roff_manyarg, NULL, NULL, 0 }, /* ta */
388 { roff_onearg, NULL, NULL, 0 }, /* ti */
389 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
390 { roff_unsupp, NULL, NULL, 0 }, /* ab */
391 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
392 { roff_line_ignore, NULL, NULL, 0 }, /* af */
393 { roff_unsupp, NULL, NULL, 0 }, /* aln */
394 { roff_als, NULL, NULL, 0 }, /* als */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
399 { roff_ds, NULL, NULL, 0 }, /* as */
400 { roff_ds, NULL, NULL, 0 }, /* as1 */
401 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
402 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
403 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
404 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
405 { roff_unsupp, NULL, NULL, 0 }, /* blm */
406 { roff_unsupp, NULL, NULL, 0 }, /* box */
407 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
408 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
409 { roff_unsupp, NULL, NULL, 0 }, /* BP */
410 { roff_break, NULL, NULL, 0 }, /* break */
411 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
412 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
413 { roff_noarg, NULL, NULL, 0 }, /* brp */
414 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
415 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
416 { roff_cc, NULL, NULL, 0 }, /* cc */
417 { roff_insec, NULL, NULL, 0 }, /* cf */
418 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
419 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
420 { roff_char, NULL, NULL, 0 }, /* char */
421 { roff_unsupp, NULL, NULL, 0 }, /* chop */
422 { roff_line_ignore, NULL, NULL, 0 }, /* class */
423 { roff_insec, NULL, NULL, 0 }, /* close */
424 { roff_unsupp, NULL, NULL, 0 }, /* CL */
425 { roff_line_ignore, NULL, NULL, 0 }, /* color */
426 { roff_unsupp, NULL, NULL, 0 }, /* composite */
427 { roff_unsupp, NULL, NULL, 0 }, /* continue */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
429 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
431 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
432 { roff_unsupp, NULL, NULL, 0 }, /* da */
433 { roff_unsupp, NULL, NULL, 0 }, /* dch */
434 { roff_Dd, NULL, NULL, 0 }, /* Dd */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
437 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
440 { roff_unsupp, NULL, NULL, 0 }, /* device */
441 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
442 { roff_unsupp, NULL, NULL, 0 }, /* di */
443 { roff_unsupp, NULL, NULL, 0 }, /* do */
444 { roff_ds, NULL, NULL, 0 }, /* ds */
445 { roff_ds, NULL, NULL, 0 }, /* ds1 */
446 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
447 { roff_unsupp, NULL, NULL, 0 }, /* dt */
448 { roff_ec, NULL, NULL, 0 }, /* ec */
449 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
450 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
451 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
452 { roff_unsupp, NULL, NULL, 0 }, /* em */
453 { roff_EN, NULL, NULL, 0 }, /* EN */
454 { roff_eo, NULL, NULL, 0 }, /* eo */
455 { roff_unsupp, NULL, NULL, 0 }, /* EP */
456 { roff_EQ, NULL, NULL, 0 }, /* EQ */
457 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
458 { roff_unsupp, NULL, NULL, 0 }, /* ev */
459 { roff_unsupp, NULL, NULL, 0 }, /* evc */
460 { roff_unsupp, NULL, NULL, 0 }, /* ex */
461 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
463 { roff_unsupp, NULL, NULL, 0 }, /* fc */
464 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
467 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
470 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
473 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
476 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
477 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
478 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
494 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
495 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
496 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
497 { roff_unsupp, NULL, NULL, 0 }, /* index */
498 { roff_it, NULL, NULL, 0 }, /* it */
499 { roff_unsupp, NULL, NULL, 0 }, /* itc */
500 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
502 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
504 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
505 { roff_unsupp, NULL, NULL, 0 }, /* lc */
506 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
507 { roff_unsupp, NULL, NULL, 0 }, /* lds */
508 { roff_unsupp, NULL, NULL, 0 }, /* length */
509 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
510 { roff_insec, NULL, NULL, 0 }, /* lf */
511 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
512 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
513 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
514 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
515 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
516 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
518 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
519 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
520 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
521 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
522 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
523 { roff_insec, NULL, NULL, 0 }, /* mso */
524 { roff_line_ignore, NULL, NULL, 0 }, /* na */
525 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
526 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
527 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
528 { roff_unsupp, NULL, NULL, 0 }, /* nm */
529 { roff_unsupp, NULL, NULL, 0 }, /* nn */
530 { roff_nop, NULL, NULL, 0 }, /* nop */
531 { roff_nr, NULL, NULL, 0 }, /* nr */
532 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
533 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
534 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
535 { roff_insec, NULL, NULL, 0 }, /* nx */
536 { roff_insec, NULL, NULL, 0 }, /* open */
537 { roff_insec, NULL, NULL, 0 }, /* opena */
538 { roff_line_ignore, NULL, NULL, 0 }, /* os */
539 { roff_unsupp, NULL, NULL, 0 }, /* output */
540 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
541 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
542 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
544 { roff_insec, NULL, NULL, 0 }, /* pi */
545 { roff_unsupp, NULL, NULL, 0 }, /* PI */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
550 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
551 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
552 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
553 { roff_insec, NULL, NULL, 0 }, /* pso */
554 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
555 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
556 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
557 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
558 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
559 { roff_return, NULL, NULL, 0 }, /* return */
560 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
561 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
562 { roff_rm, NULL, NULL, 0 }, /* rm */
563 { roff_rn, NULL, NULL, 0 }, /* rn */
564 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
565 { roff_rr, NULL, NULL, 0 }, /* rr */
566 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
567 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
568 { roff_unsupp, NULL, NULL, 0 }, /* schar */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
570 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
571 { roff_shift, NULL, NULL, 0 }, /* shift */
572 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
573 { roff_so, NULL, NULL, 0 }, /* so */
574 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
575 { roff_line_ignore, NULL, NULL, 0 }, /* special */
576 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
577 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
578 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
579 { roff_unsupp, NULL, NULL, 0 }, /* substring */
580 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
581 { roff_insec, NULL, NULL, 0 }, /* sy */
582 { roff_T_, NULL, NULL, 0 }, /* T& */
583 { roff_unsupp, NULL, NULL, 0 }, /* tc */
584 { roff_TE, NULL, NULL, 0 }, /* TE */
585 { roff_Dd, NULL, NULL, 0 }, /* TH */
586 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
587 { roff_unsupp, NULL, NULL, 0 }, /* tl */
588 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
589 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
591 { roff_tr, NULL, NULL, 0 }, /* tr */
592 { roff_line_ignore, NULL, NULL, 0 }, /* track */
593 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
594 { roff_insec, NULL, NULL, 0 }, /* trf */
595 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
596 { roff_unsupp, NULL, NULL, 0 }, /* trin */
597 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
598 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
599 { roff_TS, NULL, NULL, 0 }, /* TS */
600 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
601 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
602 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
603 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
604 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
605 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
606 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
607 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
608 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
609 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
610 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
611 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
612 { roff_unsupp, NULL, NULL, 0 }, /* wh */
613 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
614 { roff_insec, NULL, NULL, 0 }, /* write */
615 { roff_insec, NULL, NULL, 0 }, /* writec */
616 { roff_insec, NULL, NULL, 0 }, /* writem */
617 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
618 { roff_cblock, NULL, NULL, 0 }, /* . */
619 { roff_renamed, NULL, NULL, 0 },
620 { roff_userdef, NULL, NULL, 0 }
621 };
622
623 /* Array of injected predefined strings. */
624 #define PREDEFS_MAX 38
625 static const struct predef predefs[PREDEFS_MAX] = {
626 #include "predefs.in"
627 };
628
629 static int roffce_lines; /* number of input lines to center */
630 static struct roff_node *roffce_node; /* active request */
631 static int roffit_lines; /* number of lines to delay */
632 static char *roffit_macro; /* nil-terminated macro line */
633
634
635 /* --- request table ------------------------------------------------------ */
636
637 struct ohash *
638 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
639 {
640 struct ohash *htab;
641 struct roffreq *req;
642 enum roff_tok tok;
643 size_t sz;
644 unsigned int slot;
645
646 htab = mandoc_malloc(sizeof(*htab));
647 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
648
649 for (tok = mintok; tok < maxtok; tok++) {
650 if (roff_name[tok] == NULL)
651 continue;
652 sz = strlen(roff_name[tok]);
653 req = mandoc_malloc(sizeof(*req) + sz + 1);
654 req->tok = tok;
655 memcpy(req->name, roff_name[tok], sz + 1);
656 slot = ohash_qlookup(htab, req->name);
657 ohash_insert(htab, slot, req);
658 }
659 return htab;
660 }
661
662 void
663 roffhash_free(struct ohash *htab)
664 {
665 struct roffreq *req;
666 unsigned int slot;
667
668 if (htab == NULL)
669 return;
670 for (req = ohash_first(htab, &slot); req != NULL;
671 req = ohash_next(htab, &slot))
672 free(req);
673 ohash_delete(htab);
674 free(htab);
675 }
676
677 enum roff_tok
678 roffhash_find(struct ohash *htab, const char *name, size_t sz)
679 {
680 struct roffreq *req;
681 const char *end;
682
683 if (sz) {
684 end = name + sz;
685 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
686 } else
687 req = ohash_find(htab, ohash_qlookup(htab, name));
688 return req == NULL ? TOKEN_NONE : req->tok;
689 }
690
691 /* --- stack of request blocks -------------------------------------------- */
692
693 /*
694 * Pop the current node off of the stack of roff instructions currently
695 * pending. Return 1 if it is a loop or 0 otherwise.
696 */
697 static int
698 roffnode_pop(struct roff *r)
699 {
700 struct roffnode *p;
701 int inloop;
702
703 p = r->last;
704 inloop = p->tok == ROFF_while;
705 r->last = p->parent;
706 free(p->name);
707 free(p->end);
708 free(p);
709 return inloop;
710 }
711
712 /*
713 * Push a roff node onto the instruction stack. This must later be
714 * removed with roffnode_pop().
715 */
716 static void
717 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
718 int line, int col)
719 {
720 struct roffnode *p;
721
722 p = mandoc_calloc(1, sizeof(struct roffnode));
723 p->tok = tok;
724 if (name)
725 p->name = mandoc_strdup(name);
726 p->parent = r->last;
727 p->line = line;
728 p->col = col;
729 p->rule = p->parent ? p->parent->rule : 0;
730
731 r->last = p;
732 }
733
734 /* --- roff parser state data management ---------------------------------- */
735
736 static void
737 roff_free1(struct roff *r)
738 {
739 int i;
740
741 tbl_free(r->first_tbl);
742 r->first_tbl = r->last_tbl = r->tbl = NULL;
743
744 eqn_free(r->last_eqn);
745 r->last_eqn = r->eqn = NULL;
746
747 while (r->mstackpos >= 0)
748 roff_userret(r);
749
750 while (r->last)
751 roffnode_pop(r);
752
753 free (r->rstack);
754 r->rstack = NULL;
755 r->rstacksz = 0;
756 r->rstackpos = -1;
757
758 roff_freereg(r->regtab);
759 r->regtab = NULL;
760
761 roff_freestr(r->strtab);
762 roff_freestr(r->rentab);
763 roff_freestr(r->xmbtab);
764 r->strtab = r->rentab = r->xmbtab = NULL;
765
766 if (r->xtab)
767 for (i = 0; i < 128; i++)
768 free(r->xtab[i].p);
769 free(r->xtab);
770 r->xtab = NULL;
771 }
772
773 void
774 roff_reset(struct roff *r)
775 {
776 roff_free1(r);
777 r->options |= MPARSE_COMMENT;
778 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
779 r->control = '\0';
780 r->escape = '\\';
781 roffce_lines = 0;
782 roffce_node = NULL;
783 roffit_lines = 0;
784 roffit_macro = NULL;
785 }
786
787 void
788 roff_free(struct roff *r)
789 {
790 int i;
791
792 roff_free1(r);
793 for (i = 0; i < r->mstacksz; i++)
794 free(r->mstack[i].argv);
795 free(r->mstack);
796 roffhash_free(r->reqtab);
797 free(r);
798 }
799
800 struct roff *
801 roff_alloc(int options)
802 {
803 struct roff *r;
804
805 r = mandoc_calloc(1, sizeof(struct roff));
806 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
807 r->options = options | MPARSE_COMMENT;
808 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
809 r->mstackpos = -1;
810 r->rstackpos = -1;
811 r->escape = '\\';
812 return r;
813 }
814
815 /* --- syntax tree state data management ---------------------------------- */
816
817 static void
818 roff_man_free1(struct roff_man *man)
819 {
820 if (man->meta.first != NULL)
821 roff_node_delete(man, man->meta.first);
822 free(man->meta.msec);
823 free(man->meta.vol);
824 free(man->meta.os);
825 free(man->meta.arch);
826 free(man->meta.title);
827 free(man->meta.name);
828 free(man->meta.date);
829 free(man->meta.sodest);
830 }
831
832 void
833 roff_state_reset(struct roff_man *man)
834 {
835 man->last = man->meta.first;
836 man->last_es = NULL;
837 man->flags = 0;
838 man->lastsec = man->lastnamed = SEC_NONE;
839 man->next = ROFF_NEXT_CHILD;
840 roff_setreg(man->roff, "nS", 0, '=');
841 }
842
843 static void
844 roff_man_alloc1(struct roff_man *man)
845 {
846 memset(&man->meta, 0, sizeof(man->meta));
847 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
848 man->meta.first->type = ROFFT_ROOT;
849 man->meta.macroset = MACROSET_NONE;
850 roff_state_reset(man);
851 }
852
853 void
854 roff_man_reset(struct roff_man *man)
855 {
856 roff_man_free1(man);
857 roff_man_alloc1(man);
858 }
859
860 void
861 roff_man_free(struct roff_man *man)
862 {
863 roff_man_free1(man);
864 free(man);
865 }
866
867 struct roff_man *
868 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
869 {
870 struct roff_man *man;
871
872 man = mandoc_calloc(1, sizeof(*man));
873 man->roff = roff;
874 man->os_s = os_s;
875 man->quick = quick;
876 roff_man_alloc1(man);
877 roff->man = man;
878 return man;
879 }
880
881 /* --- syntax tree handling ----------------------------------------------- */
882
883 struct roff_node *
884 roff_node_alloc(struct roff_man *man, int line, int pos,
885 enum roff_type type, int tok)
886 {
887 struct roff_node *n;
888
889 n = mandoc_calloc(1, sizeof(*n));
890 n->line = line;
891 n->pos = pos;
892 n->tok = tok;
893 n->type = type;
894 n->sec = man->lastsec;
895
896 if (man->flags & MDOC_SYNOPSIS)
897 n->flags |= NODE_SYNPRETTY;
898 else
899 n->flags &= ~NODE_SYNPRETTY;
900 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
901 n->flags |= NODE_NOFILL;
902 else
903 n->flags &= ~NODE_NOFILL;
904 if (man->flags & MDOC_NEWLINE)
905 n->flags |= NODE_LINE;
906 man->flags &= ~MDOC_NEWLINE;
907
908 return n;
909 }
910
911 void
912 roff_node_append(struct roff_man *man, struct roff_node *n)
913 {
914
915 switch (man->next) {
916 case ROFF_NEXT_SIBLING:
917 if (man->last->next != NULL) {
918 n->next = man->last->next;
919 man->last->next->prev = n;
920 } else
921 man->last->parent->last = n;
922 man->last->next = n;
923 n->prev = man->last;
924 n->parent = man->last->parent;
925 break;
926 case ROFF_NEXT_CHILD:
927 if (man->last->child != NULL) {
928 n->next = man->last->child;
929 man->last->child->prev = n;
930 } else
931 man->last->last = n;
932 man->last->child = n;
933 n->parent = man->last;
934 break;
935 default:
936 abort();
937 }
938 man->last = n;
939
940 switch (n->type) {
941 case ROFFT_HEAD:
942 n->parent->head = n;
943 break;
944 case ROFFT_BODY:
945 if (n->end != ENDBODY_NOT)
946 return;
947 n->parent->body = n;
948 break;
949 case ROFFT_TAIL:
950 n->parent->tail = n;
951 break;
952 default:
953 return;
954 }
955
956 /*
957 * Copy over the normalised-data pointer of our parent. Not
958 * everybody has one, but copying a null pointer is fine.
959 */
960
961 n->norm = n->parent->norm;
962 assert(n->parent->type == ROFFT_BLOCK);
963 }
964
965 void
966 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
967 {
968 struct roff_node *n;
969
970 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
971 n->string = roff_strdup(man->roff, word);
972 roff_node_append(man, n);
973 n->flags |= NODE_VALID | NODE_ENDED;
974 man->next = ROFF_NEXT_SIBLING;
975 }
976
977 void
978 roff_word_append(struct roff_man *man, const char *word)
979 {
980 struct roff_node *n;
981 char *addstr, *newstr;
982
983 n = man->last;
984 addstr = roff_strdup(man->roff, word);
985 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
986 free(addstr);
987 free(n->string);
988 n->string = newstr;
989 man->next = ROFF_NEXT_SIBLING;
990 }
991
992 void
993 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
994 {
995 struct roff_node *n;
996
997 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
998 roff_node_append(man, n);
999 man->next = ROFF_NEXT_CHILD;
1000 }
1001
1002 struct roff_node *
1003 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1004 {
1005 struct roff_node *n;
1006
1007 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1008 roff_node_append(man, n);
1009 man->next = ROFF_NEXT_CHILD;
1010 return n;
1011 }
1012
1013 struct roff_node *
1014 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1015 {
1016 struct roff_node *n;
1017
1018 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1019 roff_node_append(man, n);
1020 man->next = ROFF_NEXT_CHILD;
1021 return n;
1022 }
1023
1024 struct roff_node *
1025 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1026 {
1027 struct roff_node *n;
1028
1029 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1030 roff_node_append(man, n);
1031 man->next = ROFF_NEXT_CHILD;
1032 return n;
1033 }
1034
1035 static void
1036 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1037 {
1038 struct roff_node *n;
1039 struct tbl_span *span;
1040
1041 if (man->meta.macroset == MACROSET_MAN)
1042 man_breakscope(man, ROFF_TS);
1043 while ((span = tbl_span(tbl)) != NULL) {
1044 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1045 n->span = span;
1046 roff_node_append(man, n);
1047 n->flags |= NODE_VALID | NODE_ENDED;
1048 man->next = ROFF_NEXT_SIBLING;
1049 }
1050 }
1051
1052 void
1053 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1054 {
1055
1056 /* Adjust siblings. */
1057
1058 if (n->prev)
1059 n->prev->next = n->next;
1060 if (n->next)
1061 n->next->prev = n->prev;
1062
1063 /* Adjust parent. */
1064
1065 if (n->parent != NULL) {
1066 if (n->parent->child == n)
1067 n->parent->child = n->next;
1068 if (n->parent->last == n)
1069 n->parent->last = n->prev;
1070 }
1071
1072 /* Adjust parse point. */
1073
1074 if (man == NULL)
1075 return;
1076 if (man->last == n) {
1077 if (n->prev == NULL) {
1078 man->last = n->parent;
1079 man->next = ROFF_NEXT_CHILD;
1080 } else {
1081 man->last = n->prev;
1082 man->next = ROFF_NEXT_SIBLING;
1083 }
1084 }
1085 if (man->meta.first == n)
1086 man->meta.first = NULL;
1087 }
1088
1089 void
1090 roff_node_relink(struct roff_man *man, struct roff_node *n)
1091 {
1092 roff_node_unlink(man, n);
1093 n->prev = n->next = NULL;
1094 roff_node_append(man, n);
1095 }
1096
1097 void
1098 roff_node_free(struct roff_node *n)
1099 {
1100
1101 if (n->args != NULL)
1102 mdoc_argv_free(n->args);
1103 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1104 free(n->norm);
1105 eqn_box_free(n->eqn);
1106 free(n->string);
1107 free(n->tag);
1108 free(n);
1109 }
1110
1111 void
1112 roff_node_delete(struct roff_man *man, struct roff_node *n)
1113 {
1114
1115 while (n->child != NULL)
1116 roff_node_delete(man, n->child);
1117 roff_node_unlink(man, n);
1118 roff_node_free(n);
1119 }
1120
1121 int
1122 roff_node_transparent(struct roff_node *n)
1123 {
1124 if (n == NULL)
1125 return 0;
1126 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1127 return 1;
1128 return roff_tok_transparent(n->tok);
1129 }
1130
1131 int
1132 roff_tok_transparent(enum roff_tok tok)
1133 {
1134 switch (tok) {
1135 case ROFF_ft:
1136 case ROFF_ll:
1137 case ROFF_mc:
1138 case ROFF_po:
1139 case ROFF_ta:
1140 case MDOC_Db:
1141 case MDOC_Es:
1142 case MDOC_Sm:
1143 case MDOC_Tg:
1144 case MAN_DT:
1145 case MAN_UC:
1146 case MAN_PD:
1147 case MAN_AT:
1148 return 1;
1149 default:
1150 return 0;
1151 }
1152 }
1153
1154 struct roff_node *
1155 roff_node_child(struct roff_node *n)
1156 {
1157 for (n = n->child; roff_node_transparent(n); n = n->next)
1158 continue;
1159 return n;
1160 }
1161
1162 struct roff_node *
1163 roff_node_prev(struct roff_node *n)
1164 {
1165 do {
1166 n = n->prev;
1167 } while (roff_node_transparent(n));
1168 return n;
1169 }
1170
1171 struct roff_node *
1172 roff_node_next(struct roff_node *n)
1173 {
1174 do {
1175 n = n->next;
1176 } while (roff_node_transparent(n));
1177 return n;
1178 }
1179
1180 void
1181 deroff(char **dest, const struct roff_node *n)
1182 {
1183 char *cp;
1184 size_t sz;
1185
1186 if (n->string == NULL) {
1187 for (n = n->child; n != NULL; n = n->next)
1188 deroff(dest, n);
1189 return;
1190 }
1191
1192 /* Skip leading whitespace. */
1193
1194 for (cp = n->string; *cp != '\0'; cp++) {
1195 if (cp[0] == '\\' && cp[1] != '\0' &&
1196 strchr(" %&0^|~", cp[1]) != NULL)
1197 cp++;
1198 else if ( ! isspace((unsigned char)*cp))
1199 break;
1200 }
1201
1202 /* Skip trailing backslash. */
1203
1204 sz = strlen(cp);
1205 if (sz > 0 && cp[sz - 1] == '\\')
1206 sz--;
1207
1208 /* Skip trailing whitespace. */
1209
1210 for (; sz; sz--)
1211 if ( ! isspace((unsigned char)cp[sz-1]))
1212 break;
1213
1214 /* Skip empty strings. */
1215
1216 if (sz == 0)
1217 return;
1218
1219 if (*dest == NULL) {
1220 *dest = mandoc_strndup(cp, sz);
1221 return;
1222 }
1223
1224 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1225 free(*dest);
1226 *dest = cp;
1227 }
1228
1229 /* --- main functions of the roff parser ---------------------------------- */
1230
1231 /*
1232 * In the current line, expand escape sequences that produce parsable
1233 * input text. Also check the syntax of the remaining escape sequences,
1234 * which typically produce output glyphs or change formatter state.
1235 */
1236 static int
1237 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1238 {
1239 struct mctx *ctx; /* current macro call context */
1240 char ubuf[24]; /* buffer to print the number */
1241 struct roff_node *n; /* used for header comments */
1242 const char *start; /* start of the string to process */
1243 char *stesc; /* start of an escape sequence ('\\') */
1244 const char *esct; /* type of esccape sequence */
1245 char *ep; /* end of comment string */
1246 const char *stnam; /* start of the name, after "[(*" */
1247 const char *cp; /* end of the name, e.g. before ']' */
1248 const char *res; /* the string to be substituted */
1249 char *nbuf; /* new buffer to copy buf->buf to */
1250 size_t maxl; /* expected length of the escape name */
1251 size_t naml; /* actual length of the escape name */
1252 size_t asz; /* length of the replacement */
1253 size_t rsz; /* length of the rest of the string */
1254 int inaml; /* length returned from mandoc_escape() */
1255 int expand_count; /* to avoid infinite loops */
1256 int npos; /* position in numeric expression */
1257 int arg_complete; /* argument not interrupted by eol */
1258 int quote_args; /* true for \\$@, false for \\$* */
1259 int done; /* no more input available */
1260 int deftype; /* type of definition to paste */
1261 int rcsid; /* kind of RCS id seen */
1262 enum mandocerr err; /* for escape sequence problems */
1263 char sign; /* increment number register */
1264 char term; /* character terminating the escape */
1265
1266 /* Search forward for comments. */
1267
1268 done = 0;
1269 start = buf->buf + pos;
1270 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1271 if (stesc[0] != newesc || stesc[1] == '\0')
1272 continue;
1273 stesc++;
1274 if (*stesc != '"' && *stesc != '#')
1275 continue;
1276
1277 /* Comment found, look for RCS id. */
1278
1279 rcsid = 0;
1280 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1281 rcsid = 1 << MANDOC_OS_OPENBSD;
1282 cp += 8;
1283 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1284 rcsid = 1 << MANDOC_OS_NETBSD;
1285 cp += 7;
1286 }
1287 if (cp != NULL &&
1288 isalnum((unsigned char)*cp) == 0 &&
1289 strchr(cp, '$') != NULL) {
1290 if (r->man->meta.rcsids & rcsid)
1291 mandoc_msg(MANDOCERR_RCS_REP, ln,
1292 (int)(stesc - buf->buf) + 1,
1293 "%s", stesc + 1);
1294 r->man->meta.rcsids |= rcsid;
1295 }
1296
1297 /* Handle trailing whitespace. */
1298
1299 ep = strchr(stesc--, '\0') - 1;
1300 if (*ep == '\n') {
1301 done = 1;
1302 ep--;
1303 }
1304 if (*ep == ' ' || *ep == '\t')
1305 mandoc_msg(MANDOCERR_SPACE_EOL,
1306 ln, (int)(ep - buf->buf), NULL);
1307
1308 /*
1309 * Save comments preceding the title macro
1310 * in the syntax tree.
1311 */
1312
1313 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1314 while (*ep == ' ' || *ep == '\t')
1315 ep--;
1316 ep[1] = '\0';
1317 n = roff_node_alloc(r->man,
1318 ln, stesc + 1 - buf->buf,
1319 ROFFT_COMMENT, TOKEN_NONE);
1320 n->string = mandoc_strdup(stesc + 2);
1321 roff_node_append(r->man, n);
1322 n->flags |= NODE_VALID | NODE_ENDED;
1323 r->man->next = ROFF_NEXT_SIBLING;
1324 }
1325
1326 /* Line continuation with comment. */
1327
1328 if (stesc[1] == '#') {
1329 *stesc = '\0';
1330 return ROFF_IGN | ROFF_APPEND;
1331 }
1332
1333 /* Discard normal comments. */
1334
1335 while (stesc > start && stesc[-1] == ' ' &&
1336 (stesc == start + 1 || stesc[-2] != '\\'))
1337 stesc--;
1338 *stesc = '\0';
1339 break;
1340 }
1341 if (stesc == start)
1342 return ROFF_CONT;
1343 stesc--;
1344
1345 /* Notice the end of the input. */
1346
1347 if (*stesc == '\n') {
1348 *stesc-- = '\0';
1349 done = 1;
1350 }
1351
1352 expand_count = 0;
1353 while (stesc >= start) {
1354 if (*stesc != newesc) {
1355
1356 /*
1357 * If we have a non-standard escape character,
1358 * escape literal backslashes because all
1359 * processing in subsequent functions uses
1360 * the standard escaping rules.
1361 */
1362
1363 if (newesc != ASCII_ESC && *stesc == '\\') {
1364 *stesc = '\0';
1365 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1366 buf->buf, stesc + 1) + 1;
1367 start = nbuf + pos;
1368 stesc = nbuf + (stesc - buf->buf);
1369 free(buf->buf);
1370 buf->buf = nbuf;
1371 }
1372
1373 /* Search backwards for the next escape. */
1374
1375 stesc--;
1376 continue;
1377 }
1378
1379 /* If it is escaped, skip it. */
1380
1381 for (cp = stesc - 1; cp >= start; cp--)
1382 if (*cp != r->escape)
1383 break;
1384
1385 if ((stesc - cp) % 2 == 0) {
1386 while (stesc > cp)
1387 *stesc-- = '\\';
1388 continue;
1389 } else if (stesc[1] != '\0') {
1390 *stesc = '\\';
1391 } else {
1392 *stesc-- = '\0';
1393 if (done)
1394 continue;
1395 else
1396 return ROFF_IGN | ROFF_APPEND;
1397 }
1398
1399 /* Decide whether to expand or to check only. */
1400
1401 term = '\0';
1402 cp = stesc + 1;
1403 if (*cp == 'E')
1404 cp++;
1405 esct = cp;
1406 switch (*esct) {
1407 case '*':
1408 case '$':
1409 res = NULL;
1410 break;
1411 case 'B':
1412 case 'w':
1413 term = cp[1];
1414 /* FALLTHROUGH */
1415 case 'n':
1416 sign = cp[1];
1417 if (sign == '+' || sign == '-')
1418 cp++;
1419 res = ubuf;
1420 break;
1421 default:
1422 err = MANDOCERR_OK;
1423 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1424 case ESCAPE_SPECIAL:
1425 if (mchars_spec2cp(stnam, inaml) >= 0)
1426 break;
1427 /* FALLTHROUGH */
1428 case ESCAPE_ERROR:
1429 err = MANDOCERR_ESC_BAD;
1430 break;
1431 case ESCAPE_UNDEF:
1432 err = MANDOCERR_ESC_UNDEF;
1433 break;
1434 case ESCAPE_UNSUPP:
1435 err = MANDOCERR_ESC_UNSUPP;
1436 break;
1437 default:
1438 break;
1439 }
1440 if (err != MANDOCERR_OK)
1441 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1442 "%.*s", (int)(cp - stesc), stesc);
1443 stesc--;
1444 continue;
1445 }
1446
1447 if (EXPAND_LIMIT < ++expand_count) {
1448 mandoc_msg(MANDOCERR_ROFFLOOP,
1449 ln, (int)(stesc - buf->buf), NULL);
1450 return ROFF_IGN;
1451 }
1452
1453 /*
1454 * The third character decides the length
1455 * of the name of the string or register.
1456 * Save a pointer to the name.
1457 */
1458
1459 if (term == '\0') {
1460 switch (*++cp) {
1461 case '\0':
1462 maxl = 0;
1463 break;
1464 case '(':
1465 cp++;
1466 maxl = 2;
1467 break;
1468 case '[':
1469 cp++;
1470 term = ']';
1471 maxl = 0;
1472 break;
1473 default:
1474 maxl = 1;
1475 break;
1476 }
1477 } else {
1478 cp += 2;
1479 maxl = 0;
1480 }
1481 stnam = cp;
1482
1483 /* Advance to the end of the name. */
1484
1485 naml = 0;
1486 arg_complete = 1;
1487 while (maxl == 0 || naml < maxl) {
1488 if (*cp == '\0') {
1489 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1490 (int)(stesc - buf->buf), "%s", stesc);
1491 arg_complete = 0;
1492 break;
1493 }
1494 if (maxl == 0 && *cp == term) {
1495 cp++;
1496 break;
1497 }
1498 if (*cp++ != '\\' || *esct != 'w') {
1499 naml++;
1500 continue;
1501 }
1502 switch (mandoc_escape(&cp, NULL, NULL)) {
1503 case ESCAPE_SPECIAL:
1504 case ESCAPE_UNICODE:
1505 case ESCAPE_NUMBERED:
1506 case ESCAPE_UNDEF:
1507 case ESCAPE_OVERSTRIKE:
1508 naml++;
1509 break;
1510 default:
1511 break;
1512 }
1513 }
1514
1515 /*
1516 * Retrieve the replacement string; if it is
1517 * undefined, resume searching for escapes.
1518 */
1519
1520 switch (*esct) {
1521 case '*':
1522 if (arg_complete) {
1523 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1524 res = roff_getstrn(r, stnam, naml, &deftype);
1525
1526 /*
1527 * If not overriden, let \*(.T
1528 * through to the formatters.
1529 */
1530
1531 if (res == NULL && naml == 2 &&
1532 stnam[0] == '.' && stnam[1] == 'T') {
1533 roff_setstrn(&r->strtab,
1534 ".T", 2, NULL, 0, 0);
1535 stesc--;
1536 continue;
1537 }
1538 }
1539 break;
1540 case '$':
1541 if (r->mstackpos < 0) {
1542 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1543 (int)(stesc - buf->buf), "%.3s", stesc);
1544 break;
1545 }
1546 ctx = r->mstack + r->mstackpos;
1547 npos = esct[1] - '1';
1548 if (npos >= 0 && npos <= 8) {
1549 res = npos < ctx->argc ?
1550 ctx->argv[npos] : "";
1551 break;
1552 }
1553 if (esct[1] == '*')
1554 quote_args = 0;
1555 else if (esct[1] == '@')
1556 quote_args = 1;
1557 else {
1558 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1559 (int)(stesc - buf->buf), "%.3s", stesc);
1560 break;
1561 }
1562 asz = 0;
1563 for (npos = 0; npos < ctx->argc; npos++) {
1564 if (npos)
1565 asz++; /* blank */
1566 if (quote_args)
1567 asz += 2; /* quotes */
1568 asz += strlen(ctx->argv[npos]);
1569 }
1570 if (asz != 3) {
1571 rsz = buf->sz - (stesc - buf->buf) - 3;
1572 if (asz < 3)
1573 memmove(stesc + asz, stesc + 3, rsz);
1574 buf->sz += asz - 3;
1575 nbuf = mandoc_realloc(buf->buf, buf->sz);
1576 start = nbuf + pos;
1577 stesc = nbuf + (stesc - buf->buf);
1578 buf->buf = nbuf;
1579 if (asz > 3)
1580 memmove(stesc + asz, stesc + 3, rsz);
1581 }
1582 for (npos = 0; npos < ctx->argc; npos++) {
1583 if (npos)
1584 *stesc++ = ' ';
1585 if (quote_args)
1586 *stesc++ = '"';
1587 cp = ctx->argv[npos];
1588 while (*cp != '\0')
1589 *stesc++ = *cp++;
1590 if (quote_args)
1591 *stesc++ = '"';
1592 }
1593 continue;
1594 case 'B':
1595 npos = 0;
1596 ubuf[0] = arg_complete &&
1597 roff_evalnum(r, ln, stnam, &npos,
1598 NULL, ROFFNUM_SCALE) &&
1599 stnam + npos + 1 == cp ? '1' : '0';
1600 ubuf[1] = '\0';
1601 break;
1602 case 'n':
1603 if (arg_complete)
1604 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1605 roff_getregn(r, stnam, naml, sign));
1606 else
1607 ubuf[0] = '\0';
1608 break;
1609 case 'w':
1610 /* use even incomplete args */
1611 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1612 24 * (int)naml);
1613 break;
1614 }
1615
1616 if (res == NULL) {
1617 if (*esct == '*')
1618 mandoc_msg(MANDOCERR_STR_UNDEF,
1619 ln, (int)(stesc - buf->buf),
1620 "%.*s", (int)naml, stnam);
1621 res = "";
1622 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1623 mandoc_msg(MANDOCERR_ROFFLOOP,
1624 ln, (int)(stesc - buf->buf), NULL);
1625 return ROFF_IGN;
1626 }
1627
1628 /* Replace the escape sequence by the string. */
1629
1630 *stesc = '\0';
1631 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1632 buf->buf, res, cp) + 1;
1633
1634 /* Prepare for the next replacement. */
1635
1636 start = nbuf + pos;
1637 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1638 free(buf->buf);
1639 buf->buf = nbuf;
1640 }
1641 return ROFF_CONT;
1642 }
1643
1644 /*
1645 * Parse a quoted or unquoted roff-style request or macro argument.
1646 * Return a pointer to the parsed argument, which is either the original
1647 * pointer or advanced by one byte in case the argument is quoted.
1648 * NUL-terminate the argument in place.
1649 * Collapse pairs of quotes inside quoted arguments.
1650 * Advance the argument pointer to the next argument,
1651 * or to the NUL byte terminating the argument line.
1652 */
1653 char *
1654 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1655 {
1656 struct buf buf;
1657 char *cp, *start;
1658 int newesc, pairs, quoted, white;
1659
1660 /* Quoting can only start with a new word. */
1661 start = *cpp;
1662 quoted = 0;
1663 if ('"' == *start) {
1664 quoted = 1;
1665 start++;
1666 }
1667
1668 newesc = pairs = white = 0;
1669 for (cp = start; '\0' != *cp; cp++) {
1670
1671 /*
1672 * Move the following text left
1673 * after quoted quotes and after "\\" and "\t".
1674 */
1675 if (pairs)
1676 cp[-pairs] = cp[0];
1677
1678 if ('\\' == cp[0]) {
1679 /*
1680 * In copy mode, translate double to single
1681 * backslashes and backslash-t to literal tabs.
1682 */
1683 switch (cp[1]) {
1684 case 'a':
1685 case 't':
1686 cp[-pairs] = '\t';
1687 pairs++;
1688 cp++;
1689 break;
1690 case '\\':
1691 newesc = 1;
1692 cp[-pairs] = ASCII_ESC;
1693 pairs++;
1694 cp++;
1695 break;
1696 case ' ':
1697 /* Skip escaped blanks. */
1698 if (0 == quoted)
1699 cp++;
1700 break;
1701 default:
1702 break;
1703 }
1704 } else if (0 == quoted) {
1705 if (' ' == cp[0]) {
1706 /* Unescaped blanks end unquoted args. */
1707 white = 1;
1708 break;
1709 }
1710 } else if ('"' == cp[0]) {
1711 if ('"' == cp[1]) {
1712 /* Quoted quotes collapse. */
1713 pairs++;
1714 cp++;
1715 } else {
1716 /* Unquoted quotes end quoted args. */
1717 quoted = 2;
1718 break;
1719 }
1720 }
1721 }
1722
1723 /* Quoted argument without a closing quote. */
1724 if (1 == quoted)
1725 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1726
1727 /* NUL-terminate this argument and move to the next one. */
1728 if (pairs)
1729 cp[-pairs] = '\0';
1730 if ('\0' != *cp) {
1731 *cp++ = '\0';
1732 while (' ' == *cp)
1733 cp++;
1734 }
1735 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1736 *cpp = cp;
1737
1738 if ('\0' == *cp && (white || ' ' == cp[-1]))
1739 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1740
1741 start = mandoc_strdup(start);
1742 if (newesc == 0)
1743 return start;
1744
1745 buf.buf = start;
1746 buf.sz = strlen(start) + 1;
1747 buf.next = NULL;
1748 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1749 free(buf.buf);
1750 buf.buf = mandoc_strdup("");
1751 }
1752 return buf.buf;
1753 }
1754
1755
1756 /*
1757 * Process text streams.
1758 */
1759 static int
1760 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1761 {
1762 size_t sz;
1763 const char *start;
1764 char *p;
1765 int isz;
1766 enum mandoc_esc esc;
1767
1768 /* Spring the input line trap. */
1769
1770 if (roffit_lines == 1) {
1771 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1772 free(buf->buf);
1773 buf->buf = p;
1774 buf->sz = isz + 1;
1775 *offs = 0;
1776 free(roffit_macro);
1777 roffit_lines = 0;
1778 return ROFF_REPARSE;
1779 } else if (roffit_lines > 1)
1780 --roffit_lines;
1781
1782 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1783 if (roffce_lines < 1) {
1784 r->man->last = roffce_node;
1785 r->man->next = ROFF_NEXT_SIBLING;
1786 roffce_lines = 0;
1787 roffce_node = NULL;
1788 } else
1789 roffce_lines--;
1790 }
1791
1792 /* Convert all breakable hyphens into ASCII_HYPH. */
1793
1794 start = p = buf->buf + pos;
1795
1796 while (*p != '\0') {
1797 sz = strcspn(p, "-\\");
1798 p += sz;
1799
1800 if (*p == '\0')
1801 break;
1802
1803 if (*p == '\\') {
1804 /* Skip over escapes. */
1805 p++;
1806 esc = mandoc_escape((const char **)&p, NULL, NULL);
1807 if (esc == ESCAPE_ERROR)
1808 break;
1809 while (*p == '-')
1810 p++;
1811 continue;
1812 } else if (p == start) {
1813 p++;
1814 continue;
1815 }
1816
1817 if (isalpha((unsigned char)p[-1]) &&
1818 isalpha((unsigned char)p[1]))
1819 *p = ASCII_HYPH;
1820 p++;
1821 }
1822 return ROFF_CONT;
1823 }
1824
1825 int
1826 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1827 {
1828 enum roff_tok t;
1829 int e;
1830 int pos; /* parse point */
1831 int spos; /* saved parse point for messages */
1832 int ppos; /* original offset in buf->buf */
1833 int ctl; /* macro line (boolean) */
1834
1835 ppos = pos = *offs;
1836
1837 /* Handle in-line equation delimiters. */
1838
1839 if (r->tbl == NULL &&
1840 r->last_eqn != NULL && r->last_eqn->delim &&
1841 (r->eqn == NULL || r->eqn_inline)) {
1842 e = roff_eqndelim(r, buf, pos);
1843 if (e == ROFF_REPARSE)
1844 return e;
1845 assert(e == ROFF_CONT);
1846 }
1847
1848 /* Expand some escape sequences. */
1849
1850 e = roff_expand(r, buf, ln, pos, r->escape);
1851 if ((e & ROFF_MASK) == ROFF_IGN)
1852 return e;
1853 assert(e == ROFF_CONT);
1854
1855 ctl = roff_getcontrol(r, buf->buf, &pos);
1856
1857 /*
1858 * First, if a scope is open and we're not a macro, pass the
1859 * text through the macro's filter.
1860 * Equations process all content themselves.
1861 * Tables process almost all content themselves, but we want
1862 * to warn about macros before passing it there.
1863 */
1864
1865 if (r->last != NULL && ! ctl) {
1866 t = r->last->tok;
1867 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1868 if ((e & ROFF_MASK) == ROFF_IGN)
1869 return e;
1870 e &= ~ROFF_MASK;
1871 } else
1872 e = ROFF_IGN;
1873 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1874 eqn_read(r->eqn, buf->buf + ppos);
1875 return e;
1876 }
1877 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1878 tbl_read(r->tbl, ln, buf->buf, ppos);
1879 roff_addtbl(r->man, ln, r->tbl);
1880 return e;
1881 }
1882 if ( ! ctl) {
1883 r->options &= ~MPARSE_COMMENT;
1884 return roff_parsetext(r, buf, pos, offs) | e;
1885 }
1886
1887 /* Skip empty request lines. */
1888
1889 if (buf->buf[pos] == '"') {
1890 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1891 return ROFF_IGN;
1892 } else if (buf->buf[pos] == '\0')
1893 return ROFF_IGN;
1894
1895 /*
1896 * If a scope is open, go to the child handler for that macro,
1897 * as it may want to preprocess before doing anything with it.
1898 * Don't do so if an equation is open.
1899 */
1900
1901 if (r->last) {
1902 t = r->last->tok;
1903 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1904 }
1905
1906 /* No scope is open. This is a new request or macro. */
1907
1908 r->options &= ~MPARSE_COMMENT;
1909 spos = pos;
1910 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1911
1912 /* Tables ignore most macros. */
1913
1914 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1915 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1916 mandoc_msg(MANDOCERR_TBLMACRO,
1917 ln, pos, "%s", buf->buf + spos);
1918 if (t != TOKEN_NONE)
1919 return ROFF_IGN;
1920 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1921 pos++;
1922 while (buf->buf[pos] == ' ')
1923 pos++;
1924 tbl_read(r->tbl, ln, buf->buf, pos);
1925 roff_addtbl(r->man, ln, r->tbl);
1926 return ROFF_IGN;
1927 }
1928
1929 /* For now, let high level macros abort .ce mode. */
1930
1931 if (ctl && roffce_node != NULL &&
1932 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1933 t == ROFF_TH || t == ROFF_TS)) {
1934 r->man->last = roffce_node;
1935 r->man->next = ROFF_NEXT_SIBLING;
1936 roffce_lines = 0;
1937 roffce_node = NULL;
1938 }
1939
1940 /*
1941 * This is neither a roff request nor a user-defined macro.
1942 * Let the standard macro set parsers handle it.
1943 */
1944
1945 if (t == TOKEN_NONE)
1946 return ROFF_CONT;
1947
1948 /* Execute a roff request or a user defined macro. */
1949
1950 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1951 }
1952
1953 /*
1954 * Internal interface function to tell the roff parser that execution
1955 * of the current macro ended. This is required because macro
1956 * definitions usually do not end with a .return request.
1957 */
1958 void
1959 roff_userret(struct roff *r)
1960 {
1961 struct mctx *ctx;
1962 int i;
1963
1964 assert(r->mstackpos >= 0);
1965 ctx = r->mstack + r->mstackpos;
1966 for (i = 0; i < ctx->argc; i++)
1967 free(ctx->argv[i]);
1968 ctx->argc = 0;
1969 r->mstackpos--;
1970 }
1971
1972 void
1973 roff_endparse(struct roff *r)
1974 {
1975 if (r->last != NULL)
1976 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1977 r->last->col, "%s", roff_name[r->last->tok]);
1978
1979 if (r->eqn != NULL) {
1980 mandoc_msg(MANDOCERR_BLK_NOEND,
1981 r->eqn->node->line, r->eqn->node->pos, "EQ");
1982 eqn_parse(r->eqn);
1983 r->eqn = NULL;
1984 }
1985
1986 if (r->tbl != NULL) {
1987 tbl_end(r->tbl, 1);
1988 r->tbl = NULL;
1989 }
1990 }
1991
1992 /*
1993 * Parse a roff node's type from the input buffer. This must be in the
1994 * form of ".foo xxx" in the usual way.
1995 */
1996 static enum roff_tok
1997 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1998 {
1999 char *cp;
2000 const char *mac;
2001 size_t maclen;
2002 int deftype;
2003 enum roff_tok t;
2004
2005 cp = buf + *pos;
2006
2007 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2008 return TOKEN_NONE;
2009
2010 mac = cp;
2011 maclen = roff_getname(r, &cp, ln, ppos);
2012
2013 deftype = ROFFDEF_USER | ROFFDEF_REN;
2014 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2015 switch (deftype) {
2016 case ROFFDEF_USER:
2017 t = ROFF_USERDEF;
2018 break;
2019 case ROFFDEF_REN:
2020 t = ROFF_RENAMED;
2021 break;
2022 default:
2023 t = roffhash_find(r->reqtab, mac, maclen);
2024 break;
2025 }
2026 if (t != TOKEN_NONE)
2027 *pos = cp - buf;
2028 else if (deftype == ROFFDEF_UNDEF) {
2029 /* Using an undefined macro defines it to be empty. */
2030 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2031 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2032 }
2033 return t;
2034 }
2035
2036 /* --- handling of request blocks ----------------------------------------- */
2037
2038 /*
2039 * Close a macro definition block or an "ignore" block.
2040 */
2041 static int
2042 roff_cblock(ROFF_ARGS)
2043 {
2044 int rr;
2045
2046 if (r->last == NULL) {
2047 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2048 return ROFF_IGN;
2049 }
2050
2051 switch (r->last->tok) {
2052 case ROFF_am:
2053 case ROFF_ami:
2054 case ROFF_de:
2055 case ROFF_dei:
2056 case ROFF_ig:
2057 break;
2058 case ROFF_am1:
2059 case ROFF_de1:
2060 /* Remapped in roff_block(). */
2061 abort();
2062 default:
2063 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2064 return ROFF_IGN;
2065 }
2066
2067 roffnode_pop(r);
2068 roffnode_cleanscope(r);
2069
2070 /*
2071 * If a conditional block with braces is still open,
2072 * check for "\}" block end markers.
2073 */
2074
2075 if (r->last != NULL && r->last->endspan < 0) {
2076 rr = 1; /* If arguments follow "\}", warn about them. */
2077 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2078 }
2079
2080 if (buf->buf[pos] != '\0')
2081 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2082 ".. %s", buf->buf + pos);
2083
2084 return ROFF_IGN;
2085 }
2086
2087 /*
2088 * Pop all nodes ending at the end of the current input line.
2089 * Return the number of loops ended.
2090 */
2091 static int
2092 roffnode_cleanscope(struct roff *r)
2093 {
2094 int inloop;
2095
2096 inloop = 0;
2097 while (r->last != NULL && r->last->endspan > 0) {
2098 if (--r->last->endspan != 0)
2099 break;
2100 inloop += roffnode_pop(r);
2101 }
2102 return inloop;
2103 }
2104
2105 /*
2106 * Handle the closing "\}" of a conditional block.
2107 * Apart from generating warnings, this only pops nodes.
2108 * Return the number of loops ended.
2109 */
2110 static int
2111 roff_ccond(struct roff *r, int ln, int ppos)
2112 {
2113 if (NULL == r->last) {
2114 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2115 return 0;
2116 }
2117
2118 switch (r->last->tok) {
2119 case ROFF_el:
2120 case ROFF_ie:
2121 case ROFF_if:
2122 case ROFF_while:
2123 break;
2124 default:
2125 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2126 return 0;
2127 }
2128
2129 if (r->last->endspan > -1) {
2130 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2131 return 0;
2132 }
2133
2134 return roffnode_pop(r) + roffnode_cleanscope(r);
2135 }
2136
2137 static int
2138 roff_block(ROFF_ARGS)
2139 {
2140 const char *name, *value;
2141 char *call, *cp, *iname, *rname;
2142 size_t csz, namesz, rsz;
2143 int deftype;
2144
2145 /* Ignore groff compatibility mode for now. */
2146
2147 if (tok == ROFF_de1)
2148 tok = ROFF_de;
2149 else if (tok == ROFF_dei1)
2150 tok = ROFF_dei;
2151 else if (tok == ROFF_am1)
2152 tok = ROFF_am;
2153 else if (tok == ROFF_ami1)
2154 tok = ROFF_ami;
2155
2156 /* Parse the macro name argument. */
2157
2158 cp = buf->buf + pos;
2159 if (tok == ROFF_ig) {
2160 iname = NULL;
2161 namesz = 0;
2162 } else {
2163 iname = cp;
2164 namesz = roff_getname(r, &cp, ln, ppos);
2165 iname[namesz] = '\0';
2166 }
2167
2168 /* Resolve the macro name argument if it is indirect. */
2169
2170 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2171 deftype = ROFFDEF_USER;
2172 name = roff_getstrn(r, iname, namesz, &deftype);
2173 if (name == NULL) {
2174 mandoc_msg(MANDOCERR_STR_UNDEF,
2175 ln, (int)(iname - buf->buf),
2176 "%.*s", (int)namesz, iname);
2177 namesz = 0;
2178 } else
2179 namesz = strlen(name);
2180 } else
2181 name = iname;
2182
2183 if (namesz == 0 && tok != ROFF_ig) {
2184 mandoc_msg(MANDOCERR_REQ_EMPTY,
2185 ln, ppos, "%s", roff_name[tok]);
2186 return ROFF_IGN;
2187 }
2188
2189 roffnode_push(r, tok, name, ln, ppos);
2190
2191 /*
2192 * At the beginning of a `de' macro, clear the existing string
2193 * with the same name, if there is one. New content will be
2194 * appended from roff_block_text() in multiline mode.
2195 */
2196
2197 if (tok == ROFF_de || tok == ROFF_dei) {
2198 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2199 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2200 } else if (tok == ROFF_am || tok == ROFF_ami) {
2201 deftype = ROFFDEF_ANY;
2202 value = roff_getstrn(r, iname, namesz, &deftype);
2203 switch (deftype) { /* Before appending, ... */
2204 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2205 roff_setstrn(&r->strtab, name, namesz,
2206 value, strlen(value), 0);
2207 break;
2208 case ROFFDEF_REN: /* call original standard macro. */
2209 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2210 (int)strlen(value), value);
2211 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2212 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2213 free(call);
2214 break;
2215 case ROFFDEF_STD: /* rename and call standard macro. */
2216 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2217 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2218 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2219 (int)rsz, rname);
2220 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2221 free(call);
2222 free(rname);
2223 break;
2224 default:
2225 break;
2226 }
2227 }
2228
2229 if (*cp == '\0')
2230 return ROFF_IGN;
2231
2232 /* Get the custom end marker. */
2233
2234 iname = cp;
2235 namesz = roff_getname(r, &cp, ln, ppos);
2236
2237 /* Resolve the end marker if it is indirect. */
2238
2239 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2240 deftype = ROFFDEF_USER;
2241 name = roff_getstrn(r, iname, namesz, &deftype);
2242 if (name == NULL) {
2243 mandoc_msg(MANDOCERR_STR_UNDEF,
2244 ln, (int)(iname - buf->buf),
2245 "%.*s", (int)namesz, iname);
2246 namesz = 0;
2247 } else
2248 namesz = strlen(name);
2249 } else
2250 name = iname;
2251
2252 if (namesz)
2253 r->last->end = mandoc_strndup(name, namesz);
2254
2255 if (*cp != '\0')
2256 mandoc_msg(MANDOCERR_ARG_EXCESS,
2257 ln, pos, ".%s ... %s", roff_name[tok], cp);
2258
2259 return ROFF_IGN;
2260 }
2261
2262 static int
2263 roff_block_sub(ROFF_ARGS)
2264 {
2265 enum roff_tok t;
2266 int i, j;
2267
2268 /*
2269 * First check whether a custom macro exists at this level. If
2270 * it does, then check against it. This is some of groff's
2271 * stranger behaviours. If we encountered a custom end-scope
2272 * tag and that tag also happens to be a "real" macro, then we
2273 * need to try interpreting it again as a real macro. If it's
2274 * not, then return ignore. Else continue.
2275 */
2276
2277 if (r->last->end) {
2278 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2279 if (buf->buf[i] != r->last->end[j])
2280 break;
2281
2282 if (r->last->end[j] == '\0' &&
2283 (buf->buf[i] == '\0' ||
2284 buf->buf[i] == ' ' ||
2285 buf->buf[i] == '\t')) {
2286 roffnode_pop(r);
2287 roffnode_cleanscope(r);
2288
2289 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2290 i++;
2291
2292 pos = i;
2293 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2294 TOKEN_NONE)
2295 return ROFF_RERUN;
2296 return ROFF_IGN;
2297 }
2298 }
2299
2300 /*
2301 * If we have no custom end-query or lookup failed, then try
2302 * pulling it out of the hashtable.
2303 */
2304
2305 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2306
2307 if (t != ROFF_cblock) {
2308 if (tok != ROFF_ig)
2309 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2310 return ROFF_IGN;
2311 }
2312
2313 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2314 }
2315
2316 static int
2317 roff_block_text(ROFF_ARGS)
2318 {
2319
2320 if (tok != ROFF_ig)
2321 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2322
2323 return ROFF_IGN;
2324 }
2325
2326 /*
2327 * Check for a closing "\}" and handle it.
2328 * In this function, the final "int *offs" argument is used for
2329 * different purposes than elsewhere:
2330 * Input: *offs == 0: caller wants to discard arguments following \}
2331 * *offs == 1: caller wants to preserve text following \}
2332 * Output: *offs = 0: tell caller to discard input line
2333 * *offs = 1: tell caller to use input line
2334 */
2335 static int
2336 roff_cond_checkend(ROFF_ARGS)
2337 {
2338 char *ep;
2339 int endloop, irc, rr;
2340
2341 irc = ROFF_IGN;
2342 rr = r->last->rule;
2343 endloop = tok != ROFF_while ? ROFF_IGN :
2344 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2345 if (roffnode_cleanscope(r))
2346 irc |= endloop;
2347
2348 /*
2349 * If "\}" occurs on a macro line without a preceding macro or
2350 * a text line contains nothing else, drop the line completely.
2351 */
2352
2353 ep = buf->buf + pos;
2354 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2355 rr = 0;
2356
2357 /*
2358 * The closing delimiter "\}" rewinds the conditional scope
2359 * but is otherwise ignored when interpreting the line.
2360 */
2361
2362 while ((ep = strchr(ep, '\\')) != NULL) {
2363 switch (ep[1]) {
2364 case '}':
2365 if (ep[2] == '\0')
2366 ep[0] = '\0';
2367 else if (rr)
2368 ep[1] = '&';
2369 else
2370 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2371 if (roff_ccond(r, ln, ep - buf->buf))
2372 irc |= endloop;
2373 break;
2374 case '\0':
2375 ++ep;
2376 break;
2377 default:
2378 ep += 2;
2379 break;
2380 }
2381 }
2382 *offs = rr;
2383 return irc;
2384 }
2385
2386 /*
2387 * Parse and process a request or macro line in conditional scope.
2388 */
2389 static int
2390 roff_cond_sub(ROFF_ARGS)
2391 {
2392 struct roffnode *bl;
2393 int irc, rr;
2394 enum roff_tok t;
2395
2396 rr = 0; /* If arguments follow "\}", skip them. */
2397 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2398 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2399
2400 /* For now, let high level macros abort .ce mode. */
2401
2402 if (roffce_node != NULL &&
2403 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2404 t == ROFF_TH || t == ROFF_TS)) {
2405 r->man->last = roffce_node;
2406 r->man->next = ROFF_NEXT_SIBLING;
2407 roffce_lines = 0;
2408 roffce_node = NULL;
2409 }
2410
2411 /*
2412 * Fully handle known macros when they are structurally
2413 * required or when the conditional evaluated to true.
2414 */
2415
2416 if (t == ROFF_break) {
2417 if (irc & ROFF_LOOPMASK)
2418 irc = ROFF_IGN | ROFF_LOOPEXIT;
2419 else if (rr) {
2420 for (bl = r->last; bl != NULL; bl = bl->parent) {
2421 bl->rule = 0;
2422 if (bl->tok == ROFF_while)
2423 break;
2424 }
2425 }
2426 } else if (t != TOKEN_NONE &&
2427 (rr || roffs[t].flags & ROFFMAC_STRUCT))
2428 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2429 else
2430 irc |= rr ? ROFF_CONT : ROFF_IGN;
2431 return irc;
2432 }
2433
2434 /*
2435 * Parse and process a text line in conditional scope.
2436 */
2437 static int
2438 roff_cond_text(ROFF_ARGS)
2439 {
2440 int irc, rr;
2441
2442 rr = 1; /* If arguments follow "\}", preserve them. */
2443 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2444 if (rr)
2445 irc |= ROFF_CONT;
2446 return irc;
2447 }
2448
2449 /* --- handling of numeric and conditional expressions -------------------- */
2450
2451 /*
2452 * Parse a single signed integer number. Stop at the first non-digit.
2453 * If there is at least one digit, return success and advance the
2454 * parse point, else return failure and let the parse point unchanged.
2455 * Ignore overflows, treat them just like the C language.
2456 */
2457 static int
2458 roff_getnum(const char *v, int *pos, int *res, int flags)
2459 {
2460 int myres, scaled, n, p;
2461
2462 if (NULL == res)
2463 res = &myres;
2464
2465 p = *pos;
2466 n = v[p] == '-';
2467 if (n || v[p] == '+')
2468 p++;
2469
2470 if (flags & ROFFNUM_WHITE)
2471 while (isspace((unsigned char)v[p]))
2472 p++;
2473
2474 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2475 *res = 10 * *res + v[p] - '0';
2476 if (p == *pos + n)
2477 return 0;
2478
2479 if (n)
2480 *res = -*res;
2481
2482 /* Each number may be followed by one optional scaling unit. */
2483
2484 switch (v[p]) {
2485 case 'f':
2486 scaled = *res * 65536;
2487 break;
2488 case 'i':
2489 scaled = *res * 240;
2490 break;
2491 case 'c':
2492 scaled = *res * 240 / 2.54;
2493 break;
2494 case 'v':
2495 case 'P':
2496 scaled = *res * 40;
2497 break;
2498 case 'm':
2499 case 'n':
2500 scaled = *res * 24;
2501 break;
2502 case 'p':
2503 scaled = *res * 10 / 3;
2504 break;
2505 case 'u':
2506 scaled = *res;
2507 break;
2508 case 'M':
2509 scaled = *res * 6 / 25;
2510 break;
2511 default:
2512 scaled = *res;
2513 p--;
2514 break;
2515 }
2516 if (flags & ROFFNUM_SCALE)
2517 *res = scaled;
2518
2519 *pos = p + 1;
2520 return 1;
2521 }
2522
2523 /*
2524 * Evaluate a string comparison condition.
2525 * The first character is the delimiter.
2526 * Succeed if the string up to its second occurrence
2527 * matches the string up to its third occurence.
2528 * Advance the cursor after the third occurrence
2529 * or lacking that, to the end of the line.
2530 */
2531 static int
2532 roff_evalstrcond(const char *v, int *pos)
2533 {
2534 const char *s1, *s2, *s3;
2535 int match;
2536
2537 match = 0;
2538 s1 = v + *pos; /* initial delimiter */
2539 s2 = s1 + 1; /* for scanning the first string */
2540 s3 = strchr(s2, *s1); /* for scanning the second string */
2541
2542 if (NULL == s3) /* found no middle delimiter */
2543 goto out;
2544
2545 while ('\0' != *++s3) {
2546 if (*s2 != *s3) { /* mismatch */
2547 s3 = strchr(s3, *s1);
2548 break;
2549 }
2550 if (*s3 == *s1) { /* found the final delimiter */
2551 match = 1;
2552 break;
2553 }
2554 s2++;
2555 }
2556
2557 out:
2558 if (NULL == s3)
2559 s3 = strchr(s2, '\0');
2560 else if (*s3 != '\0')
2561 s3++;
2562 *pos = s3 - v;
2563 return match;
2564 }
2565
2566 /*
2567 * Evaluate an optionally negated single character, numerical,
2568 * or string condition.
2569 */
2570 static int
2571 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2572 {
2573 const char *start, *end;
2574 char *cp, *name;
2575 size_t sz;
2576 int deftype, len, number, savepos, istrue, wanttrue;
2577
2578 if ('!' == v[*pos]) {
2579 wanttrue = 0;
2580 (*pos)++;
2581 } else
2582 wanttrue = 1;
2583
2584 switch (v[*pos]) {
2585 case '\0':
2586 return 0;
2587 case 'n':
2588 case 'o':
2589 (*pos)++;
2590 return wanttrue;
2591 case 'e':
2592 case 't':
2593 case 'v':
2594 (*pos)++;
2595 return !wanttrue;
2596 case 'c':
2597 do {
2598 (*pos)++;
2599 } while (v[*pos] == ' ');
2600
2601 /*
2602 * Quirk for groff compatibility:
2603 * The horizontal tab is neither available nor unavailable.
2604 */
2605
2606 if (v[*pos] == '\t') {
2607 (*pos)++;
2608 return 0;
2609 }
2610
2611 /* Printable ASCII characters are available. */
2612
2613 if (v[*pos] != '\\') {
2614 (*pos)++;
2615 return wanttrue;
2616 }
2617
2618 end = v + ++*pos;
2619 switch (mandoc_escape(&end, &start, &len)) {
2620 case ESCAPE_SPECIAL:
2621 istrue = mchars_spec2cp(start, len) != -1;
2622 break;
2623 case ESCAPE_UNICODE:
2624 istrue = 1;
2625 break;
2626 case ESCAPE_NUMBERED:
2627 istrue = mchars_num2char(start, len) != -1;
2628 break;
2629 default:
2630 istrue = !wanttrue;
2631 break;
2632 }
2633 *pos = end - v;
2634 return istrue == wanttrue;
2635 case 'd':
2636 case 'r':
2637 cp = v + *pos + 1;
2638 while (*cp == ' ')
2639 cp++;
2640 name = cp;
2641 sz = roff_getname(r, &cp, ln, cp - v);
2642 if (sz == 0)
2643 istrue = 0;
2644 else if (v[*pos] == 'r')
2645 istrue = roff_hasregn(r, name, sz);
2646 else {
2647 deftype = ROFFDEF_ANY;
2648 roff_getstrn(r, name, sz, &deftype);
2649 istrue = !!deftype;
2650 }
2651 *pos = (name + sz) - v;
2652 return istrue == wanttrue;
2653 default:
2654 break;
2655 }
2656
2657 savepos = *pos;
2658 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2659 return (number > 0) == wanttrue;
2660 else if (*pos == savepos)
2661 return roff_evalstrcond(v, pos) == wanttrue;
2662 else
2663 return 0;
2664 }
2665
2666 static int
2667 roff_line_ignore(ROFF_ARGS)
2668 {
2669
2670 return ROFF_IGN;
2671 }
2672
2673 static int
2674 roff_insec(ROFF_ARGS)
2675 {
2676
2677 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2678 return ROFF_IGN;
2679 }
2680
2681 static int
2682 roff_unsupp(ROFF_ARGS)
2683 {
2684
2685 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2686 return ROFF_IGN;
2687 }
2688
2689 static int
2690 roff_cond(ROFF_ARGS)
2691 {
2692 int irc;
2693
2694 roffnode_push(r, tok, NULL, ln, ppos);
2695
2696 /*
2697 * An `.el' has no conditional body: it will consume the value
2698 * of the current rstack entry set in prior `ie' calls or
2699 * defaults to DENY.
2700 *
2701 * If we're not an `el', however, then evaluate the conditional.
2702 */
2703
2704 r->last->rule = tok == ROFF_el ?
2705 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2706 roff_evalcond(r, ln, buf->buf, &pos);
2707
2708 /*
2709 * An if-else will put the NEGATION of the current evaluated
2710 * conditional into the stack of rules.
2711 */
2712
2713 if (tok == ROFF_ie) {
2714 if (r->rstackpos + 1 == r->rstacksz) {
2715 r->rstacksz += 16;
2716 r->rstack = mandoc_reallocarray(r->rstack,
2717 r->rstacksz, sizeof(int));
2718 }
2719 r->rstack[++r->rstackpos] = !r->last->rule;
2720 }
2721
2722 /* If the parent has false as its rule, then so do we. */
2723
2724 if (r->last->parent && !r->last->parent->rule)
2725 r->last->rule = 0;
2726
2727 /*
2728 * Determine scope.
2729 * If there is nothing on the line after the conditional,
2730 * not even whitespace, use next-line scope.
2731 * Except that .while does not support next-line scope.
2732 */
2733
2734 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2735 r->last->endspan = 2;
2736 goto out;
2737 }
2738
2739 while (buf->buf[pos] == ' ')
2740 pos++;
2741
2742 /* An opening brace requests multiline scope. */
2743
2744 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2745 r->last->endspan = -1;
2746 pos += 2;
2747 while (buf->buf[pos] == ' ')
2748 pos++;
2749 goto out;
2750 }
2751
2752 /*
2753 * Anything else following the conditional causes
2754 * single-line scope. Warn if the scope contains
2755 * nothing but trailing whitespace.
2756 */
2757
2758 if (buf->buf[pos] == '\0')
2759 mandoc_msg(MANDOCERR_COND_EMPTY,
2760 ln, ppos, "%s", roff_name[tok]);
2761
2762 r->last->endspan = 1;
2763
2764 out:
2765 *offs = pos;
2766 irc = ROFF_RERUN;
2767 if (tok == ROFF_while)
2768 irc |= ROFF_WHILE;
2769 return irc;
2770 }
2771
2772 static int
2773 roff_ds(ROFF_ARGS)
2774 {
2775 char *string;
2776 const char *name;
2777 size_t namesz;
2778
2779 /* Ignore groff compatibility mode for now. */
2780
2781 if (tok == ROFF_ds1)
2782 tok = ROFF_ds;
2783 else if (tok == ROFF_as1)
2784 tok = ROFF_as;
2785
2786 /*
2787 * The first word is the name of the string.
2788 * If it is empty or terminated by an escape sequence,
2789 * abort the `ds' request without defining anything.
2790 */
2791
2792 name = string = buf->buf + pos;
2793 if (*name == '\0')
2794 return ROFF_IGN;
2795
2796 namesz = roff_getname(r, &string, ln, pos);
2797 switch (name[namesz]) {
2798 case '\\':
2799 return ROFF_IGN;
2800 case '\t':
2801 string = buf->buf + pos + namesz;
2802 break;
2803 default:
2804 break;
2805 }
2806
2807 /* Read past the initial double-quote, if any. */
2808 if (*string == '"')
2809 string++;
2810
2811 /* The rest is the value. */
2812 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2813 ROFF_as == tok);
2814 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2815 return ROFF_IGN;
2816 }
2817
2818 /*
2819 * Parse a single operator, one or two characters long.
2820 * If the operator is recognized, return success and advance the
2821 * parse point, else return failure and let the parse point unchanged.
2822 */
2823 static int
2824 roff_getop(const char *v, int *pos, char *res)
2825 {
2826
2827 *res = v[*pos];
2828
2829 switch (*res) {
2830 case '+':
2831 case '-':
2832 case '*':
2833 case '/':
2834 case '%':
2835 case '&':
2836 case ':':
2837 break;
2838 case '<':
2839 switch (v[*pos + 1]) {
2840 case '=':
2841 *res = 'l';
2842 (*pos)++;
2843 break;
2844 case '>':
2845 *res = '!';
2846 (*pos)++;
2847 break;
2848 case '?':
2849 *res = 'i';
2850 (*pos)++;
2851 break;
2852 default:
2853 break;
2854 }
2855 break;
2856 case '>':
2857 switch (v[*pos + 1]) {
2858 case '=':
2859 *res = 'g';
2860 (*pos)++;
2861 break;
2862 case '?':
2863 *res = 'a';
2864 (*pos)++;
2865 break;
2866 default:
2867 break;
2868 }
2869 break;
2870 case '=':
2871 if ('=' == v[*pos + 1])
2872 (*pos)++;
2873 break;
2874 default:
2875 return 0;
2876 }
2877 (*pos)++;
2878
2879 return *res;
2880 }
2881
2882 /*
2883 * Evaluate either a parenthesized numeric expression
2884 * or a single signed integer number.
2885 */
2886 static int
2887 roff_evalpar(struct roff *r, int ln,
2888 const char *v, int *pos, int *res, int flags)
2889 {
2890
2891 if ('(' != v[*pos])
2892 return roff_getnum(v, pos, res, flags);
2893
2894 (*pos)++;
2895 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2896 return 0;
2897
2898 /*
2899 * Omission of the closing parenthesis
2900 * is an error in validation mode,
2901 * but ignored in evaluation mode.
2902 */
2903
2904 if (')' == v[*pos])
2905 (*pos)++;
2906 else if (NULL == res)
2907 return 0;
2908
2909 return 1;
2910 }
2911
2912 /*
2913 * Evaluate a complete numeric expression.
2914 * Proceed left to right, there is no concept of precedence.
2915 */
2916 static int
2917 roff_evalnum(struct roff *r, int ln, const char *v,
2918 int *pos, int *res, int flags)
2919 {
2920 int mypos, operand2;
2921 char operator;
2922
2923 if (NULL == pos) {
2924 mypos = 0;
2925 pos = &mypos;
2926 }
2927
2928 if (flags & ROFFNUM_WHITE)
2929 while (isspace((unsigned char)v[*pos]))
2930 (*pos)++;
2931
2932 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2933 return 0;
2934
2935 while (1) {
2936 if (flags & ROFFNUM_WHITE)
2937 while (isspace((unsigned char)v[*pos]))
2938 (*pos)++;
2939
2940 if ( ! roff_getop(v, pos, &operator))
2941 break;
2942
2943 if (flags & ROFFNUM_WHITE)
2944 while (isspace((unsigned char)v[*pos]))
2945 (*pos)++;
2946
2947 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2948 return 0;
2949
2950 if (flags & ROFFNUM_WHITE)
2951 while (isspace((unsigned char)v[*pos]))
2952 (*pos)++;
2953
2954 if (NULL == res)
2955 continue;
2956
2957 switch (operator) {
2958 case '+':
2959 *res += operand2;
2960 break;
2961 case '-':
2962 *res -= operand2;
2963 break;
2964 case '*':
2965 *res *= operand2;
2966 break;
2967 case '/':
2968 if (operand2 == 0) {
2969 mandoc_msg(MANDOCERR_DIVZERO,
2970 ln, *pos, "%s", v);
2971 *res = 0;
2972 break;
2973 }
2974 *res /= operand2;
2975 break;
2976 case '%':
2977 if (operand2 == 0) {
2978 mandoc_msg(MANDOCERR_DIVZERO,
2979 ln, *pos, "%s", v);
2980 *res = 0;
2981 break;
2982 }
2983 *res %= operand2;
2984 break;
2985 case '<':
2986 *res = *res < operand2;
2987 break;
2988 case '>':
2989 *res = *res > operand2;
2990 break;
2991 case 'l':
2992 *res = *res <= operand2;
2993 break;
2994 case 'g':
2995 *res = *res >= operand2;
2996 break;
2997 case '=':
2998 *res = *res == operand2;
2999 break;
3000 case '!':
3001 *res = *res != operand2;
3002 break;
3003 case '&':
3004 *res = *res && operand2;
3005 break;
3006 case ':':
3007 *res = *res || operand2;
3008 break;
3009 case 'i':
3010 if (operand2 < *res)
3011 *res = operand2;
3012 break;
3013 case 'a':
3014 if (operand2 > *res)
3015 *res = operand2;
3016 break;
3017 default:
3018 abort();
3019 }
3020 }
3021 return 1;
3022 }
3023
3024 /* --- register management ------------------------------------------------ */
3025
3026 void
3027 roff_setreg(struct roff *r, const char *name, int val, char sign)
3028 {
3029 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3030 }
3031
3032 static void
3033 roff_setregn(struct roff *r, const char *name, size_t len,
3034 int val, char sign, int step)
3035 {
3036 struct roffreg *reg;
3037
3038 /* Search for an existing register with the same name. */
3039 reg = r->regtab;
3040
3041 while (reg != NULL && (reg->key.sz != len ||
3042 strncmp(reg->key.p, name, len) != 0))
3043 reg = reg->next;
3044
3045 if (NULL == reg) {
3046 /* Create a new register. */
3047 reg = mandoc_malloc(sizeof(struct roffreg));
3048 reg->key.p = mandoc_strndup(name, len);
3049 reg->key.sz = len;
3050 reg->val = 0;
3051 reg->step = 0;
3052 reg->next = r->regtab;
3053 r->regtab = reg;
3054 }
3055
3056 if ('+' == sign)
3057 reg->val += val;
3058 else if ('-' == sign)
3059 reg->val -= val;
3060 else
3061 reg->val = val;
3062 if (step != INT_MIN)
3063 reg->step = step;
3064 }
3065
3066 /*
3067 * Handle some predefined read-only number registers.
3068 * For now, return -1 if the requested register is not predefined;
3069 * in case a predefined read-only register having the value -1
3070 * were to turn up, another special value would have to be chosen.
3071 */
3072 static int
3073 roff_getregro(const struct roff *r, const char *name)
3074 {
3075
3076 switch (*name) {
3077 case '$': /* Number of arguments of the last macro evaluated. */
3078 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3079 case 'A': /* ASCII approximation mode is always off. */
3080 return 0;
3081 case 'g': /* Groff compatibility mode is always on. */
3082 return 1;
3083 case 'H': /* Fixed horizontal resolution. */
3084 return 24;
3085 case 'j': /* Always adjust left margin only. */
3086 return 0;
3087 case 'T': /* Some output device is always defined. */
3088 return 1;
3089 case 'V': /* Fixed vertical resolution. */
3090 return 40;
3091 default:
3092 return -1;
3093 }
3094 }
3095
3096 int
3097 roff_getreg(struct roff *r, const char *name)
3098 {
3099 return roff_getregn(r, name, strlen(name), '\0');
3100 }
3101
3102 static int
3103 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3104 {
3105 struct roffreg *reg;
3106 int val;
3107
3108 if ('.' == name[0] && 2 == len) {
3109 val = roff_getregro(r, name + 1);
3110 if (-1 != val)
3111 return val;
3112 }
3113
3114 for (reg = r->regtab; reg; reg = reg->next) {
3115 if (len == reg->key.sz &&
3116 0 == strncmp(name, reg->key.p, len)) {
3117 switch (sign) {
3118 case '+':
3119 reg->val += reg->step;
3120 break;
3121 case '-':
3122 reg->val -= reg->step;
3123 break;
3124 default:
3125 break;
3126 }
3127 return reg->val;
3128 }
3129 }
3130
3131 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3132 return 0;
3133 }
3134
3135 static int
3136 roff_hasregn(const struct roff *r, const char *name, size_t len)
3137 {
3138 struct roffreg *reg;
3139 int val;
3140
3141 if ('.' == name[0] && 2 == len) {
3142 val = roff_getregro(r, name + 1);
3143 if (-1 != val)
3144 return 1;
3145 }
3146
3147 for (reg = r->regtab; reg; reg = reg->next)
3148 if (len == reg->key.sz &&
3149 0 == strncmp(name, reg->key.p, len))
3150 return 1;
3151
3152 return 0;
3153 }
3154
3155 static void
3156 roff_freereg(struct roffreg *reg)
3157 {
3158 struct roffreg *old_reg;
3159
3160 while (NULL != reg) {
3161 free(reg->key.p);
3162 old_reg = reg;
3163 reg = reg->next;
3164 free(old_reg);
3165 }
3166 }
3167
3168 static int
3169 roff_nr(ROFF_ARGS)
3170 {
3171 char *key, *val, *step;
3172 size_t keysz;
3173 int iv, is, len;
3174 char sign;
3175
3176 key = val = buf->buf + pos;
3177 if (*key == '\0')
3178 return ROFF_IGN;
3179
3180 keysz = roff_getname(r, &val, ln, pos);
3181 if (key[keysz] == '\\' || key[keysz] == '\t')
3182 return ROFF_IGN;
3183
3184 sign = *val;
3185 if (sign == '+' || sign == '-')
3186 val++;
3187
3188 len = 0;
3189 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3190 return ROFF_IGN;
3191
3192 step = val + len;
3193 while (isspace((unsigned char)*step))
3194 step++;
3195 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3196 is = INT_MIN;
3197
3198 roff_setregn(r, key, keysz, iv, sign, is);
3199 return ROFF_IGN;
3200 }
3201
3202 static int
3203 roff_rr(ROFF_ARGS)
3204 {
3205 struct roffreg *reg, **prev;
3206 char *name, *cp;
3207 size_t namesz;
3208
3209 name = cp = buf->buf + pos;
3210 if (*name == '\0')
3211 return ROFF_IGN;
3212 namesz = roff_getname(r, &cp, ln, pos);
3213 name[namesz] = '\0';
3214
3215 prev = &r->regtab;
3216 while (1) {
3217 reg = *prev;
3218 if (reg == NULL || !strcmp(name, reg->key.p))
3219 break;
3220 prev = &reg->next;
3221 }
3222 if (reg != NULL) {
3223 *prev = reg->next;
3224 free(reg->key.p);
3225 free(reg);
3226 }
3227 return ROFF_IGN;
3228 }
3229
3230 /* --- handler functions for roff requests -------------------------------- */
3231
3232 static int
3233 roff_rm(ROFF_ARGS)
3234 {
3235 const char *name;
3236 char *cp;
3237 size_t namesz;
3238
3239 cp = buf->buf + pos;
3240 while (*cp != '\0') {
3241 name = cp;
3242 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3243 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3244 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3245 if (name[namesz] == '\\' || name[namesz] == '\t')
3246 break;
3247 }
3248 return ROFF_IGN;
3249 }
3250
3251 static int
3252 roff_it(ROFF_ARGS)
3253 {
3254 int iv;
3255
3256 /* Parse the number of lines. */
3257
3258 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3259 mandoc_msg(MANDOCERR_IT_NONUM,
3260 ln, ppos, "%s", buf->buf + 1);
3261 return ROFF_IGN;
3262 }
3263
3264 while (isspace((unsigned char)buf->buf[pos]))
3265 pos++;
3266
3267 /*
3268 * Arm the input line trap.
3269 * Special-casing "an-trap" is an ugly workaround to cope
3270 * with DocBook stupidly fiddling with man(7) internals.
3271 */
3272
3273 roffit_lines = iv;
3274 roffit_macro = mandoc_strdup(iv != 1 ||
3275 strcmp(buf->buf + pos, "an-trap") ?
3276 buf->buf + pos : "br");
3277 return ROFF_IGN;
3278 }
3279
3280 static int
3281 roff_Dd(ROFF_ARGS)
3282 {
3283 int mask;
3284 enum roff_tok t, te;
3285
3286 switch (tok) {
3287 case ROFF_Dd:
3288 tok = MDOC_Dd;
3289 te = MDOC_MAX;
3290 if (r->format == 0)
3291 r->format = MPARSE_MDOC;
3292 mask = MPARSE_MDOC | MPARSE_QUICK;
3293 break;
3294 case ROFF_TH:
3295 tok = MAN_TH;
3296 te = MAN_MAX;
3297 if (r->format == 0)
3298 r->format = MPARSE_MAN;
3299 mask = MPARSE_QUICK;
3300 break;
3301 default:
3302 abort();
3303 }
3304 if ((r->options & mask) == 0)
3305 for (t = tok; t < te; t++)
3306 roff_setstr(r, roff_name[t], NULL, 0);
3307 return ROFF_CONT;
3308 }
3309
3310 static int
3311 roff_TE(ROFF_ARGS)
3312 {
3313 r->man->flags &= ~ROFF_NONOFILL;
3314 if (r->tbl == NULL) {
3315 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3316 return ROFF_IGN;
3317 }
3318 if (tbl_end(r->tbl, 0) == 0) {
3319 r->tbl = NULL;
3320 free(buf->buf);
3321 buf->buf = mandoc_strdup(".sp");
3322 buf->sz = 4;
3323 *offs = 0;
3324 return ROFF_REPARSE;
3325 }
3326 r->tbl = NULL;
3327 return ROFF_IGN;
3328 }
3329
3330 static int
3331 roff_T_(ROFF_ARGS)
3332 {
3333
3334 if (NULL == r->tbl)
3335 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3336 else
3337 tbl_restart(ln, ppos, r->tbl);
3338
3339 return ROFF_IGN;
3340 }
3341
3342 /*
3343 * Handle in-line equation delimiters.
3344 */
3345 static int
3346 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3347 {
3348 char *cp1, *cp2;
3349 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3350
3351 /*
3352 * Outside equations, look for an opening delimiter.
3353 * If we are inside an equation, we already know it is
3354 * in-line, or this function wouldn't have been called;
3355 * so look for a closing delimiter.
3356 */
3357
3358 cp1 = buf->buf + pos;
3359 cp2 = strchr(cp1, r->eqn == NULL ?
3360 r->last_eqn->odelim : r->last_eqn->cdelim);
3361 if (cp2 == NULL)
3362 return ROFF_CONT;
3363
3364 *cp2++ = '\0';
3365 bef_pr = bef_nl = aft_nl = aft_pr = "";
3366
3367 /* Handle preceding text, protecting whitespace. */
3368
3369 if (*buf->buf != '\0') {
3370 if (r->eqn == NULL)
3371 bef_pr = "\\&";
3372 bef_nl = "\n";
3373 }
3374
3375 /*
3376 * Prepare replacing the delimiter with an equation macro
3377 * and drop leading white space from the equation.
3378 */
3379
3380 if (r->eqn == NULL) {
3381 while (*cp2 == ' ')
3382 cp2++;
3383 mac = ".EQ";
3384 } else
3385 mac = ".EN";
3386
3387 /* Handle following text, protecting whitespace. */
3388
3389 if (*cp2 != '\0') {
3390 aft_nl = "\n";
3391 if (r->eqn != NULL)
3392 aft_pr = "\\&";
3393 }
3394
3395 /* Do the actual replacement. */
3396
3397 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3398 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3399 free(buf->buf);
3400 buf->buf = cp1;
3401
3402 /* Toggle the in-line state of the eqn subsystem. */
3403
3404 r->eqn_inline = r->eqn == NULL;
3405 return ROFF_REPARSE;
3406 }
3407
3408 static int
3409 roff_EQ(ROFF_ARGS)
3410 {
3411 struct roff_node *n;
3412
3413 if (r->man->meta.macroset == MACROSET_MAN)
3414 man_breakscope(r->man, ROFF_EQ);
3415 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3416 if (ln > r->man->last->line)
3417 n->flags |= NODE_LINE;
3418 n->eqn = eqn_box_new();
3419 roff_node_append(r->man, n);
3420 r->man->next = ROFF_NEXT_SIBLING;
3421
3422 assert(r->eqn == NULL);
3423 if (r->last_eqn == NULL)
3424 r->last_eqn = eqn_alloc();
3425 else
3426 eqn_reset(r->last_eqn);
3427 r->eqn = r->last_eqn;
3428 r->eqn->node = n;
3429
3430 if (buf->buf[pos] != '\0')
3431 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3432 ".EQ %s", buf->buf + pos);
3433
3434 return ROFF_IGN;
3435 }
3436
3437 static int
3438 roff_EN(ROFF_ARGS)
3439 {
3440 if (r->eqn != NULL) {
3441 eqn_parse(r->eqn);
3442 r->eqn = NULL;
3443 } else
3444 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3445 if (buf->buf[pos] != '\0')
3446 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3447 "EN %s", buf->buf + pos);
3448 return ROFF_IGN;
3449 }
3450
3451 static int
3452 roff_TS(ROFF_ARGS)
3453 {
3454 if (r->tbl != NULL) {
3455 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3456 tbl_end(r->tbl, 0);
3457 }
3458 r->man->flags |= ROFF_NONOFILL;
3459 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3460 if (r->last_tbl == NULL)
3461 r->first_tbl = r->tbl;
3462 r->last_tbl = r->tbl;
3463 return ROFF_IGN;
3464 }
3465
3466 static int
3467 roff_noarg(ROFF_ARGS)
3468 {
3469 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3470 man_breakscope(r->man, tok);
3471 if (tok == ROFF_brp)
3472 tok = ROFF_br;
3473 roff_elem_alloc(r->man, ln, ppos, tok);
3474 if (buf->buf[pos] != '\0')
3475 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3476 "%s %s", roff_name[tok], buf->buf + pos);
3477 if (tok == ROFF_nf)
3478 r->man->flags |= ROFF_NOFILL;
3479 else if (tok == ROFF_fi)
3480 r->man->flags &= ~ROFF_NOFILL;
3481 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3482 r->man->next = ROFF_NEXT_SIBLING;
3483 return ROFF_IGN;
3484 }
3485
3486 static int
3487 roff_onearg(ROFF_ARGS)
3488 {
3489 struct roff_node *n;
3490 char *cp;
3491 int npos;
3492
3493 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3494 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3495 tok == ROFF_ti))
3496 man_breakscope(r->man, tok);
3497
3498 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3499 r->man->last = roffce_node;
3500 r->man->next = ROFF_NEXT_SIBLING;
3501 }
3502
3503 roff_elem_alloc(r->man, ln, ppos, tok);
3504 n = r->man->last;
3505
3506 cp = buf->buf + pos;
3507 if (*cp != '\0') {
3508 while (*cp != '\0' && *cp != ' ')
3509 cp++;
3510 while (*cp == ' ')
3511 *cp++ = '\0';
3512 if (*cp != '\0')
3513 mandoc_msg(MANDOCERR_ARG_EXCESS,
3514 ln, (int)(cp - buf->buf),
3515 "%s ... %s", roff_name[tok], cp);
3516 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3517 }
3518
3519 if (tok == ROFF_ce || tok == ROFF_rj) {
3520 if (r->man->last->type == ROFFT_ELEM) {
3521 roff_word_alloc(r->man, ln, pos, "1");
3522 r->man->last->flags |= NODE_NOSRC;
3523 }
3524 npos = 0;
3525 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3526 &roffce_lines, 0) == 0) {
3527 mandoc_msg(MANDOCERR_CE_NONUM,
3528 ln, pos, "ce %s", buf->buf + pos);
3529 roffce_lines = 1;
3530 }
3531 if (roffce_lines < 1) {
3532 r->man->last = r->man->last->parent;
3533 roffce_node = NULL;
3534 roffce_lines = 0;
3535 } else
3536 roffce_node = r->man->last->parent;
3537 } else {
3538 n->flags |= NODE_VALID | NODE_ENDED;
3539 r->man->last = n;
3540 }
3541 n->flags |= NODE_LINE;
3542 r->man->next = ROFF_NEXT_SIBLING;
3543 return ROFF_IGN;
3544 }
3545
3546 static int
3547 roff_manyarg(ROFF_ARGS)
3548 {
3549 struct roff_node *n;
3550 char *sp, *ep;
3551
3552 roff_elem_alloc(r->man, ln, ppos, tok);
3553 n = r->man->last;
3554
3555 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3556 while (*ep != '\0' && *ep != ' ')
3557 ep++;
3558 while (*ep == ' ')
3559 *ep++ = '\0';
3560 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3561 }
3562
3563 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3564 r->man->last = n;
3565 r->man->next = ROFF_NEXT_SIBLING;
3566 return ROFF_IGN;
3567 }
3568
3569 static int
3570 roff_als(ROFF_ARGS)
3571 {
3572 char *oldn, *newn, *end, *value;
3573 size_t oldsz, newsz, valsz;
3574
3575 newn = oldn = buf->buf + pos;
3576 if (*newn == '\0')
3577 return ROFF_IGN;
3578
3579 newsz = roff_getname(r, &oldn, ln, pos);
3580 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3581 return ROFF_IGN;
3582
3583 end = oldn;
3584 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3585 if (oldsz == 0)
3586 return ROFF_IGN;
3587
3588 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3589 (int)oldsz, oldn);
3590 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3591 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3592 free(value);
3593 return ROFF_IGN;
3594 }
3595
3596 /*
3597 * The .break request only makes sense inside conditionals,
3598 * and that case is already handled in roff_cond_sub().
3599 */
3600 static int
3601 roff_break(ROFF_ARGS)
3602 {
3603 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3604 return ROFF_IGN;
3605 }
3606
3607 static int
3608 roff_cc(ROFF_ARGS)
3609 {
3610 const char *p;
3611
3612 p = buf->buf + pos;
3613
3614 if (*p == '\0' || (r->control = *p++) == '.')
3615 r->control = '\0';
3616
3617 if (*p != '\0')
3618 mandoc_msg(MANDOCERR_ARG_EXCESS,
3619 ln, p - buf->buf, "cc ... %s", p);
3620
3621 return ROFF_IGN;
3622 }
3623
3624 static int
3625 roff_char(ROFF_ARGS)
3626 {
3627 const char *p, *kp, *vp;
3628 size_t ksz, vsz;
3629 int font;
3630
3631 /* Parse the character to be replaced. */
3632
3633 kp = buf->buf + pos;
3634 p = kp + 1;
3635 if (*kp == '\0' || (*kp == '\\' &&
3636 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3637 (*p != ' ' && *p != '\0')) {
3638 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3639 return ROFF_IGN;
3640 }
3641 ksz = p - kp;
3642 while (*p == ' ')
3643 p++;
3644
3645 /*
3646 * If the replacement string contains a font escape sequence,
3647 * we have to restore the font at the end.
3648 */
3649
3650 vp = p;
3651 vsz = strlen(p);
3652 font = 0;
3653 while (*p != '\0') {
3654 if (*p++ != '\\')
3655 continue;
3656 switch (mandoc_escape(&p, NULL, NULL)) {
3657 case ESCAPE_FONT:
3658 case ESCAPE_FONTROMAN:
3659 case ESCAPE_FONTITALIC:
3660 case ESCAPE_FONTBOLD:
3661 case ESCAPE_FONTBI:
3662 case ESCAPE_FONTCW:
3663 case ESCAPE_FONTPREV:
3664 font++;
3665 break;
3666 default:
3667 break;
3668 }
3669 }
3670 if (font > 1)
3671 mandoc_msg(MANDOCERR_CHAR_FONT,
3672 ln, (int)(vp - buf->buf), "%s", vp);
3673
3674 /*
3675 * Approximate the effect of .char using the .tr tables.
3676 * XXX In groff, .char and .tr interact differently.
3677 */
3678
3679 if (ksz == 1) {
3680 if (r->xtab == NULL)
3681 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3682 assert((unsigned int)*kp < 128);
3683 free(r->xtab[(int)*kp].p);
3684 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3685 "%s%s", vp, font ? "\fP" : "");
3686 } else {
3687 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3688 if (font)
3689 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3690 }
3691 return ROFF_IGN;
3692 }
3693
3694 static int
3695 roff_ec(ROFF_ARGS)
3696 {
3697 const char *p;
3698
3699 p = buf->buf + pos;
3700 if (*p == '\0')
3701 r->escape = '\\';
3702 else {
3703 r->escape = *p;
3704 if (*++p != '\0')
3705 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3706 (int)(p - buf->buf), "ec ... %s", p);
3707 }
3708 return ROFF_IGN;
3709 }
3710
3711 static int
3712 roff_eo(ROFF_ARGS)
3713 {
3714 r->escape = '\0';
3715 if (buf->buf[pos] != '\0')
3716 mandoc_msg(MANDOCERR_ARG_SKIP,
3717 ln, pos, "eo %s", buf->buf + pos);
3718 return ROFF_IGN;
3719 }
3720
3721 static int
3722 roff_nop(ROFF_ARGS)
3723 {
3724 while (buf->buf[pos] == ' ')
3725 pos++;
3726 *offs = pos;
3727 return ROFF_RERUN;
3728 }
3729
3730 static int
3731 roff_tr(ROFF_ARGS)
3732 {
3733 const char *p, *first, *second;
3734 size_t fsz, ssz;
3735 enum mandoc_esc esc;
3736
3737 p = buf->buf + pos;
3738
3739 if (*p == '\0') {
3740 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3741 return ROFF_IGN;
3742 }
3743
3744 while (*p != '\0') {
3745 fsz = ssz = 1;
3746
3747 first = p++;
3748 if (*first == '\\') {
3749 esc = mandoc_escape(&p, NULL, NULL);
3750 if (esc == ESCAPE_ERROR) {
3751 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3752 (int)(p - buf->buf), "%s", first);
3753 return ROFF_IGN;
3754 }
3755 fsz = (size_t)(p - first);
3756 }
3757
3758 second = p++;
3759 if (*second == '\\') {
3760 esc = mandoc_escape(&p, NULL, NULL);
3761 if (esc == ESCAPE_ERROR) {
3762 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3763 (int)(p - buf->buf), "%s", second);
3764 return ROFF_IGN;
3765 }
3766 ssz = (size_t)(p - second);
3767 } else if (*second == '\0') {
3768 mandoc_msg(MANDOCERR_TR_ODD, ln,
3769 (int)(first - buf->buf), "tr %s", first);
3770 second = " ";
3771 p--;
3772 }
3773
3774 if (fsz > 1) {
3775 roff_setstrn(&r->xmbtab, first, fsz,
3776 second, ssz, 0);
3777 continue;
3778 }
3779
3780 if (r->xtab == NULL)
3781 r->xtab = mandoc_calloc(128,
3782 sizeof(struct roffstr));
3783
3784 free(r->xtab[(int)*first].p);
3785 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3786 r->xtab[(int)*first].sz = ssz;
3787 }
3788
3789 return ROFF_IGN;
3790 }
3791
3792 /*
3793 * Implementation of the .return request.
3794 * There is no need to call roff_userret() from here.
3795 * The read module will call that after rewinding the reader stack
3796 * to the place from where the current macro was called.
3797 */
3798 static int
3799 roff_return(ROFF_ARGS)
3800 {
3801 if (r->mstackpos >= 0)
3802 return ROFF_IGN | ROFF_USERRET;
3803
3804 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3805 return ROFF_IGN;
3806 }
3807
3808 static int
3809 roff_rn(ROFF_ARGS)
3810 {
3811 const char *value;
3812 char *oldn, *newn, *end;
3813 size_t oldsz, newsz;
3814 int deftype;
3815
3816 oldn = newn = buf->buf + pos;
3817 if (*oldn == '\0')
3818 return ROFF_IGN;
3819
3820 oldsz = roff_getname(r, &newn, ln, pos);
3821 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3822 return ROFF_IGN;
3823
3824 end = newn;
3825 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3826 if (newsz == 0)
3827 return ROFF_IGN;
3828
3829 deftype = ROFFDEF_ANY;
3830 value = roff_getstrn(r, oldn, oldsz, &deftype);
3831 switch (deftype) {
3832 case ROFFDEF_USER:
3833 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3834 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3835 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3836 break;
3837 case ROFFDEF_PRE:
3838 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3839 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3840 break;
3841 case ROFFDEF_REN:
3842 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3843 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3844 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3845 break;
3846 case ROFFDEF_STD:
3847 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3848 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3849 break;
3850 default:
3851 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3852 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3853 break;
3854 }
3855 return ROFF_IGN;
3856 }
3857
3858 static int
3859 roff_shift(ROFF_ARGS)
3860 {
3861 struct mctx *ctx;
3862 int levels, i;
3863
3864 levels = 1;
3865 if (buf->buf[pos] != '\0' &&
3866 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3867 mandoc_msg(MANDOCERR_CE_NONUM,
3868 ln, pos, "shift %s", buf->buf + pos);
3869 levels = 1;
3870 }
3871 if (r->mstackpos < 0) {
3872 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3873 return ROFF_IGN;
3874 }
3875 ctx = r->mstack + r->mstackpos;
3876 if (levels > ctx->argc) {
3877 mandoc_msg(MANDOCERR_SHIFT,
3878 ln, pos, "%d, but max is %d", levels, ctx->argc);
3879 levels = ctx->argc;
3880 }
3881 if (levels == 0)
3882 return ROFF_IGN;
3883 for (i = 0; i < levels; i++)
3884 free(ctx->argv[i]);
3885 ctx->argc -= levels;
3886 for (i = 0; i < ctx->argc; i++)
3887 ctx->argv[i] = ctx->argv[i + levels];
3888 return ROFF_IGN;
3889 }
3890
3891 static int
3892 roff_so(ROFF_ARGS)
3893 {
3894 char *name, *cp;
3895
3896 name = buf->buf + pos;
3897 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3898
3899 /*
3900 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3901 * opening anything that's not in our cwd or anything beneath
3902 * it. Thus, explicitly disallow traversing up the file-system
3903 * or using absolute paths.
3904 */
3905
3906 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3907 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3908 buf->sz = mandoc_asprintf(&cp,
3909 ".sp\nSee the file %s.\n.sp", name) + 1;
3910 free(buf->buf);
3911 buf->buf = cp;
3912 *offs = 0;
3913 return ROFF_REPARSE;
3914 }
3915
3916 *offs = pos;
3917 return ROFF_SO;
3918 }
3919
3920 /* --- user defined strings and macros ------------------------------------ */
3921
3922 static int
3923 roff_userdef(ROFF_ARGS)
3924 {
3925 struct mctx *ctx;
3926 char *arg, *ap, *dst, *src;
3927 size_t sz;
3928
3929 /* If the macro is empty, ignore it altogether. */
3930
3931 if (*r->current_string == '\0')
3932 return ROFF_IGN;
3933
3934 /* Initialize a new macro stack context. */
3935
3936 if (++r->mstackpos == r->mstacksz) {
3937 r->mstack = mandoc_recallocarray(r->mstack,
3938 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3939 r->mstacksz += 8;
3940 }
3941 ctx = r->mstack + r->mstackpos;
3942 ctx->argsz = 0;
3943 ctx->argc = 0;
3944 ctx->argv = NULL;
3945
3946 /*
3947 * Collect pointers to macro argument strings,
3948 * NUL-terminating them and escaping quotes.
3949 */
3950
3951 src = buf->buf + pos;
3952 while (*src != '\0') {
3953 if (ctx->argc == ctx->argsz) {
3954 ctx->argsz += 8;
3955 ctx->argv = mandoc_reallocarray(ctx->argv,
3956 ctx->argsz, sizeof(*ctx->argv));
3957 }
3958 arg = roff_getarg(r, &src, ln, &pos);
3959 sz = 1; /* For the terminating NUL. */
3960 for (ap = arg; *ap != '\0'; ap++)
3961 sz += *ap == '"' ? 4 : 1;
3962 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3963 for (ap = arg; *ap != '\0'; ap++) {
3964 if (*ap == '"') {
3965 memcpy(dst, "\\(dq", 4);
3966 dst += 4;
3967 } else
3968 *dst++ = *ap;
3969 }
3970 *dst = '\0';
3971 free(arg);
3972 }
3973
3974 /* Replace the macro invocation by the macro definition. */
3975
3976 free(buf->buf);
3977 buf->buf = mandoc_strdup(r->current_string);
3978 buf->sz = strlen(buf->buf) + 1;
3979 *offs = 0;
3980
3981 return buf->buf[buf->sz - 2] == '\n' ?
3982 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3983 }
3984
3985 /*
3986 * Calling a high-level macro that was renamed with .rn.
3987 * r->current_string has already been set up by roff_parse().
3988 */
3989 static int
3990 roff_renamed(ROFF_ARGS)
3991 {
3992 char *nbuf;
3993
3994 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3995 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3996 free(buf->buf);
3997 buf->buf = nbuf;
3998 *offs = 0;
3999 return ROFF_CONT;
4000 }
4001
4002 /*
4003 * Measure the length in bytes of the roff identifier at *cpp
4004 * and advance the pointer to the next word.
4005 */
4006 static size_t
4007 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4008 {
4009 char *name, *cp;
4010 size_t namesz;
4011
4012 name = *cpp;
4013 if (*name == '\0')
4014 return 0;
4015
4016 /* Advance cp to the byte after the end of the name. */
4017
4018 for (cp = name; 1; cp++) {
4019 namesz = cp - name;
4020 if (*cp == '\0')
4021 break;
4022 if (*cp == ' ' || *cp == '\t') {
4023 cp++;
4024 break;
4025 }
4026 if (*cp != '\\')
4027 continue;
4028 if (cp[1] == '{' || cp[1] == '}')
4029 break;
4030 if (*++cp == '\\')
4031 continue;
4032 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4033 "%.*s", (int)(cp - name + 1), name);
4034 mandoc_escape((const char **)&cp, NULL, NULL);
4035 break;
4036 }
4037
4038 /* Read past spaces. */
4039
4040 while (*cp == ' ')
4041 cp++;
4042
4043 *cpp = cp;
4044 return namesz;
4045 }
4046
4047 /*
4048 * Store *string into the user-defined string called *name.
4049 * To clear an existing entry, call with (*r, *name, NULL, 0).
4050 * append == 0: replace mode
4051 * append == 1: single-line append mode
4052 * append == 2: multiline append mode, append '\n' after each call
4053 */
4054 static void
4055 roff_setstr(struct roff *r, const char *name, const char *string,
4056 int append)
4057 {
4058 size_t namesz;
4059
4060 namesz = strlen(name);
4061 roff_setstrn(&r->strtab, name, namesz, string,
4062 string ? strlen(string) : 0, append);
4063 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4064 }
4065
4066 static void
4067 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4068 const char *string, size_t stringsz, int append)
4069 {
4070 struct roffkv *n;
4071 char *c;
4072 int i;
4073 size_t oldch, newch;
4074
4075 /* Search for an existing string with the same name. */
4076 n = *r;
4077
4078 while (n && (namesz != n->key.sz ||
4079 strncmp(n->key.p, name, namesz)))
4080 n = n->next;
4081
4082 if (NULL == n) {
4083 /* Create a new string table entry. */
4084 n = mandoc_malloc(sizeof(struct roffkv));
4085 n->key.p = mandoc_strndup(name, namesz);
4086 n->key.sz = namesz;
4087 n->val.p = NULL;
4088 n->val.sz = 0;
4089 n->next = *r;
4090 *r = n;
4091 } else if (0 == append) {
4092 free(n->val.p);
4093 n->val.p = NULL;
4094 n->val.sz = 0;
4095 }
4096
4097 if (NULL == string)
4098 return;
4099
4100 /*
4101 * One additional byte for the '\n' in multiline mode,
4102 * and one for the terminating '\0'.
4103 */
4104 newch = stringsz + (1 < append ? 2u : 1u);
4105
4106 if (NULL == n->val.p) {
4107 n->val.p = mandoc_malloc(newch);
4108 *n->val.p = '\0';
4109 oldch = 0;
4110 } else {
4111 oldch = n->val.sz;
4112 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4113 }
4114
4115 /* Skip existing content in the destination buffer. */
4116 c = n->val.p + (int)oldch;
4117
4118 /* Append new content to the destination buffer. */
4119 i = 0;
4120 while (i < (int)stringsz) {
4121 /*
4122 * Rudimentary roff copy mode:
4123 * Handle escaped backslashes.
4124 */
4125 if ('\\' == string[i] && '\\' == string[i + 1])
4126 i++;
4127 *c++ = string[i++];
4128 }
4129
4130 /* Append terminating bytes. */
4131 if (1 < append)
4132 *c++ = '\n';
4133
4134 *c = '\0';
4135 n->val.sz = (int)(c - n->val.p);
4136 }
4137
4138 static const char *
4139 roff_getstrn(struct roff *r, const char *name, size_t len,
4140 int *deftype)
4141 {
4142 const struct roffkv *n;
4143 int found, i;
4144 enum roff_tok tok;
4145
4146 found = 0;
4147 for (n = r->strtab; n != NULL; n = n->next) {
4148 if (strncmp(name, n->key.p, len) != 0 ||
4149 n->key.p[len] != '\0' || n->val.p == NULL)
4150 continue;
4151 if (*deftype & ROFFDEF_USER) {
4152 *deftype = ROFFDEF_USER;
4153 return n->val.p;
4154 } else {
4155 found = 1;
4156 break;
4157 }
4158 }
4159 for (n = r->rentab; n != NULL; n = n->next) {
4160 if (strncmp(name, n->key.p, len) != 0 ||
4161 n->key.p[len] != '\0' || n->val.p == NULL)
4162 continue;
4163 if (*deftype & ROFFDEF_REN) {
4164 *deftype = ROFFDEF_REN;
4165 return n->val.p;
4166 } else {
4167 found = 1;
4168 break;
4169 }
4170 }
4171 for (i = 0; i < PREDEFS_MAX; i++) {
4172 if (strncmp(name, predefs[i].name, len) != 0 ||
4173 predefs[i].name[len] != '\0')
4174 continue;
4175 if (*deftype & ROFFDEF_PRE) {
4176 *deftype = ROFFDEF_PRE;
4177 return predefs[i].str;
4178 } else {
4179 found = 1;
4180 break;
4181 }
4182 }
4183 if (r->man->meta.macroset != MACROSET_MAN) {
4184 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4185 if (strncmp(name, roff_name[tok], len) != 0 ||
4186 roff_name[tok][len] != '\0')
4187 continue;
4188 if (*deftype & ROFFDEF_STD) {
4189 *deftype = ROFFDEF_STD;
4190 return NULL;
4191 } else {
4192 found = 1;
4193 break;
4194 }
4195 }
4196 }
4197 if (r->man->meta.macroset != MACROSET_MDOC) {
4198 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4199 if (strncmp(name, roff_name[tok], len) != 0 ||
4200 roff_name[tok][len] != '\0')
4201 continue;
4202 if (*deftype & ROFFDEF_STD) {
4203 *deftype = ROFFDEF_STD;
4204 return NULL;
4205 } else {
4206 found = 1;
4207 break;
4208 }
4209 }
4210 }
4211
4212 if (found == 0 && *deftype != ROFFDEF_ANY) {
4213 if (*deftype & ROFFDEF_REN) {
4214 /*
4215 * This might still be a request,
4216 * so do not treat it as undefined yet.
4217 */
4218 *deftype = ROFFDEF_UNDEF;
4219 return NULL;
4220 }
4221
4222 /* Using an undefined string defines it to be empty. */
4223
4224 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4225 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4226 }
4227
4228 *deftype = 0;
4229 return NULL;
4230 }
4231
4232 static void
4233 roff_freestr(struct roffkv *r)
4234 {
4235 struct roffkv *n, *nn;
4236
4237 for (n = r; n; n = nn) {
4238 free(n->key.p);
4239 free(n->val.p);
4240 nn = n->next;
4241 free(n);
4242 }
4243 }
4244
4245 /* --- accessors and utility functions ------------------------------------ */
4246
4247 /*
4248 * Duplicate an input string, making the appropriate character
4249 * conversations (as stipulated by `tr') along the way.
4250 * Returns a heap-allocated string with all the replacements made.
4251 */
4252 char *
4253 roff_strdup(const struct roff *r, const char *p)
4254 {
4255 const struct roffkv *cp;
4256 char *res;
4257 const char *pp;
4258 size_t ssz, sz;
4259 enum mandoc_esc esc;
4260
4261 if (NULL == r->xmbtab && NULL == r->xtab)
4262 return mandoc_strdup(p);
4263 else if ('\0' == *p)
4264 return mandoc_strdup("");
4265
4266 /*
4267 * Step through each character looking for term matches
4268 * (remember that a `tr' can be invoked with an escape, which is
4269 * a glyph but the escape is multi-character).
4270 * We only do this if the character hash has been initialised
4271 * and the string is >0 length.
4272 */
4273
4274 res = NULL;
4275 ssz = 0;
4276
4277 while ('\0' != *p) {
4278 assert((unsigned int)*p < 128);
4279 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4280 sz = r->xtab[(int)*p].sz;
4281 res = mandoc_realloc(res, ssz + sz + 1);
4282 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4283 ssz += sz;
4284 p++;
4285 continue;
4286 } else if ('\\' != *p) {
4287 res = mandoc_realloc(res, ssz + 2);
4288 res[ssz++] = *p++;
4289 continue;
4290 }
4291
4292 /* Search for term matches. */
4293 for (cp = r->xmbtab; cp; cp = cp->next)
4294 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4295 break;
4296
4297 if (NULL != cp) {
4298 /*
4299 * A match has been found.
4300 * Append the match to the array and move
4301 * forward by its keysize.
4302 */
4303 res = mandoc_realloc(res,
4304 ssz + cp->val.sz + 1);
4305 memcpy(res + ssz, cp->val.p, cp->val.sz);
4306 ssz += cp->val.sz;
4307 p += (int)cp->key.sz;
4308 continue;
4309 }
4310
4311 /*
4312 * Handle escapes carefully: we need to copy
4313 * over just the escape itself, or else we might
4314 * do replacements within the escape itself.
4315 * Make sure to pass along the bogus string.
4316 */
4317 pp = p++;
4318 esc = mandoc_escape(&p, NULL, NULL);
4319 if (ESCAPE_ERROR == esc) {
4320 sz = strlen(pp);
4321 res = mandoc_realloc(res, ssz + sz + 1);
4322 memcpy(res + ssz, pp, sz);
4323 break;
4324 }
4325 /*
4326 * We bail out on bad escapes.
4327 * No need to warn: we already did so when
4328 * roff_expand() was called.
4329 */
4330 sz = (int)(p - pp);
4331 res = mandoc_realloc(res, ssz + sz + 1);
4332 memcpy(res + ssz, pp, sz);
4333 ssz += sz;
4334 }
4335
4336 res[(int)ssz] = '\0';
4337 return res;
4338 }
4339
4340 int
4341 roff_getformat(const struct roff *r)
4342 {
4343
4344 return r->format;
4345 }
4346
4347 /*
4348 * Find out whether a line is a macro line or not.
4349 * If it is, adjust the current position and return one; if it isn't,
4350 * return zero and don't change the current position.
4351 * If the control character has been set with `.cc', then let that grain
4352 * precedence.
4353 * This is slighly contrary to groff, where using the non-breaking
4354 * control character when `cc' has been invoked will cause the
4355 * non-breaking macro contents to be printed verbatim.
4356 */
4357 int
4358 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4359 {
4360 int pos;
4361
4362 pos = *ppos;
4363
4364 if (r->control != '\0' && cp[pos] == r->control)
4365 pos++;
4366 else if (r->control != '\0')
4367 return 0;
4368 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4369 pos += 2;
4370 else if ('.' == cp[pos] || '\'' == cp[pos])
4371 pos++;
4372 else
4373 return 0;
4374
4375 while (' ' == cp[pos] || '\t' == cp[pos])
4376 pos++;
4377
4378 *ppos = pos;
4379 return 1;
4380 }