]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
In fragment identifiers, use ~%d for ordinal suffixes,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.374 2020/04/08 11:56:03 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_text(ROFF_ARGS);
196 static int roff_cond_sub(ROFF_ARGS);
197 static int roff_ds(ROFF_ARGS);
198 static int roff_ec(ROFF_ARGS);
199 static int roff_eo(ROFF_ARGS);
200 static int roff_eqndelim(struct roff *, struct buf *, int);
201 static int roff_evalcond(struct roff *, int, char *, int *);
202 static int roff_evalnum(struct roff *, int,
203 const char *, int *, int *, int);
204 static int roff_evalpar(struct roff *, int,
205 const char *, int *, int *, int);
206 static int roff_evalstrcond(const char *, int *);
207 static int roff_expand(struct roff *, struct buf *,
208 int, int, char);
209 static void roff_free1(struct roff *);
210 static void roff_freereg(struct roffreg *);
211 static void roff_freestr(struct roffkv *);
212 static size_t roff_getname(struct roff *, char **, int, int);
213 static int roff_getnum(const char *, int *, int *, int);
214 static int roff_getop(const char *, int *, char *);
215 static int roff_getregn(struct roff *,
216 const char *, size_t, char);
217 static int roff_getregro(const struct roff *,
218 const char *name);
219 static const char *roff_getstrn(struct roff *,
220 const char *, size_t, int *);
221 static int roff_hasregn(const struct roff *,
222 const char *, size_t);
223 static int roff_insec(ROFF_ARGS);
224 static int roff_it(ROFF_ARGS);
225 static int roff_line_ignore(ROFF_ARGS);
226 static void roff_man_alloc1(struct roff_man *);
227 static void roff_man_free1(struct roff_man *);
228 static int roff_manyarg(ROFF_ARGS);
229 static int roff_noarg(ROFF_ARGS);
230 static int roff_nop(ROFF_ARGS);
231 static int roff_nr(ROFF_ARGS);
232 static int roff_onearg(ROFF_ARGS);
233 static enum roff_tok roff_parse(struct roff *, char *, int *,
234 int, int);
235 static int roff_parsetext(struct roff *, struct buf *,
236 int, int *);
237 static int roff_renamed(ROFF_ARGS);
238 static int roff_return(ROFF_ARGS);
239 static int roff_rm(ROFF_ARGS);
240 static int roff_rn(ROFF_ARGS);
241 static int roff_rr(ROFF_ARGS);
242 static void roff_setregn(struct roff *, const char *,
243 size_t, int, char, int);
244 static void roff_setstr(struct roff *,
245 const char *, const char *, int);
246 static void roff_setstrn(struct roffkv **, const char *,
247 size_t, const char *, size_t, int);
248 static int roff_shift(ROFF_ARGS);
249 static int roff_so(ROFF_ARGS);
250 static int roff_tr(ROFF_ARGS);
251 static int roff_Dd(ROFF_ARGS);
252 static int roff_TE(ROFF_ARGS);
253 static int roff_TS(ROFF_ARGS);
254 static int roff_EQ(ROFF_ARGS);
255 static int roff_EN(ROFF_ARGS);
256 static int roff_T_(ROFF_ARGS);
257 static int roff_unsupp(ROFF_ARGS);
258 static int roff_userdef(ROFF_ARGS);
259
260 /* --- constant data ------------------------------------------------------ */
261
262 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
263 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
264
265 const char *__roff_name[MAN_MAX + 1] = {
266 "br", "ce", "fi", "ft",
267 "ll", "mc", "nf",
268 "po", "rj", "sp",
269 "ta", "ti", NULL,
270 "ab", "ad", "af", "aln",
271 "als", "am", "am1", "ami",
272 "ami1", "as", "as1", "asciify",
273 "backtrace", "bd", "bleedat", "blm",
274 "box", "boxa", "bp", "BP",
275 "break", "breakchar", "brnl", "brp",
276 "brpnl", "c2", "cc",
277 "cf", "cflags", "ch", "char",
278 "chop", "class", "close", "CL",
279 "color", "composite", "continue", "cp",
280 "cropat", "cs", "cu", "da",
281 "dch", "Dd", "de", "de1",
282 "defcolor", "dei", "dei1", "device",
283 "devicem", "di", "do", "ds",
284 "ds1", "dwh", "dt", "ec",
285 "ecr", "ecs", "el", "em",
286 "EN", "eo", "EP", "EQ",
287 "errprint", "ev", "evc", "ex",
288 "fallback", "fam", "fc", "fchar",
289 "fcolor", "fdeferlig", "feature", "fkern",
290 "fl", "flig", "fp", "fps",
291 "fschar", "fspacewidth", "fspecial", "ftr",
292 "fzoom", "gcolor", "hc", "hcode",
293 "hidechar", "hla", "hlm", "hpf",
294 "hpfa", "hpfcode", "hw", "hy",
295 "hylang", "hylen", "hym", "hypp",
296 "hys", "ie", "if", "ig",
297 "index", "it", "itc", "IX",
298 "kern", "kernafter", "kernbefore", "kernpair",
299 "lc", "lc_ctype", "lds", "length",
300 "letadj", "lf", "lg", "lhang",
301 "linetabs", "lnr", "lnrf", "lpfx",
302 "ls", "lsm", "lt",
303 "mediasize", "minss", "mk", "mso",
304 "na", "ne", "nh", "nhychar",
305 "nm", "nn", "nop", "nr",
306 "nrf", "nroff", "ns", "nx",
307 "open", "opena", "os", "output",
308 "padj", "papersize", "pc", "pev",
309 "pi", "PI", "pl", "pm",
310 "pn", "pnr", "ps",
311 "psbb", "pshape", "pso", "ptr",
312 "pvs", "rchar", "rd", "recursionlimit",
313 "return", "rfschar", "rhang",
314 "rm", "rn", "rnn", "rr",
315 "rs", "rt", "schar", "sentchar",
316 "shc", "shift", "sizes", "so",
317 "spacewidth", "special", "spreadwarn", "ss",
318 "sty", "substring", "sv", "sy",
319 "T&", "tc", "TE",
320 "TH", "tkf", "tl",
321 "tm", "tm1", "tmc", "tr",
322 "track", "transchar", "trf", "trimat",
323 "trin", "trnt", "troff", "TS",
324 "uf", "ul", "unformat", "unwatch",
325 "unwatchn", "vpt", "vs", "warn",
326 "warnscale", "watch", "watchlength", "watchn",
327 "wh", "while", "write", "writec",
328 "writem", "xflag", ".", NULL,
329 NULL, "text",
330 "Dd", "Dt", "Os", "Sh",
331 "Ss", "Pp", "D1", "Dl",
332 "Bd", "Ed", "Bl", "El",
333 "It", "Ad", "An", "Ap",
334 "Ar", "Cd", "Cm", "Dv",
335 "Er", "Ev", "Ex", "Fa",
336 "Fd", "Fl", "Fn", "Ft",
337 "Ic", "In", "Li", "Nd",
338 "Nm", "Op", "Ot", "Pa",
339 "Rv", "St", "Va", "Vt",
340 "Xr", "%A", "%B", "%D",
341 "%I", "%J", "%N", "%O",
342 "%P", "%R", "%T", "%V",
343 "Ac", "Ao", "Aq", "At",
344 "Bc", "Bf", "Bo", "Bq",
345 "Bsx", "Bx", "Db", "Dc",
346 "Do", "Dq", "Ec", "Ef",
347 "Em", "Eo", "Fx", "Ms",
348 "No", "Ns", "Nx", "Ox",
349 "Pc", "Pf", "Po", "Pq",
350 "Qc", "Ql", "Qo", "Qq",
351 "Re", "Rs", "Sc", "So",
352 "Sq", "Sm", "Sx", "Sy",
353 "Tn", "Ux", "Xc", "Xo",
354 "Fo", "Fc", "Oo", "Oc",
355 "Bk", "Ek", "Bt", "Hf",
356 "Fr", "Ud", "Lb", "Lp",
357 "Lk", "Mt", "Brq", "Bro",
358 "Brc", "%C", "Es", "En",
359 "Dx", "%Q", "%U", "Ta",
360 "Tg", NULL,
361 "TH", "SH", "SS", "TP",
362 "TQ",
363 "LP", "PP", "P", "IP",
364 "HP", "SM", "SB", "BI",
365 "IB", "BR", "RB", "R",
366 "B", "I", "IR", "RI",
367 "RE", "RS", "DT", "UC",
368 "PD", "AT", "in",
369 "SY", "YS", "OP",
370 "EX", "EE", "UR",
371 "UE", "MT", "ME", NULL
372 };
373 const char *const *roff_name = __roff_name;
374
375 static struct roffmac roffs[TOKEN_NONE] = {
376 { roff_noarg, NULL, NULL, 0 }, /* br */
377 { roff_onearg, NULL, NULL, 0 }, /* ce */
378 { roff_noarg, NULL, NULL, 0 }, /* fi */
379 { roff_onearg, NULL, NULL, 0 }, /* ft */
380 { roff_onearg, NULL, NULL, 0 }, /* ll */
381 { roff_onearg, NULL, NULL, 0 }, /* mc */
382 { roff_noarg, NULL, NULL, 0 }, /* nf */
383 { roff_onearg, NULL, NULL, 0 }, /* po */
384 { roff_onearg, NULL, NULL, 0 }, /* rj */
385 { roff_onearg, NULL, NULL, 0 }, /* sp */
386 { roff_manyarg, NULL, NULL, 0 }, /* ta */
387 { roff_onearg, NULL, NULL, 0 }, /* ti */
388 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
389 { roff_unsupp, NULL, NULL, 0 }, /* ab */
390 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
391 { roff_line_ignore, NULL, NULL, 0 }, /* af */
392 { roff_unsupp, NULL, NULL, 0 }, /* aln */
393 { roff_als, NULL, NULL, 0 }, /* als */
394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
398 { roff_ds, NULL, NULL, 0 }, /* as */
399 { roff_ds, NULL, NULL, 0 }, /* as1 */
400 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
401 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
402 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
403 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
404 { roff_unsupp, NULL, NULL, 0 }, /* blm */
405 { roff_unsupp, NULL, NULL, 0 }, /* box */
406 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
407 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
408 { roff_unsupp, NULL, NULL, 0 }, /* BP */
409 { roff_break, NULL, NULL, 0 }, /* break */
410 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
411 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
412 { roff_noarg, NULL, NULL, 0 }, /* brp */
413 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
414 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
415 { roff_cc, NULL, NULL, 0 }, /* cc */
416 { roff_insec, NULL, NULL, 0 }, /* cf */
417 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
418 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
419 { roff_char, NULL, NULL, 0 }, /* char */
420 { roff_unsupp, NULL, NULL, 0 }, /* chop */
421 { roff_line_ignore, NULL, NULL, 0 }, /* class */
422 { roff_insec, NULL, NULL, 0 }, /* close */
423 { roff_unsupp, NULL, NULL, 0 }, /* CL */
424 { roff_line_ignore, NULL, NULL, 0 }, /* color */
425 { roff_unsupp, NULL, NULL, 0 }, /* composite */
426 { roff_unsupp, NULL, NULL, 0 }, /* continue */
427 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
429 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
431 { roff_unsupp, NULL, NULL, 0 }, /* da */
432 { roff_unsupp, NULL, NULL, 0 }, /* dch */
433 { roff_Dd, NULL, NULL, 0 }, /* Dd */
434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
436 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
439 { roff_unsupp, NULL, NULL, 0 }, /* device */
440 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
441 { roff_unsupp, NULL, NULL, 0 }, /* di */
442 { roff_unsupp, NULL, NULL, 0 }, /* do */
443 { roff_ds, NULL, NULL, 0 }, /* ds */
444 { roff_ds, NULL, NULL, 0 }, /* ds1 */
445 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
446 { roff_unsupp, NULL, NULL, 0 }, /* dt */
447 { roff_ec, NULL, NULL, 0 }, /* ec */
448 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
449 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
450 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
451 { roff_unsupp, NULL, NULL, 0 }, /* em */
452 { roff_EN, NULL, NULL, 0 }, /* EN */
453 { roff_eo, NULL, NULL, 0 }, /* eo */
454 { roff_unsupp, NULL, NULL, 0 }, /* EP */
455 { roff_EQ, NULL, NULL, 0 }, /* EQ */
456 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
457 { roff_unsupp, NULL, NULL, 0 }, /* ev */
458 { roff_unsupp, NULL, NULL, 0 }, /* evc */
459 { roff_unsupp, NULL, NULL, 0 }, /* ex */
460 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
461 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
462 { roff_unsupp, NULL, NULL, 0 }, /* fc */
463 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
464 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
466 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
467 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
469 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
470 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
472 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
473 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
475 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
476 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
477 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
493 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
494 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
495 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
496 { roff_unsupp, NULL, NULL, 0 }, /* index */
497 { roff_it, NULL, NULL, 0 }, /* it */
498 { roff_unsupp, NULL, NULL, 0 }, /* itc */
499 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
500 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
502 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
504 { roff_unsupp, NULL, NULL, 0 }, /* lc */
505 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
506 { roff_unsupp, NULL, NULL, 0 }, /* lds */
507 { roff_unsupp, NULL, NULL, 0 }, /* length */
508 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
509 { roff_insec, NULL, NULL, 0 }, /* lf */
510 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
511 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
512 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
513 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
514 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
515 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
516 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
517 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
518 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
519 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
520 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
521 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
522 { roff_insec, NULL, NULL, 0 }, /* mso */
523 { roff_line_ignore, NULL, NULL, 0 }, /* na */
524 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
525 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
526 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
527 { roff_unsupp, NULL, NULL, 0 }, /* nm */
528 { roff_unsupp, NULL, NULL, 0 }, /* nn */
529 { roff_nop, NULL, NULL, 0 }, /* nop */
530 { roff_nr, NULL, NULL, 0 }, /* nr */
531 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
532 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
533 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
534 { roff_insec, NULL, NULL, 0 }, /* nx */
535 { roff_insec, NULL, NULL, 0 }, /* open */
536 { roff_insec, NULL, NULL, 0 }, /* opena */
537 { roff_line_ignore, NULL, NULL, 0 }, /* os */
538 { roff_unsupp, NULL, NULL, 0 }, /* output */
539 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
540 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
541 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
542 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
543 { roff_insec, NULL, NULL, 0 }, /* pi */
544 { roff_unsupp, NULL, NULL, 0 }, /* PI */
545 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
549 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
550 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
551 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
552 { roff_insec, NULL, NULL, 0 }, /* pso */
553 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
554 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
555 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
556 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
557 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
558 { roff_return, NULL, NULL, 0 }, /* return */
559 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
560 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
561 { roff_rm, NULL, NULL, 0 }, /* rm */
562 { roff_rn, NULL, NULL, 0 }, /* rn */
563 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
564 { roff_rr, NULL, NULL, 0 }, /* rr */
565 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
566 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
567 { roff_unsupp, NULL, NULL, 0 }, /* schar */
568 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
569 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
570 { roff_shift, NULL, NULL, 0 }, /* shift */
571 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
572 { roff_so, NULL, NULL, 0 }, /* so */
573 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
574 { roff_line_ignore, NULL, NULL, 0 }, /* special */
575 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
576 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
577 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
578 { roff_unsupp, NULL, NULL, 0 }, /* substring */
579 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
580 { roff_insec, NULL, NULL, 0 }, /* sy */
581 { roff_T_, NULL, NULL, 0 }, /* T& */
582 { roff_unsupp, NULL, NULL, 0 }, /* tc */
583 { roff_TE, NULL, NULL, 0 }, /* TE */
584 { roff_Dd, NULL, NULL, 0 }, /* TH */
585 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
586 { roff_unsupp, NULL, NULL, 0 }, /* tl */
587 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
588 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
589 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
590 { roff_tr, NULL, NULL, 0 }, /* tr */
591 { roff_line_ignore, NULL, NULL, 0 }, /* track */
592 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
593 { roff_insec, NULL, NULL, 0 }, /* trf */
594 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
595 { roff_unsupp, NULL, NULL, 0 }, /* trin */
596 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
597 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
598 { roff_TS, NULL, NULL, 0 }, /* TS */
599 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
600 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
601 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
602 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
603 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
604 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
605 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
606 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
607 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
608 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
609 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
610 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
611 { roff_unsupp, NULL, NULL, 0 }, /* wh */
612 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
613 { roff_insec, NULL, NULL, 0 }, /* write */
614 { roff_insec, NULL, NULL, 0 }, /* writec */
615 { roff_insec, NULL, NULL, 0 }, /* writem */
616 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
617 { roff_cblock, NULL, NULL, 0 }, /* . */
618 { roff_renamed, NULL, NULL, 0 },
619 { roff_userdef, NULL, NULL, 0 }
620 };
621
622 /* Array of injected predefined strings. */
623 #define PREDEFS_MAX 38
624 static const struct predef predefs[PREDEFS_MAX] = {
625 #include "predefs.in"
626 };
627
628 static int roffce_lines; /* number of input lines to center */
629 static struct roff_node *roffce_node; /* active request */
630 static int roffit_lines; /* number of lines to delay */
631 static char *roffit_macro; /* nil-terminated macro line */
632
633
634 /* --- request table ------------------------------------------------------ */
635
636 struct ohash *
637 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
638 {
639 struct ohash *htab;
640 struct roffreq *req;
641 enum roff_tok tok;
642 size_t sz;
643 unsigned int slot;
644
645 htab = mandoc_malloc(sizeof(*htab));
646 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
647
648 for (tok = mintok; tok < maxtok; tok++) {
649 if (roff_name[tok] == NULL)
650 continue;
651 sz = strlen(roff_name[tok]);
652 req = mandoc_malloc(sizeof(*req) + sz + 1);
653 req->tok = tok;
654 memcpy(req->name, roff_name[tok], sz + 1);
655 slot = ohash_qlookup(htab, req->name);
656 ohash_insert(htab, slot, req);
657 }
658 return htab;
659 }
660
661 void
662 roffhash_free(struct ohash *htab)
663 {
664 struct roffreq *req;
665 unsigned int slot;
666
667 if (htab == NULL)
668 return;
669 for (req = ohash_first(htab, &slot); req != NULL;
670 req = ohash_next(htab, &slot))
671 free(req);
672 ohash_delete(htab);
673 free(htab);
674 }
675
676 enum roff_tok
677 roffhash_find(struct ohash *htab, const char *name, size_t sz)
678 {
679 struct roffreq *req;
680 const char *end;
681
682 if (sz) {
683 end = name + sz;
684 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
685 } else
686 req = ohash_find(htab, ohash_qlookup(htab, name));
687 return req == NULL ? TOKEN_NONE : req->tok;
688 }
689
690 /* --- stack of request blocks -------------------------------------------- */
691
692 /*
693 * Pop the current node off of the stack of roff instructions currently
694 * pending. Return 1 if it is a loop or 0 otherwise.
695 */
696 static int
697 roffnode_pop(struct roff *r)
698 {
699 struct roffnode *p;
700 int inloop;
701
702 p = r->last;
703 inloop = p->tok == ROFF_while;
704 r->last = p->parent;
705 free(p->name);
706 free(p->end);
707 free(p);
708 return inloop;
709 }
710
711 /*
712 * Push a roff node onto the instruction stack. This must later be
713 * removed with roffnode_pop().
714 */
715 static void
716 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
717 int line, int col)
718 {
719 struct roffnode *p;
720
721 p = mandoc_calloc(1, sizeof(struct roffnode));
722 p->tok = tok;
723 if (name)
724 p->name = mandoc_strdup(name);
725 p->parent = r->last;
726 p->line = line;
727 p->col = col;
728 p->rule = p->parent ? p->parent->rule : 0;
729
730 r->last = p;
731 }
732
733 /* --- roff parser state data management ---------------------------------- */
734
735 static void
736 roff_free1(struct roff *r)
737 {
738 int i;
739
740 tbl_free(r->first_tbl);
741 r->first_tbl = r->last_tbl = r->tbl = NULL;
742
743 eqn_free(r->last_eqn);
744 r->last_eqn = r->eqn = NULL;
745
746 while (r->mstackpos >= 0)
747 roff_userret(r);
748
749 while (r->last)
750 roffnode_pop(r);
751
752 free (r->rstack);
753 r->rstack = NULL;
754 r->rstacksz = 0;
755 r->rstackpos = -1;
756
757 roff_freereg(r->regtab);
758 r->regtab = NULL;
759
760 roff_freestr(r->strtab);
761 roff_freestr(r->rentab);
762 roff_freestr(r->xmbtab);
763 r->strtab = r->rentab = r->xmbtab = NULL;
764
765 if (r->xtab)
766 for (i = 0; i < 128; i++)
767 free(r->xtab[i].p);
768 free(r->xtab);
769 r->xtab = NULL;
770 }
771
772 void
773 roff_reset(struct roff *r)
774 {
775 roff_free1(r);
776 r->options |= MPARSE_COMMENT;
777 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
778 r->control = '\0';
779 r->escape = '\\';
780 roffce_lines = 0;
781 roffce_node = NULL;
782 roffit_lines = 0;
783 roffit_macro = NULL;
784 }
785
786 void
787 roff_free(struct roff *r)
788 {
789 int i;
790
791 roff_free1(r);
792 for (i = 0; i < r->mstacksz; i++)
793 free(r->mstack[i].argv);
794 free(r->mstack);
795 roffhash_free(r->reqtab);
796 free(r);
797 }
798
799 struct roff *
800 roff_alloc(int options)
801 {
802 struct roff *r;
803
804 r = mandoc_calloc(1, sizeof(struct roff));
805 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
806 r->options = options | MPARSE_COMMENT;
807 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
808 r->mstackpos = -1;
809 r->rstackpos = -1;
810 r->escape = '\\';
811 return r;
812 }
813
814 /* --- syntax tree state data management ---------------------------------- */
815
816 static void
817 roff_man_free1(struct roff_man *man)
818 {
819 if (man->meta.first != NULL)
820 roff_node_delete(man, man->meta.first);
821 free(man->meta.msec);
822 free(man->meta.vol);
823 free(man->meta.os);
824 free(man->meta.arch);
825 free(man->meta.title);
826 free(man->meta.name);
827 free(man->meta.date);
828 free(man->meta.sodest);
829 }
830
831 void
832 roff_state_reset(struct roff_man *man)
833 {
834 man->last = man->meta.first;
835 man->last_es = NULL;
836 man->flags = 0;
837 man->lastsec = man->lastnamed = SEC_NONE;
838 man->next = ROFF_NEXT_CHILD;
839 roff_setreg(man->roff, "nS", 0, '=');
840 }
841
842 static void
843 roff_man_alloc1(struct roff_man *man)
844 {
845 memset(&man->meta, 0, sizeof(man->meta));
846 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
847 man->meta.first->type = ROFFT_ROOT;
848 man->meta.macroset = MACROSET_NONE;
849 roff_state_reset(man);
850 }
851
852 void
853 roff_man_reset(struct roff_man *man)
854 {
855 roff_man_free1(man);
856 roff_man_alloc1(man);
857 }
858
859 void
860 roff_man_free(struct roff_man *man)
861 {
862 roff_man_free1(man);
863 free(man);
864 }
865
866 struct roff_man *
867 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
868 {
869 struct roff_man *man;
870
871 man = mandoc_calloc(1, sizeof(*man));
872 man->roff = roff;
873 man->os_s = os_s;
874 man->quick = quick;
875 roff_man_alloc1(man);
876 roff->man = man;
877 return man;
878 }
879
880 /* --- syntax tree handling ----------------------------------------------- */
881
882 struct roff_node *
883 roff_node_alloc(struct roff_man *man, int line, int pos,
884 enum roff_type type, int tok)
885 {
886 struct roff_node *n;
887
888 n = mandoc_calloc(1, sizeof(*n));
889 n->line = line;
890 n->pos = pos;
891 n->tok = tok;
892 n->type = type;
893 n->sec = man->lastsec;
894
895 if (man->flags & MDOC_SYNOPSIS)
896 n->flags |= NODE_SYNPRETTY;
897 else
898 n->flags &= ~NODE_SYNPRETTY;
899 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
900 n->flags |= NODE_NOFILL;
901 else
902 n->flags &= ~NODE_NOFILL;
903 if (man->flags & MDOC_NEWLINE)
904 n->flags |= NODE_LINE;
905 man->flags &= ~MDOC_NEWLINE;
906
907 return n;
908 }
909
910 void
911 roff_node_append(struct roff_man *man, struct roff_node *n)
912 {
913
914 switch (man->next) {
915 case ROFF_NEXT_SIBLING:
916 if (man->last->next != NULL) {
917 n->next = man->last->next;
918 man->last->next->prev = n;
919 } else
920 man->last->parent->last = n;
921 man->last->next = n;
922 n->prev = man->last;
923 n->parent = man->last->parent;
924 break;
925 case ROFF_NEXT_CHILD:
926 if (man->last->child != NULL) {
927 n->next = man->last->child;
928 man->last->child->prev = n;
929 } else
930 man->last->last = n;
931 man->last->child = n;
932 n->parent = man->last;
933 break;
934 default:
935 abort();
936 }
937 man->last = n;
938
939 switch (n->type) {
940 case ROFFT_HEAD:
941 n->parent->head = n;
942 break;
943 case ROFFT_BODY:
944 if (n->end != ENDBODY_NOT)
945 return;
946 n->parent->body = n;
947 break;
948 case ROFFT_TAIL:
949 n->parent->tail = n;
950 break;
951 default:
952 return;
953 }
954
955 /*
956 * Copy over the normalised-data pointer of our parent. Not
957 * everybody has one, but copying a null pointer is fine.
958 */
959
960 n->norm = n->parent->norm;
961 assert(n->parent->type == ROFFT_BLOCK);
962 }
963
964 void
965 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
966 {
967 struct roff_node *n;
968
969 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
970 n->string = roff_strdup(man->roff, word);
971 roff_node_append(man, n);
972 n->flags |= NODE_VALID | NODE_ENDED;
973 man->next = ROFF_NEXT_SIBLING;
974 }
975
976 void
977 roff_word_append(struct roff_man *man, const char *word)
978 {
979 struct roff_node *n;
980 char *addstr, *newstr;
981
982 n = man->last;
983 addstr = roff_strdup(man->roff, word);
984 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
985 free(addstr);
986 free(n->string);
987 n->string = newstr;
988 man->next = ROFF_NEXT_SIBLING;
989 }
990
991 void
992 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
993 {
994 struct roff_node *n;
995
996 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
997 roff_node_append(man, n);
998 man->next = ROFF_NEXT_CHILD;
999 }
1000
1001 struct roff_node *
1002 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1003 {
1004 struct roff_node *n;
1005
1006 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1007 roff_node_append(man, n);
1008 man->next = ROFF_NEXT_CHILD;
1009 return n;
1010 }
1011
1012 struct roff_node *
1013 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1014 {
1015 struct roff_node *n;
1016
1017 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1018 roff_node_append(man, n);
1019 man->next = ROFF_NEXT_CHILD;
1020 return n;
1021 }
1022
1023 struct roff_node *
1024 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1025 {
1026 struct roff_node *n;
1027
1028 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1029 roff_node_append(man, n);
1030 man->next = ROFF_NEXT_CHILD;
1031 return n;
1032 }
1033
1034 static void
1035 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1036 {
1037 struct roff_node *n;
1038 struct tbl_span *span;
1039
1040 if (man->meta.macroset == MACROSET_MAN)
1041 man_breakscope(man, ROFF_TS);
1042 while ((span = tbl_span(tbl)) != NULL) {
1043 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1044 n->span = span;
1045 roff_node_append(man, n);
1046 n->flags |= NODE_VALID | NODE_ENDED;
1047 man->next = ROFF_NEXT_SIBLING;
1048 }
1049 }
1050
1051 void
1052 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1053 {
1054
1055 /* Adjust siblings. */
1056
1057 if (n->prev)
1058 n->prev->next = n->next;
1059 if (n->next)
1060 n->next->prev = n->prev;
1061
1062 /* Adjust parent. */
1063
1064 if (n->parent != NULL) {
1065 if (n->parent->child == n)
1066 n->parent->child = n->next;
1067 if (n->parent->last == n)
1068 n->parent->last = n->prev;
1069 }
1070
1071 /* Adjust parse point. */
1072
1073 if (man == NULL)
1074 return;
1075 if (man->last == n) {
1076 if (n->prev == NULL) {
1077 man->last = n->parent;
1078 man->next = ROFF_NEXT_CHILD;
1079 } else {
1080 man->last = n->prev;
1081 man->next = ROFF_NEXT_SIBLING;
1082 }
1083 }
1084 if (man->meta.first == n)
1085 man->meta.first = NULL;
1086 }
1087
1088 void
1089 roff_node_relink(struct roff_man *man, struct roff_node *n)
1090 {
1091 roff_node_unlink(man, n);
1092 n->prev = n->next = NULL;
1093 roff_node_append(man, n);
1094 }
1095
1096 void
1097 roff_node_free(struct roff_node *n)
1098 {
1099
1100 if (n->args != NULL)
1101 mdoc_argv_free(n->args);
1102 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1103 free(n->norm);
1104 eqn_box_free(n->eqn);
1105 free(n->string);
1106 free(n->tag);
1107 free(n);
1108 }
1109
1110 void
1111 roff_node_delete(struct roff_man *man, struct roff_node *n)
1112 {
1113
1114 while (n->child != NULL)
1115 roff_node_delete(man, n->child);
1116 roff_node_unlink(man, n);
1117 roff_node_free(n);
1118 }
1119
1120 int
1121 roff_node_transparent(struct roff_node *n)
1122 {
1123 if (n == NULL)
1124 return 0;
1125 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1126 return 1;
1127 return roff_tok_transparent(n->tok);
1128 }
1129
1130 int
1131 roff_tok_transparent(enum roff_tok tok)
1132 {
1133 switch (tok) {
1134 case ROFF_ft:
1135 case ROFF_ll:
1136 case ROFF_mc:
1137 case ROFF_po:
1138 case ROFF_ta:
1139 case MDOC_Db:
1140 case MDOC_Es:
1141 case MDOC_Sm:
1142 case MDOC_Tg:
1143 case MAN_DT:
1144 case MAN_UC:
1145 case MAN_PD:
1146 case MAN_AT:
1147 return 1;
1148 default:
1149 return 0;
1150 }
1151 }
1152
1153 struct roff_node *
1154 roff_node_child(struct roff_node *n)
1155 {
1156 for (n = n->child; roff_node_transparent(n); n = n->next)
1157 continue;
1158 return n;
1159 }
1160
1161 struct roff_node *
1162 roff_node_prev(struct roff_node *n)
1163 {
1164 do {
1165 n = n->prev;
1166 } while (roff_node_transparent(n));
1167 return n;
1168 }
1169
1170 struct roff_node *
1171 roff_node_next(struct roff_node *n)
1172 {
1173 do {
1174 n = n->next;
1175 } while (roff_node_transparent(n));
1176 return n;
1177 }
1178
1179 void
1180 deroff(char **dest, const struct roff_node *n)
1181 {
1182 char *cp;
1183 size_t sz;
1184
1185 if (n->string == NULL) {
1186 for (n = n->child; n != NULL; n = n->next)
1187 deroff(dest, n);
1188 return;
1189 }
1190
1191 /* Skip leading whitespace. */
1192
1193 for (cp = n->string; *cp != '\0'; cp++) {
1194 if (cp[0] == '\\' && cp[1] != '\0' &&
1195 strchr(" %&0^|~", cp[1]) != NULL)
1196 cp++;
1197 else if ( ! isspace((unsigned char)*cp))
1198 break;
1199 }
1200
1201 /* Skip trailing backslash. */
1202
1203 sz = strlen(cp);
1204 if (sz > 0 && cp[sz - 1] == '\\')
1205 sz--;
1206
1207 /* Skip trailing whitespace. */
1208
1209 for (; sz; sz--)
1210 if ( ! isspace((unsigned char)cp[sz-1]))
1211 break;
1212
1213 /* Skip empty strings. */
1214
1215 if (sz == 0)
1216 return;
1217
1218 if (*dest == NULL) {
1219 *dest = mandoc_strndup(cp, sz);
1220 return;
1221 }
1222
1223 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1224 free(*dest);
1225 *dest = cp;
1226 }
1227
1228 /* --- main functions of the roff parser ---------------------------------- */
1229
1230 /*
1231 * In the current line, expand escape sequences that produce parsable
1232 * input text. Also check the syntax of the remaining escape sequences,
1233 * which typically produce output glyphs or change formatter state.
1234 */
1235 static int
1236 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1237 {
1238 struct mctx *ctx; /* current macro call context */
1239 char ubuf[24]; /* buffer to print the number */
1240 struct roff_node *n; /* used for header comments */
1241 const char *start; /* start of the string to process */
1242 char *stesc; /* start of an escape sequence ('\\') */
1243 const char *esct; /* type of esccape sequence */
1244 char *ep; /* end of comment string */
1245 const char *stnam; /* start of the name, after "[(*" */
1246 const char *cp; /* end of the name, e.g. before ']' */
1247 const char *res; /* the string to be substituted */
1248 char *nbuf; /* new buffer to copy buf->buf to */
1249 size_t maxl; /* expected length of the escape name */
1250 size_t naml; /* actual length of the escape name */
1251 size_t asz; /* length of the replacement */
1252 size_t rsz; /* length of the rest of the string */
1253 int inaml; /* length returned from mandoc_escape() */
1254 int expand_count; /* to avoid infinite loops */
1255 int npos; /* position in numeric expression */
1256 int arg_complete; /* argument not interrupted by eol */
1257 int quote_args; /* true for \\$@, false for \\$* */
1258 int done; /* no more input available */
1259 int deftype; /* type of definition to paste */
1260 int rcsid; /* kind of RCS id seen */
1261 enum mandocerr err; /* for escape sequence problems */
1262 char sign; /* increment number register */
1263 char term; /* character terminating the escape */
1264
1265 /* Search forward for comments. */
1266
1267 done = 0;
1268 start = buf->buf + pos;
1269 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1270 if (stesc[0] != newesc || stesc[1] == '\0')
1271 continue;
1272 stesc++;
1273 if (*stesc != '"' && *stesc != '#')
1274 continue;
1275
1276 /* Comment found, look for RCS id. */
1277
1278 rcsid = 0;
1279 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1280 rcsid = 1 << MANDOC_OS_OPENBSD;
1281 cp += 8;
1282 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1283 rcsid = 1 << MANDOC_OS_NETBSD;
1284 cp += 7;
1285 }
1286 if (cp != NULL &&
1287 isalnum((unsigned char)*cp) == 0 &&
1288 strchr(cp, '$') != NULL) {
1289 if (r->man->meta.rcsids & rcsid)
1290 mandoc_msg(MANDOCERR_RCS_REP, ln,
1291 (int)(stesc - buf->buf) + 1,
1292 "%s", stesc + 1);
1293 r->man->meta.rcsids |= rcsid;
1294 }
1295
1296 /* Handle trailing whitespace. */
1297
1298 ep = strchr(stesc--, '\0') - 1;
1299 if (*ep == '\n') {
1300 done = 1;
1301 ep--;
1302 }
1303 if (*ep == ' ' || *ep == '\t')
1304 mandoc_msg(MANDOCERR_SPACE_EOL,
1305 ln, (int)(ep - buf->buf), NULL);
1306
1307 /*
1308 * Save comments preceding the title macro
1309 * in the syntax tree.
1310 */
1311
1312 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1313 while (*ep == ' ' || *ep == '\t')
1314 ep--;
1315 ep[1] = '\0';
1316 n = roff_node_alloc(r->man,
1317 ln, stesc + 1 - buf->buf,
1318 ROFFT_COMMENT, TOKEN_NONE);
1319 n->string = mandoc_strdup(stesc + 2);
1320 roff_node_append(r->man, n);
1321 n->flags |= NODE_VALID | NODE_ENDED;
1322 r->man->next = ROFF_NEXT_SIBLING;
1323 }
1324
1325 /* Line continuation with comment. */
1326
1327 if (stesc[1] == '#') {
1328 *stesc = '\0';
1329 return ROFF_IGN | ROFF_APPEND;
1330 }
1331
1332 /* Discard normal comments. */
1333
1334 while (stesc > start && stesc[-1] == ' ' &&
1335 (stesc == start + 1 || stesc[-2] != '\\'))
1336 stesc--;
1337 *stesc = '\0';
1338 break;
1339 }
1340 if (stesc == start)
1341 return ROFF_CONT;
1342 stesc--;
1343
1344 /* Notice the end of the input. */
1345
1346 if (*stesc == '\n') {
1347 *stesc-- = '\0';
1348 done = 1;
1349 }
1350
1351 expand_count = 0;
1352 while (stesc >= start) {
1353 if (*stesc != newesc) {
1354
1355 /*
1356 * If we have a non-standard escape character,
1357 * escape literal backslashes because all
1358 * processing in subsequent functions uses
1359 * the standard escaping rules.
1360 */
1361
1362 if (newesc != ASCII_ESC && *stesc == '\\') {
1363 *stesc = '\0';
1364 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1365 buf->buf, stesc + 1) + 1;
1366 start = nbuf + pos;
1367 stesc = nbuf + (stesc - buf->buf);
1368 free(buf->buf);
1369 buf->buf = nbuf;
1370 }
1371
1372 /* Search backwards for the next escape. */
1373
1374 stesc--;
1375 continue;
1376 }
1377
1378 /* If it is escaped, skip it. */
1379
1380 for (cp = stesc - 1; cp >= start; cp--)
1381 if (*cp != r->escape)
1382 break;
1383
1384 if ((stesc - cp) % 2 == 0) {
1385 while (stesc > cp)
1386 *stesc-- = '\\';
1387 continue;
1388 } else if (stesc[1] != '\0') {
1389 *stesc = '\\';
1390 } else {
1391 *stesc-- = '\0';
1392 if (done)
1393 continue;
1394 else
1395 return ROFF_IGN | ROFF_APPEND;
1396 }
1397
1398 /* Decide whether to expand or to check only. */
1399
1400 term = '\0';
1401 cp = stesc + 1;
1402 if (*cp == 'E')
1403 cp++;
1404 esct = cp;
1405 switch (*esct) {
1406 case '*':
1407 case '$':
1408 res = NULL;
1409 break;
1410 case 'B':
1411 case 'w':
1412 term = cp[1];
1413 /* FALLTHROUGH */
1414 case 'n':
1415 sign = cp[1];
1416 if (sign == '+' || sign == '-')
1417 cp++;
1418 res = ubuf;
1419 break;
1420 default:
1421 err = MANDOCERR_OK;
1422 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1423 case ESCAPE_SPECIAL:
1424 if (mchars_spec2cp(stnam, inaml) >= 0)
1425 break;
1426 /* FALLTHROUGH */
1427 case ESCAPE_ERROR:
1428 err = MANDOCERR_ESC_BAD;
1429 break;
1430 case ESCAPE_UNDEF:
1431 err = MANDOCERR_ESC_UNDEF;
1432 break;
1433 case ESCAPE_UNSUPP:
1434 err = MANDOCERR_ESC_UNSUPP;
1435 break;
1436 default:
1437 break;
1438 }
1439 if (err != MANDOCERR_OK)
1440 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1441 "%.*s", (int)(cp - stesc), stesc);
1442 stesc--;
1443 continue;
1444 }
1445
1446 if (EXPAND_LIMIT < ++expand_count) {
1447 mandoc_msg(MANDOCERR_ROFFLOOP,
1448 ln, (int)(stesc - buf->buf), NULL);
1449 return ROFF_IGN;
1450 }
1451
1452 /*
1453 * The third character decides the length
1454 * of the name of the string or register.
1455 * Save a pointer to the name.
1456 */
1457
1458 if (term == '\0') {
1459 switch (*++cp) {
1460 case '\0':
1461 maxl = 0;
1462 break;
1463 case '(':
1464 cp++;
1465 maxl = 2;
1466 break;
1467 case '[':
1468 cp++;
1469 term = ']';
1470 maxl = 0;
1471 break;
1472 default:
1473 maxl = 1;
1474 break;
1475 }
1476 } else {
1477 cp += 2;
1478 maxl = 0;
1479 }
1480 stnam = cp;
1481
1482 /* Advance to the end of the name. */
1483
1484 naml = 0;
1485 arg_complete = 1;
1486 while (maxl == 0 || naml < maxl) {
1487 if (*cp == '\0') {
1488 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1489 (int)(stesc - buf->buf), "%s", stesc);
1490 arg_complete = 0;
1491 break;
1492 }
1493 if (maxl == 0 && *cp == term) {
1494 cp++;
1495 break;
1496 }
1497 if (*cp++ != '\\' || *esct != 'w') {
1498 naml++;
1499 continue;
1500 }
1501 switch (mandoc_escape(&cp, NULL, NULL)) {
1502 case ESCAPE_SPECIAL:
1503 case ESCAPE_UNICODE:
1504 case ESCAPE_NUMBERED:
1505 case ESCAPE_UNDEF:
1506 case ESCAPE_OVERSTRIKE:
1507 naml++;
1508 break;
1509 default:
1510 break;
1511 }
1512 }
1513
1514 /*
1515 * Retrieve the replacement string; if it is
1516 * undefined, resume searching for escapes.
1517 */
1518
1519 switch (*esct) {
1520 case '*':
1521 if (arg_complete) {
1522 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1523 res = roff_getstrn(r, stnam, naml, &deftype);
1524
1525 /*
1526 * If not overriden, let \*(.T
1527 * through to the formatters.
1528 */
1529
1530 if (res == NULL && naml == 2 &&
1531 stnam[0] == '.' && stnam[1] == 'T') {
1532 roff_setstrn(&r->strtab,
1533 ".T", 2, NULL, 0, 0);
1534 stesc--;
1535 continue;
1536 }
1537 }
1538 break;
1539 case '$':
1540 if (r->mstackpos < 0) {
1541 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1542 (int)(stesc - buf->buf), "%.3s", stesc);
1543 break;
1544 }
1545 ctx = r->mstack + r->mstackpos;
1546 npos = esct[1] - '1';
1547 if (npos >= 0 && npos <= 8) {
1548 res = npos < ctx->argc ?
1549 ctx->argv[npos] : "";
1550 break;
1551 }
1552 if (esct[1] == '*')
1553 quote_args = 0;
1554 else if (esct[1] == '@')
1555 quote_args = 1;
1556 else {
1557 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1558 (int)(stesc - buf->buf), "%.3s", stesc);
1559 break;
1560 }
1561 asz = 0;
1562 for (npos = 0; npos < ctx->argc; npos++) {
1563 if (npos)
1564 asz++; /* blank */
1565 if (quote_args)
1566 asz += 2; /* quotes */
1567 asz += strlen(ctx->argv[npos]);
1568 }
1569 if (asz != 3) {
1570 rsz = buf->sz - (stesc - buf->buf) - 3;
1571 if (asz < 3)
1572 memmove(stesc + asz, stesc + 3, rsz);
1573 buf->sz += asz - 3;
1574 nbuf = mandoc_realloc(buf->buf, buf->sz);
1575 start = nbuf + pos;
1576 stesc = nbuf + (stesc - buf->buf);
1577 buf->buf = nbuf;
1578 if (asz > 3)
1579 memmove(stesc + asz, stesc + 3, rsz);
1580 }
1581 for (npos = 0; npos < ctx->argc; npos++) {
1582 if (npos)
1583 *stesc++ = ' ';
1584 if (quote_args)
1585 *stesc++ = '"';
1586 cp = ctx->argv[npos];
1587 while (*cp != '\0')
1588 *stesc++ = *cp++;
1589 if (quote_args)
1590 *stesc++ = '"';
1591 }
1592 continue;
1593 case 'B':
1594 npos = 0;
1595 ubuf[0] = arg_complete &&
1596 roff_evalnum(r, ln, stnam, &npos,
1597 NULL, ROFFNUM_SCALE) &&
1598 stnam + npos + 1 == cp ? '1' : '0';
1599 ubuf[1] = '\0';
1600 break;
1601 case 'n':
1602 if (arg_complete)
1603 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1604 roff_getregn(r, stnam, naml, sign));
1605 else
1606 ubuf[0] = '\0';
1607 break;
1608 case 'w':
1609 /* use even incomplete args */
1610 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1611 24 * (int)naml);
1612 break;
1613 }
1614
1615 if (res == NULL) {
1616 if (*esct == '*')
1617 mandoc_msg(MANDOCERR_STR_UNDEF,
1618 ln, (int)(stesc - buf->buf),
1619 "%.*s", (int)naml, stnam);
1620 res = "";
1621 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1622 mandoc_msg(MANDOCERR_ROFFLOOP,
1623 ln, (int)(stesc - buf->buf), NULL);
1624 return ROFF_IGN;
1625 }
1626
1627 /* Replace the escape sequence by the string. */
1628
1629 *stesc = '\0';
1630 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1631 buf->buf, res, cp) + 1;
1632
1633 /* Prepare for the next replacement. */
1634
1635 start = nbuf + pos;
1636 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1637 free(buf->buf);
1638 buf->buf = nbuf;
1639 }
1640 return ROFF_CONT;
1641 }
1642
1643 /*
1644 * Parse a quoted or unquoted roff-style request or macro argument.
1645 * Return a pointer to the parsed argument, which is either the original
1646 * pointer or advanced by one byte in case the argument is quoted.
1647 * NUL-terminate the argument in place.
1648 * Collapse pairs of quotes inside quoted arguments.
1649 * Advance the argument pointer to the next argument,
1650 * or to the NUL byte terminating the argument line.
1651 */
1652 char *
1653 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1654 {
1655 struct buf buf;
1656 char *cp, *start;
1657 int newesc, pairs, quoted, white;
1658
1659 /* Quoting can only start with a new word. */
1660 start = *cpp;
1661 quoted = 0;
1662 if ('"' == *start) {
1663 quoted = 1;
1664 start++;
1665 }
1666
1667 newesc = pairs = white = 0;
1668 for (cp = start; '\0' != *cp; cp++) {
1669
1670 /*
1671 * Move the following text left
1672 * after quoted quotes and after "\\" and "\t".
1673 */
1674 if (pairs)
1675 cp[-pairs] = cp[0];
1676
1677 if ('\\' == cp[0]) {
1678 /*
1679 * In copy mode, translate double to single
1680 * backslashes and backslash-t to literal tabs.
1681 */
1682 switch (cp[1]) {
1683 case 'a':
1684 case 't':
1685 cp[-pairs] = '\t';
1686 pairs++;
1687 cp++;
1688 break;
1689 case '\\':
1690 newesc = 1;
1691 cp[-pairs] = ASCII_ESC;
1692 pairs++;
1693 cp++;
1694 break;
1695 case ' ':
1696 /* Skip escaped blanks. */
1697 if (0 == quoted)
1698 cp++;
1699 break;
1700 default:
1701 break;
1702 }
1703 } else if (0 == quoted) {
1704 if (' ' == cp[0]) {
1705 /* Unescaped blanks end unquoted args. */
1706 white = 1;
1707 break;
1708 }
1709 } else if ('"' == cp[0]) {
1710 if ('"' == cp[1]) {
1711 /* Quoted quotes collapse. */
1712 pairs++;
1713 cp++;
1714 } else {
1715 /* Unquoted quotes end quoted args. */
1716 quoted = 2;
1717 break;
1718 }
1719 }
1720 }
1721
1722 /* Quoted argument without a closing quote. */
1723 if (1 == quoted)
1724 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1725
1726 /* NUL-terminate this argument and move to the next one. */
1727 if (pairs)
1728 cp[-pairs] = '\0';
1729 if ('\0' != *cp) {
1730 *cp++ = '\0';
1731 while (' ' == *cp)
1732 cp++;
1733 }
1734 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1735 *cpp = cp;
1736
1737 if ('\0' == *cp && (white || ' ' == cp[-1]))
1738 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1739
1740 start = mandoc_strdup(start);
1741 if (newesc == 0)
1742 return start;
1743
1744 buf.buf = start;
1745 buf.sz = strlen(start) + 1;
1746 buf.next = NULL;
1747 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1748 free(buf.buf);
1749 buf.buf = mandoc_strdup("");
1750 }
1751 return buf.buf;
1752 }
1753
1754
1755 /*
1756 * Process text streams.
1757 */
1758 static int
1759 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1760 {
1761 size_t sz;
1762 const char *start;
1763 char *p;
1764 int isz;
1765 enum mandoc_esc esc;
1766
1767 /* Spring the input line trap. */
1768
1769 if (roffit_lines == 1) {
1770 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1771 free(buf->buf);
1772 buf->buf = p;
1773 buf->sz = isz + 1;
1774 *offs = 0;
1775 free(roffit_macro);
1776 roffit_lines = 0;
1777 return ROFF_REPARSE;
1778 } else if (roffit_lines > 1)
1779 --roffit_lines;
1780
1781 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1782 if (roffce_lines < 1) {
1783 r->man->last = roffce_node;
1784 r->man->next = ROFF_NEXT_SIBLING;
1785 roffce_lines = 0;
1786 roffce_node = NULL;
1787 } else
1788 roffce_lines--;
1789 }
1790
1791 /* Convert all breakable hyphens into ASCII_HYPH. */
1792
1793 start = p = buf->buf + pos;
1794
1795 while (*p != '\0') {
1796 sz = strcspn(p, "-\\");
1797 p += sz;
1798
1799 if (*p == '\0')
1800 break;
1801
1802 if (*p == '\\') {
1803 /* Skip over escapes. */
1804 p++;
1805 esc = mandoc_escape((const char **)&p, NULL, NULL);
1806 if (esc == ESCAPE_ERROR)
1807 break;
1808 while (*p == '-')
1809 p++;
1810 continue;
1811 } else if (p == start) {
1812 p++;
1813 continue;
1814 }
1815
1816 if (isalpha((unsigned char)p[-1]) &&
1817 isalpha((unsigned char)p[1]))
1818 *p = ASCII_HYPH;
1819 p++;
1820 }
1821 return ROFF_CONT;
1822 }
1823
1824 int
1825 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1826 {
1827 enum roff_tok t;
1828 int e;
1829 int pos; /* parse point */
1830 int spos; /* saved parse point for messages */
1831 int ppos; /* original offset in buf->buf */
1832 int ctl; /* macro line (boolean) */
1833
1834 ppos = pos = *offs;
1835
1836 /* Handle in-line equation delimiters. */
1837
1838 if (r->tbl == NULL &&
1839 r->last_eqn != NULL && r->last_eqn->delim &&
1840 (r->eqn == NULL || r->eqn_inline)) {
1841 e = roff_eqndelim(r, buf, pos);
1842 if (e == ROFF_REPARSE)
1843 return e;
1844 assert(e == ROFF_CONT);
1845 }
1846
1847 /* Expand some escape sequences. */
1848
1849 e = roff_expand(r, buf, ln, pos, r->escape);
1850 if ((e & ROFF_MASK) == ROFF_IGN)
1851 return e;
1852 assert(e == ROFF_CONT);
1853
1854 ctl = roff_getcontrol(r, buf->buf, &pos);
1855
1856 /*
1857 * First, if a scope is open and we're not a macro, pass the
1858 * text through the macro's filter.
1859 * Equations process all content themselves.
1860 * Tables process almost all content themselves, but we want
1861 * to warn about macros before passing it there.
1862 */
1863
1864 if (r->last != NULL && ! ctl) {
1865 t = r->last->tok;
1866 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1867 if ((e & ROFF_MASK) == ROFF_IGN)
1868 return e;
1869 e &= ~ROFF_MASK;
1870 } else
1871 e = ROFF_IGN;
1872 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1873 eqn_read(r->eqn, buf->buf + ppos);
1874 return e;
1875 }
1876 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1877 tbl_read(r->tbl, ln, buf->buf, ppos);
1878 roff_addtbl(r->man, ln, r->tbl);
1879 return e;
1880 }
1881 if ( ! ctl) {
1882 r->options &= ~MPARSE_COMMENT;
1883 return roff_parsetext(r, buf, pos, offs) | e;
1884 }
1885
1886 /* Skip empty request lines. */
1887
1888 if (buf->buf[pos] == '"') {
1889 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1890 return ROFF_IGN;
1891 } else if (buf->buf[pos] == '\0')
1892 return ROFF_IGN;
1893
1894 /*
1895 * If a scope is open, go to the child handler for that macro,
1896 * as it may want to preprocess before doing anything with it.
1897 * Don't do so if an equation is open.
1898 */
1899
1900 if (r->last) {
1901 t = r->last->tok;
1902 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1903 }
1904
1905 /* No scope is open. This is a new request or macro. */
1906
1907 r->options &= ~MPARSE_COMMENT;
1908 spos = pos;
1909 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1910
1911 /* Tables ignore most macros. */
1912
1913 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1914 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1915 mandoc_msg(MANDOCERR_TBLMACRO,
1916 ln, pos, "%s", buf->buf + spos);
1917 if (t != TOKEN_NONE)
1918 return ROFF_IGN;
1919 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1920 pos++;
1921 while (buf->buf[pos] == ' ')
1922 pos++;
1923 tbl_read(r->tbl, ln, buf->buf, pos);
1924 roff_addtbl(r->man, ln, r->tbl);
1925 return ROFF_IGN;
1926 }
1927
1928 /* For now, let high level macros abort .ce mode. */
1929
1930 if (ctl && roffce_node != NULL &&
1931 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1932 t == ROFF_TH || t == ROFF_TS)) {
1933 r->man->last = roffce_node;
1934 r->man->next = ROFF_NEXT_SIBLING;
1935 roffce_lines = 0;
1936 roffce_node = NULL;
1937 }
1938
1939 /*
1940 * This is neither a roff request nor a user-defined macro.
1941 * Let the standard macro set parsers handle it.
1942 */
1943
1944 if (t == TOKEN_NONE)
1945 return ROFF_CONT;
1946
1947 /* Execute a roff request or a user defined macro. */
1948
1949 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1950 }
1951
1952 /*
1953 * Internal interface function to tell the roff parser that execution
1954 * of the current macro ended. This is required because macro
1955 * definitions usually do not end with a .return request.
1956 */
1957 void
1958 roff_userret(struct roff *r)
1959 {
1960 struct mctx *ctx;
1961 int i;
1962
1963 assert(r->mstackpos >= 0);
1964 ctx = r->mstack + r->mstackpos;
1965 for (i = 0; i < ctx->argc; i++)
1966 free(ctx->argv[i]);
1967 ctx->argc = 0;
1968 r->mstackpos--;
1969 }
1970
1971 void
1972 roff_endparse(struct roff *r)
1973 {
1974 if (r->last != NULL)
1975 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1976 r->last->col, "%s", roff_name[r->last->tok]);
1977
1978 if (r->eqn != NULL) {
1979 mandoc_msg(MANDOCERR_BLK_NOEND,
1980 r->eqn->node->line, r->eqn->node->pos, "EQ");
1981 eqn_parse(r->eqn);
1982 r->eqn = NULL;
1983 }
1984
1985 if (r->tbl != NULL) {
1986 tbl_end(r->tbl, 1);
1987 r->tbl = NULL;
1988 }
1989 }
1990
1991 /*
1992 * Parse a roff node's type from the input buffer. This must be in the
1993 * form of ".foo xxx" in the usual way.
1994 */
1995 static enum roff_tok
1996 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1997 {
1998 char *cp;
1999 const char *mac;
2000 size_t maclen;
2001 int deftype;
2002 enum roff_tok t;
2003
2004 cp = buf + *pos;
2005
2006 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2007 return TOKEN_NONE;
2008
2009 mac = cp;
2010 maclen = roff_getname(r, &cp, ln, ppos);
2011
2012 deftype = ROFFDEF_USER | ROFFDEF_REN;
2013 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2014 switch (deftype) {
2015 case ROFFDEF_USER:
2016 t = ROFF_USERDEF;
2017 break;
2018 case ROFFDEF_REN:
2019 t = ROFF_RENAMED;
2020 break;
2021 default:
2022 t = roffhash_find(r->reqtab, mac, maclen);
2023 break;
2024 }
2025 if (t != TOKEN_NONE)
2026 *pos = cp - buf;
2027 else if (deftype == ROFFDEF_UNDEF) {
2028 /* Using an undefined macro defines it to be empty. */
2029 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2030 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2031 }
2032 return t;
2033 }
2034
2035 /* --- handling of request blocks ----------------------------------------- */
2036
2037 static int
2038 roff_cblock(ROFF_ARGS)
2039 {
2040
2041 /*
2042 * A block-close `..' should only be invoked as a child of an
2043 * ignore macro, otherwise raise a warning and just ignore it.
2044 */
2045
2046 if (r->last == NULL) {
2047 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2048 return ROFF_IGN;
2049 }
2050
2051 switch (r->last->tok) {
2052 case ROFF_am:
2053 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
2054 case ROFF_ami:
2055 case ROFF_de:
2056 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
2057 case ROFF_dei:
2058 case ROFF_ig:
2059 break;
2060 default:
2061 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2062 return ROFF_IGN;
2063 }
2064
2065 if (buf->buf[pos] != '\0')
2066 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2067 ".. %s", buf->buf + pos);
2068
2069 roffnode_pop(r);
2070 roffnode_cleanscope(r);
2071 return ROFF_IGN;
2072
2073 }
2074
2075 /*
2076 * Pop all nodes ending at the end of the current input line.
2077 * Return the number of loops ended.
2078 */
2079 static int
2080 roffnode_cleanscope(struct roff *r)
2081 {
2082 int inloop;
2083
2084 inloop = 0;
2085 while (r->last != NULL) {
2086 if (--r->last->endspan != 0)
2087 break;
2088 inloop += roffnode_pop(r);
2089 }
2090 return inloop;
2091 }
2092
2093 /*
2094 * Handle the closing \} of a conditional block.
2095 * Apart from generating warnings, this only pops nodes.
2096 * Return the number of loops ended.
2097 */
2098 static int
2099 roff_ccond(struct roff *r, int ln, int ppos)
2100 {
2101 if (NULL == r->last) {
2102 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2103 return 0;
2104 }
2105
2106 switch (r->last->tok) {
2107 case ROFF_el:
2108 case ROFF_ie:
2109 case ROFF_if:
2110 case ROFF_while:
2111 break;
2112 default:
2113 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2114 return 0;
2115 }
2116
2117 if (r->last->endspan > -1) {
2118 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2119 return 0;
2120 }
2121
2122 return roffnode_pop(r) + roffnode_cleanscope(r);
2123 }
2124
2125 static int
2126 roff_block(ROFF_ARGS)
2127 {
2128 const char *name, *value;
2129 char *call, *cp, *iname, *rname;
2130 size_t csz, namesz, rsz;
2131 int deftype;
2132
2133 /* Ignore groff compatibility mode for now. */
2134
2135 if (tok == ROFF_de1)
2136 tok = ROFF_de;
2137 else if (tok == ROFF_dei1)
2138 tok = ROFF_dei;
2139 else if (tok == ROFF_am1)
2140 tok = ROFF_am;
2141 else if (tok == ROFF_ami1)
2142 tok = ROFF_ami;
2143
2144 /* Parse the macro name argument. */
2145
2146 cp = buf->buf + pos;
2147 if (tok == ROFF_ig) {
2148 iname = NULL;
2149 namesz = 0;
2150 } else {
2151 iname = cp;
2152 namesz = roff_getname(r, &cp, ln, ppos);
2153 iname[namesz] = '\0';
2154 }
2155
2156 /* Resolve the macro name argument if it is indirect. */
2157
2158 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2159 deftype = ROFFDEF_USER;
2160 name = roff_getstrn(r, iname, namesz, &deftype);
2161 if (name == NULL) {
2162 mandoc_msg(MANDOCERR_STR_UNDEF,
2163 ln, (int)(iname - buf->buf),
2164 "%.*s", (int)namesz, iname);
2165 namesz = 0;
2166 } else
2167 namesz = strlen(name);
2168 } else
2169 name = iname;
2170
2171 if (namesz == 0 && tok != ROFF_ig) {
2172 mandoc_msg(MANDOCERR_REQ_EMPTY,
2173 ln, ppos, "%s", roff_name[tok]);
2174 return ROFF_IGN;
2175 }
2176
2177 roffnode_push(r, tok, name, ln, ppos);
2178
2179 /*
2180 * At the beginning of a `de' macro, clear the existing string
2181 * with the same name, if there is one. New content will be
2182 * appended from roff_block_text() in multiline mode.
2183 */
2184
2185 if (tok == ROFF_de || tok == ROFF_dei) {
2186 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2187 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2188 } else if (tok == ROFF_am || tok == ROFF_ami) {
2189 deftype = ROFFDEF_ANY;
2190 value = roff_getstrn(r, iname, namesz, &deftype);
2191 switch (deftype) { /* Before appending, ... */
2192 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2193 roff_setstrn(&r->strtab, name, namesz,
2194 value, strlen(value), 0);
2195 break;
2196 case ROFFDEF_REN: /* call original standard macro. */
2197 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2198 (int)strlen(value), value);
2199 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2200 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2201 free(call);
2202 break;
2203 case ROFFDEF_STD: /* rename and call standard macro. */
2204 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2205 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2206 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2207 (int)rsz, rname);
2208 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2209 free(call);
2210 free(rname);
2211 break;
2212 default:
2213 break;
2214 }
2215 }
2216
2217 if (*cp == '\0')
2218 return ROFF_IGN;
2219
2220 /* Get the custom end marker. */
2221
2222 iname = cp;
2223 namesz = roff_getname(r, &cp, ln, ppos);
2224
2225 /* Resolve the end marker if it is indirect. */
2226
2227 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2228 deftype = ROFFDEF_USER;
2229 name = roff_getstrn(r, iname, namesz, &deftype);
2230 if (name == NULL) {
2231 mandoc_msg(MANDOCERR_STR_UNDEF,
2232 ln, (int)(iname - buf->buf),
2233 "%.*s", (int)namesz, iname);
2234 namesz = 0;
2235 } else
2236 namesz = strlen(name);
2237 } else
2238 name = iname;
2239
2240 if (namesz)
2241 r->last->end = mandoc_strndup(name, namesz);
2242
2243 if (*cp != '\0')
2244 mandoc_msg(MANDOCERR_ARG_EXCESS,
2245 ln, pos, ".%s ... %s", roff_name[tok], cp);
2246
2247 return ROFF_IGN;
2248 }
2249
2250 static int
2251 roff_block_sub(ROFF_ARGS)
2252 {
2253 enum roff_tok t;
2254 int i, j;
2255
2256 /*
2257 * First check whether a custom macro exists at this level. If
2258 * it does, then check against it. This is some of groff's
2259 * stranger behaviours. If we encountered a custom end-scope
2260 * tag and that tag also happens to be a "real" macro, then we
2261 * need to try interpreting it again as a real macro. If it's
2262 * not, then return ignore. Else continue.
2263 */
2264
2265 if (r->last->end) {
2266 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2267 if (buf->buf[i] != r->last->end[j])
2268 break;
2269
2270 if (r->last->end[j] == '\0' &&
2271 (buf->buf[i] == '\0' ||
2272 buf->buf[i] == ' ' ||
2273 buf->buf[i] == '\t')) {
2274 roffnode_pop(r);
2275 roffnode_cleanscope(r);
2276
2277 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2278 i++;
2279
2280 pos = i;
2281 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2282 TOKEN_NONE)
2283 return ROFF_RERUN;
2284 return ROFF_IGN;
2285 }
2286 }
2287
2288 /*
2289 * If we have no custom end-query or lookup failed, then try
2290 * pulling it out of the hashtable.
2291 */
2292
2293 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2294
2295 if (t != ROFF_cblock) {
2296 if (tok != ROFF_ig)
2297 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2298 return ROFF_IGN;
2299 }
2300
2301 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2302 }
2303
2304 static int
2305 roff_block_text(ROFF_ARGS)
2306 {
2307
2308 if (tok != ROFF_ig)
2309 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2310
2311 return ROFF_IGN;
2312 }
2313
2314 static int
2315 roff_cond_sub(ROFF_ARGS)
2316 {
2317 struct roffnode *bl;
2318 char *ep;
2319 int endloop, irc, rr;
2320 enum roff_tok t;
2321
2322 irc = ROFF_IGN;
2323 rr = r->last->rule;
2324 endloop = tok != ROFF_while ? ROFF_IGN :
2325 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2326 if (roffnode_cleanscope(r))
2327 irc |= endloop;
2328
2329 /*
2330 * If `\}' occurs on a macro line without a preceding macro,
2331 * drop the line completely.
2332 */
2333
2334 ep = buf->buf + pos;
2335 if (ep[0] == '\\' && ep[1] == '}')
2336 rr = 0;
2337
2338 /*
2339 * The closing delimiter `\}' rewinds the conditional scope
2340 * but is otherwise ignored when interpreting the line.
2341 */
2342
2343 while ((ep = strchr(ep, '\\')) != NULL) {
2344 switch (ep[1]) {
2345 case '}':
2346 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2347 if (roff_ccond(r, ln, ep - buf->buf))
2348 irc |= endloop;
2349 break;
2350 case '\0':
2351 ++ep;
2352 break;
2353 default:
2354 ep += 2;
2355 break;
2356 }
2357 }
2358
2359 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2360
2361 /* For now, let high level macros abort .ce mode. */
2362
2363 if (roffce_node != NULL &&
2364 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2365 t == ROFF_TH || t == ROFF_TS)) {
2366 r->man->last = roffce_node;
2367 r->man->next = ROFF_NEXT_SIBLING;
2368 roffce_lines = 0;
2369 roffce_node = NULL;
2370 }
2371
2372 /*
2373 * Fully handle known macros when they are structurally
2374 * required or when the conditional evaluated to true.
2375 */
2376
2377 if (t == ROFF_break) {
2378 if (irc & ROFF_LOOPMASK)
2379 irc = ROFF_IGN | ROFF_LOOPEXIT;
2380 else if (rr) {
2381 for (bl = r->last; bl != NULL; bl = bl->parent) {
2382 bl->rule = 0;
2383 if (bl->tok == ROFF_while)
2384 break;
2385 }
2386 }
2387 } else if (t != TOKEN_NONE &&
2388 (rr || roffs[t].flags & ROFFMAC_STRUCT))
2389 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2390 else
2391 irc |= rr ? ROFF_CONT : ROFF_IGN;
2392 return irc;
2393 }
2394
2395 static int
2396 roff_cond_text(ROFF_ARGS)
2397 {
2398 char *ep;
2399 int endloop, irc, rr;
2400
2401 irc = ROFF_IGN;
2402 rr = r->last->rule;
2403 endloop = tok != ROFF_while ? ROFF_IGN :
2404 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2405 if (roffnode_cleanscope(r))
2406 irc |= endloop;
2407
2408 /*
2409 * If `\}' occurs on a text line with neither preceding
2410 * nor following characters, drop the line completely.
2411 */
2412
2413 ep = buf->buf + pos;
2414 if (strcmp(ep, "\\}") == 0)
2415 rr = 0;
2416
2417 /*
2418 * The closing delimiter `\}' rewinds the conditional scope
2419 * but is otherwise ignored when interpreting the line.
2420 */
2421
2422 while ((ep = strchr(ep, '\\')) != NULL) {
2423 switch (ep[1]) {
2424 case '}':
2425 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2426 if (roff_ccond(r, ln, ep - buf->buf))
2427 irc |= endloop;
2428 break;
2429 case '\0':
2430 ++ep;
2431 break;
2432 default:
2433 ep += 2;
2434 break;
2435 }
2436 }
2437 if (rr)
2438 irc |= ROFF_CONT;
2439 return irc;
2440 }
2441
2442 /* --- handling of numeric and conditional expressions -------------------- */
2443
2444 /*
2445 * Parse a single signed integer number. Stop at the first non-digit.
2446 * If there is at least one digit, return success and advance the
2447 * parse point, else return failure and let the parse point unchanged.
2448 * Ignore overflows, treat them just like the C language.
2449 */
2450 static int
2451 roff_getnum(const char *v, int *pos, int *res, int flags)
2452 {
2453 int myres, scaled, n, p;
2454
2455 if (NULL == res)
2456 res = &myres;
2457
2458 p = *pos;
2459 n = v[p] == '-';
2460 if (n || v[p] == '+')
2461 p++;
2462
2463 if (flags & ROFFNUM_WHITE)
2464 while (isspace((unsigned char)v[p]))
2465 p++;
2466
2467 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2468 *res = 10 * *res + v[p] - '0';
2469 if (p == *pos + n)
2470 return 0;
2471
2472 if (n)
2473 *res = -*res;
2474
2475 /* Each number may be followed by one optional scaling unit. */
2476
2477 switch (v[p]) {
2478 case 'f':
2479 scaled = *res * 65536;
2480 break;
2481 case 'i':
2482 scaled = *res * 240;
2483 break;
2484 case 'c':
2485 scaled = *res * 240 / 2.54;
2486 break;
2487 case 'v':
2488 case 'P':
2489 scaled = *res * 40;
2490 break;
2491 case 'm':
2492 case 'n':
2493 scaled = *res * 24;
2494 break;
2495 case 'p':
2496 scaled = *res * 10 / 3;
2497 break;
2498 case 'u':
2499 scaled = *res;
2500 break;
2501 case 'M':
2502 scaled = *res * 6 / 25;
2503 break;
2504 default:
2505 scaled = *res;
2506 p--;
2507 break;
2508 }
2509 if (flags & ROFFNUM_SCALE)
2510 *res = scaled;
2511
2512 *pos = p + 1;
2513 return 1;
2514 }
2515
2516 /*
2517 * Evaluate a string comparison condition.
2518 * The first character is the delimiter.
2519 * Succeed if the string up to its second occurrence
2520 * matches the string up to its third occurence.
2521 * Advance the cursor after the third occurrence
2522 * or lacking that, to the end of the line.
2523 */
2524 static int
2525 roff_evalstrcond(const char *v, int *pos)
2526 {
2527 const char *s1, *s2, *s3;
2528 int match;
2529
2530 match = 0;
2531 s1 = v + *pos; /* initial delimiter */
2532 s2 = s1 + 1; /* for scanning the first string */
2533 s3 = strchr(s2, *s1); /* for scanning the second string */
2534
2535 if (NULL == s3) /* found no middle delimiter */
2536 goto out;
2537
2538 while ('\0' != *++s3) {
2539 if (*s2 != *s3) { /* mismatch */
2540 s3 = strchr(s3, *s1);
2541 break;
2542 }
2543 if (*s3 == *s1) { /* found the final delimiter */
2544 match = 1;
2545 break;
2546 }
2547 s2++;
2548 }
2549
2550 out:
2551 if (NULL == s3)
2552 s3 = strchr(s2, '\0');
2553 else if (*s3 != '\0')
2554 s3++;
2555 *pos = s3 - v;
2556 return match;
2557 }
2558
2559 /*
2560 * Evaluate an optionally negated single character, numerical,
2561 * or string condition.
2562 */
2563 static int
2564 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2565 {
2566 const char *start, *end;
2567 char *cp, *name;
2568 size_t sz;
2569 int deftype, len, number, savepos, istrue, wanttrue;
2570
2571 if ('!' == v[*pos]) {
2572 wanttrue = 0;
2573 (*pos)++;
2574 } else
2575 wanttrue = 1;
2576
2577 switch (v[*pos]) {
2578 case '\0':
2579 return 0;
2580 case 'n':
2581 case 'o':
2582 (*pos)++;
2583 return wanttrue;
2584 case 'e':
2585 case 't':
2586 case 'v':
2587 (*pos)++;
2588 return !wanttrue;
2589 case 'c':
2590 do {
2591 (*pos)++;
2592 } while (v[*pos] == ' ');
2593
2594 /*
2595 * Quirk for groff compatibility:
2596 * The horizontal tab is neither available nor unavailable.
2597 */
2598
2599 if (v[*pos] == '\t') {
2600 (*pos)++;
2601 return 0;
2602 }
2603
2604 /* Printable ASCII characters are available. */
2605
2606 if (v[*pos] != '\\') {
2607 (*pos)++;
2608 return wanttrue;
2609 }
2610
2611 end = v + ++*pos;
2612 switch (mandoc_escape(&end, &start, &len)) {
2613 case ESCAPE_SPECIAL:
2614 istrue = mchars_spec2cp(start, len) != -1;
2615 break;
2616 case ESCAPE_UNICODE:
2617 istrue = 1;
2618 break;
2619 case ESCAPE_NUMBERED:
2620 istrue = mchars_num2char(start, len) != -1;
2621 break;
2622 default:
2623 istrue = !wanttrue;
2624 break;
2625 }
2626 *pos = end - v;
2627 return istrue == wanttrue;
2628 case 'd':
2629 case 'r':
2630 cp = v + *pos + 1;
2631 while (*cp == ' ')
2632 cp++;
2633 name = cp;
2634 sz = roff_getname(r, &cp, ln, cp - v);
2635 if (sz == 0)
2636 istrue = 0;
2637 else if (v[*pos] == 'r')
2638 istrue = roff_hasregn(r, name, sz);
2639 else {
2640 deftype = ROFFDEF_ANY;
2641 roff_getstrn(r, name, sz, &deftype);
2642 istrue = !!deftype;
2643 }
2644 *pos = (name + sz) - v;
2645 return istrue == wanttrue;
2646 default:
2647 break;
2648 }
2649
2650 savepos = *pos;
2651 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2652 return (number > 0) == wanttrue;
2653 else if (*pos == savepos)
2654 return roff_evalstrcond(v, pos) == wanttrue;
2655 else
2656 return 0;
2657 }
2658
2659 static int
2660 roff_line_ignore(ROFF_ARGS)
2661 {
2662
2663 return ROFF_IGN;
2664 }
2665
2666 static int
2667 roff_insec(ROFF_ARGS)
2668 {
2669
2670 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2671 return ROFF_IGN;
2672 }
2673
2674 static int
2675 roff_unsupp(ROFF_ARGS)
2676 {
2677
2678 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2679 return ROFF_IGN;
2680 }
2681
2682 static int
2683 roff_cond(ROFF_ARGS)
2684 {
2685 int irc;
2686
2687 roffnode_push(r, tok, NULL, ln, ppos);
2688
2689 /*
2690 * An `.el' has no conditional body: it will consume the value
2691 * of the current rstack entry set in prior `ie' calls or
2692 * defaults to DENY.
2693 *
2694 * If we're not an `el', however, then evaluate the conditional.
2695 */
2696
2697 r->last->rule = tok == ROFF_el ?
2698 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2699 roff_evalcond(r, ln, buf->buf, &pos);
2700
2701 /*
2702 * An if-else will put the NEGATION of the current evaluated
2703 * conditional into the stack of rules.
2704 */
2705
2706 if (tok == ROFF_ie) {
2707 if (r->rstackpos + 1 == r->rstacksz) {
2708 r->rstacksz += 16;
2709 r->rstack = mandoc_reallocarray(r->rstack,
2710 r->rstacksz, sizeof(int));
2711 }
2712 r->rstack[++r->rstackpos] = !r->last->rule;
2713 }
2714
2715 /* If the parent has false as its rule, then so do we. */
2716
2717 if (r->last->parent && !r->last->parent->rule)
2718 r->last->rule = 0;
2719
2720 /*
2721 * Determine scope.
2722 * If there is nothing on the line after the conditional,
2723 * not even whitespace, use next-line scope.
2724 * Except that .while does not support next-line scope.
2725 */
2726
2727 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2728 r->last->endspan = 2;
2729 goto out;
2730 }
2731
2732 while (buf->buf[pos] == ' ')
2733 pos++;
2734
2735 /* An opening brace requests multiline scope. */
2736
2737 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2738 r->last->endspan = -1;
2739 pos += 2;
2740 while (buf->buf[pos] == ' ')
2741 pos++;
2742 goto out;
2743 }
2744
2745 /*
2746 * Anything else following the conditional causes
2747 * single-line scope. Warn if the scope contains
2748 * nothing but trailing whitespace.
2749 */
2750
2751 if (buf->buf[pos] == '\0')
2752 mandoc_msg(MANDOCERR_COND_EMPTY,
2753 ln, ppos, "%s", roff_name[tok]);
2754
2755 r->last->endspan = 1;
2756
2757 out:
2758 *offs = pos;
2759 irc = ROFF_RERUN;
2760 if (tok == ROFF_while)
2761 irc |= ROFF_WHILE;
2762 return irc;
2763 }
2764
2765 static int
2766 roff_ds(ROFF_ARGS)
2767 {
2768 char *string;
2769 const char *name;
2770 size_t namesz;
2771
2772 /* Ignore groff compatibility mode for now. */
2773
2774 if (tok == ROFF_ds1)
2775 tok = ROFF_ds;
2776 else if (tok == ROFF_as1)
2777 tok = ROFF_as;
2778
2779 /*
2780 * The first word is the name of the string.
2781 * If it is empty or terminated by an escape sequence,
2782 * abort the `ds' request without defining anything.
2783 */
2784
2785 name = string = buf->buf + pos;
2786 if (*name == '\0')
2787 return ROFF_IGN;
2788
2789 namesz = roff_getname(r, &string, ln, pos);
2790 switch (name[namesz]) {
2791 case '\\':
2792 return ROFF_IGN;
2793 case '\t':
2794 string = buf->buf + pos + namesz;
2795 break;
2796 default:
2797 break;
2798 }
2799
2800 /* Read past the initial double-quote, if any. */
2801 if (*string == '"')
2802 string++;
2803
2804 /* The rest is the value. */
2805 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2806 ROFF_as == tok);
2807 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2808 return ROFF_IGN;
2809 }
2810
2811 /*
2812 * Parse a single operator, one or two characters long.
2813 * If the operator is recognized, return success and advance the
2814 * parse point, else return failure and let the parse point unchanged.
2815 */
2816 static int
2817 roff_getop(const char *v, int *pos, char *res)
2818 {
2819
2820 *res = v[*pos];
2821
2822 switch (*res) {
2823 case '+':
2824 case '-':
2825 case '*':
2826 case '/':
2827 case '%':
2828 case '&':
2829 case ':':
2830 break;
2831 case '<':
2832 switch (v[*pos + 1]) {
2833 case '=':
2834 *res = 'l';
2835 (*pos)++;
2836 break;
2837 case '>':
2838 *res = '!';
2839 (*pos)++;
2840 break;
2841 case '?':
2842 *res = 'i';
2843 (*pos)++;
2844 break;
2845 default:
2846 break;
2847 }
2848 break;
2849 case '>':
2850 switch (v[*pos + 1]) {
2851 case '=':
2852 *res = 'g';
2853 (*pos)++;
2854 break;
2855 case '?':
2856 *res = 'a';
2857 (*pos)++;
2858 break;
2859 default:
2860 break;
2861 }
2862 break;
2863 case '=':
2864 if ('=' == v[*pos + 1])
2865 (*pos)++;
2866 break;
2867 default:
2868 return 0;
2869 }
2870 (*pos)++;
2871
2872 return *res;
2873 }
2874
2875 /*
2876 * Evaluate either a parenthesized numeric expression
2877 * or a single signed integer number.
2878 */
2879 static int
2880 roff_evalpar(struct roff *r, int ln,
2881 const char *v, int *pos, int *res, int flags)
2882 {
2883
2884 if ('(' != v[*pos])
2885 return roff_getnum(v, pos, res, flags);
2886
2887 (*pos)++;
2888 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2889 return 0;
2890
2891 /*
2892 * Omission of the closing parenthesis
2893 * is an error in validation mode,
2894 * but ignored in evaluation mode.
2895 */
2896
2897 if (')' == v[*pos])
2898 (*pos)++;
2899 else if (NULL == res)
2900 return 0;
2901
2902 return 1;
2903 }
2904
2905 /*
2906 * Evaluate a complete numeric expression.
2907 * Proceed left to right, there is no concept of precedence.
2908 */
2909 static int
2910 roff_evalnum(struct roff *r, int ln, const char *v,
2911 int *pos, int *res, int flags)
2912 {
2913 int mypos, operand2;
2914 char operator;
2915
2916 if (NULL == pos) {
2917 mypos = 0;
2918 pos = &mypos;
2919 }
2920
2921 if (flags & ROFFNUM_WHITE)
2922 while (isspace((unsigned char)v[*pos]))
2923 (*pos)++;
2924
2925 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2926 return 0;
2927
2928 while (1) {
2929 if (flags & ROFFNUM_WHITE)
2930 while (isspace((unsigned char)v[*pos]))
2931 (*pos)++;
2932
2933 if ( ! roff_getop(v, pos, &operator))
2934 break;
2935
2936 if (flags & ROFFNUM_WHITE)
2937 while (isspace((unsigned char)v[*pos]))
2938 (*pos)++;
2939
2940 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2941 return 0;
2942
2943 if (flags & ROFFNUM_WHITE)
2944 while (isspace((unsigned char)v[*pos]))
2945 (*pos)++;
2946
2947 if (NULL == res)
2948 continue;
2949
2950 switch (operator) {
2951 case '+':
2952 *res += operand2;
2953 break;
2954 case '-':
2955 *res -= operand2;
2956 break;
2957 case '*':
2958 *res *= operand2;
2959 break;
2960 case '/':
2961 if (operand2 == 0) {
2962 mandoc_msg(MANDOCERR_DIVZERO,
2963 ln, *pos, "%s", v);
2964 *res = 0;
2965 break;
2966 }
2967 *res /= operand2;
2968 break;
2969 case '%':
2970 if (operand2 == 0) {
2971 mandoc_msg(MANDOCERR_DIVZERO,
2972 ln, *pos, "%s", v);
2973 *res = 0;
2974 break;
2975 }
2976 *res %= operand2;
2977 break;
2978 case '<':
2979 *res = *res < operand2;
2980 break;
2981 case '>':
2982 *res = *res > operand2;
2983 break;
2984 case 'l':
2985 *res = *res <= operand2;
2986 break;
2987 case 'g':
2988 *res = *res >= operand2;
2989 break;
2990 case '=':
2991 *res = *res == operand2;
2992 break;
2993 case '!':
2994 *res = *res != operand2;
2995 break;
2996 case '&':
2997 *res = *res && operand2;
2998 break;
2999 case ':':
3000 *res = *res || operand2;
3001 break;
3002 case 'i':
3003 if (operand2 < *res)
3004 *res = operand2;
3005 break;
3006 case 'a':
3007 if (operand2 > *res)
3008 *res = operand2;
3009 break;
3010 default:
3011 abort();
3012 }
3013 }
3014 return 1;
3015 }
3016
3017 /* --- register management ------------------------------------------------ */
3018
3019 void
3020 roff_setreg(struct roff *r, const char *name, int val, char sign)
3021 {
3022 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3023 }
3024
3025 static void
3026 roff_setregn(struct roff *r, const char *name, size_t len,
3027 int val, char sign, int step)
3028 {
3029 struct roffreg *reg;
3030
3031 /* Search for an existing register with the same name. */
3032 reg = r->regtab;
3033
3034 while (reg != NULL && (reg->key.sz != len ||
3035 strncmp(reg->key.p, name, len) != 0))
3036 reg = reg->next;
3037
3038 if (NULL == reg) {
3039 /* Create a new register. */
3040 reg = mandoc_malloc(sizeof(struct roffreg));
3041 reg->key.p = mandoc_strndup(name, len);
3042 reg->key.sz = len;
3043 reg->val = 0;
3044 reg->step = 0;
3045 reg->next = r->regtab;
3046 r->regtab = reg;
3047 }
3048
3049 if ('+' == sign)
3050 reg->val += val;
3051 else if ('-' == sign)
3052 reg->val -= val;
3053 else
3054 reg->val = val;
3055 if (step != INT_MIN)
3056 reg->step = step;
3057 }
3058
3059 /*
3060 * Handle some predefined read-only number registers.
3061 * For now, return -1 if the requested register is not predefined;
3062 * in case a predefined read-only register having the value -1
3063 * were to turn up, another special value would have to be chosen.
3064 */
3065 static int
3066 roff_getregro(const struct roff *r, const char *name)
3067 {
3068
3069 switch (*name) {
3070 case '$': /* Number of arguments of the last macro evaluated. */
3071 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3072 case 'A': /* ASCII approximation mode is always off. */
3073 return 0;
3074 case 'g': /* Groff compatibility mode is always on. */
3075 return 1;
3076 case 'H': /* Fixed horizontal resolution. */
3077 return 24;
3078 case 'j': /* Always adjust left margin only. */
3079 return 0;
3080 case 'T': /* Some output device is always defined. */
3081 return 1;
3082 case 'V': /* Fixed vertical resolution. */
3083 return 40;
3084 default:
3085 return -1;
3086 }
3087 }
3088
3089 int
3090 roff_getreg(struct roff *r, const char *name)
3091 {
3092 return roff_getregn(r, name, strlen(name), '\0');
3093 }
3094
3095 static int
3096 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3097 {
3098 struct roffreg *reg;
3099 int val;
3100
3101 if ('.' == name[0] && 2 == len) {
3102 val = roff_getregro(r, name + 1);
3103 if (-1 != val)
3104 return val;
3105 }
3106
3107 for (reg = r->regtab; reg; reg = reg->next) {
3108 if (len == reg->key.sz &&
3109 0 == strncmp(name, reg->key.p, len)) {
3110 switch (sign) {
3111 case '+':
3112 reg->val += reg->step;
3113 break;
3114 case '-':
3115 reg->val -= reg->step;
3116 break;
3117 default:
3118 break;
3119 }
3120 return reg->val;
3121 }
3122 }
3123
3124 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3125 return 0;
3126 }
3127
3128 static int
3129 roff_hasregn(const struct roff *r, const char *name, size_t len)
3130 {
3131 struct roffreg *reg;
3132 int val;
3133
3134 if ('.' == name[0] && 2 == len) {
3135 val = roff_getregro(r, name + 1);
3136 if (-1 != val)
3137 return 1;
3138 }
3139
3140 for (reg = r->regtab; reg; reg = reg->next)
3141 if (len == reg->key.sz &&
3142 0 == strncmp(name, reg->key.p, len))
3143 return 1;
3144
3145 return 0;
3146 }
3147
3148 static void
3149 roff_freereg(struct roffreg *reg)
3150 {
3151 struct roffreg *old_reg;
3152
3153 while (NULL != reg) {
3154 free(reg->key.p);
3155 old_reg = reg;
3156 reg = reg->next;
3157 free(old_reg);
3158 }
3159 }
3160
3161 static int
3162 roff_nr(ROFF_ARGS)
3163 {
3164 char *key, *val, *step;
3165 size_t keysz;
3166 int iv, is, len;
3167 char sign;
3168
3169 key = val = buf->buf + pos;
3170 if (*key == '\0')
3171 return ROFF_IGN;
3172
3173 keysz = roff_getname(r, &val, ln, pos);
3174 if (key[keysz] == '\\' || key[keysz] == '\t')
3175 return ROFF_IGN;
3176
3177 sign = *val;
3178 if (sign == '+' || sign == '-')
3179 val++;
3180
3181 len = 0;
3182 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3183 return ROFF_IGN;
3184
3185 step = val + len;
3186 while (isspace((unsigned char)*step))
3187 step++;
3188 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3189 is = INT_MIN;
3190
3191 roff_setregn(r, key, keysz, iv, sign, is);
3192 return ROFF_IGN;
3193 }
3194
3195 static int
3196 roff_rr(ROFF_ARGS)
3197 {
3198 struct roffreg *reg, **prev;
3199 char *name, *cp;
3200 size_t namesz;
3201
3202 name = cp = buf->buf + pos;
3203 if (*name == '\0')
3204 return ROFF_IGN;
3205 namesz = roff_getname(r, &cp, ln, pos);
3206 name[namesz] = '\0';
3207
3208 prev = &r->regtab;
3209 while (1) {
3210 reg = *prev;
3211 if (reg == NULL || !strcmp(name, reg->key.p))
3212 break;
3213 prev = &reg->next;
3214 }
3215 if (reg != NULL) {
3216 *prev = reg->next;
3217 free(reg->key.p);
3218 free(reg);
3219 }
3220 return ROFF_IGN;
3221 }
3222
3223 /* --- handler functions for roff requests -------------------------------- */
3224
3225 static int
3226 roff_rm(ROFF_ARGS)
3227 {
3228 const char *name;
3229 char *cp;
3230 size_t namesz;
3231
3232 cp = buf->buf + pos;
3233 while (*cp != '\0') {
3234 name = cp;
3235 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3236 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3237 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3238 if (name[namesz] == '\\' || name[namesz] == '\t')
3239 break;
3240 }
3241 return ROFF_IGN;
3242 }
3243
3244 static int
3245 roff_it(ROFF_ARGS)
3246 {
3247 int iv;
3248
3249 /* Parse the number of lines. */
3250
3251 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3252 mandoc_msg(MANDOCERR_IT_NONUM,
3253 ln, ppos, "%s", buf->buf + 1);
3254 return ROFF_IGN;
3255 }
3256
3257 while (isspace((unsigned char)buf->buf[pos]))
3258 pos++;
3259
3260 /*
3261 * Arm the input line trap.
3262 * Special-casing "an-trap" is an ugly workaround to cope
3263 * with DocBook stupidly fiddling with man(7) internals.
3264 */
3265
3266 roffit_lines = iv;
3267 roffit_macro = mandoc_strdup(iv != 1 ||
3268 strcmp(buf->buf + pos, "an-trap") ?
3269 buf->buf + pos : "br");
3270 return ROFF_IGN;
3271 }
3272
3273 static int
3274 roff_Dd(ROFF_ARGS)
3275 {
3276 int mask;
3277 enum roff_tok t, te;
3278
3279 switch (tok) {
3280 case ROFF_Dd:
3281 tok = MDOC_Dd;
3282 te = MDOC_MAX;
3283 if (r->format == 0)
3284 r->format = MPARSE_MDOC;
3285 mask = MPARSE_MDOC | MPARSE_QUICK;
3286 break;
3287 case ROFF_TH:
3288 tok = MAN_TH;
3289 te = MAN_MAX;
3290 if (r->format == 0)
3291 r->format = MPARSE_MAN;
3292 mask = MPARSE_QUICK;
3293 break;
3294 default:
3295 abort();
3296 }
3297 if ((r->options & mask) == 0)
3298 for (t = tok; t < te; t++)
3299 roff_setstr(r, roff_name[t], NULL, 0);
3300 return ROFF_CONT;
3301 }
3302
3303 static int
3304 roff_TE(ROFF_ARGS)
3305 {
3306 r->man->flags &= ~ROFF_NONOFILL;
3307 if (r->tbl == NULL) {
3308 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3309 return ROFF_IGN;
3310 }
3311 if (tbl_end(r->tbl, 0) == 0) {
3312 r->tbl = NULL;
3313 free(buf->buf);
3314 buf->buf = mandoc_strdup(".sp");
3315 buf->sz = 4;
3316 *offs = 0;
3317 return ROFF_REPARSE;
3318 }
3319 r->tbl = NULL;
3320 return ROFF_IGN;
3321 }
3322
3323 static int
3324 roff_T_(ROFF_ARGS)
3325 {
3326
3327 if (NULL == r->tbl)
3328 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3329 else
3330 tbl_restart(ln, ppos, r->tbl);
3331
3332 return ROFF_IGN;
3333 }
3334
3335 /*
3336 * Handle in-line equation delimiters.
3337 */
3338 static int
3339 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3340 {
3341 char *cp1, *cp2;
3342 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3343
3344 /*
3345 * Outside equations, look for an opening delimiter.
3346 * If we are inside an equation, we already know it is
3347 * in-line, or this function wouldn't have been called;
3348 * so look for a closing delimiter.
3349 */
3350
3351 cp1 = buf->buf + pos;
3352 cp2 = strchr(cp1, r->eqn == NULL ?
3353 r->last_eqn->odelim : r->last_eqn->cdelim);
3354 if (cp2 == NULL)
3355 return ROFF_CONT;
3356
3357 *cp2++ = '\0';
3358 bef_pr = bef_nl = aft_nl = aft_pr = "";
3359
3360 /* Handle preceding text, protecting whitespace. */
3361
3362 if (*buf->buf != '\0') {
3363 if (r->eqn == NULL)
3364 bef_pr = "\\&";
3365 bef_nl = "\n";
3366 }
3367
3368 /*
3369 * Prepare replacing the delimiter with an equation macro
3370 * and drop leading white space from the equation.
3371 */
3372
3373 if (r->eqn == NULL) {
3374 while (*cp2 == ' ')
3375 cp2++;
3376 mac = ".EQ";
3377 } else
3378 mac = ".EN";
3379
3380 /* Handle following text, protecting whitespace. */
3381
3382 if (*cp2 != '\0') {
3383 aft_nl = "\n";
3384 if (r->eqn != NULL)
3385 aft_pr = "\\&";
3386 }
3387
3388 /* Do the actual replacement. */
3389
3390 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3391 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3392 free(buf->buf);
3393 buf->buf = cp1;
3394
3395 /* Toggle the in-line state of the eqn subsystem. */
3396
3397 r->eqn_inline = r->eqn == NULL;
3398 return ROFF_REPARSE;
3399 }
3400
3401 static int
3402 roff_EQ(ROFF_ARGS)
3403 {
3404 struct roff_node *n;
3405
3406 if (r->man->meta.macroset == MACROSET_MAN)
3407 man_breakscope(r->man, ROFF_EQ);
3408 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3409 if (ln > r->man->last->line)
3410 n->flags |= NODE_LINE;
3411 n->eqn = eqn_box_new();
3412 roff_node_append(r->man, n);
3413 r->man->next = ROFF_NEXT_SIBLING;
3414
3415 assert(r->eqn == NULL);
3416 if (r->last_eqn == NULL)
3417 r->last_eqn = eqn_alloc();
3418 else
3419 eqn_reset(r->last_eqn);
3420 r->eqn = r->last_eqn;
3421 r->eqn->node = n;
3422
3423 if (buf->buf[pos] != '\0')
3424 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3425 ".EQ %s", buf->buf + pos);
3426
3427 return ROFF_IGN;
3428 }
3429
3430 static int
3431 roff_EN(ROFF_ARGS)
3432 {
3433 if (r->eqn != NULL) {
3434 eqn_parse(r->eqn);
3435 r->eqn = NULL;
3436 } else
3437 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3438 if (buf->buf[pos] != '\0')
3439 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3440 "EN %s", buf->buf + pos);
3441 return ROFF_IGN;
3442 }
3443
3444 static int
3445 roff_TS(ROFF_ARGS)
3446 {
3447 if (r->tbl != NULL) {
3448 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3449 tbl_end(r->tbl, 0);
3450 }
3451 r->man->flags |= ROFF_NONOFILL;
3452 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3453 if (r->last_tbl == NULL)
3454 r->first_tbl = r->tbl;
3455 r->last_tbl = r->tbl;
3456 return ROFF_IGN;
3457 }
3458
3459 static int
3460 roff_noarg(ROFF_ARGS)
3461 {
3462 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3463 man_breakscope(r->man, tok);
3464 if (tok == ROFF_brp)
3465 tok = ROFF_br;
3466 roff_elem_alloc(r->man, ln, ppos, tok);
3467 if (buf->buf[pos] != '\0')
3468 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3469 "%s %s", roff_name[tok], buf->buf + pos);
3470 if (tok == ROFF_nf)
3471 r->man->flags |= ROFF_NOFILL;
3472 else if (tok == ROFF_fi)
3473 r->man->flags &= ~ROFF_NOFILL;
3474 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3475 r->man->next = ROFF_NEXT_SIBLING;
3476 return ROFF_IGN;
3477 }
3478
3479 static int
3480 roff_onearg(ROFF_ARGS)
3481 {
3482 struct roff_node *n;
3483 char *cp;
3484 int npos;
3485
3486 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3487 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3488 tok == ROFF_ti))
3489 man_breakscope(r->man, tok);
3490
3491 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3492 r->man->last = roffce_node;
3493 r->man->next = ROFF_NEXT_SIBLING;
3494 }
3495
3496 roff_elem_alloc(r->man, ln, ppos, tok);
3497 n = r->man->last;
3498
3499 cp = buf->buf + pos;
3500 if (*cp != '\0') {
3501 while (*cp != '\0' && *cp != ' ')
3502 cp++;
3503 while (*cp == ' ')
3504 *cp++ = '\0';
3505 if (*cp != '\0')
3506 mandoc_msg(MANDOCERR_ARG_EXCESS,
3507 ln, (int)(cp - buf->buf),
3508 "%s ... %s", roff_name[tok], cp);
3509 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3510 }
3511
3512 if (tok == ROFF_ce || tok == ROFF_rj) {
3513 if (r->man->last->type == ROFFT_ELEM) {
3514 roff_word_alloc(r->man, ln, pos, "1");
3515 r->man->last->flags |= NODE_NOSRC;
3516 }
3517 npos = 0;
3518 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3519 &roffce_lines, 0) == 0) {
3520 mandoc_msg(MANDOCERR_CE_NONUM,
3521 ln, pos, "ce %s", buf->buf + pos);
3522 roffce_lines = 1;
3523 }
3524 if (roffce_lines < 1) {
3525 r->man->last = r->man->last->parent;
3526 roffce_node = NULL;
3527 roffce_lines = 0;
3528 } else
3529 roffce_node = r->man->last->parent;
3530 } else {
3531 n->flags |= NODE_VALID | NODE_ENDED;
3532 r->man->last = n;
3533 }
3534 n->flags |= NODE_LINE;
3535 r->man->next = ROFF_NEXT_SIBLING;
3536 return ROFF_IGN;
3537 }
3538
3539 static int
3540 roff_manyarg(ROFF_ARGS)
3541 {
3542 struct roff_node *n;
3543 char *sp, *ep;
3544
3545 roff_elem_alloc(r->man, ln, ppos, tok);
3546 n = r->man->last;
3547
3548 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3549 while (*ep != '\0' && *ep != ' ')
3550 ep++;
3551 while (*ep == ' ')
3552 *ep++ = '\0';
3553 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3554 }
3555
3556 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3557 r->man->last = n;
3558 r->man->next = ROFF_NEXT_SIBLING;
3559 return ROFF_IGN;
3560 }
3561
3562 static int
3563 roff_als(ROFF_ARGS)
3564 {
3565 char *oldn, *newn, *end, *value;
3566 size_t oldsz, newsz, valsz;
3567
3568 newn = oldn = buf->buf + pos;
3569 if (*newn == '\0')
3570 return ROFF_IGN;
3571
3572 newsz = roff_getname(r, &oldn, ln, pos);
3573 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3574 return ROFF_IGN;
3575
3576 end = oldn;
3577 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3578 if (oldsz == 0)
3579 return ROFF_IGN;
3580
3581 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3582 (int)oldsz, oldn);
3583 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3584 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3585 free(value);
3586 return ROFF_IGN;
3587 }
3588
3589 /*
3590 * The .break request only makes sense inside conditionals,
3591 * and that case is already handled in roff_cond_sub().
3592 */
3593 static int
3594 roff_break(ROFF_ARGS)
3595 {
3596 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3597 return ROFF_IGN;
3598 }
3599
3600 static int
3601 roff_cc(ROFF_ARGS)
3602 {
3603 const char *p;
3604
3605 p = buf->buf + pos;
3606
3607 if (*p == '\0' || (r->control = *p++) == '.')
3608 r->control = '\0';
3609
3610 if (*p != '\0')
3611 mandoc_msg(MANDOCERR_ARG_EXCESS,
3612 ln, p - buf->buf, "cc ... %s", p);
3613
3614 return ROFF_IGN;
3615 }
3616
3617 static int
3618 roff_char(ROFF_ARGS)
3619 {
3620 const char *p, *kp, *vp;
3621 size_t ksz, vsz;
3622 int font;
3623
3624 /* Parse the character to be replaced. */
3625
3626 kp = buf->buf + pos;
3627 p = kp + 1;
3628 if (*kp == '\0' || (*kp == '\\' &&
3629 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3630 (*p != ' ' && *p != '\0')) {
3631 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3632 return ROFF_IGN;
3633 }
3634 ksz = p - kp;
3635 while (*p == ' ')
3636 p++;
3637
3638 /*
3639 * If the replacement string contains a font escape sequence,
3640 * we have to restore the font at the end.
3641 */
3642
3643 vp = p;
3644 vsz = strlen(p);
3645 font = 0;
3646 while (*p != '\0') {
3647 if (*p++ != '\\')
3648 continue;
3649 switch (mandoc_escape(&p, NULL, NULL)) {
3650 case ESCAPE_FONT:
3651 case ESCAPE_FONTROMAN:
3652 case ESCAPE_FONTITALIC:
3653 case ESCAPE_FONTBOLD:
3654 case ESCAPE_FONTBI:
3655 case ESCAPE_FONTCW:
3656 case ESCAPE_FONTPREV:
3657 font++;
3658 break;
3659 default:
3660 break;
3661 }
3662 }
3663 if (font > 1)
3664 mandoc_msg(MANDOCERR_CHAR_FONT,
3665 ln, (int)(vp - buf->buf), "%s", vp);
3666
3667 /*
3668 * Approximate the effect of .char using the .tr tables.
3669 * XXX In groff, .char and .tr interact differently.
3670 */
3671
3672 if (ksz == 1) {
3673 if (r->xtab == NULL)
3674 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3675 assert((unsigned int)*kp < 128);
3676 free(r->xtab[(int)*kp].p);
3677 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3678 "%s%s", vp, font ? "\fP" : "");
3679 } else {
3680 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3681 if (font)
3682 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3683 }
3684 return ROFF_IGN;
3685 }
3686
3687 static int
3688 roff_ec(ROFF_ARGS)
3689 {
3690 const char *p;
3691
3692 p = buf->buf + pos;
3693 if (*p == '\0')
3694 r->escape = '\\';
3695 else {
3696 r->escape = *p;
3697 if (*++p != '\0')
3698 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3699 (int)(p - buf->buf), "ec ... %s", p);
3700 }
3701 return ROFF_IGN;
3702 }
3703
3704 static int
3705 roff_eo(ROFF_ARGS)
3706 {
3707 r->escape = '\0';
3708 if (buf->buf[pos] != '\0')
3709 mandoc_msg(MANDOCERR_ARG_SKIP,
3710 ln, pos, "eo %s", buf->buf + pos);
3711 return ROFF_IGN;
3712 }
3713
3714 static int
3715 roff_nop(ROFF_ARGS)
3716 {
3717 while (buf->buf[pos] == ' ')
3718 pos++;
3719 *offs = pos;
3720 return ROFF_RERUN;
3721 }
3722
3723 static int
3724 roff_tr(ROFF_ARGS)
3725 {
3726 const char *p, *first, *second;
3727 size_t fsz, ssz;
3728 enum mandoc_esc esc;
3729
3730 p = buf->buf + pos;
3731
3732 if (*p == '\0') {
3733 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3734 return ROFF_IGN;
3735 }
3736
3737 while (*p != '\0') {
3738 fsz = ssz = 1;
3739
3740 first = p++;
3741 if (*first == '\\') {
3742 esc = mandoc_escape(&p, NULL, NULL);
3743 if (esc == ESCAPE_ERROR) {
3744 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3745 (int)(p - buf->buf), "%s", first);
3746 return ROFF_IGN;
3747 }
3748 fsz = (size_t)(p - first);
3749 }
3750
3751 second = p++;
3752 if (*second == '\\') {
3753 esc = mandoc_escape(&p, NULL, NULL);
3754 if (esc == ESCAPE_ERROR) {
3755 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3756 (int)(p - buf->buf), "%s", second);
3757 return ROFF_IGN;
3758 }
3759 ssz = (size_t)(p - second);
3760 } else if (*second == '\0') {
3761 mandoc_msg(MANDOCERR_TR_ODD, ln,
3762 (int)(first - buf->buf), "tr %s", first);
3763 second = " ";
3764 p--;
3765 }
3766
3767 if (fsz > 1) {
3768 roff_setstrn(&r->xmbtab, first, fsz,
3769 second, ssz, 0);
3770 continue;
3771 }
3772
3773 if (r->xtab == NULL)
3774 r->xtab = mandoc_calloc(128,
3775 sizeof(struct roffstr));
3776
3777 free(r->xtab[(int)*first].p);
3778 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3779 r->xtab[(int)*first].sz = ssz;
3780 }
3781
3782 return ROFF_IGN;
3783 }
3784
3785 /*
3786 * Implementation of the .return request.
3787 * There is no need to call roff_userret() from here.
3788 * The read module will call that after rewinding the reader stack
3789 * to the place from where the current macro was called.
3790 */
3791 static int
3792 roff_return(ROFF_ARGS)
3793 {
3794 if (r->mstackpos >= 0)
3795 return ROFF_IGN | ROFF_USERRET;
3796
3797 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3798 return ROFF_IGN;
3799 }
3800
3801 static int
3802 roff_rn(ROFF_ARGS)
3803 {
3804 const char *value;
3805 char *oldn, *newn, *end;
3806 size_t oldsz, newsz;
3807 int deftype;
3808
3809 oldn = newn = buf->buf + pos;
3810 if (*oldn == '\0')
3811 return ROFF_IGN;
3812
3813 oldsz = roff_getname(r, &newn, ln, pos);
3814 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3815 return ROFF_IGN;
3816
3817 end = newn;
3818 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3819 if (newsz == 0)
3820 return ROFF_IGN;
3821
3822 deftype = ROFFDEF_ANY;
3823 value = roff_getstrn(r, oldn, oldsz, &deftype);
3824 switch (deftype) {
3825 case ROFFDEF_USER:
3826 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3827 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3828 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3829 break;
3830 case ROFFDEF_PRE:
3831 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3832 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3833 break;
3834 case ROFFDEF_REN:
3835 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3836 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3837 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3838 break;
3839 case ROFFDEF_STD:
3840 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3841 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3842 break;
3843 default:
3844 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3845 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3846 break;
3847 }
3848 return ROFF_IGN;
3849 }
3850
3851 static int
3852 roff_shift(ROFF_ARGS)
3853 {
3854 struct mctx *ctx;
3855 int levels, i;
3856
3857 levels = 1;
3858 if (buf->buf[pos] != '\0' &&
3859 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3860 mandoc_msg(MANDOCERR_CE_NONUM,
3861 ln, pos, "shift %s", buf->buf + pos);
3862 levels = 1;
3863 }
3864 if (r->mstackpos < 0) {
3865 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3866 return ROFF_IGN;
3867 }
3868 ctx = r->mstack + r->mstackpos;
3869 if (levels > ctx->argc) {
3870 mandoc_msg(MANDOCERR_SHIFT,
3871 ln, pos, "%d, but max is %d", levels, ctx->argc);
3872 levels = ctx->argc;
3873 }
3874 if (levels == 0)
3875 return ROFF_IGN;
3876 for (i = 0; i < levels; i++)
3877 free(ctx->argv[i]);
3878 ctx->argc -= levels;
3879 for (i = 0; i < ctx->argc; i++)
3880 ctx->argv[i] = ctx->argv[i + levels];
3881 return ROFF_IGN;
3882 }
3883
3884 static int
3885 roff_so(ROFF_ARGS)
3886 {
3887 char *name, *cp;
3888
3889 name = buf->buf + pos;
3890 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3891
3892 /*
3893 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3894 * opening anything that's not in our cwd or anything beneath
3895 * it. Thus, explicitly disallow traversing up the file-system
3896 * or using absolute paths.
3897 */
3898
3899 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3900 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3901 buf->sz = mandoc_asprintf(&cp,
3902 ".sp\nSee the file %s.\n.sp", name) + 1;
3903 free(buf->buf);
3904 buf->buf = cp;
3905 *offs = 0;
3906 return ROFF_REPARSE;
3907 }
3908
3909 *offs = pos;
3910 return ROFF_SO;
3911 }
3912
3913 /* --- user defined strings and macros ------------------------------------ */
3914
3915 static int
3916 roff_userdef(ROFF_ARGS)
3917 {
3918 struct mctx *ctx;
3919 char *arg, *ap, *dst, *src;
3920 size_t sz;
3921
3922 /* If the macro is empty, ignore it altogether. */
3923
3924 if (*r->current_string == '\0')
3925 return ROFF_IGN;
3926
3927 /* Initialize a new macro stack context. */
3928
3929 if (++r->mstackpos == r->mstacksz) {
3930 r->mstack = mandoc_recallocarray(r->mstack,
3931 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3932 r->mstacksz += 8;
3933 }
3934 ctx = r->mstack + r->mstackpos;
3935 ctx->argsz = 0;
3936 ctx->argc = 0;
3937 ctx->argv = NULL;
3938
3939 /*
3940 * Collect pointers to macro argument strings,
3941 * NUL-terminating them and escaping quotes.
3942 */
3943
3944 src = buf->buf + pos;
3945 while (*src != '\0') {
3946 if (ctx->argc == ctx->argsz) {
3947 ctx->argsz += 8;
3948 ctx->argv = mandoc_reallocarray(ctx->argv,
3949 ctx->argsz, sizeof(*ctx->argv));
3950 }
3951 arg = roff_getarg(r, &src, ln, &pos);
3952 sz = 1; /* For the terminating NUL. */
3953 for (ap = arg; *ap != '\0'; ap++)
3954 sz += *ap == '"' ? 4 : 1;
3955 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3956 for (ap = arg; *ap != '\0'; ap++) {
3957 if (*ap == '"') {
3958 memcpy(dst, "\\(dq", 4);
3959 dst += 4;
3960 } else
3961 *dst++ = *ap;
3962 }
3963 *dst = '\0';
3964 free(arg);
3965 }
3966
3967 /* Replace the macro invocation by the macro definition. */
3968
3969 free(buf->buf);
3970 buf->buf = mandoc_strdup(r->current_string);
3971 buf->sz = strlen(buf->buf) + 1;
3972 *offs = 0;
3973
3974 return buf->buf[buf->sz - 2] == '\n' ?
3975 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3976 }
3977
3978 /*
3979 * Calling a high-level macro that was renamed with .rn.
3980 * r->current_string has already been set up by roff_parse().
3981 */
3982 static int
3983 roff_renamed(ROFF_ARGS)
3984 {
3985 char *nbuf;
3986
3987 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3988 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3989 free(buf->buf);
3990 buf->buf = nbuf;
3991 *offs = 0;
3992 return ROFF_CONT;
3993 }
3994
3995 /*
3996 * Measure the length in bytes of the roff identifier at *cpp
3997 * and advance the pointer to the next word.
3998 */
3999 static size_t
4000 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4001 {
4002 char *name, *cp;
4003 size_t namesz;
4004
4005 name = *cpp;
4006 if (*name == '\0')
4007 return 0;
4008
4009 /* Advance cp to the byte after the end of the name. */
4010
4011 for (cp = name; 1; cp++) {
4012 namesz = cp - name;
4013 if (*cp == '\0')
4014 break;
4015 if (*cp == ' ' || *cp == '\t') {
4016 cp++;
4017 break;
4018 }
4019 if (*cp != '\\')
4020 continue;
4021 if (cp[1] == '{' || cp[1] == '}')
4022 break;
4023 if (*++cp == '\\')
4024 continue;
4025 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4026 "%.*s", (int)(cp - name + 1), name);
4027 mandoc_escape((const char **)&cp, NULL, NULL);
4028 break;
4029 }
4030
4031 /* Read past spaces. */
4032
4033 while (*cp == ' ')
4034 cp++;
4035
4036 *cpp = cp;
4037 return namesz;
4038 }
4039
4040 /*
4041 * Store *string into the user-defined string called *name.
4042 * To clear an existing entry, call with (*r, *name, NULL, 0).
4043 * append == 0: replace mode
4044 * append == 1: single-line append mode
4045 * append == 2: multiline append mode, append '\n' after each call
4046 */
4047 static void
4048 roff_setstr(struct roff *r, const char *name, const char *string,
4049 int append)
4050 {
4051 size_t namesz;
4052
4053 namesz = strlen(name);
4054 roff_setstrn(&r->strtab, name, namesz, string,
4055 string ? strlen(string) : 0, append);
4056 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4057 }
4058
4059 static void
4060 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4061 const char *string, size_t stringsz, int append)
4062 {
4063 struct roffkv *n;
4064 char *c;
4065 int i;
4066 size_t oldch, newch;
4067
4068 /* Search for an existing string with the same name. */
4069 n = *r;
4070
4071 while (n && (namesz != n->key.sz ||
4072 strncmp(n->key.p, name, namesz)))
4073 n = n->next;
4074
4075 if (NULL == n) {
4076 /* Create a new string table entry. */
4077 n = mandoc_malloc(sizeof(struct roffkv));
4078 n->key.p = mandoc_strndup(name, namesz);
4079 n->key.sz = namesz;
4080 n->val.p = NULL;
4081 n->val.sz = 0;
4082 n->next = *r;
4083 *r = n;
4084 } else if (0 == append) {
4085 free(n->val.p);
4086 n->val.p = NULL;
4087 n->val.sz = 0;
4088 }
4089
4090 if (NULL == string)
4091 return;
4092
4093 /*
4094 * One additional byte for the '\n' in multiline mode,
4095 * and one for the terminating '\0'.
4096 */
4097 newch = stringsz + (1 < append ? 2u : 1u);
4098
4099 if (NULL == n->val.p) {
4100 n->val.p = mandoc_malloc(newch);
4101 *n->val.p = '\0';
4102 oldch = 0;
4103 } else {
4104 oldch = n->val.sz;
4105 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4106 }
4107
4108 /* Skip existing content in the destination buffer. */
4109 c = n->val.p + (int)oldch;
4110
4111 /* Append new content to the destination buffer. */
4112 i = 0;
4113 while (i < (int)stringsz) {
4114 /*
4115 * Rudimentary roff copy mode:
4116 * Handle escaped backslashes.
4117 */
4118 if ('\\' == string[i] && '\\' == string[i + 1])
4119 i++;
4120 *c++ = string[i++];
4121 }
4122
4123 /* Append terminating bytes. */
4124 if (1 < append)
4125 *c++ = '\n';
4126
4127 *c = '\0';
4128 n->val.sz = (int)(c - n->val.p);
4129 }
4130
4131 static const char *
4132 roff_getstrn(struct roff *r, const char *name, size_t len,
4133 int *deftype)
4134 {
4135 const struct roffkv *n;
4136 int found, i;
4137 enum roff_tok tok;
4138
4139 found = 0;
4140 for (n = r->strtab; n != NULL; n = n->next) {
4141 if (strncmp(name, n->key.p, len) != 0 ||
4142 n->key.p[len] != '\0' || n->val.p == NULL)
4143 continue;
4144 if (*deftype & ROFFDEF_USER) {
4145 *deftype = ROFFDEF_USER;
4146 return n->val.p;
4147 } else {
4148 found = 1;
4149 break;
4150 }
4151 }
4152 for (n = r->rentab; n != NULL; n = n->next) {
4153 if (strncmp(name, n->key.p, len) != 0 ||
4154 n->key.p[len] != '\0' || n->val.p == NULL)
4155 continue;
4156 if (*deftype & ROFFDEF_REN) {
4157 *deftype = ROFFDEF_REN;
4158 return n->val.p;
4159 } else {
4160 found = 1;
4161 break;
4162 }
4163 }
4164 for (i = 0; i < PREDEFS_MAX; i++) {
4165 if (strncmp(name, predefs[i].name, len) != 0 ||
4166 predefs[i].name[len] != '\0')
4167 continue;
4168 if (*deftype & ROFFDEF_PRE) {
4169 *deftype = ROFFDEF_PRE;
4170 return predefs[i].str;
4171 } else {
4172 found = 1;
4173 break;
4174 }
4175 }
4176 if (r->man->meta.macroset != MACROSET_MAN) {
4177 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4178 if (strncmp(name, roff_name[tok], len) != 0 ||
4179 roff_name[tok][len] != '\0')
4180 continue;
4181 if (*deftype & ROFFDEF_STD) {
4182 *deftype = ROFFDEF_STD;
4183 return NULL;
4184 } else {
4185 found = 1;
4186 break;
4187 }
4188 }
4189 }
4190 if (r->man->meta.macroset != MACROSET_MDOC) {
4191 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4192 if (strncmp(name, roff_name[tok], len) != 0 ||
4193 roff_name[tok][len] != '\0')
4194 continue;
4195 if (*deftype & ROFFDEF_STD) {
4196 *deftype = ROFFDEF_STD;
4197 return NULL;
4198 } else {
4199 found = 1;
4200 break;
4201 }
4202 }
4203 }
4204
4205 if (found == 0 && *deftype != ROFFDEF_ANY) {
4206 if (*deftype & ROFFDEF_REN) {
4207 /*
4208 * This might still be a request,
4209 * so do not treat it as undefined yet.
4210 */
4211 *deftype = ROFFDEF_UNDEF;
4212 return NULL;
4213 }
4214
4215 /* Using an undefined string defines it to be empty. */
4216
4217 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4218 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4219 }
4220
4221 *deftype = 0;
4222 return NULL;
4223 }
4224
4225 static void
4226 roff_freestr(struct roffkv *r)
4227 {
4228 struct roffkv *n, *nn;
4229
4230 for (n = r; n; n = nn) {
4231 free(n->key.p);
4232 free(n->val.p);
4233 nn = n->next;
4234 free(n);
4235 }
4236 }
4237
4238 /* --- accessors and utility functions ------------------------------------ */
4239
4240 /*
4241 * Duplicate an input string, making the appropriate character
4242 * conversations (as stipulated by `tr') along the way.
4243 * Returns a heap-allocated string with all the replacements made.
4244 */
4245 char *
4246 roff_strdup(const struct roff *r, const char *p)
4247 {
4248 const struct roffkv *cp;
4249 char *res;
4250 const char *pp;
4251 size_t ssz, sz;
4252 enum mandoc_esc esc;
4253
4254 if (NULL == r->xmbtab && NULL == r->xtab)
4255 return mandoc_strdup(p);
4256 else if ('\0' == *p)
4257 return mandoc_strdup("");
4258
4259 /*
4260 * Step through each character looking for term matches
4261 * (remember that a `tr' can be invoked with an escape, which is
4262 * a glyph but the escape is multi-character).
4263 * We only do this if the character hash has been initialised
4264 * and the string is >0 length.
4265 */
4266
4267 res = NULL;
4268 ssz = 0;
4269
4270 while ('\0' != *p) {
4271 assert((unsigned int)*p < 128);
4272 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4273 sz = r->xtab[(int)*p].sz;
4274 res = mandoc_realloc(res, ssz + sz + 1);
4275 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4276 ssz += sz;
4277 p++;
4278 continue;
4279 } else if ('\\' != *p) {
4280 res = mandoc_realloc(res, ssz + 2);
4281 res[ssz++] = *p++;
4282 continue;
4283 }
4284
4285 /* Search for term matches. */
4286 for (cp = r->xmbtab; cp; cp = cp->next)
4287 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4288 break;
4289
4290 if (NULL != cp) {
4291 /*
4292 * A match has been found.
4293 * Append the match to the array and move
4294 * forward by its keysize.
4295 */
4296 res = mandoc_realloc(res,
4297 ssz + cp->val.sz + 1);
4298 memcpy(res + ssz, cp->val.p, cp->val.sz);
4299 ssz += cp->val.sz;
4300 p += (int)cp->key.sz;
4301 continue;
4302 }
4303
4304 /*
4305 * Handle escapes carefully: we need to copy
4306 * over just the escape itself, or else we might
4307 * do replacements within the escape itself.
4308 * Make sure to pass along the bogus string.
4309 */
4310 pp = p++;
4311 esc = mandoc_escape(&p, NULL, NULL);
4312 if (ESCAPE_ERROR == esc) {
4313 sz = strlen(pp);
4314 res = mandoc_realloc(res, ssz + sz + 1);
4315 memcpy(res + ssz, pp, sz);
4316 break;
4317 }
4318 /*
4319 * We bail out on bad escapes.
4320 * No need to warn: we already did so when
4321 * roff_expand() was called.
4322 */
4323 sz = (int)(p - pp);
4324 res = mandoc_realloc(res, ssz + sz + 1);
4325 memcpy(res + ssz, pp, sz);
4326 ssz += sz;
4327 }
4328
4329 res[(int)ssz] = '\0';
4330 return res;
4331 }
4332
4333 int
4334 roff_getformat(const struct roff *r)
4335 {
4336
4337 return r->format;
4338 }
4339
4340 /*
4341 * Find out whether a line is a macro line or not.
4342 * If it is, adjust the current position and return one; if it isn't,
4343 * return zero and don't change the current position.
4344 * If the control character has been set with `.cc', then let that grain
4345 * precedence.
4346 * This is slighly contrary to groff, where using the non-breaking
4347 * control character when `cc' has been invoked will cause the
4348 * non-breaking macro contents to be printed verbatim.
4349 */
4350 int
4351 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4352 {
4353 int pos;
4354
4355 pos = *ppos;
4356
4357 if (r->control != '\0' && cp[pos] == r->control)
4358 pos++;
4359 else if (r->control != '\0')
4360 return 0;
4361 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4362 pos += 2;
4363 else if ('.' == cp[pos] || '\'' == cp[pos])
4364 pos++;
4365 else
4366 return 0;
4367
4368 while (' ' == cp[pos] || '\t' == cp[pos])
4369 pos++;
4370
4371 *ppos = pos;
4372 return 1;
4373 }