]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
ca45b0f68b7a37eb21d232f2b16c548d9f4c66c0
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.385 2022/04/30 11:32:42 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_mc(ROFF_ARGS);
231 static int roff_noarg(ROFF_ARGS);
232 static int roff_nop(ROFF_ARGS);
233 static int roff_nr(ROFF_ARGS);
234 static int roff_onearg(ROFF_ARGS);
235 static enum roff_tok roff_parse(struct roff *, char *, int *,
236 int, int);
237 static int roff_parsetext(struct roff *, struct buf *,
238 int, int *);
239 static int roff_renamed(ROFF_ARGS);
240 static int roff_return(ROFF_ARGS);
241 static int roff_rm(ROFF_ARGS);
242 static int roff_rn(ROFF_ARGS);
243 static int roff_rr(ROFF_ARGS);
244 static void roff_setregn(struct roff *, const char *,
245 size_t, int, char, int);
246 static void roff_setstr(struct roff *,
247 const char *, const char *, int);
248 static void roff_setstrn(struct roffkv **, const char *,
249 size_t, const char *, size_t, int);
250 static int roff_shift(ROFF_ARGS);
251 static int roff_so(ROFF_ARGS);
252 static int roff_tr(ROFF_ARGS);
253 static int roff_Dd(ROFF_ARGS);
254 static int roff_TE(ROFF_ARGS);
255 static int roff_TS(ROFF_ARGS);
256 static int roff_EQ(ROFF_ARGS);
257 static int roff_EN(ROFF_ARGS);
258 static int roff_T_(ROFF_ARGS);
259 static int roff_unsupp(ROFF_ARGS);
260 static int roff_userdef(ROFF_ARGS);
261
262 /* --- constant data ------------------------------------------------------ */
263
264 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
265 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
266
267 const char *__roff_name[MAN_MAX + 1] = {
268 "br", "ce", "fi", "ft",
269 "ll", "mc", "nf",
270 "po", "rj", "sp",
271 "ta", "ti", NULL,
272 "ab", "ad", "af", "aln",
273 "als", "am", "am1", "ami",
274 "ami1", "as", "as1", "asciify",
275 "backtrace", "bd", "bleedat", "blm",
276 "box", "boxa", "bp", "BP",
277 "break", "breakchar", "brnl", "brp",
278 "brpnl", "c2", "cc",
279 "cf", "cflags", "ch", "char",
280 "chop", "class", "close", "CL",
281 "color", "composite", "continue", "cp",
282 "cropat", "cs", "cu", "da",
283 "dch", "Dd", "de", "de1",
284 "defcolor", "dei", "dei1", "device",
285 "devicem", "di", "do", "ds",
286 "ds1", "dwh", "dt", "ec",
287 "ecr", "ecs", "el", "em",
288 "EN", "eo", "EP", "EQ",
289 "errprint", "ev", "evc", "ex",
290 "fallback", "fam", "fc", "fchar",
291 "fcolor", "fdeferlig", "feature", "fkern",
292 "fl", "flig", "fp", "fps",
293 "fschar", "fspacewidth", "fspecial", "ftr",
294 "fzoom", "gcolor", "hc", "hcode",
295 "hidechar", "hla", "hlm", "hpf",
296 "hpfa", "hpfcode", "hw", "hy",
297 "hylang", "hylen", "hym", "hypp",
298 "hys", "ie", "if", "ig",
299 "index", "it", "itc", "IX",
300 "kern", "kernafter", "kernbefore", "kernpair",
301 "lc", "lc_ctype", "lds", "length",
302 "letadj", "lf", "lg", "lhang",
303 "linetabs", "lnr", "lnrf", "lpfx",
304 "ls", "lsm", "lt",
305 "mediasize", "minss", "mk", "mso",
306 "na", "ne", "nh", "nhychar",
307 "nm", "nn", "nop", "nr",
308 "nrf", "nroff", "ns", "nx",
309 "open", "opena", "os", "output",
310 "padj", "papersize", "pc", "pev",
311 "pi", "PI", "pl", "pm",
312 "pn", "pnr", "ps",
313 "psbb", "pshape", "pso", "ptr",
314 "pvs", "rchar", "rd", "recursionlimit",
315 "return", "rfschar", "rhang",
316 "rm", "rn", "rnn", "rr",
317 "rs", "rt", "schar", "sentchar",
318 "shc", "shift", "sizes", "so",
319 "spacewidth", "special", "spreadwarn", "ss",
320 "sty", "substring", "sv", "sy",
321 "T&", "tc", "TE",
322 "TH", "tkf", "tl",
323 "tm", "tm1", "tmc", "tr",
324 "track", "transchar", "trf", "trimat",
325 "trin", "trnt", "troff", "TS",
326 "uf", "ul", "unformat", "unwatch",
327 "unwatchn", "vpt", "vs", "warn",
328 "warnscale", "watch", "watchlength", "watchn",
329 "wh", "while", "write", "writec",
330 "writem", "xflag", ".", NULL,
331 NULL, "text",
332 "Dd", "Dt", "Os", "Sh",
333 "Ss", "Pp", "D1", "Dl",
334 "Bd", "Ed", "Bl", "El",
335 "It", "Ad", "An", "Ap",
336 "Ar", "Cd", "Cm", "Dv",
337 "Er", "Ev", "Ex", "Fa",
338 "Fd", "Fl", "Fn", "Ft",
339 "Ic", "In", "Li", "Nd",
340 "Nm", "Op", "Ot", "Pa",
341 "Rv", "St", "Va", "Vt",
342 "Xr", "%A", "%B", "%D",
343 "%I", "%J", "%N", "%O",
344 "%P", "%R", "%T", "%V",
345 "Ac", "Ao", "Aq", "At",
346 "Bc", "Bf", "Bo", "Bq",
347 "Bsx", "Bx", "Db", "Dc",
348 "Do", "Dq", "Ec", "Ef",
349 "Em", "Eo", "Fx", "Ms",
350 "No", "Ns", "Nx", "Ox",
351 "Pc", "Pf", "Po", "Pq",
352 "Qc", "Ql", "Qo", "Qq",
353 "Re", "Rs", "Sc", "So",
354 "Sq", "Sm", "Sx", "Sy",
355 "Tn", "Ux", "Xc", "Xo",
356 "Fo", "Fc", "Oo", "Oc",
357 "Bk", "Ek", "Bt", "Hf",
358 "Fr", "Ud", "Lb", "Lp",
359 "Lk", "Mt", "Brq", "Bro",
360 "Brc", "%C", "Es", "En",
361 "Dx", "%Q", "%U", "Ta",
362 "Tg", NULL,
363 "TH", "SH", "SS", "TP",
364 "TQ",
365 "LP", "PP", "P", "IP",
366 "HP", "SM", "SB", "BI",
367 "IB", "BR", "RB", "R",
368 "B", "I", "IR", "RI",
369 "RE", "RS", "DT", "UC",
370 "PD", "AT", "in",
371 "SY", "YS", "OP",
372 "EX", "EE", "UR",
373 "UE", "MT", "ME", NULL
374 };
375 const char *const *roff_name = __roff_name;
376
377 static struct roffmac roffs[TOKEN_NONE] = {
378 { roff_noarg, NULL, NULL, 0 }, /* br */
379 { roff_onearg, NULL, NULL, 0 }, /* ce */
380 { roff_noarg, NULL, NULL, 0 }, /* fi */
381 { roff_onearg, NULL, NULL, 0 }, /* ft */
382 { roff_onearg, NULL, NULL, 0 }, /* ll */
383 { roff_mc, NULL, NULL, 0 }, /* mc */
384 { roff_noarg, NULL, NULL, 0 }, /* nf */
385 { roff_onearg, NULL, NULL, 0 }, /* po */
386 { roff_onearg, NULL, NULL, 0 }, /* rj */
387 { roff_onearg, NULL, NULL, 0 }, /* sp */
388 { roff_manyarg, NULL, NULL, 0 }, /* ta */
389 { roff_onearg, NULL, NULL, 0 }, /* ti */
390 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
391 { roff_unsupp, NULL, NULL, 0 }, /* ab */
392 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
393 { roff_line_ignore, NULL, NULL, 0 }, /* af */
394 { roff_unsupp, NULL, NULL, 0 }, /* aln */
395 { roff_als, NULL, NULL, 0 }, /* als */
396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
400 { roff_ds, NULL, NULL, 0 }, /* as */
401 { roff_ds, NULL, NULL, 0 }, /* as1 */
402 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
403 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
404 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
405 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
406 { roff_unsupp, NULL, NULL, 0 }, /* blm */
407 { roff_unsupp, NULL, NULL, 0 }, /* box */
408 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
409 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
410 { roff_unsupp, NULL, NULL, 0 }, /* BP */
411 { roff_break, NULL, NULL, 0 }, /* break */
412 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
413 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
414 { roff_noarg, NULL, NULL, 0 }, /* brp */
415 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
416 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
417 { roff_cc, NULL, NULL, 0 }, /* cc */
418 { roff_insec, NULL, NULL, 0 }, /* cf */
419 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
420 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
421 { roff_char, NULL, NULL, 0 }, /* char */
422 { roff_unsupp, NULL, NULL, 0 }, /* chop */
423 { roff_line_ignore, NULL, NULL, 0 }, /* class */
424 { roff_insec, NULL, NULL, 0 }, /* close */
425 { roff_unsupp, NULL, NULL, 0 }, /* CL */
426 { roff_line_ignore, NULL, NULL, 0 }, /* color */
427 { roff_unsupp, NULL, NULL, 0 }, /* composite */
428 { roff_unsupp, NULL, NULL, 0 }, /* continue */
429 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
431 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
432 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
433 { roff_unsupp, NULL, NULL, 0 }, /* da */
434 { roff_unsupp, NULL, NULL, 0 }, /* dch */
435 { roff_Dd, NULL, NULL, 0 }, /* Dd */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
438 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
440 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
441 { roff_unsupp, NULL, NULL, 0 }, /* device */
442 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
443 { roff_unsupp, NULL, NULL, 0 }, /* di */
444 { roff_unsupp, NULL, NULL, 0 }, /* do */
445 { roff_ds, NULL, NULL, 0 }, /* ds */
446 { roff_ds, NULL, NULL, 0 }, /* ds1 */
447 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
448 { roff_unsupp, NULL, NULL, 0 }, /* dt */
449 { roff_ec, NULL, NULL, 0 }, /* ec */
450 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
451 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
452 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
453 { roff_unsupp, NULL, NULL, 0 }, /* em */
454 { roff_EN, NULL, NULL, 0 }, /* EN */
455 { roff_eo, NULL, NULL, 0 }, /* eo */
456 { roff_unsupp, NULL, NULL, 0 }, /* EP */
457 { roff_EQ, NULL, NULL, 0 }, /* EQ */
458 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
459 { roff_unsupp, NULL, NULL, 0 }, /* ev */
460 { roff_unsupp, NULL, NULL, 0 }, /* evc */
461 { roff_unsupp, NULL, NULL, 0 }, /* ex */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
464 { roff_unsupp, NULL, NULL, 0 }, /* fc */
465 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
467 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
468 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
470 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
471 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
473 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
474 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
476 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
477 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
478 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
479 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
494 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
495 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
496 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
497 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
498 { roff_unsupp, NULL, NULL, 0 }, /* index */
499 { roff_it, NULL, NULL, 0 }, /* it */
500 { roff_unsupp, NULL, NULL, 0 }, /* itc */
501 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
502 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
504 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
505 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
506 { roff_unsupp, NULL, NULL, 0 }, /* lc */
507 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
508 { roff_unsupp, NULL, NULL, 0 }, /* lds */
509 { roff_unsupp, NULL, NULL, 0 }, /* length */
510 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
511 { roff_insec, NULL, NULL, 0 }, /* lf */
512 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
513 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
514 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
515 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
516 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
517 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
518 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
519 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
520 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
521 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
522 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
523 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
524 { roff_insec, NULL, NULL, 0 }, /* mso */
525 { roff_line_ignore, NULL, NULL, 0 }, /* na */
526 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
527 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
528 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
529 { roff_unsupp, NULL, NULL, 0 }, /* nm */
530 { roff_unsupp, NULL, NULL, 0 }, /* nn */
531 { roff_nop, NULL, NULL, 0 }, /* nop */
532 { roff_nr, NULL, NULL, 0 }, /* nr */
533 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
534 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
535 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
536 { roff_insec, NULL, NULL, 0 }, /* nx */
537 { roff_insec, NULL, NULL, 0 }, /* open */
538 { roff_insec, NULL, NULL, 0 }, /* opena */
539 { roff_line_ignore, NULL, NULL, 0 }, /* os */
540 { roff_unsupp, NULL, NULL, 0 }, /* output */
541 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
542 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
544 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
545 { roff_insec, NULL, NULL, 0 }, /* pi */
546 { roff_unsupp, NULL, NULL, 0 }, /* PI */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
550 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
551 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
552 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
553 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
554 { roff_insec, NULL, NULL, 0 }, /* pso */
555 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
556 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
557 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
558 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
559 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
560 { roff_return, NULL, NULL, 0 }, /* return */
561 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
562 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
563 { roff_rm, NULL, NULL, 0 }, /* rm */
564 { roff_rn, NULL, NULL, 0 }, /* rn */
565 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
566 { roff_rr, NULL, NULL, 0 }, /* rr */
567 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
568 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
569 { roff_unsupp, NULL, NULL, 0 }, /* schar */
570 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
571 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
572 { roff_shift, NULL, NULL, 0 }, /* shift */
573 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
574 { roff_so, NULL, NULL, 0 }, /* so */
575 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
576 { roff_line_ignore, NULL, NULL, 0 }, /* special */
577 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
578 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
579 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
580 { roff_unsupp, NULL, NULL, 0 }, /* substring */
581 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
582 { roff_insec, NULL, NULL, 0 }, /* sy */
583 { roff_T_, NULL, NULL, 0 }, /* T& */
584 { roff_unsupp, NULL, NULL, 0 }, /* tc */
585 { roff_TE, NULL, NULL, 0 }, /* TE */
586 { roff_Dd, NULL, NULL, 0 }, /* TH */
587 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
588 { roff_unsupp, NULL, NULL, 0 }, /* tl */
589 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
591 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
592 { roff_tr, NULL, NULL, 0 }, /* tr */
593 { roff_line_ignore, NULL, NULL, 0 }, /* track */
594 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
595 { roff_insec, NULL, NULL, 0 }, /* trf */
596 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
597 { roff_unsupp, NULL, NULL, 0 }, /* trin */
598 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
599 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
600 { roff_TS, NULL, NULL, 0 }, /* TS */
601 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
602 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
603 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
604 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
605 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
606 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
607 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
608 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
609 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
610 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
611 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
612 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
613 { roff_unsupp, NULL, NULL, 0 }, /* wh */
614 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
615 { roff_insec, NULL, NULL, 0 }, /* write */
616 { roff_insec, NULL, NULL, 0 }, /* writec */
617 { roff_insec, NULL, NULL, 0 }, /* writem */
618 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
619 { roff_cblock, NULL, NULL, 0 }, /* . */
620 { roff_renamed, NULL, NULL, 0 },
621 { roff_userdef, NULL, NULL, 0 }
622 };
623
624 /* Array of injected predefined strings. */
625 #define PREDEFS_MAX 38
626 static const struct predef predefs[PREDEFS_MAX] = {
627 #include "predefs.in"
628 };
629
630 static int roffce_lines; /* number of input lines to center */
631 static struct roff_node *roffce_node; /* active request */
632 static int roffit_lines; /* number of lines to delay */
633 static char *roffit_macro; /* nil-terminated macro line */
634
635
636 /* --- request table ------------------------------------------------------ */
637
638 struct ohash *
639 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
640 {
641 struct ohash *htab;
642 struct roffreq *req;
643 enum roff_tok tok;
644 size_t sz;
645 unsigned int slot;
646
647 htab = mandoc_malloc(sizeof(*htab));
648 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
649
650 for (tok = mintok; tok < maxtok; tok++) {
651 if (roff_name[tok] == NULL)
652 continue;
653 sz = strlen(roff_name[tok]);
654 req = mandoc_malloc(sizeof(*req) + sz + 1);
655 req->tok = tok;
656 memcpy(req->name, roff_name[tok], sz + 1);
657 slot = ohash_qlookup(htab, req->name);
658 ohash_insert(htab, slot, req);
659 }
660 return htab;
661 }
662
663 void
664 roffhash_free(struct ohash *htab)
665 {
666 struct roffreq *req;
667 unsigned int slot;
668
669 if (htab == NULL)
670 return;
671 for (req = ohash_first(htab, &slot); req != NULL;
672 req = ohash_next(htab, &slot))
673 free(req);
674 ohash_delete(htab);
675 free(htab);
676 }
677
678 enum roff_tok
679 roffhash_find(struct ohash *htab, const char *name, size_t sz)
680 {
681 struct roffreq *req;
682 const char *end;
683
684 if (sz) {
685 end = name + sz;
686 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
687 } else
688 req = ohash_find(htab, ohash_qlookup(htab, name));
689 return req == NULL ? TOKEN_NONE : req->tok;
690 }
691
692 /* --- stack of request blocks -------------------------------------------- */
693
694 /*
695 * Pop the current node off of the stack of roff instructions currently
696 * pending. Return 1 if it is a loop or 0 otherwise.
697 */
698 static int
699 roffnode_pop(struct roff *r)
700 {
701 struct roffnode *p;
702 int inloop;
703
704 p = r->last;
705 inloop = p->tok == ROFF_while;
706 r->last = p->parent;
707 free(p->name);
708 free(p->end);
709 free(p);
710 return inloop;
711 }
712
713 /*
714 * Push a roff node onto the instruction stack. This must later be
715 * removed with roffnode_pop().
716 */
717 static void
718 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
719 int line, int col)
720 {
721 struct roffnode *p;
722
723 p = mandoc_calloc(1, sizeof(struct roffnode));
724 p->tok = tok;
725 if (name)
726 p->name = mandoc_strdup(name);
727 p->parent = r->last;
728 p->line = line;
729 p->col = col;
730 p->rule = p->parent ? p->parent->rule : 0;
731
732 r->last = p;
733 }
734
735 /* --- roff parser state data management ---------------------------------- */
736
737 static void
738 roff_free1(struct roff *r)
739 {
740 int i;
741
742 tbl_free(r->first_tbl);
743 r->first_tbl = r->last_tbl = r->tbl = NULL;
744
745 eqn_free(r->last_eqn);
746 r->last_eqn = r->eqn = NULL;
747
748 while (r->mstackpos >= 0)
749 roff_userret(r);
750
751 while (r->last)
752 roffnode_pop(r);
753
754 free (r->rstack);
755 r->rstack = NULL;
756 r->rstacksz = 0;
757 r->rstackpos = -1;
758
759 roff_freereg(r->regtab);
760 r->regtab = NULL;
761
762 roff_freestr(r->strtab);
763 roff_freestr(r->rentab);
764 roff_freestr(r->xmbtab);
765 r->strtab = r->rentab = r->xmbtab = NULL;
766
767 if (r->xtab)
768 for (i = 0; i < 128; i++)
769 free(r->xtab[i].p);
770 free(r->xtab);
771 r->xtab = NULL;
772 }
773
774 void
775 roff_reset(struct roff *r)
776 {
777 roff_free1(r);
778 r->options |= MPARSE_COMMENT;
779 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
780 r->control = '\0';
781 r->escape = '\\';
782 roffce_lines = 0;
783 roffce_node = NULL;
784 roffit_lines = 0;
785 roffit_macro = NULL;
786 }
787
788 void
789 roff_free(struct roff *r)
790 {
791 int i;
792
793 roff_free1(r);
794 for (i = 0; i < r->mstacksz; i++)
795 free(r->mstack[i].argv);
796 free(r->mstack);
797 roffhash_free(r->reqtab);
798 free(r);
799 }
800
801 struct roff *
802 roff_alloc(int options)
803 {
804 struct roff *r;
805
806 r = mandoc_calloc(1, sizeof(struct roff));
807 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
808 r->options = options | MPARSE_COMMENT;
809 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
810 r->mstackpos = -1;
811 r->rstackpos = -1;
812 r->escape = '\\';
813 return r;
814 }
815
816 /* --- syntax tree state data management ---------------------------------- */
817
818 static void
819 roff_man_free1(struct roff_man *man)
820 {
821 if (man->meta.first != NULL)
822 roff_node_delete(man, man->meta.first);
823 free(man->meta.msec);
824 free(man->meta.vol);
825 free(man->meta.os);
826 free(man->meta.arch);
827 free(man->meta.title);
828 free(man->meta.name);
829 free(man->meta.date);
830 free(man->meta.sodest);
831 }
832
833 void
834 roff_state_reset(struct roff_man *man)
835 {
836 man->last = man->meta.first;
837 man->last_es = NULL;
838 man->flags = 0;
839 man->lastsec = man->lastnamed = SEC_NONE;
840 man->next = ROFF_NEXT_CHILD;
841 roff_setreg(man->roff, "nS", 0, '=');
842 }
843
844 static void
845 roff_man_alloc1(struct roff_man *man)
846 {
847 memset(&man->meta, 0, sizeof(man->meta));
848 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
849 man->meta.first->type = ROFFT_ROOT;
850 man->meta.macroset = MACROSET_NONE;
851 roff_state_reset(man);
852 }
853
854 void
855 roff_man_reset(struct roff_man *man)
856 {
857 roff_man_free1(man);
858 roff_man_alloc1(man);
859 }
860
861 void
862 roff_man_free(struct roff_man *man)
863 {
864 roff_man_free1(man);
865 free(man->os_r);
866 free(man);
867 }
868
869 struct roff_man *
870 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
871 {
872 struct roff_man *man;
873
874 man = mandoc_calloc(1, sizeof(*man));
875 man->roff = roff;
876 man->os_s = os_s;
877 man->quick = quick;
878 roff_man_alloc1(man);
879 roff->man = man;
880 return man;
881 }
882
883 /* --- syntax tree handling ----------------------------------------------- */
884
885 struct roff_node *
886 roff_node_alloc(struct roff_man *man, int line, int pos,
887 enum roff_type type, int tok)
888 {
889 struct roff_node *n;
890
891 n = mandoc_calloc(1, sizeof(*n));
892 n->line = line;
893 n->pos = pos;
894 n->tok = tok;
895 n->type = type;
896 n->sec = man->lastsec;
897
898 if (man->flags & MDOC_SYNOPSIS)
899 n->flags |= NODE_SYNPRETTY;
900 else
901 n->flags &= ~NODE_SYNPRETTY;
902 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
903 n->flags |= NODE_NOFILL;
904 else
905 n->flags &= ~NODE_NOFILL;
906 if (man->flags & MDOC_NEWLINE)
907 n->flags |= NODE_LINE;
908 man->flags &= ~MDOC_NEWLINE;
909
910 return n;
911 }
912
913 void
914 roff_node_append(struct roff_man *man, struct roff_node *n)
915 {
916
917 switch (man->next) {
918 case ROFF_NEXT_SIBLING:
919 if (man->last->next != NULL) {
920 n->next = man->last->next;
921 man->last->next->prev = n;
922 } else
923 man->last->parent->last = n;
924 man->last->next = n;
925 n->prev = man->last;
926 n->parent = man->last->parent;
927 break;
928 case ROFF_NEXT_CHILD:
929 if (man->last->child != NULL) {
930 n->next = man->last->child;
931 man->last->child->prev = n;
932 } else
933 man->last->last = n;
934 man->last->child = n;
935 n->parent = man->last;
936 break;
937 default:
938 abort();
939 }
940 man->last = n;
941
942 switch (n->type) {
943 case ROFFT_HEAD:
944 n->parent->head = n;
945 break;
946 case ROFFT_BODY:
947 if (n->end != ENDBODY_NOT)
948 return;
949 n->parent->body = n;
950 break;
951 case ROFFT_TAIL:
952 n->parent->tail = n;
953 break;
954 default:
955 return;
956 }
957
958 /*
959 * Copy over the normalised-data pointer of our parent. Not
960 * everybody has one, but copying a null pointer is fine.
961 */
962
963 n->norm = n->parent->norm;
964 assert(n->parent->type == ROFFT_BLOCK);
965 }
966
967 void
968 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
969 {
970 struct roff_node *n;
971
972 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
973 n->string = roff_strdup(man->roff, word);
974 roff_node_append(man, n);
975 n->flags |= NODE_VALID | NODE_ENDED;
976 man->next = ROFF_NEXT_SIBLING;
977 }
978
979 void
980 roff_word_append(struct roff_man *man, const char *word)
981 {
982 struct roff_node *n;
983 char *addstr, *newstr;
984
985 n = man->last;
986 addstr = roff_strdup(man->roff, word);
987 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
988 free(addstr);
989 free(n->string);
990 n->string = newstr;
991 man->next = ROFF_NEXT_SIBLING;
992 }
993
994 void
995 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
996 {
997 struct roff_node *n;
998
999 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1000 roff_node_append(man, n);
1001 man->next = ROFF_NEXT_CHILD;
1002 }
1003
1004 struct roff_node *
1005 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1006 {
1007 struct roff_node *n;
1008
1009 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1010 roff_node_append(man, n);
1011 man->next = ROFF_NEXT_CHILD;
1012 return n;
1013 }
1014
1015 struct roff_node *
1016 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1017 {
1018 struct roff_node *n;
1019
1020 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1021 roff_node_append(man, n);
1022 man->next = ROFF_NEXT_CHILD;
1023 return n;
1024 }
1025
1026 struct roff_node *
1027 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1028 {
1029 struct roff_node *n;
1030
1031 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1032 roff_node_append(man, n);
1033 man->next = ROFF_NEXT_CHILD;
1034 return n;
1035 }
1036
1037 static void
1038 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1039 {
1040 struct roff_node *n;
1041 struct tbl_span *span;
1042
1043 if (man->meta.macroset == MACROSET_MAN)
1044 man_breakscope(man, ROFF_TS);
1045 while ((span = tbl_span(tbl)) != NULL) {
1046 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1047 n->span = span;
1048 roff_node_append(man, n);
1049 n->flags |= NODE_VALID | NODE_ENDED;
1050 man->next = ROFF_NEXT_SIBLING;
1051 }
1052 }
1053
1054 void
1055 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1056 {
1057
1058 /* Adjust siblings. */
1059
1060 if (n->prev)
1061 n->prev->next = n->next;
1062 if (n->next)
1063 n->next->prev = n->prev;
1064
1065 /* Adjust parent. */
1066
1067 if (n->parent != NULL) {
1068 if (n->parent->child == n)
1069 n->parent->child = n->next;
1070 if (n->parent->last == n)
1071 n->parent->last = n->prev;
1072 }
1073
1074 /* Adjust parse point. */
1075
1076 if (man == NULL)
1077 return;
1078 if (man->last == n) {
1079 if (n->prev == NULL) {
1080 man->last = n->parent;
1081 man->next = ROFF_NEXT_CHILD;
1082 } else {
1083 man->last = n->prev;
1084 man->next = ROFF_NEXT_SIBLING;
1085 }
1086 }
1087 if (man->meta.first == n)
1088 man->meta.first = NULL;
1089 }
1090
1091 void
1092 roff_node_relink(struct roff_man *man, struct roff_node *n)
1093 {
1094 roff_node_unlink(man, n);
1095 n->prev = n->next = NULL;
1096 roff_node_append(man, n);
1097 }
1098
1099 void
1100 roff_node_free(struct roff_node *n)
1101 {
1102
1103 if (n->args != NULL)
1104 mdoc_argv_free(n->args);
1105 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1106 free(n->norm);
1107 eqn_box_free(n->eqn);
1108 free(n->string);
1109 free(n->tag);
1110 free(n);
1111 }
1112
1113 void
1114 roff_node_delete(struct roff_man *man, struct roff_node *n)
1115 {
1116
1117 while (n->child != NULL)
1118 roff_node_delete(man, n->child);
1119 roff_node_unlink(man, n);
1120 roff_node_free(n);
1121 }
1122
1123 int
1124 roff_node_transparent(struct roff_node *n)
1125 {
1126 if (n == NULL)
1127 return 0;
1128 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1129 return 1;
1130 return roff_tok_transparent(n->tok);
1131 }
1132
1133 int
1134 roff_tok_transparent(enum roff_tok tok)
1135 {
1136 switch (tok) {
1137 case ROFF_ft:
1138 case ROFF_ll:
1139 case ROFF_mc:
1140 case ROFF_po:
1141 case ROFF_ta:
1142 case MDOC_Db:
1143 case MDOC_Es:
1144 case MDOC_Sm:
1145 case MDOC_Tg:
1146 case MAN_DT:
1147 case MAN_UC:
1148 case MAN_PD:
1149 case MAN_AT:
1150 return 1;
1151 default:
1152 return 0;
1153 }
1154 }
1155
1156 struct roff_node *
1157 roff_node_child(struct roff_node *n)
1158 {
1159 for (n = n->child; roff_node_transparent(n); n = n->next)
1160 continue;
1161 return n;
1162 }
1163
1164 struct roff_node *
1165 roff_node_prev(struct roff_node *n)
1166 {
1167 do {
1168 n = n->prev;
1169 } while (roff_node_transparent(n));
1170 return n;
1171 }
1172
1173 struct roff_node *
1174 roff_node_next(struct roff_node *n)
1175 {
1176 do {
1177 n = n->next;
1178 } while (roff_node_transparent(n));
1179 return n;
1180 }
1181
1182 void
1183 deroff(char **dest, const struct roff_node *n)
1184 {
1185 char *cp;
1186 size_t sz;
1187
1188 if (n->string == NULL) {
1189 for (n = n->child; n != NULL; n = n->next)
1190 deroff(dest, n);
1191 return;
1192 }
1193
1194 /* Skip leading whitespace. */
1195
1196 for (cp = n->string; *cp != '\0'; cp++) {
1197 if (cp[0] == '\\' && cp[1] != '\0' &&
1198 strchr(" %&0^|~", cp[1]) != NULL)
1199 cp++;
1200 else if ( ! isspace((unsigned char)*cp))
1201 break;
1202 }
1203
1204 /* Skip trailing backslash. */
1205
1206 sz = strlen(cp);
1207 if (sz > 0 && cp[sz - 1] == '\\')
1208 sz--;
1209
1210 /* Skip trailing whitespace. */
1211
1212 for (; sz; sz--)
1213 if ( ! isspace((unsigned char)cp[sz-1]))
1214 break;
1215
1216 /* Skip empty strings. */
1217
1218 if (sz == 0)
1219 return;
1220
1221 if (*dest == NULL) {
1222 *dest = mandoc_strndup(cp, sz);
1223 return;
1224 }
1225
1226 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1227 free(*dest);
1228 *dest = cp;
1229 }
1230
1231 /* --- main functions of the roff parser ---------------------------------- */
1232
1233 /*
1234 * In the current line, expand escape sequences that produce parsable
1235 * input text. Also check the syntax of the remaining escape sequences,
1236 * which typically produce output glyphs or change formatter state.
1237 */
1238 static int
1239 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1240 {
1241 struct mctx *ctx; /* current macro call context */
1242 char ubuf[24]; /* buffer to print the number */
1243 struct roff_node *n; /* used for header comments */
1244 const char *start; /* start of the string to process */
1245 char *stesc; /* start of an escape sequence ('\\') */
1246 const char *esct; /* type of esccape sequence */
1247 char *ep; /* end of comment string */
1248 const char *stnam; /* start of the name, after "[(*" */
1249 const char *cp; /* end of the name, e.g. before ']' */
1250 const char *res; /* the string to be substituted */
1251 char *nbuf; /* new buffer to copy buf->buf to */
1252 size_t maxl; /* expected length of the escape name */
1253 size_t naml; /* actual length of the escape name */
1254 size_t asz; /* length of the replacement */
1255 size_t rsz; /* length of the rest of the string */
1256 int inaml; /* length returned from mandoc_escape() */
1257 int expand_count; /* to avoid infinite loops */
1258 int npos; /* position in numeric expression */
1259 int arg_complete; /* argument not interrupted by eol */
1260 int quote_args; /* true for \\$@, false for \\$* */
1261 int done; /* no more input available */
1262 int deftype; /* type of definition to paste */
1263 int rcsid; /* kind of RCS id seen */
1264 enum mandocerr err; /* for escape sequence problems */
1265 char sign; /* increment number register */
1266 char term; /* character terminating the escape */
1267
1268 /* Search forward for comments. */
1269
1270 done = 0;
1271 start = buf->buf + pos;
1272 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1273 if (stesc[0] != newesc || stesc[1] == '\0')
1274 continue;
1275 stesc++;
1276 if (*stesc != '"' && *stesc != '#')
1277 continue;
1278
1279 /* Comment found, look for RCS id. */
1280
1281 rcsid = 0;
1282 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1283 rcsid = 1 << MANDOC_OS_OPENBSD;
1284 cp += 8;
1285 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1286 rcsid = 1 << MANDOC_OS_NETBSD;
1287 cp += 7;
1288 }
1289 if (cp != NULL &&
1290 isalnum((unsigned char)*cp) == 0 &&
1291 strchr(cp, '$') != NULL) {
1292 if (r->man->meta.rcsids & rcsid)
1293 mandoc_msg(MANDOCERR_RCS_REP, ln,
1294 (int)(stesc - buf->buf) + 1,
1295 "%s", stesc + 1);
1296 r->man->meta.rcsids |= rcsid;
1297 }
1298
1299 /* Handle trailing whitespace. */
1300
1301 ep = strchr(stesc--, '\0') - 1;
1302 if (*ep == '\n') {
1303 done = 1;
1304 ep--;
1305 }
1306 if (*ep == ' ' || *ep == '\t')
1307 mandoc_msg(MANDOCERR_SPACE_EOL,
1308 ln, (int)(ep - buf->buf), NULL);
1309
1310 /*
1311 * Save comments preceding the title macro
1312 * in the syntax tree.
1313 */
1314
1315 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1316 while (*ep == ' ' || *ep == '\t')
1317 ep--;
1318 ep[1] = '\0';
1319 n = roff_node_alloc(r->man,
1320 ln, stesc + 1 - buf->buf,
1321 ROFFT_COMMENT, TOKEN_NONE);
1322 n->string = mandoc_strdup(stesc + 2);
1323 roff_node_append(r->man, n);
1324 n->flags |= NODE_VALID | NODE_ENDED;
1325 r->man->next = ROFF_NEXT_SIBLING;
1326 }
1327
1328 /* Line continuation with comment. */
1329
1330 if (stesc[1] == '#') {
1331 *stesc = '\0';
1332 return ROFF_IGN | ROFF_APPEND;
1333 }
1334
1335 /* Discard normal comments. */
1336
1337 while (stesc > start && stesc[-1] == ' ' &&
1338 (stesc == start + 1 || stesc[-2] != '\\'))
1339 stesc--;
1340 *stesc = '\0';
1341 break;
1342 }
1343 if (stesc == start)
1344 return ROFF_CONT;
1345 stesc--;
1346
1347 /* Notice the end of the input. */
1348
1349 if (*stesc == '\n') {
1350 *stesc-- = '\0';
1351 done = 1;
1352 }
1353
1354 expand_count = 0;
1355 while (stesc >= start) {
1356 if (*stesc != newesc) {
1357
1358 /*
1359 * If we have a non-standard escape character,
1360 * escape literal backslashes because all
1361 * processing in subsequent functions uses
1362 * the standard escaping rules.
1363 */
1364
1365 if (newesc != ASCII_ESC && *stesc == '\\') {
1366 *stesc = '\0';
1367 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1368 buf->buf, stesc + 1) + 1;
1369 start = nbuf + pos;
1370 stesc = nbuf + (stesc - buf->buf);
1371 free(buf->buf);
1372 buf->buf = nbuf;
1373 }
1374
1375 /* Search backwards for the next escape. */
1376
1377 stesc--;
1378 continue;
1379 }
1380
1381 /* If it is escaped, skip it. */
1382
1383 for (cp = stesc - 1; cp >= start; cp--)
1384 if (*cp != r->escape)
1385 break;
1386
1387 if ((stesc - cp) % 2 == 0) {
1388 while (stesc > cp)
1389 *stesc-- = '\\';
1390 continue;
1391 } else if (stesc[1] != '\0') {
1392 *stesc = '\\';
1393 } else {
1394 *stesc-- = '\0';
1395 if (done)
1396 continue;
1397 else
1398 return ROFF_IGN | ROFF_APPEND;
1399 }
1400
1401 /* Decide whether to expand or to check only. */
1402
1403 term = '\0';
1404 cp = stesc + 1;
1405 while (*cp == 'E')
1406 cp++;
1407 esct = cp;
1408 switch (*esct) {
1409 case '*':
1410 case '$':
1411 res = NULL;
1412 break;
1413 case 'B':
1414 case 'w':
1415 term = cp[1];
1416 /* FALLTHROUGH */
1417 case 'n':
1418 sign = cp[1];
1419 if (sign == '+' || sign == '-')
1420 cp++;
1421 res = ubuf;
1422 break;
1423 default:
1424 err = MANDOCERR_OK;
1425 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1426 case ESCAPE_SPECIAL:
1427 if (mchars_spec2cp(stnam, inaml) >= 0)
1428 break;
1429 /* FALLTHROUGH */
1430 case ESCAPE_ERROR:
1431 err = MANDOCERR_ESC_BAD;
1432 break;
1433 case ESCAPE_UNDEF:
1434 err = MANDOCERR_ESC_UNDEF;
1435 break;
1436 case ESCAPE_UNSUPP:
1437 err = MANDOCERR_ESC_UNSUPP;
1438 break;
1439 default:
1440 break;
1441 }
1442 if (err != MANDOCERR_OK)
1443 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1444 "%.*s", (int)(cp - stesc), stesc);
1445 stesc--;
1446 continue;
1447 }
1448
1449 if (EXPAND_LIMIT < ++expand_count) {
1450 mandoc_msg(MANDOCERR_ROFFLOOP,
1451 ln, (int)(stesc - buf->buf), NULL);
1452 return ROFF_IGN;
1453 }
1454
1455 /*
1456 * The third character decides the length
1457 * of the name of the string or register.
1458 * Save a pointer to the name.
1459 */
1460
1461 if (term == '\0') {
1462 switch (*++cp) {
1463 case '\0':
1464 maxl = 0;
1465 break;
1466 case '(':
1467 cp++;
1468 maxl = 2;
1469 break;
1470 case '[':
1471 cp++;
1472 term = ']';
1473 maxl = 0;
1474 break;
1475 default:
1476 maxl = 1;
1477 break;
1478 }
1479 } else {
1480 cp += 2;
1481 maxl = 0;
1482 }
1483 stnam = cp;
1484
1485 /* Advance to the end of the name. */
1486
1487 naml = 0;
1488 arg_complete = 1;
1489 while (maxl == 0 || naml < maxl) {
1490 if (*cp == '\0') {
1491 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1492 (int)(stesc - buf->buf), "%s", stesc);
1493 arg_complete = 0;
1494 break;
1495 }
1496 if (maxl == 0 && *cp == term) {
1497 cp++;
1498 break;
1499 }
1500 if (*cp++ != '\\' || *esct != 'w') {
1501 naml++;
1502 continue;
1503 }
1504 switch (mandoc_escape(&cp, NULL, NULL)) {
1505 case ESCAPE_SPECIAL:
1506 case ESCAPE_UNICODE:
1507 case ESCAPE_NUMBERED:
1508 case ESCAPE_UNDEF:
1509 case ESCAPE_OVERSTRIKE:
1510 naml++;
1511 break;
1512 default:
1513 break;
1514 }
1515 }
1516
1517 /*
1518 * Retrieve the replacement string; if it is
1519 * undefined, resume searching for escapes.
1520 */
1521
1522 switch (*esct) {
1523 case '*':
1524 if (arg_complete) {
1525 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1526 res = roff_getstrn(r, stnam, naml, &deftype);
1527
1528 /*
1529 * If not overriden, let \*(.T
1530 * through to the formatters.
1531 */
1532
1533 if (res == NULL && naml == 2 &&
1534 stnam[0] == '.' && stnam[1] == 'T') {
1535 roff_setstrn(&r->strtab,
1536 ".T", 2, NULL, 0, 0);
1537 stesc--;
1538 continue;
1539 }
1540 }
1541 break;
1542 case '$':
1543 if (r->mstackpos < 0) {
1544 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1545 (int)(stesc - buf->buf), "%.3s", stesc);
1546 break;
1547 }
1548 ctx = r->mstack + r->mstackpos;
1549 npos = esct[1] - '1';
1550 if (npos >= 0 && npos <= 8) {
1551 res = npos < ctx->argc ?
1552 ctx->argv[npos] : "";
1553 break;
1554 }
1555 if (esct[1] == '*')
1556 quote_args = 0;
1557 else if (esct[1] == '@')
1558 quote_args = 1;
1559 else {
1560 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1561 (int)(stesc - buf->buf), "%.3s", stesc);
1562 break;
1563 }
1564 asz = 0;
1565 for (npos = 0; npos < ctx->argc; npos++) {
1566 if (npos)
1567 asz++; /* blank */
1568 if (quote_args)
1569 asz += 2; /* quotes */
1570 asz += strlen(ctx->argv[npos]);
1571 }
1572 if (asz != 3) {
1573 rsz = buf->sz - (stesc - buf->buf) - 3;
1574 if (asz < 3)
1575 memmove(stesc + asz, stesc + 3, rsz);
1576 buf->sz += asz - 3;
1577 nbuf = mandoc_realloc(buf->buf, buf->sz);
1578 start = nbuf + pos;
1579 stesc = nbuf + (stesc - buf->buf);
1580 buf->buf = nbuf;
1581 if (asz > 3)
1582 memmove(stesc + asz, stesc + 3, rsz);
1583 }
1584 for (npos = 0; npos < ctx->argc; npos++) {
1585 if (npos)
1586 *stesc++ = ' ';
1587 if (quote_args)
1588 *stesc++ = '"';
1589 cp = ctx->argv[npos];
1590 while (*cp != '\0')
1591 *stesc++ = *cp++;
1592 if (quote_args)
1593 *stesc++ = '"';
1594 }
1595 continue;
1596 case 'B':
1597 npos = 0;
1598 ubuf[0] = arg_complete &&
1599 roff_evalnum(r, ln, stnam, &npos,
1600 NULL, ROFFNUM_SCALE) &&
1601 stnam + npos + 1 == cp ? '1' : '0';
1602 ubuf[1] = '\0';
1603 break;
1604 case 'n':
1605 if (arg_complete)
1606 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1607 roff_getregn(r, stnam, naml, sign));
1608 else
1609 ubuf[0] = '\0';
1610 break;
1611 case 'w':
1612 /* use even incomplete args */
1613 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1614 24 * (int)naml);
1615 break;
1616 }
1617
1618 if (res == NULL) {
1619 if (*esct == '*')
1620 mandoc_msg(MANDOCERR_STR_UNDEF,
1621 ln, (int)(stesc - buf->buf),
1622 "%.*s", (int)naml, stnam);
1623 res = "";
1624 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1625 mandoc_msg(MANDOCERR_ROFFLOOP,
1626 ln, (int)(stesc - buf->buf), NULL);
1627 return ROFF_IGN;
1628 }
1629
1630 /* Replace the escape sequence by the string. */
1631
1632 *stesc = '\0';
1633 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1634 buf->buf, res, cp) + 1;
1635
1636 /* Prepare for the next replacement. */
1637
1638 start = nbuf + pos;
1639 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1640 free(buf->buf);
1641 buf->buf = nbuf;
1642 }
1643 return ROFF_CONT;
1644 }
1645
1646 /*
1647 * Parse a quoted or unquoted roff-style request or macro argument.
1648 * Return a pointer to the parsed argument, which is either the original
1649 * pointer or advanced by one byte in case the argument is quoted.
1650 * NUL-terminate the argument in place.
1651 * Collapse pairs of quotes inside quoted arguments.
1652 * Advance the argument pointer to the next argument,
1653 * or to the NUL byte terminating the argument line.
1654 */
1655 char *
1656 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1657 {
1658 struct buf buf;
1659 char *cp, *start;
1660 int newesc, pairs, quoted, white;
1661
1662 /* Quoting can only start with a new word. */
1663 start = *cpp;
1664 quoted = 0;
1665 if ('"' == *start) {
1666 quoted = 1;
1667 start++;
1668 }
1669
1670 newesc = pairs = white = 0;
1671 for (cp = start; '\0' != *cp; cp++) {
1672
1673 /*
1674 * Move the following text left
1675 * after quoted quotes and after "\\" and "\t".
1676 */
1677 if (pairs)
1678 cp[-pairs] = cp[0];
1679
1680 if ('\\' == cp[0]) {
1681 /*
1682 * In copy mode, translate double to single
1683 * backslashes and backslash-t to literal tabs.
1684 */
1685 switch (cp[1]) {
1686 case 'a':
1687 case 't':
1688 cp[-pairs] = '\t';
1689 pairs++;
1690 cp++;
1691 break;
1692 case '\\':
1693 newesc = 1;
1694 cp[-pairs] = ASCII_ESC;
1695 pairs++;
1696 cp++;
1697 break;
1698 case ' ':
1699 /* Skip escaped blanks. */
1700 if (0 == quoted)
1701 cp++;
1702 break;
1703 default:
1704 break;
1705 }
1706 } else if (0 == quoted) {
1707 if (' ' == cp[0]) {
1708 /* Unescaped blanks end unquoted args. */
1709 white = 1;
1710 break;
1711 }
1712 } else if ('"' == cp[0]) {
1713 if ('"' == cp[1]) {
1714 /* Quoted quotes collapse. */
1715 pairs++;
1716 cp++;
1717 } else {
1718 /* Unquoted quotes end quoted args. */
1719 quoted = 2;
1720 break;
1721 }
1722 }
1723 }
1724
1725 /* Quoted argument without a closing quote. */
1726 if (1 == quoted)
1727 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1728
1729 /* NUL-terminate this argument and move to the next one. */
1730 if (pairs)
1731 cp[-pairs] = '\0';
1732 if ('\0' != *cp) {
1733 *cp++ = '\0';
1734 while (' ' == *cp)
1735 cp++;
1736 }
1737 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1738 *cpp = cp;
1739
1740 if ('\0' == *cp && (white || ' ' == cp[-1]))
1741 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1742
1743 start = mandoc_strdup(start);
1744 if (newesc == 0)
1745 return start;
1746
1747 buf.buf = start;
1748 buf.sz = strlen(start) + 1;
1749 buf.next = NULL;
1750 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1751 free(buf.buf);
1752 buf.buf = mandoc_strdup("");
1753 }
1754 return buf.buf;
1755 }
1756
1757
1758 /*
1759 * Process text streams.
1760 */
1761 static int
1762 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1763 {
1764 size_t sz;
1765 const char *start;
1766 char *p;
1767 int isz;
1768 enum mandoc_esc esc;
1769
1770 /* Spring the input line trap. */
1771
1772 if (roffit_lines == 1) {
1773 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1774 free(buf->buf);
1775 buf->buf = p;
1776 buf->sz = isz + 1;
1777 *offs = 0;
1778 free(roffit_macro);
1779 roffit_lines = 0;
1780 return ROFF_REPARSE;
1781 } else if (roffit_lines > 1)
1782 --roffit_lines;
1783
1784 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1785 if (roffce_lines < 1) {
1786 r->man->last = roffce_node;
1787 r->man->next = ROFF_NEXT_SIBLING;
1788 roffce_lines = 0;
1789 roffce_node = NULL;
1790 } else
1791 roffce_lines--;
1792 }
1793
1794 /* Convert all breakable hyphens into ASCII_HYPH. */
1795
1796 start = p = buf->buf + pos;
1797
1798 while (*p != '\0') {
1799 sz = strcspn(p, "-\\");
1800 p += sz;
1801
1802 if (*p == '\0')
1803 break;
1804
1805 if (*p == '\\') {
1806 /* Skip over escapes. */
1807 p++;
1808 esc = mandoc_escape((const char **)&p, NULL, NULL);
1809 if (esc == ESCAPE_ERROR)
1810 break;
1811 while (*p == '-')
1812 p++;
1813 continue;
1814 } else if (p == start) {
1815 p++;
1816 continue;
1817 }
1818
1819 if (isalpha((unsigned char)p[-1]) &&
1820 isalpha((unsigned char)p[1]))
1821 *p = ASCII_HYPH;
1822 p++;
1823 }
1824 return ROFF_CONT;
1825 }
1826
1827 int
1828 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1829 {
1830 enum roff_tok t;
1831 int e;
1832 int pos; /* parse point */
1833 int spos; /* saved parse point for messages */
1834 int ppos; /* original offset in buf->buf */
1835 int ctl; /* macro line (boolean) */
1836
1837 ppos = pos = *offs;
1838
1839 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1840 (r->man->flags & ROFF_NOFILL) == 0 &&
1841 strchr(" .\\", buf->buf[pos]) == NULL &&
1842 buf->buf[pos] != r->control &&
1843 strcspn(buf->buf, " ") < 80)
1844 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1845 "%.20s...", buf->buf + pos);
1846
1847 /* Handle in-line equation delimiters. */
1848
1849 if (r->tbl == NULL &&
1850 r->last_eqn != NULL && r->last_eqn->delim &&
1851 (r->eqn == NULL || r->eqn_inline)) {
1852 e = roff_eqndelim(r, buf, pos);
1853 if (e == ROFF_REPARSE)
1854 return e;
1855 assert(e == ROFF_CONT);
1856 }
1857
1858 /* Expand some escape sequences. */
1859
1860 e = roff_expand(r, buf, ln, pos, r->escape);
1861 if ((e & ROFF_MASK) == ROFF_IGN)
1862 return e;
1863 assert(e == ROFF_CONT);
1864
1865 ctl = roff_getcontrol(r, buf->buf, &pos);
1866
1867 /*
1868 * First, if a scope is open and we're not a macro, pass the
1869 * text through the macro's filter.
1870 * Equations process all content themselves.
1871 * Tables process almost all content themselves, but we want
1872 * to warn about macros before passing it there.
1873 */
1874
1875 if (r->last != NULL && ! ctl) {
1876 t = r->last->tok;
1877 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1878 if ((e & ROFF_MASK) == ROFF_IGN)
1879 return e;
1880 e &= ~ROFF_MASK;
1881 } else
1882 e = ROFF_IGN;
1883 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1884 eqn_read(r->eqn, buf->buf + ppos);
1885 return e;
1886 }
1887 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1888 tbl_read(r->tbl, ln, buf->buf, ppos);
1889 roff_addtbl(r->man, ln, r->tbl);
1890 return e;
1891 }
1892 if ( ! ctl) {
1893 r->options &= ~MPARSE_COMMENT;
1894 return roff_parsetext(r, buf, pos, offs) | e;
1895 }
1896
1897 /* Skip empty request lines. */
1898
1899 if (buf->buf[pos] == '"') {
1900 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1901 return ROFF_IGN;
1902 } else if (buf->buf[pos] == '\0')
1903 return ROFF_IGN;
1904
1905 /*
1906 * If a scope is open, go to the child handler for that macro,
1907 * as it may want to preprocess before doing anything with it.
1908 * Don't do so if an equation is open.
1909 */
1910
1911 if (r->last) {
1912 t = r->last->tok;
1913 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1914 }
1915
1916 /* No scope is open. This is a new request or macro. */
1917
1918 r->options &= ~MPARSE_COMMENT;
1919 spos = pos;
1920 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1921
1922 /* Tables ignore most macros. */
1923
1924 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1925 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1926 mandoc_msg(MANDOCERR_TBLMACRO,
1927 ln, pos, "%s", buf->buf + spos);
1928 if (t != TOKEN_NONE)
1929 return ROFF_IGN;
1930 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1931 pos++;
1932 while (buf->buf[pos] == ' ')
1933 pos++;
1934 tbl_read(r->tbl, ln, buf->buf, pos);
1935 roff_addtbl(r->man, ln, r->tbl);
1936 return ROFF_IGN;
1937 }
1938
1939 /* For now, let high level macros abort .ce mode. */
1940
1941 if (ctl && roffce_node != NULL &&
1942 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1943 t == ROFF_TH || t == ROFF_TS)) {
1944 r->man->last = roffce_node;
1945 r->man->next = ROFF_NEXT_SIBLING;
1946 roffce_lines = 0;
1947 roffce_node = NULL;
1948 }
1949
1950 /*
1951 * This is neither a roff request nor a user-defined macro.
1952 * Let the standard macro set parsers handle it.
1953 */
1954
1955 if (t == TOKEN_NONE)
1956 return ROFF_CONT;
1957
1958 /* Execute a roff request or a user defined macro. */
1959
1960 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1961 }
1962
1963 /*
1964 * Internal interface function to tell the roff parser that execution
1965 * of the current macro ended. This is required because macro
1966 * definitions usually do not end with a .return request.
1967 */
1968 void
1969 roff_userret(struct roff *r)
1970 {
1971 struct mctx *ctx;
1972 int i;
1973
1974 assert(r->mstackpos >= 0);
1975 ctx = r->mstack + r->mstackpos;
1976 for (i = 0; i < ctx->argc; i++)
1977 free(ctx->argv[i]);
1978 ctx->argc = 0;
1979 r->mstackpos--;
1980 }
1981
1982 void
1983 roff_endparse(struct roff *r)
1984 {
1985 if (r->last != NULL)
1986 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1987 r->last->col, "%s", roff_name[r->last->tok]);
1988
1989 if (r->eqn != NULL) {
1990 mandoc_msg(MANDOCERR_BLK_NOEND,
1991 r->eqn->node->line, r->eqn->node->pos, "EQ");
1992 eqn_parse(r->eqn);
1993 r->eqn = NULL;
1994 }
1995
1996 if (r->tbl != NULL) {
1997 tbl_end(r->tbl, 1);
1998 r->tbl = NULL;
1999 }
2000 }
2001
2002 /*
2003 * Parse a roff node's type from the input buffer. This must be in the
2004 * form of ".foo xxx" in the usual way.
2005 */
2006 static enum roff_tok
2007 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2008 {
2009 char *cp;
2010 const char *mac;
2011 size_t maclen;
2012 int deftype;
2013 enum roff_tok t;
2014
2015 cp = buf + *pos;
2016
2017 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2018 return TOKEN_NONE;
2019
2020 mac = cp;
2021 maclen = roff_getname(r, &cp, ln, ppos);
2022
2023 deftype = ROFFDEF_USER | ROFFDEF_REN;
2024 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2025 switch (deftype) {
2026 case ROFFDEF_USER:
2027 t = ROFF_USERDEF;
2028 break;
2029 case ROFFDEF_REN:
2030 t = ROFF_RENAMED;
2031 break;
2032 default:
2033 t = roffhash_find(r->reqtab, mac, maclen);
2034 break;
2035 }
2036 if (t != TOKEN_NONE)
2037 *pos = cp - buf;
2038 else if (deftype == ROFFDEF_UNDEF) {
2039 /* Using an undefined macro defines it to be empty. */
2040 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2041 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2042 }
2043 return t;
2044 }
2045
2046 /* --- handling of request blocks ----------------------------------------- */
2047
2048 /*
2049 * Close a macro definition block or an "ignore" block.
2050 */
2051 static int
2052 roff_cblock(ROFF_ARGS)
2053 {
2054 int rr;
2055
2056 if (r->last == NULL) {
2057 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2058 return ROFF_IGN;
2059 }
2060
2061 switch (r->last->tok) {
2062 case ROFF_am:
2063 case ROFF_ami:
2064 case ROFF_de:
2065 case ROFF_dei:
2066 case ROFF_ig:
2067 break;
2068 case ROFF_am1:
2069 case ROFF_de1:
2070 /* Remapped in roff_block(). */
2071 abort();
2072 default:
2073 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2074 return ROFF_IGN;
2075 }
2076
2077 roffnode_pop(r);
2078 roffnode_cleanscope(r);
2079
2080 /*
2081 * If a conditional block with braces is still open,
2082 * check for "\}" block end markers.
2083 */
2084
2085 if (r->last != NULL && r->last->endspan < 0) {
2086 rr = 1; /* If arguments follow "\}", warn about them. */
2087 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2088 }
2089
2090 if (buf->buf[pos] != '\0')
2091 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2092 ".. %s", buf->buf + pos);
2093
2094 return ROFF_IGN;
2095 }
2096
2097 /*
2098 * Pop all nodes ending at the end of the current input line.
2099 * Return the number of loops ended.
2100 */
2101 static int
2102 roffnode_cleanscope(struct roff *r)
2103 {
2104 int inloop;
2105
2106 inloop = 0;
2107 while (r->last != NULL && r->last->endspan > 0) {
2108 if (--r->last->endspan != 0)
2109 break;
2110 inloop += roffnode_pop(r);
2111 }
2112 return inloop;
2113 }
2114
2115 /*
2116 * Handle the closing "\}" of a conditional block.
2117 * Apart from generating warnings, this only pops nodes.
2118 * Return the number of loops ended.
2119 */
2120 static int
2121 roff_ccond(struct roff *r, int ln, int ppos)
2122 {
2123 if (NULL == r->last) {
2124 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2125 return 0;
2126 }
2127
2128 switch (r->last->tok) {
2129 case ROFF_el:
2130 case ROFF_ie:
2131 case ROFF_if:
2132 case ROFF_while:
2133 break;
2134 default:
2135 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2136 return 0;
2137 }
2138
2139 if (r->last->endspan > -1) {
2140 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2141 return 0;
2142 }
2143
2144 return roffnode_pop(r) + roffnode_cleanscope(r);
2145 }
2146
2147 static int
2148 roff_block(ROFF_ARGS)
2149 {
2150 const char *name, *value;
2151 char *call, *cp, *iname, *rname;
2152 size_t csz, namesz, rsz;
2153 int deftype;
2154
2155 /* Ignore groff compatibility mode for now. */
2156
2157 if (tok == ROFF_de1)
2158 tok = ROFF_de;
2159 else if (tok == ROFF_dei1)
2160 tok = ROFF_dei;
2161 else if (tok == ROFF_am1)
2162 tok = ROFF_am;
2163 else if (tok == ROFF_ami1)
2164 tok = ROFF_ami;
2165
2166 /* Parse the macro name argument. */
2167
2168 cp = buf->buf + pos;
2169 if (tok == ROFF_ig) {
2170 iname = NULL;
2171 namesz = 0;
2172 } else {
2173 iname = cp;
2174 namesz = roff_getname(r, &cp, ln, ppos);
2175 iname[namesz] = '\0';
2176 }
2177
2178 /* Resolve the macro name argument if it is indirect. */
2179
2180 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2181 deftype = ROFFDEF_USER;
2182 name = roff_getstrn(r, iname, namesz, &deftype);
2183 if (name == NULL) {
2184 mandoc_msg(MANDOCERR_STR_UNDEF,
2185 ln, (int)(iname - buf->buf),
2186 "%.*s", (int)namesz, iname);
2187 namesz = 0;
2188 } else
2189 namesz = strlen(name);
2190 } else
2191 name = iname;
2192
2193 if (namesz == 0 && tok != ROFF_ig) {
2194 mandoc_msg(MANDOCERR_REQ_EMPTY,
2195 ln, ppos, "%s", roff_name[tok]);
2196 return ROFF_IGN;
2197 }
2198
2199 roffnode_push(r, tok, name, ln, ppos);
2200
2201 /*
2202 * At the beginning of a `de' macro, clear the existing string
2203 * with the same name, if there is one. New content will be
2204 * appended from roff_block_text() in multiline mode.
2205 */
2206
2207 if (tok == ROFF_de || tok == ROFF_dei) {
2208 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2209 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2210 } else if (tok == ROFF_am || tok == ROFF_ami) {
2211 deftype = ROFFDEF_ANY;
2212 value = roff_getstrn(r, iname, namesz, &deftype);
2213 switch (deftype) { /* Before appending, ... */
2214 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2215 roff_setstrn(&r->strtab, name, namesz,
2216 value, strlen(value), 0);
2217 break;
2218 case ROFFDEF_REN: /* call original standard macro. */
2219 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2220 (int)strlen(value), value);
2221 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2222 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2223 free(call);
2224 break;
2225 case ROFFDEF_STD: /* rename and call standard macro. */
2226 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2227 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2228 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2229 (int)rsz, rname);
2230 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2231 free(call);
2232 free(rname);
2233 break;
2234 default:
2235 break;
2236 }
2237 }
2238
2239 if (*cp == '\0')
2240 return ROFF_IGN;
2241
2242 /* Get the custom end marker. */
2243
2244 iname = cp;
2245 namesz = roff_getname(r, &cp, ln, ppos);
2246
2247 /* Resolve the end marker if it is indirect. */
2248
2249 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2250 deftype = ROFFDEF_USER;
2251 name = roff_getstrn(r, iname, namesz, &deftype);
2252 if (name == NULL) {
2253 mandoc_msg(MANDOCERR_STR_UNDEF,
2254 ln, (int)(iname - buf->buf),
2255 "%.*s", (int)namesz, iname);
2256 namesz = 0;
2257 } else
2258 namesz = strlen(name);
2259 } else
2260 name = iname;
2261
2262 if (namesz)
2263 r->last->end = mandoc_strndup(name, namesz);
2264
2265 if (*cp != '\0')
2266 mandoc_msg(MANDOCERR_ARG_EXCESS,
2267 ln, pos, ".%s ... %s", roff_name[tok], cp);
2268
2269 return ROFF_IGN;
2270 }
2271
2272 static int
2273 roff_block_sub(ROFF_ARGS)
2274 {
2275 enum roff_tok t;
2276 int i, j;
2277
2278 /*
2279 * If a custom end marker is a user-defined or predefined macro
2280 * or a request, interpret it.
2281 */
2282
2283 if (r->last->end) {
2284 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2285 if (buf->buf[i] != r->last->end[j])
2286 break;
2287
2288 if (r->last->end[j] == '\0' &&
2289 (buf->buf[i] == '\0' ||
2290 buf->buf[i] == ' ' ||
2291 buf->buf[i] == '\t')) {
2292 roffnode_pop(r);
2293 roffnode_cleanscope(r);
2294
2295 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2296 i++;
2297
2298 pos = i;
2299 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2300 TOKEN_NONE)
2301 return ROFF_RERUN;
2302 return ROFF_IGN;
2303 }
2304 }
2305
2306 /* Handle the standard end marker. */
2307
2308 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2309 if (t == ROFF_cblock)
2310 return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2311
2312 /* Not an end marker, so append the line to the block. */
2313
2314 if (tok != ROFF_ig)
2315 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2316 return ROFF_IGN;
2317 }
2318
2319 static int
2320 roff_block_text(ROFF_ARGS)
2321 {
2322
2323 if (tok != ROFF_ig)
2324 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2325
2326 return ROFF_IGN;
2327 }
2328
2329 /*
2330 * Check for a closing "\}" and handle it.
2331 * In this function, the final "int *offs" argument is used for
2332 * different purposes than elsewhere:
2333 * Input: *offs == 0: caller wants to discard arguments following \}
2334 * *offs == 1: caller wants to preserve text following \}
2335 * Output: *offs = 0: tell caller to discard input line
2336 * *offs = 1: tell caller to use input line
2337 */
2338 static int
2339 roff_cond_checkend(ROFF_ARGS)
2340 {
2341 char *ep;
2342 int endloop, irc, rr;
2343
2344 irc = ROFF_IGN;
2345 rr = r->last->rule;
2346 endloop = tok != ROFF_while ? ROFF_IGN :
2347 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2348 if (roffnode_cleanscope(r))
2349 irc |= endloop;
2350
2351 /*
2352 * If "\}" occurs on a macro line without a preceding macro or
2353 * a text line contains nothing else, drop the line completely.
2354 */
2355
2356 ep = buf->buf + pos;
2357 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2358 rr = 0;
2359
2360 /*
2361 * The closing delimiter "\}" rewinds the conditional scope
2362 * but is otherwise ignored when interpreting the line.
2363 */
2364
2365 while ((ep = strchr(ep, '\\')) != NULL) {
2366 switch (ep[1]) {
2367 case '}':
2368 if (ep[2] == '\0')
2369 ep[0] = '\0';
2370 else if (rr)
2371 ep[1] = '&';
2372 else
2373 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2374 if (roff_ccond(r, ln, ep - buf->buf))
2375 irc |= endloop;
2376 break;
2377 case '\0':
2378 ++ep;
2379 break;
2380 default:
2381 ep += 2;
2382 break;
2383 }
2384 }
2385 *offs = rr;
2386 return irc;
2387 }
2388
2389 /*
2390 * Parse and process a request or macro line in conditional scope.
2391 */
2392 static int
2393 roff_cond_sub(ROFF_ARGS)
2394 {
2395 struct roffnode *bl;
2396 int irc, rr;
2397 enum roff_tok t;
2398
2399 rr = 0; /* If arguments follow "\}", skip them. */
2400 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2401 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2402
2403 /* For now, let high level macros abort .ce mode. */
2404
2405 if (roffce_node != NULL &&
2406 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2407 t == ROFF_TH || t == ROFF_TS)) {
2408 r->man->last = roffce_node;
2409 r->man->next = ROFF_NEXT_SIBLING;
2410 roffce_lines = 0;
2411 roffce_node = NULL;
2412 }
2413
2414 /*
2415 * Fully handle known macros when they are structurally
2416 * required or when the conditional evaluated to true.
2417 */
2418
2419 if (t == ROFF_break) {
2420 if (irc & ROFF_LOOPMASK)
2421 irc = ROFF_IGN | ROFF_LOOPEXIT;
2422 else if (rr) {
2423 for (bl = r->last; bl != NULL; bl = bl->parent) {
2424 bl->rule = 0;
2425 if (bl->tok == ROFF_while)
2426 break;
2427 }
2428 }
2429 } else if (t != TOKEN_NONE &&
2430 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
2431 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2432 if (irc & ROFF_WHILE)
2433 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2434 } else
2435 irc |= rr ? ROFF_CONT : ROFF_IGN;
2436 return irc;
2437 }
2438
2439 /*
2440 * Parse and process a text line in conditional scope.
2441 */
2442 static int
2443 roff_cond_text(ROFF_ARGS)
2444 {
2445 int irc, rr;
2446
2447 rr = 1; /* If arguments follow "\}", preserve them. */
2448 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2449 if (rr)
2450 irc |= ROFF_CONT;
2451 return irc;
2452 }
2453
2454 /* --- handling of numeric and conditional expressions -------------------- */
2455
2456 /*
2457 * Parse a single signed integer number. Stop at the first non-digit.
2458 * If there is at least one digit, return success and advance the
2459 * parse point, else return failure and let the parse point unchanged.
2460 * Ignore overflows, treat them just like the C language.
2461 */
2462 static int
2463 roff_getnum(const char *v, int *pos, int *res, int flags)
2464 {
2465 int myres, scaled, n, p;
2466
2467 if (NULL == res)
2468 res = &myres;
2469
2470 p = *pos;
2471 n = v[p] == '-';
2472 if (n || v[p] == '+')
2473 p++;
2474
2475 if (flags & ROFFNUM_WHITE)
2476 while (isspace((unsigned char)v[p]))
2477 p++;
2478
2479 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2480 *res = 10 * *res + v[p] - '0';
2481 if (p == *pos + n)
2482 return 0;
2483
2484 if (n)
2485 *res = -*res;
2486
2487 /* Each number may be followed by one optional scaling unit. */
2488
2489 switch (v[p]) {
2490 case 'f':
2491 scaled = *res * 65536;
2492 break;
2493 case 'i':
2494 scaled = *res * 240;
2495 break;
2496 case 'c':
2497 scaled = *res * 240 / 2.54;
2498 break;
2499 case 'v':
2500 case 'P':
2501 scaled = *res * 40;
2502 break;
2503 case 'm':
2504 case 'n':
2505 scaled = *res * 24;
2506 break;
2507 case 'p':
2508 scaled = *res * 10 / 3;
2509 break;
2510 case 'u':
2511 scaled = *res;
2512 break;
2513 case 'M':
2514 scaled = *res * 6 / 25;
2515 break;
2516 default:
2517 scaled = *res;
2518 p--;
2519 break;
2520 }
2521 if (flags & ROFFNUM_SCALE)
2522 *res = scaled;
2523
2524 *pos = p + 1;
2525 return 1;
2526 }
2527
2528 /*
2529 * Evaluate a string comparison condition.
2530 * The first character is the delimiter.
2531 * Succeed if the string up to its second occurrence
2532 * matches the string up to its third occurence.
2533 * Advance the cursor after the third occurrence
2534 * or lacking that, to the end of the line.
2535 */
2536 static int
2537 roff_evalstrcond(const char *v, int *pos)
2538 {
2539 const char *s1, *s2, *s3;
2540 int match;
2541
2542 match = 0;
2543 s1 = v + *pos; /* initial delimiter */
2544 s2 = s1 + 1; /* for scanning the first string */
2545 s3 = strchr(s2, *s1); /* for scanning the second string */
2546
2547 if (NULL == s3) /* found no middle delimiter */
2548 goto out;
2549
2550 while ('\0' != *++s3) {
2551 if (*s2 != *s3) { /* mismatch */
2552 s3 = strchr(s3, *s1);
2553 break;
2554 }
2555 if (*s3 == *s1) { /* found the final delimiter */
2556 match = 1;
2557 break;
2558 }
2559 s2++;
2560 }
2561
2562 out:
2563 if (NULL == s3)
2564 s3 = strchr(s2, '\0');
2565 else if (*s3 != '\0')
2566 s3++;
2567 *pos = s3 - v;
2568 return match;
2569 }
2570
2571 /*
2572 * Evaluate an optionally negated single character, numerical,
2573 * or string condition.
2574 */
2575 static int
2576 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2577 {
2578 const char *start, *end;
2579 char *cp, *name;
2580 size_t sz;
2581 int deftype, len, number, savepos, istrue, wanttrue;
2582
2583 if ('!' == v[*pos]) {
2584 wanttrue = 0;
2585 (*pos)++;
2586 } else
2587 wanttrue = 1;
2588
2589 switch (v[*pos]) {
2590 case '\0':
2591 return 0;
2592 case 'n':
2593 case 'o':
2594 (*pos)++;
2595 return wanttrue;
2596 case 'e':
2597 case 't':
2598 case 'v':
2599 (*pos)++;
2600 return !wanttrue;
2601 case 'c':
2602 do {
2603 (*pos)++;
2604 } while (v[*pos] == ' ');
2605
2606 /*
2607 * Quirk for groff compatibility:
2608 * The horizontal tab is neither available nor unavailable.
2609 */
2610
2611 if (v[*pos] == '\t') {
2612 (*pos)++;
2613 return 0;
2614 }
2615
2616 /* Printable ASCII characters are available. */
2617
2618 if (v[*pos] != '\\') {
2619 (*pos)++;
2620 return wanttrue;
2621 }
2622
2623 end = v + ++*pos;
2624 switch (mandoc_escape(&end, &start, &len)) {
2625 case ESCAPE_SPECIAL:
2626 istrue = mchars_spec2cp(start, len) != -1;
2627 break;
2628 case ESCAPE_UNICODE:
2629 istrue = 1;
2630 break;
2631 case ESCAPE_NUMBERED:
2632 istrue = mchars_num2char(start, len) != -1;
2633 break;
2634 default:
2635 istrue = !wanttrue;
2636 break;
2637 }
2638 *pos = end - v;
2639 return istrue == wanttrue;
2640 case 'd':
2641 case 'r':
2642 cp = v + *pos + 1;
2643 while (*cp == ' ')
2644 cp++;
2645 name = cp;
2646 sz = roff_getname(r, &cp, ln, cp - v);
2647 if (sz == 0)
2648 istrue = 0;
2649 else if (v[*pos] == 'r')
2650 istrue = roff_hasregn(r, name, sz);
2651 else {
2652 deftype = ROFFDEF_ANY;
2653 roff_getstrn(r, name, sz, &deftype);
2654 istrue = !!deftype;
2655 }
2656 *pos = (name + sz) - v;
2657 return istrue == wanttrue;
2658 default:
2659 break;
2660 }
2661
2662 savepos = *pos;
2663 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2664 return (number > 0) == wanttrue;
2665 else if (*pos == savepos)
2666 return roff_evalstrcond(v, pos) == wanttrue;
2667 else
2668 return 0;
2669 }
2670
2671 static int
2672 roff_line_ignore(ROFF_ARGS)
2673 {
2674
2675 return ROFF_IGN;
2676 }
2677
2678 static int
2679 roff_insec(ROFF_ARGS)
2680 {
2681
2682 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2683 return ROFF_IGN;
2684 }
2685
2686 static int
2687 roff_unsupp(ROFF_ARGS)
2688 {
2689
2690 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2691 return ROFF_IGN;
2692 }
2693
2694 static int
2695 roff_cond(ROFF_ARGS)
2696 {
2697 int irc;
2698
2699 roffnode_push(r, tok, NULL, ln, ppos);
2700
2701 /*
2702 * An `.el' has no conditional body: it will consume the value
2703 * of the current rstack entry set in prior `ie' calls or
2704 * defaults to DENY.
2705 *
2706 * If we're not an `el', however, then evaluate the conditional.
2707 */
2708
2709 r->last->rule = tok == ROFF_el ?
2710 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2711 roff_evalcond(r, ln, buf->buf, &pos);
2712
2713 /*
2714 * An if-else will put the NEGATION of the current evaluated
2715 * conditional into the stack of rules.
2716 */
2717
2718 if (tok == ROFF_ie) {
2719 if (r->rstackpos + 1 == r->rstacksz) {
2720 r->rstacksz += 16;
2721 r->rstack = mandoc_reallocarray(r->rstack,
2722 r->rstacksz, sizeof(int));
2723 }
2724 r->rstack[++r->rstackpos] = !r->last->rule;
2725 }
2726
2727 /* If the parent has false as its rule, then so do we. */
2728
2729 if (r->last->parent && !r->last->parent->rule)
2730 r->last->rule = 0;
2731
2732 /*
2733 * Determine scope.
2734 * If there is nothing on the line after the conditional,
2735 * not even whitespace, use next-line scope.
2736 * Except that .while does not support next-line scope.
2737 */
2738
2739 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2740 r->last->endspan = 2;
2741 goto out;
2742 }
2743
2744 while (buf->buf[pos] == ' ')
2745 pos++;
2746
2747 /* An opening brace requests multiline scope. */
2748
2749 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2750 r->last->endspan = -1;
2751 pos += 2;
2752 while (buf->buf[pos] == ' ')
2753 pos++;
2754 goto out;
2755 }
2756
2757 /*
2758 * Anything else following the conditional causes
2759 * single-line scope. Warn if the scope contains
2760 * nothing but trailing whitespace.
2761 */
2762
2763 if (buf->buf[pos] == '\0')
2764 mandoc_msg(MANDOCERR_COND_EMPTY,
2765 ln, ppos, "%s", roff_name[tok]);
2766
2767 r->last->endspan = 1;
2768
2769 out:
2770 *offs = pos;
2771 irc = ROFF_RERUN;
2772 if (tok == ROFF_while)
2773 irc |= ROFF_WHILE;
2774 return irc;
2775 }
2776
2777 static int
2778 roff_ds(ROFF_ARGS)
2779 {
2780 char *string;
2781 const char *name;
2782 size_t namesz;
2783
2784 /* Ignore groff compatibility mode for now. */
2785
2786 if (tok == ROFF_ds1)
2787 tok = ROFF_ds;
2788 else if (tok == ROFF_as1)
2789 tok = ROFF_as;
2790
2791 /*
2792 * The first word is the name of the string.
2793 * If it is empty or terminated by an escape sequence,
2794 * abort the `ds' request without defining anything.
2795 */
2796
2797 name = string = buf->buf + pos;
2798 if (*name == '\0')
2799 return ROFF_IGN;
2800
2801 namesz = roff_getname(r, &string, ln, pos);
2802 switch (name[namesz]) {
2803 case '\\':
2804 return ROFF_IGN;
2805 case '\t':
2806 string = buf->buf + pos + namesz;
2807 break;
2808 default:
2809 break;
2810 }
2811
2812 /* Read past the initial double-quote, if any. */
2813 if (*string == '"')
2814 string++;
2815
2816 /* The rest is the value. */
2817 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2818 ROFF_as == tok);
2819 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2820 return ROFF_IGN;
2821 }
2822
2823 /*
2824 * Parse a single operator, one or two characters long.
2825 * If the operator is recognized, return success and advance the
2826 * parse point, else return failure and let the parse point unchanged.
2827 */
2828 static int
2829 roff_getop(const char *v, int *pos, char *res)
2830 {
2831
2832 *res = v[*pos];
2833
2834 switch (*res) {
2835 case '+':
2836 case '-':
2837 case '*':
2838 case '/':
2839 case '%':
2840 case '&':
2841 case ':':
2842 break;
2843 case '<':
2844 switch (v[*pos + 1]) {
2845 case '=':
2846 *res = 'l';
2847 (*pos)++;
2848 break;
2849 case '>':
2850 *res = '!';
2851 (*pos)++;
2852 break;
2853 case '?':
2854 *res = 'i';
2855 (*pos)++;
2856 break;
2857 default:
2858 break;
2859 }
2860 break;
2861 case '>':
2862 switch (v[*pos + 1]) {
2863 case '=':
2864 *res = 'g';
2865 (*pos)++;
2866 break;
2867 case '?':
2868 *res = 'a';
2869 (*pos)++;
2870 break;
2871 default:
2872 break;
2873 }
2874 break;
2875 case '=':
2876 if ('=' == v[*pos + 1])
2877 (*pos)++;
2878 break;
2879 default:
2880 return 0;
2881 }
2882 (*pos)++;
2883
2884 return *res;
2885 }
2886
2887 /*
2888 * Evaluate either a parenthesized numeric expression
2889 * or a single signed integer number.
2890 */
2891 static int
2892 roff_evalpar(struct roff *r, int ln,
2893 const char *v, int *pos, int *res, int flags)
2894 {
2895
2896 if ('(' != v[*pos])
2897 return roff_getnum(v, pos, res, flags);
2898
2899 (*pos)++;
2900 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2901 return 0;
2902
2903 /*
2904 * Omission of the closing parenthesis
2905 * is an error in validation mode,
2906 * but ignored in evaluation mode.
2907 */
2908
2909 if (')' == v[*pos])
2910 (*pos)++;
2911 else if (NULL == res)
2912 return 0;
2913
2914 return 1;
2915 }
2916
2917 /*
2918 * Evaluate a complete numeric expression.
2919 * Proceed left to right, there is no concept of precedence.
2920 */
2921 static int
2922 roff_evalnum(struct roff *r, int ln, const char *v,
2923 int *pos, int *res, int flags)
2924 {
2925 int mypos, operand2;
2926 char operator;
2927
2928 if (NULL == pos) {
2929 mypos = 0;
2930 pos = &mypos;
2931 }
2932
2933 if (flags & ROFFNUM_WHITE)
2934 while (isspace((unsigned char)v[*pos]))
2935 (*pos)++;
2936
2937 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2938 return 0;
2939
2940 while (1) {
2941 if (flags & ROFFNUM_WHITE)
2942 while (isspace((unsigned char)v[*pos]))
2943 (*pos)++;
2944
2945 if ( ! roff_getop(v, pos, &operator))
2946 break;
2947
2948 if (flags & ROFFNUM_WHITE)
2949 while (isspace((unsigned char)v[*pos]))
2950 (*pos)++;
2951
2952 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2953 return 0;
2954
2955 if (flags & ROFFNUM_WHITE)
2956 while (isspace((unsigned char)v[*pos]))
2957 (*pos)++;
2958
2959 if (NULL == res)
2960 continue;
2961
2962 switch (operator) {
2963 case '+':
2964 *res += operand2;
2965 break;
2966 case '-':
2967 *res -= operand2;
2968 break;
2969 case '*':
2970 *res *= operand2;
2971 break;
2972 case '/':
2973 if (operand2 == 0) {
2974 mandoc_msg(MANDOCERR_DIVZERO,
2975 ln, *pos, "%s", v);
2976 *res = 0;
2977 break;
2978 }
2979 *res /= operand2;
2980 break;
2981 case '%':
2982 if (operand2 == 0) {
2983 mandoc_msg(MANDOCERR_DIVZERO,
2984 ln, *pos, "%s", v);
2985 *res = 0;
2986 break;
2987 }
2988 *res %= operand2;
2989 break;
2990 case '<':
2991 *res = *res < operand2;
2992 break;
2993 case '>':
2994 *res = *res > operand2;
2995 break;
2996 case 'l':
2997 *res = *res <= operand2;
2998 break;
2999 case 'g':
3000 *res = *res >= operand2;
3001 break;
3002 case '=':
3003 *res = *res == operand2;
3004 break;
3005 case '!':
3006 *res = *res != operand2;
3007 break;
3008 case '&':
3009 *res = *res && operand2;
3010 break;
3011 case ':':
3012 *res = *res || operand2;
3013 break;
3014 case 'i':
3015 if (operand2 < *res)
3016 *res = operand2;
3017 break;
3018 case 'a':
3019 if (operand2 > *res)
3020 *res = operand2;
3021 break;
3022 default:
3023 abort();
3024 }
3025 }
3026 return 1;
3027 }
3028
3029 /* --- register management ------------------------------------------------ */
3030
3031 void
3032 roff_setreg(struct roff *r, const char *name, int val, char sign)
3033 {
3034 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3035 }
3036
3037 static void
3038 roff_setregn(struct roff *r, const char *name, size_t len,
3039 int val, char sign, int step)
3040 {
3041 struct roffreg *reg;
3042
3043 /* Search for an existing register with the same name. */
3044 reg = r->regtab;
3045
3046 while (reg != NULL && (reg->key.sz != len ||
3047 strncmp(reg->key.p, name, len) != 0))
3048 reg = reg->next;
3049
3050 if (NULL == reg) {
3051 /* Create a new register. */
3052 reg = mandoc_malloc(sizeof(struct roffreg));
3053 reg->key.p = mandoc_strndup(name, len);
3054 reg->key.sz = len;
3055 reg->val = 0;
3056 reg->step = 0;
3057 reg->next = r->regtab;
3058 r->regtab = reg;
3059 }
3060
3061 if ('+' == sign)
3062 reg->val += val;
3063 else if ('-' == sign)
3064 reg->val -= val;
3065 else
3066 reg->val = val;
3067 if (step != INT_MIN)
3068 reg->step = step;
3069 }
3070
3071 /*
3072 * Handle some predefined read-only number registers.
3073 * For now, return -1 if the requested register is not predefined;
3074 * in case a predefined read-only register having the value -1
3075 * were to turn up, another special value would have to be chosen.
3076 */
3077 static int
3078 roff_getregro(const struct roff *r, const char *name)
3079 {
3080
3081 switch (*name) {
3082 case '$': /* Number of arguments of the last macro evaluated. */
3083 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3084 case 'A': /* ASCII approximation mode is always off. */
3085 return 0;
3086 case 'g': /* Groff compatibility mode is always on. */
3087 return 1;
3088 case 'H': /* Fixed horizontal resolution. */
3089 return 24;
3090 case 'j': /* Always adjust left margin only. */
3091 return 0;
3092 case 'T': /* Some output device is always defined. */
3093 return 1;
3094 case 'V': /* Fixed vertical resolution. */
3095 return 40;
3096 default:
3097 return -1;
3098 }
3099 }
3100
3101 int
3102 roff_getreg(struct roff *r, const char *name)
3103 {
3104 return roff_getregn(r, name, strlen(name), '\0');
3105 }
3106
3107 static int
3108 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3109 {
3110 struct roffreg *reg;
3111 int val;
3112
3113 if ('.' == name[0] && 2 == len) {
3114 val = roff_getregro(r, name + 1);
3115 if (-1 != val)
3116 return val;
3117 }
3118
3119 for (reg = r->regtab; reg; reg = reg->next) {
3120 if (len == reg->key.sz &&
3121 0 == strncmp(name, reg->key.p, len)) {
3122 switch (sign) {
3123 case '+':
3124 reg->val += reg->step;
3125 break;
3126 case '-':
3127 reg->val -= reg->step;
3128 break;
3129 default:
3130 break;
3131 }
3132 return reg->val;
3133 }
3134 }
3135
3136 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3137 return 0;
3138 }
3139
3140 static int
3141 roff_hasregn(const struct roff *r, const char *name, size_t len)
3142 {
3143 struct roffreg *reg;
3144 int val;
3145
3146 if ('.' == name[0] && 2 == len) {
3147 val = roff_getregro(r, name + 1);
3148 if (-1 != val)
3149 return 1;
3150 }
3151
3152 for (reg = r->regtab; reg; reg = reg->next)
3153 if (len == reg->key.sz &&
3154 0 == strncmp(name, reg->key.p, len))
3155 return 1;
3156
3157 return 0;
3158 }
3159
3160 static void
3161 roff_freereg(struct roffreg *reg)
3162 {
3163 struct roffreg *old_reg;
3164
3165 while (NULL != reg) {
3166 free(reg->key.p);
3167 old_reg = reg;
3168 reg = reg->next;
3169 free(old_reg);
3170 }
3171 }
3172
3173 static int
3174 roff_nr(ROFF_ARGS)
3175 {
3176 char *key, *val, *step;
3177 size_t keysz;
3178 int iv, is, len;
3179 char sign;
3180
3181 key = val = buf->buf + pos;
3182 if (*key == '\0')
3183 return ROFF_IGN;
3184
3185 keysz = roff_getname(r, &val, ln, pos);
3186 if (key[keysz] == '\\' || key[keysz] == '\t')
3187 return ROFF_IGN;
3188
3189 sign = *val;
3190 if (sign == '+' || sign == '-')
3191 val++;
3192
3193 len = 0;
3194 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3195 return ROFF_IGN;
3196
3197 step = val + len;
3198 while (isspace((unsigned char)*step))
3199 step++;
3200 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3201 is = INT_MIN;
3202
3203 roff_setregn(r, key, keysz, iv, sign, is);
3204 return ROFF_IGN;
3205 }
3206
3207 static int
3208 roff_rr(ROFF_ARGS)
3209 {
3210 struct roffreg *reg, **prev;
3211 char *name, *cp;
3212 size_t namesz;
3213
3214 name = cp = buf->buf + pos;
3215 if (*name == '\0')
3216 return ROFF_IGN;
3217 namesz = roff_getname(r, &cp, ln, pos);
3218 name[namesz] = '\0';
3219
3220 prev = &r->regtab;
3221 while (1) {
3222 reg = *prev;
3223 if (reg == NULL || !strcmp(name, reg->key.p))
3224 break;
3225 prev = &reg->next;
3226 }
3227 if (reg != NULL) {
3228 *prev = reg->next;
3229 free(reg->key.p);
3230 free(reg);
3231 }
3232 return ROFF_IGN;
3233 }
3234
3235 /* --- handler functions for roff requests -------------------------------- */
3236
3237 static int
3238 roff_rm(ROFF_ARGS)
3239 {
3240 const char *name;
3241 char *cp;
3242 size_t namesz;
3243
3244 cp = buf->buf + pos;
3245 while (*cp != '\0') {
3246 name = cp;
3247 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3248 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3249 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3250 if (name[namesz] == '\\' || name[namesz] == '\t')
3251 break;
3252 }
3253 return ROFF_IGN;
3254 }
3255
3256 static int
3257 roff_it(ROFF_ARGS)
3258 {
3259 int iv;
3260
3261 /* Parse the number of lines. */
3262
3263 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3264 mandoc_msg(MANDOCERR_IT_NONUM,
3265 ln, ppos, "%s", buf->buf + 1);
3266 return ROFF_IGN;
3267 }
3268
3269 while (isspace((unsigned char)buf->buf[pos]))
3270 pos++;
3271
3272 /*
3273 * Arm the input line trap.
3274 * Special-casing "an-trap" is an ugly workaround to cope
3275 * with DocBook stupidly fiddling with man(7) internals.
3276 */
3277
3278 roffit_lines = iv;
3279 roffit_macro = mandoc_strdup(iv != 1 ||
3280 strcmp(buf->buf + pos, "an-trap") ?
3281 buf->buf + pos : "br");
3282 return ROFF_IGN;
3283 }
3284
3285 static int
3286 roff_Dd(ROFF_ARGS)
3287 {
3288 int mask;
3289 enum roff_tok t, te;
3290
3291 switch (tok) {
3292 case ROFF_Dd:
3293 tok = MDOC_Dd;
3294 te = MDOC_MAX;
3295 if (r->format == 0)
3296 r->format = MPARSE_MDOC;
3297 mask = MPARSE_MDOC | MPARSE_QUICK;
3298 break;
3299 case ROFF_TH:
3300 tok = MAN_TH;
3301 te = MAN_MAX;
3302 if (r->format == 0)
3303 r->format = MPARSE_MAN;
3304 mask = MPARSE_QUICK;
3305 break;
3306 default:
3307 abort();
3308 }
3309 if ((r->options & mask) == 0)
3310 for (t = tok; t < te; t++)
3311 roff_setstr(r, roff_name[t], NULL, 0);
3312 return ROFF_CONT;
3313 }
3314
3315 static int
3316 roff_TE(ROFF_ARGS)
3317 {
3318 r->man->flags &= ~ROFF_NONOFILL;
3319 if (r->tbl == NULL) {
3320 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3321 return ROFF_IGN;
3322 }
3323 if (tbl_end(r->tbl, 0) == 0) {
3324 r->tbl = NULL;
3325 free(buf->buf);
3326 buf->buf = mandoc_strdup(".sp");
3327 buf->sz = 4;
3328 *offs = 0;
3329 return ROFF_REPARSE;
3330 }
3331 r->tbl = NULL;
3332 return ROFF_IGN;
3333 }
3334
3335 static int
3336 roff_T_(ROFF_ARGS)
3337 {
3338
3339 if (NULL == r->tbl)
3340 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3341 else
3342 tbl_restart(ln, ppos, r->tbl);
3343
3344 return ROFF_IGN;
3345 }
3346
3347 /*
3348 * Handle in-line equation delimiters.
3349 */
3350 static int
3351 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3352 {
3353 char *cp1, *cp2;
3354 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3355
3356 /*
3357 * Outside equations, look for an opening delimiter.
3358 * If we are inside an equation, we already know it is
3359 * in-line, or this function wouldn't have been called;
3360 * so look for a closing delimiter.
3361 */
3362
3363 cp1 = buf->buf + pos;
3364 cp2 = strchr(cp1, r->eqn == NULL ?
3365 r->last_eqn->odelim : r->last_eqn->cdelim);
3366 if (cp2 == NULL)
3367 return ROFF_CONT;
3368
3369 *cp2++ = '\0';
3370 bef_pr = bef_nl = aft_nl = aft_pr = "";
3371
3372 /* Handle preceding text, protecting whitespace. */
3373
3374 if (*buf->buf != '\0') {
3375 if (r->eqn == NULL)
3376 bef_pr = "\\&";
3377 bef_nl = "\n";
3378 }
3379
3380 /*
3381 * Prepare replacing the delimiter with an equation macro
3382 * and drop leading white space from the equation.
3383 */
3384
3385 if (r->eqn == NULL) {
3386 while (*cp2 == ' ')
3387 cp2++;
3388 mac = ".EQ";
3389 } else
3390 mac = ".EN";
3391
3392 /* Handle following text, protecting whitespace. */
3393
3394 if (*cp2 != '\0') {
3395 aft_nl = "\n";
3396 if (r->eqn != NULL)
3397 aft_pr = "\\&";
3398 }
3399
3400 /* Do the actual replacement. */
3401
3402 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3403 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3404 free(buf->buf);
3405 buf->buf = cp1;
3406
3407 /* Toggle the in-line state of the eqn subsystem. */
3408
3409 r->eqn_inline = r->eqn == NULL;
3410 return ROFF_REPARSE;
3411 }
3412
3413 static int
3414 roff_EQ(ROFF_ARGS)
3415 {
3416 struct roff_node *n;
3417
3418 if (r->man->meta.macroset == MACROSET_MAN)
3419 man_breakscope(r->man, ROFF_EQ);
3420 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3421 if (ln > r->man->last->line)
3422 n->flags |= NODE_LINE;
3423 n->eqn = eqn_box_new();
3424 roff_node_append(r->man, n);
3425 r->man->next = ROFF_NEXT_SIBLING;
3426
3427 assert(r->eqn == NULL);
3428 if (r->last_eqn == NULL)
3429 r->last_eqn = eqn_alloc();
3430 else
3431 eqn_reset(r->last_eqn);
3432 r->eqn = r->last_eqn;
3433 r->eqn->node = n;
3434
3435 if (buf->buf[pos] != '\0')
3436 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3437 ".EQ %s", buf->buf + pos);
3438
3439 return ROFF_IGN;
3440 }
3441
3442 static int
3443 roff_EN(ROFF_ARGS)
3444 {
3445 if (r->eqn != NULL) {
3446 eqn_parse(r->eqn);
3447 r->eqn = NULL;
3448 } else
3449 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3450 if (buf->buf[pos] != '\0')
3451 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3452 "EN %s", buf->buf + pos);
3453 return ROFF_IGN;
3454 }
3455
3456 static int
3457 roff_TS(ROFF_ARGS)
3458 {
3459 if (r->tbl != NULL) {
3460 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3461 tbl_end(r->tbl, 0);
3462 }
3463 r->man->flags |= ROFF_NONOFILL;
3464 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3465 if (r->last_tbl == NULL)
3466 r->first_tbl = r->tbl;
3467 r->last_tbl = r->tbl;
3468 return ROFF_IGN;
3469 }
3470
3471 static int
3472 roff_noarg(ROFF_ARGS)
3473 {
3474 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3475 man_breakscope(r->man, tok);
3476 if (tok == ROFF_brp)
3477 tok = ROFF_br;
3478 roff_elem_alloc(r->man, ln, ppos, tok);
3479 if (buf->buf[pos] != '\0')
3480 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3481 "%s %s", roff_name[tok], buf->buf + pos);
3482 if (tok == ROFF_nf)
3483 r->man->flags |= ROFF_NOFILL;
3484 else if (tok == ROFF_fi)
3485 r->man->flags &= ~ROFF_NOFILL;
3486 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3487 r->man->next = ROFF_NEXT_SIBLING;
3488 return ROFF_IGN;
3489 }
3490
3491 static int
3492 roff_onearg(ROFF_ARGS)
3493 {
3494 struct roff_node *n;
3495 char *cp;
3496 int npos;
3497
3498 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3499 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3500 tok == ROFF_ti))
3501 man_breakscope(r->man, tok);
3502
3503 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3504 r->man->last = roffce_node;
3505 r->man->next = ROFF_NEXT_SIBLING;
3506 }
3507
3508 roff_elem_alloc(r->man, ln, ppos, tok);
3509 n = r->man->last;
3510
3511 cp = buf->buf + pos;
3512 if (*cp != '\0') {
3513 while (*cp != '\0' && *cp != ' ')
3514 cp++;
3515 while (*cp == ' ')
3516 *cp++ = '\0';
3517 if (*cp != '\0')
3518 mandoc_msg(MANDOCERR_ARG_EXCESS,
3519 ln, (int)(cp - buf->buf),
3520 "%s ... %s", roff_name[tok], cp);
3521 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3522 }
3523
3524 if (tok == ROFF_ce || tok == ROFF_rj) {
3525 if (r->man->last->type == ROFFT_ELEM) {
3526 roff_word_alloc(r->man, ln, pos, "1");
3527 r->man->last->flags |= NODE_NOSRC;
3528 }
3529 npos = 0;
3530 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3531 &roffce_lines, 0) == 0) {
3532 mandoc_msg(MANDOCERR_CE_NONUM,
3533 ln, pos, "ce %s", buf->buf + pos);
3534 roffce_lines = 1;
3535 }
3536 if (roffce_lines < 1) {
3537 r->man->last = r->man->last->parent;
3538 roffce_node = NULL;
3539 roffce_lines = 0;
3540 } else
3541 roffce_node = r->man->last->parent;
3542 } else {
3543 n->flags |= NODE_VALID | NODE_ENDED;
3544 r->man->last = n;
3545 }
3546 n->flags |= NODE_LINE;
3547 r->man->next = ROFF_NEXT_SIBLING;
3548 return ROFF_IGN;
3549 }
3550
3551 static int
3552 roff_manyarg(ROFF_ARGS)
3553 {
3554 struct roff_node *n;
3555 char *sp, *ep;
3556
3557 roff_elem_alloc(r->man, ln, ppos, tok);
3558 n = r->man->last;
3559
3560 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3561 while (*ep != '\0' && *ep != ' ')
3562 ep++;
3563 while (*ep == ' ')
3564 *ep++ = '\0';
3565 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3566 }
3567
3568 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3569 r->man->last = n;
3570 r->man->next = ROFF_NEXT_SIBLING;
3571 return ROFF_IGN;
3572 }
3573
3574 static int
3575 roff_als(ROFF_ARGS)
3576 {
3577 char *oldn, *newn, *end, *value;
3578 size_t oldsz, newsz, valsz;
3579
3580 newn = oldn = buf->buf + pos;
3581 if (*newn == '\0')
3582 return ROFF_IGN;
3583
3584 newsz = roff_getname(r, &oldn, ln, pos);
3585 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3586 return ROFF_IGN;
3587
3588 end = oldn;
3589 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3590 if (oldsz == 0)
3591 return ROFF_IGN;
3592
3593 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3594 (int)oldsz, oldn);
3595 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3596 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3597 free(value);
3598 return ROFF_IGN;
3599 }
3600
3601 /*
3602 * The .break request only makes sense inside conditionals,
3603 * and that case is already handled in roff_cond_sub().
3604 */
3605 static int
3606 roff_break(ROFF_ARGS)
3607 {
3608 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3609 return ROFF_IGN;
3610 }
3611
3612 static int
3613 roff_cc(ROFF_ARGS)
3614 {
3615 const char *p;
3616
3617 p = buf->buf + pos;
3618
3619 if (*p == '\0' || (r->control = *p++) == '.')
3620 r->control = '\0';
3621
3622 if (*p != '\0')
3623 mandoc_msg(MANDOCERR_ARG_EXCESS,
3624 ln, p - buf->buf, "cc ... %s", p);
3625
3626 return ROFF_IGN;
3627 }
3628
3629 static int
3630 roff_char(ROFF_ARGS)
3631 {
3632 const char *p, *kp, *vp;
3633 size_t ksz, vsz;
3634 int font;
3635
3636 /* Parse the character to be replaced. */
3637
3638 kp = buf->buf + pos;
3639 p = kp + 1;
3640 if (*kp == '\0' || (*kp == '\\' &&
3641 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3642 (*p != ' ' && *p != '\0')) {
3643 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3644 return ROFF_IGN;
3645 }
3646 ksz = p - kp;
3647 while (*p == ' ')
3648 p++;
3649
3650 /*
3651 * If the replacement string contains a font escape sequence,
3652 * we have to restore the font at the end.
3653 */
3654
3655 vp = p;
3656 vsz = strlen(p);
3657 font = 0;
3658 while (*p != '\0') {
3659 if (*p++ != '\\')
3660 continue;
3661 switch (mandoc_escape(&p, NULL, NULL)) {
3662 case ESCAPE_FONT:
3663 case ESCAPE_FONTROMAN:
3664 case ESCAPE_FONTITALIC:
3665 case ESCAPE_FONTBOLD:
3666 case ESCAPE_FONTBI:
3667 case ESCAPE_FONTCR:
3668 case ESCAPE_FONTCB:
3669 case ESCAPE_FONTCI:
3670 case ESCAPE_FONTPREV:
3671 font++;
3672 break;
3673 default:
3674 break;
3675 }
3676 }
3677 if (font > 1)
3678 mandoc_msg(MANDOCERR_CHAR_FONT,
3679 ln, (int)(vp - buf->buf), "%s", vp);
3680
3681 /*
3682 * Approximate the effect of .char using the .tr tables.
3683 * XXX In groff, .char and .tr interact differently.
3684 */
3685
3686 if (ksz == 1) {
3687 if (r->xtab == NULL)
3688 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3689 assert((unsigned int)*kp < 128);
3690 free(r->xtab[(int)*kp].p);
3691 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3692 "%s%s", vp, font ? "\fP" : "");
3693 } else {
3694 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3695 if (font)
3696 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3697 }
3698 return ROFF_IGN;
3699 }
3700
3701 static int
3702 roff_ec(ROFF_ARGS)
3703 {
3704 const char *p;
3705
3706 p = buf->buf + pos;
3707 if (*p == '\0')
3708 r->escape = '\\';
3709 else {
3710 r->escape = *p;
3711 if (*++p != '\0')
3712 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3713 (int)(p - buf->buf), "ec ... %s", p);
3714 }
3715 return ROFF_IGN;
3716 }
3717
3718 static int
3719 roff_eo(ROFF_ARGS)
3720 {
3721 r->escape = '\0';
3722 if (buf->buf[pos] != '\0')
3723 mandoc_msg(MANDOCERR_ARG_SKIP,
3724 ln, pos, "eo %s", buf->buf + pos);
3725 return ROFF_IGN;
3726 }
3727
3728 static int
3729 roff_mc(ROFF_ARGS)
3730 {
3731 struct roff_node *n;
3732 char *cp;
3733
3734 /* Parse the first argument. */
3735
3736 cp = buf->buf + pos;
3737 if (*cp != '\0')
3738 cp++;
3739 if (buf->buf[pos] == '\\') {
3740 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3741 case ESCAPE_SPECIAL:
3742 case ESCAPE_UNICODE:
3743 case ESCAPE_NUMBERED:
3744 break;
3745 default:
3746 *cp = '\0';
3747 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3748 "mc %s", buf->buf + pos);
3749 buf->buf[pos] = '\0';
3750 break;
3751 }
3752 }
3753
3754 /* Ignore additional arguments. */
3755
3756 while (*cp == ' ')
3757 *cp++ = '\0';
3758 if (*cp != '\0') {
3759 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3760 "mc ... %s", cp);
3761 *cp = '\0';
3762 }
3763
3764 /* Create the .mc node. */
3765
3766 roff_elem_alloc(r->man, ln, ppos, tok);
3767 n = r->man->last;
3768 if (buf->buf[pos] != '\0')
3769 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3770 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3771 r->man->last = n;
3772 r->man->next = ROFF_NEXT_SIBLING;
3773 return ROFF_IGN;
3774 }
3775
3776 static int
3777 roff_nop(ROFF_ARGS)
3778 {
3779 while (buf->buf[pos] == ' ')
3780 pos++;
3781 *offs = pos;
3782 return ROFF_RERUN;
3783 }
3784
3785 static int
3786 roff_tr(ROFF_ARGS)
3787 {
3788 const char *p, *first, *second;
3789 size_t fsz, ssz;
3790 enum mandoc_esc esc;
3791
3792 p = buf->buf + pos;
3793
3794 if (*p == '\0') {
3795 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3796 return ROFF_IGN;
3797 }
3798
3799 while (*p != '\0') {
3800 fsz = ssz = 1;
3801
3802 first = p++;
3803 if (*first == '\\') {
3804 esc = mandoc_escape(&p, NULL, NULL);
3805 if (esc == ESCAPE_ERROR) {
3806 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3807 (int)(p - buf->buf), "%s", first);
3808 return ROFF_IGN;
3809 }
3810 fsz = (size_t)(p - first);
3811 }
3812
3813 second = p++;
3814 if (*second == '\\') {
3815 esc = mandoc_escape(&p, NULL, NULL);
3816 if (esc == ESCAPE_ERROR) {
3817 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3818 (int)(p - buf->buf), "%s", second);
3819 return ROFF_IGN;
3820 }
3821 ssz = (size_t)(p - second);
3822 } else if (*second == '\0') {
3823 mandoc_msg(MANDOCERR_TR_ODD, ln,
3824 (int)(first - buf->buf), "tr %s", first);
3825 second = " ";
3826 p--;
3827 }
3828
3829 if (fsz > 1) {
3830 roff_setstrn(&r->xmbtab, first, fsz,
3831 second, ssz, 0);
3832 continue;
3833 }
3834
3835 if (r->xtab == NULL)
3836 r->xtab = mandoc_calloc(128,
3837 sizeof(struct roffstr));
3838
3839 free(r->xtab[(int)*first].p);
3840 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3841 r->xtab[(int)*first].sz = ssz;
3842 }
3843
3844 return ROFF_IGN;
3845 }
3846
3847 /*
3848 * Implementation of the .return request.
3849 * There is no need to call roff_userret() from here.
3850 * The read module will call that after rewinding the reader stack
3851 * to the place from where the current macro was called.
3852 */
3853 static int
3854 roff_return(ROFF_ARGS)
3855 {
3856 if (r->mstackpos >= 0)
3857 return ROFF_IGN | ROFF_USERRET;
3858
3859 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3860 return ROFF_IGN;
3861 }
3862
3863 static int
3864 roff_rn(ROFF_ARGS)
3865 {
3866 const char *value;
3867 char *oldn, *newn, *end;
3868 size_t oldsz, newsz;
3869 int deftype;
3870
3871 oldn = newn = buf->buf + pos;
3872 if (*oldn == '\0')
3873 return ROFF_IGN;
3874
3875 oldsz = roff_getname(r, &newn, ln, pos);
3876 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3877 return ROFF_IGN;
3878
3879 end = newn;
3880 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3881 if (newsz == 0)
3882 return ROFF_IGN;
3883
3884 deftype = ROFFDEF_ANY;
3885 value = roff_getstrn(r, oldn, oldsz, &deftype);
3886 switch (deftype) {
3887 case ROFFDEF_USER:
3888 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3889 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3890 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3891 break;
3892 case ROFFDEF_PRE:
3893 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3894 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3895 break;
3896 case ROFFDEF_REN:
3897 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3898 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3899 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3900 break;
3901 case ROFFDEF_STD:
3902 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3903 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3904 break;
3905 default:
3906 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3907 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3908 break;
3909 }
3910 return ROFF_IGN;
3911 }
3912
3913 static int
3914 roff_shift(ROFF_ARGS)
3915 {
3916 struct mctx *ctx;
3917 int argpos, levels, i;
3918
3919 argpos = pos;
3920 levels = 1;
3921 if (buf->buf[pos] != '\0' &&
3922 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3923 mandoc_msg(MANDOCERR_CE_NONUM,
3924 ln, pos, "shift %s", buf->buf + pos);
3925 levels = 1;
3926 }
3927 if (r->mstackpos < 0) {
3928 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3929 return ROFF_IGN;
3930 }
3931 ctx = r->mstack + r->mstackpos;
3932 if (levels > ctx->argc) {
3933 mandoc_msg(MANDOCERR_SHIFT,
3934 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3935 levels = ctx->argc;
3936 }
3937 if (levels < 0) {
3938 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3939 levels = 0;
3940 }
3941 if (levels == 0)
3942 return ROFF_IGN;
3943 for (i = 0; i < levels; i++)
3944 free(ctx->argv[i]);
3945 ctx->argc -= levels;
3946 for (i = 0; i < ctx->argc; i++)
3947 ctx->argv[i] = ctx->argv[i + levels];
3948 return ROFF_IGN;
3949 }
3950
3951 static int
3952 roff_so(ROFF_ARGS)
3953 {
3954 char *name, *cp;
3955
3956 name = buf->buf + pos;
3957 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3958
3959 /*
3960 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3961 * opening anything that's not in our cwd or anything beneath
3962 * it. Thus, explicitly disallow traversing up the file-system
3963 * or using absolute paths.
3964 */
3965
3966 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3967 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3968 buf->sz = mandoc_asprintf(&cp,
3969 ".sp\nSee the file %s.\n.sp", name) + 1;
3970 free(buf->buf);
3971 buf->buf = cp;
3972 *offs = 0;
3973 return ROFF_REPARSE;
3974 }
3975
3976 *offs = pos;
3977 return ROFF_SO;
3978 }
3979
3980 /* --- user defined strings and macros ------------------------------------ */
3981
3982 static int
3983 roff_userdef(ROFF_ARGS)
3984 {
3985 struct mctx *ctx;
3986 char *arg, *ap, *dst, *src;
3987 size_t sz;
3988
3989 /* If the macro is empty, ignore it altogether. */
3990
3991 if (*r->current_string == '\0')
3992 return ROFF_IGN;
3993
3994 /* Initialize a new macro stack context. */
3995
3996 if (++r->mstackpos == r->mstacksz) {
3997 r->mstack = mandoc_recallocarray(r->mstack,
3998 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3999 r->mstacksz += 8;
4000 }
4001 ctx = r->mstack + r->mstackpos;
4002 ctx->argc = 0;
4003
4004 /*
4005 * Collect pointers to macro argument strings,
4006 * NUL-terminating them and escaping quotes.
4007 */
4008
4009 src = buf->buf + pos;
4010 while (*src != '\0') {
4011 if (ctx->argc == ctx->argsz) {
4012 ctx->argsz += 8;
4013 ctx->argv = mandoc_reallocarray(ctx->argv,
4014 ctx->argsz, sizeof(*ctx->argv));
4015 }
4016 arg = roff_getarg(r, &src, ln, &pos);
4017 sz = 1; /* For the terminating NUL. */
4018 for (ap = arg; *ap != '\0'; ap++)
4019 sz += *ap == '"' ? 4 : 1;
4020 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
4021 for (ap = arg; *ap != '\0'; ap++) {
4022 if (*ap == '"') {
4023 memcpy(dst, "\\(dq", 4);
4024 dst += 4;
4025 } else
4026 *dst++ = *ap;
4027 }
4028 *dst = '\0';
4029 free(arg);
4030 }
4031
4032 /* Replace the macro invocation by the macro definition. */
4033
4034 free(buf->buf);
4035 buf->buf = mandoc_strdup(r->current_string);
4036 buf->sz = strlen(buf->buf) + 1;
4037 *offs = 0;
4038
4039 return buf->buf[buf->sz - 2] == '\n' ?
4040 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4041 }
4042
4043 /*
4044 * Calling a high-level macro that was renamed with .rn.
4045 * r->current_string has already been set up by roff_parse().
4046 */
4047 static int
4048 roff_renamed(ROFF_ARGS)
4049 {
4050 char *nbuf;
4051
4052 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4053 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4054 free(buf->buf);
4055 buf->buf = nbuf;
4056 *offs = 0;
4057 return ROFF_CONT;
4058 }
4059
4060 /*
4061 * Measure the length in bytes of the roff identifier at *cpp
4062 * and advance the pointer to the next word.
4063 */
4064 static size_t
4065 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4066 {
4067 char *name, *cp;
4068 size_t namesz;
4069
4070 name = *cpp;
4071 if (*name == '\0')
4072 return 0;
4073
4074 /* Advance cp to the byte after the end of the name. */
4075
4076 for (cp = name; 1; cp++) {
4077 namesz = cp - name;
4078 if (*cp == '\0')
4079 break;
4080 if (*cp == ' ' || *cp == '\t') {
4081 cp++;
4082 break;
4083 }
4084 if (*cp != '\\')
4085 continue;
4086 if (cp[1] == '{' || cp[1] == '}')
4087 break;
4088 if (*++cp == '\\')
4089 continue;
4090 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4091 "%.*s", (int)(cp - name + 1), name);
4092 mandoc_escape((const char **)&cp, NULL, NULL);
4093 break;
4094 }
4095
4096 /* Read past spaces. */
4097
4098 while (*cp == ' ')
4099 cp++;
4100
4101 *cpp = cp;
4102 return namesz;
4103 }
4104
4105 /*
4106 * Store *string into the user-defined string called *name.
4107 * To clear an existing entry, call with (*r, *name, NULL, 0).
4108 * append == 0: replace mode
4109 * append == 1: single-line append mode
4110 * append == 2: multiline append mode, append '\n' after each call
4111 */
4112 static void
4113 roff_setstr(struct roff *r, const char *name, const char *string,
4114 int append)
4115 {
4116 size_t namesz;
4117
4118 namesz = strlen(name);
4119 roff_setstrn(&r->strtab, name, namesz, string,
4120 string ? strlen(string) : 0, append);
4121 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4122 }
4123
4124 static void
4125 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4126 const char *string, size_t stringsz, int append)
4127 {
4128 struct roffkv *n;
4129 char *c;
4130 int i;
4131 size_t oldch, newch;
4132
4133 /* Search for an existing string with the same name. */
4134 n = *r;
4135
4136 while (n && (namesz != n->key.sz ||
4137 strncmp(n->key.p, name, namesz)))
4138 n = n->next;
4139
4140 if (NULL == n) {
4141 /* Create a new string table entry. */
4142 n = mandoc_malloc(sizeof(struct roffkv));
4143 n->key.p = mandoc_strndup(name, namesz);
4144 n->key.sz = namesz;
4145 n->val.p = NULL;
4146 n->val.sz = 0;
4147 n->next = *r;
4148 *r = n;
4149 } else if (0 == append) {
4150 free(n->val.p);
4151 n->val.p = NULL;
4152 n->val.sz = 0;
4153 }
4154
4155 if (NULL == string)
4156 return;
4157
4158 /*
4159 * One additional byte for the '\n' in multiline mode,
4160 * and one for the terminating '\0'.
4161 */
4162 newch = stringsz + (1 < append ? 2u : 1u);
4163
4164 if (NULL == n->val.p) {
4165 n->val.p = mandoc_malloc(newch);
4166 *n->val.p = '\0';
4167 oldch = 0;
4168 } else {
4169 oldch = n->val.sz;
4170 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4171 }
4172
4173 /* Skip existing content in the destination buffer. */
4174 c = n->val.p + (int)oldch;
4175
4176 /* Append new content to the destination buffer. */
4177 i = 0;
4178 while (i < (int)stringsz) {
4179 /*
4180 * Rudimentary roff copy mode:
4181 * Handle escaped backslashes.
4182 */
4183 if ('\\' == string[i] && '\\' == string[i + 1])
4184 i++;
4185 *c++ = string[i++];
4186 }
4187
4188 /* Append terminating bytes. */
4189 if (1 < append)
4190 *c++ = '\n';
4191
4192 *c = '\0';
4193 n->val.sz = (int)(c - n->val.p);
4194 }
4195
4196 static const char *
4197 roff_getstrn(struct roff *r, const char *name, size_t len,
4198 int *deftype)
4199 {
4200 const struct roffkv *n;
4201 int found, i;
4202 enum roff_tok tok;
4203
4204 found = 0;
4205 for (n = r->strtab; n != NULL; n = n->next) {
4206 if (strncmp(name, n->key.p, len) != 0 ||
4207 n->key.p[len] != '\0' || n->val.p == NULL)
4208 continue;
4209 if (*deftype & ROFFDEF_USER) {
4210 *deftype = ROFFDEF_USER;
4211 return n->val.p;
4212 } else {
4213 found = 1;
4214 break;
4215 }
4216 }
4217 for (n = r->rentab; n != NULL; n = n->next) {
4218 if (strncmp(name, n->key.p, len) != 0 ||
4219 n->key.p[len] != '\0' || n->val.p == NULL)
4220 continue;
4221 if (*deftype & ROFFDEF_REN) {
4222 *deftype = ROFFDEF_REN;
4223 return n->val.p;
4224 } else {
4225 found = 1;
4226 break;
4227 }
4228 }
4229 for (i = 0; i < PREDEFS_MAX; i++) {
4230 if (strncmp(name, predefs[i].name, len) != 0 ||
4231 predefs[i].name[len] != '\0')
4232 continue;
4233 if (*deftype & ROFFDEF_PRE) {
4234 *deftype = ROFFDEF_PRE;
4235 return predefs[i].str;
4236 } else {
4237 found = 1;
4238 break;
4239 }
4240 }
4241 if (r->man->meta.macroset != MACROSET_MAN) {
4242 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4243 if (strncmp(name, roff_name[tok], len) != 0 ||
4244 roff_name[tok][len] != '\0')
4245 continue;
4246 if (*deftype & ROFFDEF_STD) {
4247 *deftype = ROFFDEF_STD;
4248 return NULL;
4249 } else {
4250 found = 1;
4251 break;
4252 }
4253 }
4254 }
4255 if (r->man->meta.macroset != MACROSET_MDOC) {
4256 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4257 if (strncmp(name, roff_name[tok], len) != 0 ||
4258 roff_name[tok][len] != '\0')
4259 continue;
4260 if (*deftype & ROFFDEF_STD) {
4261 *deftype = ROFFDEF_STD;
4262 return NULL;
4263 } else {
4264 found = 1;
4265 break;
4266 }
4267 }
4268 }
4269
4270 if (found == 0 && *deftype != ROFFDEF_ANY) {
4271 if (*deftype & ROFFDEF_REN) {
4272 /*
4273 * This might still be a request,
4274 * so do not treat it as undefined yet.
4275 */
4276 *deftype = ROFFDEF_UNDEF;
4277 return NULL;
4278 }
4279
4280 /* Using an undefined string defines it to be empty. */
4281
4282 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4283 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4284 }
4285
4286 *deftype = 0;
4287 return NULL;
4288 }
4289
4290 static void
4291 roff_freestr(struct roffkv *r)
4292 {
4293 struct roffkv *n, *nn;
4294
4295 for (n = r; n; n = nn) {
4296 free(n->key.p);
4297 free(n->val.p);
4298 nn = n->next;
4299 free(n);
4300 }
4301 }
4302
4303 /* --- accessors and utility functions ------------------------------------ */
4304
4305 /*
4306 * Duplicate an input string, making the appropriate character
4307 * conversations (as stipulated by `tr') along the way.
4308 * Returns a heap-allocated string with all the replacements made.
4309 */
4310 char *
4311 roff_strdup(const struct roff *r, const char *p)
4312 {
4313 const struct roffkv *cp;
4314 char *res;
4315 const char *pp;
4316 size_t ssz, sz;
4317 enum mandoc_esc esc;
4318
4319 if (NULL == r->xmbtab && NULL == r->xtab)
4320 return mandoc_strdup(p);
4321 else if ('\0' == *p)
4322 return mandoc_strdup("");
4323
4324 /*
4325 * Step through each character looking for term matches
4326 * (remember that a `tr' can be invoked with an escape, which is
4327 * a glyph but the escape is multi-character).
4328 * We only do this if the character hash has been initialised
4329 * and the string is >0 length.
4330 */
4331
4332 res = NULL;
4333 ssz = 0;
4334
4335 while ('\0' != *p) {
4336 assert((unsigned int)*p < 128);
4337 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4338 sz = r->xtab[(int)*p].sz;
4339 res = mandoc_realloc(res, ssz + sz + 1);
4340 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4341 ssz += sz;
4342 p++;
4343 continue;
4344 } else if ('\\' != *p) {
4345 res = mandoc_realloc(res, ssz + 2);
4346 res[ssz++] = *p++;
4347 continue;
4348 }
4349
4350 /* Search for term matches. */
4351 for (cp = r->xmbtab; cp; cp = cp->next)
4352 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4353 break;
4354
4355 if (NULL != cp) {
4356 /*
4357 * A match has been found.
4358 * Append the match to the array and move
4359 * forward by its keysize.
4360 */
4361 res = mandoc_realloc(res,
4362 ssz + cp->val.sz + 1);
4363 memcpy(res + ssz, cp->val.p, cp->val.sz);
4364 ssz += cp->val.sz;
4365 p += (int)cp->key.sz;
4366 continue;
4367 }
4368
4369 /*
4370 * Handle escapes carefully: we need to copy
4371 * over just the escape itself, or else we might
4372 * do replacements within the escape itself.
4373 * Make sure to pass along the bogus string.
4374 */
4375 pp = p++;
4376 esc = mandoc_escape(&p, NULL, NULL);
4377 if (ESCAPE_ERROR == esc) {
4378 sz = strlen(pp);
4379 res = mandoc_realloc(res, ssz + sz + 1);
4380 memcpy(res + ssz, pp, sz);
4381 break;
4382 }
4383 /*
4384 * We bail out on bad escapes.
4385 * No need to warn: we already did so when
4386 * roff_expand() was called.
4387 */
4388 sz = (int)(p - pp);
4389 res = mandoc_realloc(res, ssz + sz + 1);
4390 memcpy(res + ssz, pp, sz);
4391 ssz += sz;
4392 }
4393
4394 res[(int)ssz] = '\0';
4395 return res;
4396 }
4397
4398 int
4399 roff_getformat(const struct roff *r)
4400 {
4401
4402 return r->format;
4403 }
4404
4405 /*
4406 * Find out whether a line is a macro line or not.
4407 * If it is, adjust the current position and return one; if it isn't,
4408 * return zero and don't change the current position.
4409 * If the control character has been set with `.cc', then let that grain
4410 * precedence.
4411 * This is slighly contrary to groff, where using the non-breaking
4412 * control character when `cc' has been invoked will cause the
4413 * non-breaking macro contents to be printed verbatim.
4414 */
4415 int
4416 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4417 {
4418 int pos;
4419
4420 pos = *ppos;
4421
4422 if (r->control != '\0' && cp[pos] == r->control)
4423 pos++;
4424 else if (r->control != '\0')
4425 return 0;
4426 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4427 pos += 2;
4428 else if ('.' == cp[pos] || '\'' == cp[pos])
4429 pos++;
4430 else
4431 return 0;
4432
4433 while (' ' == cp[pos] || '\t' == cp[pos])
4434 pos++;
4435
4436 *ppos = pos;
4437 return 1;
4438 }