]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
29166fd4fe6e41ce42a32ba4afa1a0d19c4dc11e
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.382 2022/04/24 13:38:46 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_noarg(ROFF_ARGS);
231 static int roff_nop(ROFF_ARGS);
232 static int roff_nr(ROFF_ARGS);
233 static int roff_onearg(ROFF_ARGS);
234 static enum roff_tok roff_parse(struct roff *, char *, int *,
235 int, int);
236 static int roff_parsetext(struct roff *, struct buf *,
237 int, int *);
238 static int roff_renamed(ROFF_ARGS);
239 static int roff_return(ROFF_ARGS);
240 static int roff_rm(ROFF_ARGS);
241 static int roff_rn(ROFF_ARGS);
242 static int roff_rr(ROFF_ARGS);
243 static void roff_setregn(struct roff *, const char *,
244 size_t, int, char, int);
245 static void roff_setstr(struct roff *,
246 const char *, const char *, int);
247 static void roff_setstrn(struct roffkv **, const char *,
248 size_t, const char *, size_t, int);
249 static int roff_shift(ROFF_ARGS);
250 static int roff_so(ROFF_ARGS);
251 static int roff_tr(ROFF_ARGS);
252 static int roff_Dd(ROFF_ARGS);
253 static int roff_TE(ROFF_ARGS);
254 static int roff_TS(ROFF_ARGS);
255 static int roff_EQ(ROFF_ARGS);
256 static int roff_EN(ROFF_ARGS);
257 static int roff_T_(ROFF_ARGS);
258 static int roff_unsupp(ROFF_ARGS);
259 static int roff_userdef(ROFF_ARGS);
260
261 /* --- constant data ------------------------------------------------------ */
262
263 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
264 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
265
266 const char *__roff_name[MAN_MAX + 1] = {
267 "br", "ce", "fi", "ft",
268 "ll", "mc", "nf",
269 "po", "rj", "sp",
270 "ta", "ti", NULL,
271 "ab", "ad", "af", "aln",
272 "als", "am", "am1", "ami",
273 "ami1", "as", "as1", "asciify",
274 "backtrace", "bd", "bleedat", "blm",
275 "box", "boxa", "bp", "BP",
276 "break", "breakchar", "brnl", "brp",
277 "brpnl", "c2", "cc",
278 "cf", "cflags", "ch", "char",
279 "chop", "class", "close", "CL",
280 "color", "composite", "continue", "cp",
281 "cropat", "cs", "cu", "da",
282 "dch", "Dd", "de", "de1",
283 "defcolor", "dei", "dei1", "device",
284 "devicem", "di", "do", "ds",
285 "ds1", "dwh", "dt", "ec",
286 "ecr", "ecs", "el", "em",
287 "EN", "eo", "EP", "EQ",
288 "errprint", "ev", "evc", "ex",
289 "fallback", "fam", "fc", "fchar",
290 "fcolor", "fdeferlig", "feature", "fkern",
291 "fl", "flig", "fp", "fps",
292 "fschar", "fspacewidth", "fspecial", "ftr",
293 "fzoom", "gcolor", "hc", "hcode",
294 "hidechar", "hla", "hlm", "hpf",
295 "hpfa", "hpfcode", "hw", "hy",
296 "hylang", "hylen", "hym", "hypp",
297 "hys", "ie", "if", "ig",
298 "index", "it", "itc", "IX",
299 "kern", "kernafter", "kernbefore", "kernpair",
300 "lc", "lc_ctype", "lds", "length",
301 "letadj", "lf", "lg", "lhang",
302 "linetabs", "lnr", "lnrf", "lpfx",
303 "ls", "lsm", "lt",
304 "mediasize", "minss", "mk", "mso",
305 "na", "ne", "nh", "nhychar",
306 "nm", "nn", "nop", "nr",
307 "nrf", "nroff", "ns", "nx",
308 "open", "opena", "os", "output",
309 "padj", "papersize", "pc", "pev",
310 "pi", "PI", "pl", "pm",
311 "pn", "pnr", "ps",
312 "psbb", "pshape", "pso", "ptr",
313 "pvs", "rchar", "rd", "recursionlimit",
314 "return", "rfschar", "rhang",
315 "rm", "rn", "rnn", "rr",
316 "rs", "rt", "schar", "sentchar",
317 "shc", "shift", "sizes", "so",
318 "spacewidth", "special", "spreadwarn", "ss",
319 "sty", "substring", "sv", "sy",
320 "T&", "tc", "TE",
321 "TH", "tkf", "tl",
322 "tm", "tm1", "tmc", "tr",
323 "track", "transchar", "trf", "trimat",
324 "trin", "trnt", "troff", "TS",
325 "uf", "ul", "unformat", "unwatch",
326 "unwatchn", "vpt", "vs", "warn",
327 "warnscale", "watch", "watchlength", "watchn",
328 "wh", "while", "write", "writec",
329 "writem", "xflag", ".", NULL,
330 NULL, "text",
331 "Dd", "Dt", "Os", "Sh",
332 "Ss", "Pp", "D1", "Dl",
333 "Bd", "Ed", "Bl", "El",
334 "It", "Ad", "An", "Ap",
335 "Ar", "Cd", "Cm", "Dv",
336 "Er", "Ev", "Ex", "Fa",
337 "Fd", "Fl", "Fn", "Ft",
338 "Ic", "In", "Li", "Nd",
339 "Nm", "Op", "Ot", "Pa",
340 "Rv", "St", "Va", "Vt",
341 "Xr", "%A", "%B", "%D",
342 "%I", "%J", "%N", "%O",
343 "%P", "%R", "%T", "%V",
344 "Ac", "Ao", "Aq", "At",
345 "Bc", "Bf", "Bo", "Bq",
346 "Bsx", "Bx", "Db", "Dc",
347 "Do", "Dq", "Ec", "Ef",
348 "Em", "Eo", "Fx", "Ms",
349 "No", "Ns", "Nx", "Ox",
350 "Pc", "Pf", "Po", "Pq",
351 "Qc", "Ql", "Qo", "Qq",
352 "Re", "Rs", "Sc", "So",
353 "Sq", "Sm", "Sx", "Sy",
354 "Tn", "Ux", "Xc", "Xo",
355 "Fo", "Fc", "Oo", "Oc",
356 "Bk", "Ek", "Bt", "Hf",
357 "Fr", "Ud", "Lb", "Lp",
358 "Lk", "Mt", "Brq", "Bro",
359 "Brc", "%C", "Es", "En",
360 "Dx", "%Q", "%U", "Ta",
361 "Tg", NULL,
362 "TH", "SH", "SS", "TP",
363 "TQ",
364 "LP", "PP", "P", "IP",
365 "HP", "SM", "SB", "BI",
366 "IB", "BR", "RB", "R",
367 "B", "I", "IR", "RI",
368 "RE", "RS", "DT", "UC",
369 "PD", "AT", "in",
370 "SY", "YS", "OP",
371 "EX", "EE", "UR",
372 "UE", "MT", "ME", NULL
373 };
374 const char *const *roff_name = __roff_name;
375
376 static struct roffmac roffs[TOKEN_NONE] = {
377 { roff_noarg, NULL, NULL, 0 }, /* br */
378 { roff_onearg, NULL, NULL, 0 }, /* ce */
379 { roff_noarg, NULL, NULL, 0 }, /* fi */
380 { roff_onearg, NULL, NULL, 0 }, /* ft */
381 { roff_onearg, NULL, NULL, 0 }, /* ll */
382 { roff_onearg, NULL, NULL, 0 }, /* mc */
383 { roff_noarg, NULL, NULL, 0 }, /* nf */
384 { roff_onearg, NULL, NULL, 0 }, /* po */
385 { roff_onearg, NULL, NULL, 0 }, /* rj */
386 { roff_onearg, NULL, NULL, 0 }, /* sp */
387 { roff_manyarg, NULL, NULL, 0 }, /* ta */
388 { roff_onearg, NULL, NULL, 0 }, /* ti */
389 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
390 { roff_unsupp, NULL, NULL, 0 }, /* ab */
391 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
392 { roff_line_ignore, NULL, NULL, 0 }, /* af */
393 { roff_unsupp, NULL, NULL, 0 }, /* aln */
394 { roff_als, NULL, NULL, 0 }, /* als */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
399 { roff_ds, NULL, NULL, 0 }, /* as */
400 { roff_ds, NULL, NULL, 0 }, /* as1 */
401 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
402 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
403 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
404 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
405 { roff_unsupp, NULL, NULL, 0 }, /* blm */
406 { roff_unsupp, NULL, NULL, 0 }, /* box */
407 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
408 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
409 { roff_unsupp, NULL, NULL, 0 }, /* BP */
410 { roff_break, NULL, NULL, 0 }, /* break */
411 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
412 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
413 { roff_noarg, NULL, NULL, 0 }, /* brp */
414 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
415 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
416 { roff_cc, NULL, NULL, 0 }, /* cc */
417 { roff_insec, NULL, NULL, 0 }, /* cf */
418 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
419 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
420 { roff_char, NULL, NULL, 0 }, /* char */
421 { roff_unsupp, NULL, NULL, 0 }, /* chop */
422 { roff_line_ignore, NULL, NULL, 0 }, /* class */
423 { roff_insec, NULL, NULL, 0 }, /* close */
424 { roff_unsupp, NULL, NULL, 0 }, /* CL */
425 { roff_line_ignore, NULL, NULL, 0 }, /* color */
426 { roff_unsupp, NULL, NULL, 0 }, /* composite */
427 { roff_unsupp, NULL, NULL, 0 }, /* continue */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
429 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
431 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
432 { roff_unsupp, NULL, NULL, 0 }, /* da */
433 { roff_unsupp, NULL, NULL, 0 }, /* dch */
434 { roff_Dd, NULL, NULL, 0 }, /* Dd */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
437 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
440 { roff_unsupp, NULL, NULL, 0 }, /* device */
441 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
442 { roff_unsupp, NULL, NULL, 0 }, /* di */
443 { roff_unsupp, NULL, NULL, 0 }, /* do */
444 { roff_ds, NULL, NULL, 0 }, /* ds */
445 { roff_ds, NULL, NULL, 0 }, /* ds1 */
446 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
447 { roff_unsupp, NULL, NULL, 0 }, /* dt */
448 { roff_ec, NULL, NULL, 0 }, /* ec */
449 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
450 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
451 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
452 { roff_unsupp, NULL, NULL, 0 }, /* em */
453 { roff_EN, NULL, NULL, 0 }, /* EN */
454 { roff_eo, NULL, NULL, 0 }, /* eo */
455 { roff_unsupp, NULL, NULL, 0 }, /* EP */
456 { roff_EQ, NULL, NULL, 0 }, /* EQ */
457 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
458 { roff_unsupp, NULL, NULL, 0 }, /* ev */
459 { roff_unsupp, NULL, NULL, 0 }, /* evc */
460 { roff_unsupp, NULL, NULL, 0 }, /* ex */
461 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
463 { roff_unsupp, NULL, NULL, 0 }, /* fc */
464 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
467 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
470 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
473 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
476 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
477 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
478 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
494 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
495 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
496 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
497 { roff_unsupp, NULL, NULL, 0 }, /* index */
498 { roff_it, NULL, NULL, 0 }, /* it */
499 { roff_unsupp, NULL, NULL, 0 }, /* itc */
500 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
502 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
504 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
505 { roff_unsupp, NULL, NULL, 0 }, /* lc */
506 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
507 { roff_unsupp, NULL, NULL, 0 }, /* lds */
508 { roff_unsupp, NULL, NULL, 0 }, /* length */
509 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
510 { roff_insec, NULL, NULL, 0 }, /* lf */
511 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
512 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
513 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
514 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
515 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
516 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
518 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
519 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
520 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
521 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
522 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
523 { roff_insec, NULL, NULL, 0 }, /* mso */
524 { roff_line_ignore, NULL, NULL, 0 }, /* na */
525 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
526 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
527 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
528 { roff_unsupp, NULL, NULL, 0 }, /* nm */
529 { roff_unsupp, NULL, NULL, 0 }, /* nn */
530 { roff_nop, NULL, NULL, 0 }, /* nop */
531 { roff_nr, NULL, NULL, 0 }, /* nr */
532 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
533 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
534 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
535 { roff_insec, NULL, NULL, 0 }, /* nx */
536 { roff_insec, NULL, NULL, 0 }, /* open */
537 { roff_insec, NULL, NULL, 0 }, /* opena */
538 { roff_line_ignore, NULL, NULL, 0 }, /* os */
539 { roff_unsupp, NULL, NULL, 0 }, /* output */
540 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
541 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
542 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
544 { roff_insec, NULL, NULL, 0 }, /* pi */
545 { roff_unsupp, NULL, NULL, 0 }, /* PI */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
550 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
551 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
552 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
553 { roff_insec, NULL, NULL, 0 }, /* pso */
554 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
555 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
556 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
557 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
558 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
559 { roff_return, NULL, NULL, 0 }, /* return */
560 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
561 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
562 { roff_rm, NULL, NULL, 0 }, /* rm */
563 { roff_rn, NULL, NULL, 0 }, /* rn */
564 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
565 { roff_rr, NULL, NULL, 0 }, /* rr */
566 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
567 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
568 { roff_unsupp, NULL, NULL, 0 }, /* schar */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
570 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
571 { roff_shift, NULL, NULL, 0 }, /* shift */
572 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
573 { roff_so, NULL, NULL, 0 }, /* so */
574 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
575 { roff_line_ignore, NULL, NULL, 0 }, /* special */
576 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
577 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
578 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
579 { roff_unsupp, NULL, NULL, 0 }, /* substring */
580 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
581 { roff_insec, NULL, NULL, 0 }, /* sy */
582 { roff_T_, NULL, NULL, 0 }, /* T& */
583 { roff_unsupp, NULL, NULL, 0 }, /* tc */
584 { roff_TE, NULL, NULL, 0 }, /* TE */
585 { roff_Dd, NULL, NULL, 0 }, /* TH */
586 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
587 { roff_unsupp, NULL, NULL, 0 }, /* tl */
588 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
589 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
591 { roff_tr, NULL, NULL, 0 }, /* tr */
592 { roff_line_ignore, NULL, NULL, 0 }, /* track */
593 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
594 { roff_insec, NULL, NULL, 0 }, /* trf */
595 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
596 { roff_unsupp, NULL, NULL, 0 }, /* trin */
597 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
598 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
599 { roff_TS, NULL, NULL, 0 }, /* TS */
600 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
601 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
602 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
603 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
604 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
605 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
606 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
607 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
608 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
609 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
610 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
611 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
612 { roff_unsupp, NULL, NULL, 0 }, /* wh */
613 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
614 { roff_insec, NULL, NULL, 0 }, /* write */
615 { roff_insec, NULL, NULL, 0 }, /* writec */
616 { roff_insec, NULL, NULL, 0 }, /* writem */
617 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
618 { roff_cblock, NULL, NULL, 0 }, /* . */
619 { roff_renamed, NULL, NULL, 0 },
620 { roff_userdef, NULL, NULL, 0 }
621 };
622
623 /* Array of injected predefined strings. */
624 #define PREDEFS_MAX 38
625 static const struct predef predefs[PREDEFS_MAX] = {
626 #include "predefs.in"
627 };
628
629 static int roffce_lines; /* number of input lines to center */
630 static struct roff_node *roffce_node; /* active request */
631 static int roffit_lines; /* number of lines to delay */
632 static char *roffit_macro; /* nil-terminated macro line */
633
634
635 /* --- request table ------------------------------------------------------ */
636
637 struct ohash *
638 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
639 {
640 struct ohash *htab;
641 struct roffreq *req;
642 enum roff_tok tok;
643 size_t sz;
644 unsigned int slot;
645
646 htab = mandoc_malloc(sizeof(*htab));
647 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
648
649 for (tok = mintok; tok < maxtok; tok++) {
650 if (roff_name[tok] == NULL)
651 continue;
652 sz = strlen(roff_name[tok]);
653 req = mandoc_malloc(sizeof(*req) + sz + 1);
654 req->tok = tok;
655 memcpy(req->name, roff_name[tok], sz + 1);
656 slot = ohash_qlookup(htab, req->name);
657 ohash_insert(htab, slot, req);
658 }
659 return htab;
660 }
661
662 void
663 roffhash_free(struct ohash *htab)
664 {
665 struct roffreq *req;
666 unsigned int slot;
667
668 if (htab == NULL)
669 return;
670 for (req = ohash_first(htab, &slot); req != NULL;
671 req = ohash_next(htab, &slot))
672 free(req);
673 ohash_delete(htab);
674 free(htab);
675 }
676
677 enum roff_tok
678 roffhash_find(struct ohash *htab, const char *name, size_t sz)
679 {
680 struct roffreq *req;
681 const char *end;
682
683 if (sz) {
684 end = name + sz;
685 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
686 } else
687 req = ohash_find(htab, ohash_qlookup(htab, name));
688 return req == NULL ? TOKEN_NONE : req->tok;
689 }
690
691 /* --- stack of request blocks -------------------------------------------- */
692
693 /*
694 * Pop the current node off of the stack of roff instructions currently
695 * pending. Return 1 if it is a loop or 0 otherwise.
696 */
697 static int
698 roffnode_pop(struct roff *r)
699 {
700 struct roffnode *p;
701 int inloop;
702
703 p = r->last;
704 inloop = p->tok == ROFF_while;
705 r->last = p->parent;
706 free(p->name);
707 free(p->end);
708 free(p);
709 return inloop;
710 }
711
712 /*
713 * Push a roff node onto the instruction stack. This must later be
714 * removed with roffnode_pop().
715 */
716 static void
717 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
718 int line, int col)
719 {
720 struct roffnode *p;
721
722 p = mandoc_calloc(1, sizeof(struct roffnode));
723 p->tok = tok;
724 if (name)
725 p->name = mandoc_strdup(name);
726 p->parent = r->last;
727 p->line = line;
728 p->col = col;
729 p->rule = p->parent ? p->parent->rule : 0;
730
731 r->last = p;
732 }
733
734 /* --- roff parser state data management ---------------------------------- */
735
736 static void
737 roff_free1(struct roff *r)
738 {
739 int i;
740
741 tbl_free(r->first_tbl);
742 r->first_tbl = r->last_tbl = r->tbl = NULL;
743
744 eqn_free(r->last_eqn);
745 r->last_eqn = r->eqn = NULL;
746
747 while (r->mstackpos >= 0)
748 roff_userret(r);
749
750 while (r->last)
751 roffnode_pop(r);
752
753 free (r->rstack);
754 r->rstack = NULL;
755 r->rstacksz = 0;
756 r->rstackpos = -1;
757
758 roff_freereg(r->regtab);
759 r->regtab = NULL;
760
761 roff_freestr(r->strtab);
762 roff_freestr(r->rentab);
763 roff_freestr(r->xmbtab);
764 r->strtab = r->rentab = r->xmbtab = NULL;
765
766 if (r->xtab)
767 for (i = 0; i < 128; i++)
768 free(r->xtab[i].p);
769 free(r->xtab);
770 r->xtab = NULL;
771 }
772
773 void
774 roff_reset(struct roff *r)
775 {
776 roff_free1(r);
777 r->options |= MPARSE_COMMENT;
778 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
779 r->control = '\0';
780 r->escape = '\\';
781 roffce_lines = 0;
782 roffce_node = NULL;
783 roffit_lines = 0;
784 roffit_macro = NULL;
785 }
786
787 void
788 roff_free(struct roff *r)
789 {
790 int i;
791
792 roff_free1(r);
793 for (i = 0; i < r->mstacksz; i++)
794 free(r->mstack[i].argv);
795 free(r->mstack);
796 roffhash_free(r->reqtab);
797 free(r);
798 }
799
800 struct roff *
801 roff_alloc(int options)
802 {
803 struct roff *r;
804
805 r = mandoc_calloc(1, sizeof(struct roff));
806 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
807 r->options = options | MPARSE_COMMENT;
808 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
809 r->mstackpos = -1;
810 r->rstackpos = -1;
811 r->escape = '\\';
812 return r;
813 }
814
815 /* --- syntax tree state data management ---------------------------------- */
816
817 static void
818 roff_man_free1(struct roff_man *man)
819 {
820 if (man->meta.first != NULL)
821 roff_node_delete(man, man->meta.first);
822 free(man->meta.msec);
823 free(man->meta.vol);
824 free(man->meta.os);
825 free(man->meta.arch);
826 free(man->meta.title);
827 free(man->meta.name);
828 free(man->meta.date);
829 free(man->meta.sodest);
830 }
831
832 void
833 roff_state_reset(struct roff_man *man)
834 {
835 man->last = man->meta.first;
836 man->last_es = NULL;
837 man->flags = 0;
838 man->lastsec = man->lastnamed = SEC_NONE;
839 man->next = ROFF_NEXT_CHILD;
840 roff_setreg(man->roff, "nS", 0, '=');
841 }
842
843 static void
844 roff_man_alloc1(struct roff_man *man)
845 {
846 memset(&man->meta, 0, sizeof(man->meta));
847 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
848 man->meta.first->type = ROFFT_ROOT;
849 man->meta.macroset = MACROSET_NONE;
850 roff_state_reset(man);
851 }
852
853 void
854 roff_man_reset(struct roff_man *man)
855 {
856 roff_man_free1(man);
857 roff_man_alloc1(man);
858 }
859
860 void
861 roff_man_free(struct roff_man *man)
862 {
863 roff_man_free1(man);
864 free(man->os_r);
865 free(man);
866 }
867
868 struct roff_man *
869 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
870 {
871 struct roff_man *man;
872
873 man = mandoc_calloc(1, sizeof(*man));
874 man->roff = roff;
875 man->os_s = os_s;
876 man->quick = quick;
877 roff_man_alloc1(man);
878 roff->man = man;
879 return man;
880 }
881
882 /* --- syntax tree handling ----------------------------------------------- */
883
884 struct roff_node *
885 roff_node_alloc(struct roff_man *man, int line, int pos,
886 enum roff_type type, int tok)
887 {
888 struct roff_node *n;
889
890 n = mandoc_calloc(1, sizeof(*n));
891 n->line = line;
892 n->pos = pos;
893 n->tok = tok;
894 n->type = type;
895 n->sec = man->lastsec;
896
897 if (man->flags & MDOC_SYNOPSIS)
898 n->flags |= NODE_SYNPRETTY;
899 else
900 n->flags &= ~NODE_SYNPRETTY;
901 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
902 n->flags |= NODE_NOFILL;
903 else
904 n->flags &= ~NODE_NOFILL;
905 if (man->flags & MDOC_NEWLINE)
906 n->flags |= NODE_LINE;
907 man->flags &= ~MDOC_NEWLINE;
908
909 return n;
910 }
911
912 void
913 roff_node_append(struct roff_man *man, struct roff_node *n)
914 {
915
916 switch (man->next) {
917 case ROFF_NEXT_SIBLING:
918 if (man->last->next != NULL) {
919 n->next = man->last->next;
920 man->last->next->prev = n;
921 } else
922 man->last->parent->last = n;
923 man->last->next = n;
924 n->prev = man->last;
925 n->parent = man->last->parent;
926 break;
927 case ROFF_NEXT_CHILD:
928 if (man->last->child != NULL) {
929 n->next = man->last->child;
930 man->last->child->prev = n;
931 } else
932 man->last->last = n;
933 man->last->child = n;
934 n->parent = man->last;
935 break;
936 default:
937 abort();
938 }
939 man->last = n;
940
941 switch (n->type) {
942 case ROFFT_HEAD:
943 n->parent->head = n;
944 break;
945 case ROFFT_BODY:
946 if (n->end != ENDBODY_NOT)
947 return;
948 n->parent->body = n;
949 break;
950 case ROFFT_TAIL:
951 n->parent->tail = n;
952 break;
953 default:
954 return;
955 }
956
957 /*
958 * Copy over the normalised-data pointer of our parent. Not
959 * everybody has one, but copying a null pointer is fine.
960 */
961
962 n->norm = n->parent->norm;
963 assert(n->parent->type == ROFFT_BLOCK);
964 }
965
966 void
967 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
968 {
969 struct roff_node *n;
970
971 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
972 n->string = roff_strdup(man->roff, word);
973 roff_node_append(man, n);
974 n->flags |= NODE_VALID | NODE_ENDED;
975 man->next = ROFF_NEXT_SIBLING;
976 }
977
978 void
979 roff_word_append(struct roff_man *man, const char *word)
980 {
981 struct roff_node *n;
982 char *addstr, *newstr;
983
984 n = man->last;
985 addstr = roff_strdup(man->roff, word);
986 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
987 free(addstr);
988 free(n->string);
989 n->string = newstr;
990 man->next = ROFF_NEXT_SIBLING;
991 }
992
993 void
994 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
995 {
996 struct roff_node *n;
997
998 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
999 roff_node_append(man, n);
1000 man->next = ROFF_NEXT_CHILD;
1001 }
1002
1003 struct roff_node *
1004 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1005 {
1006 struct roff_node *n;
1007
1008 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1009 roff_node_append(man, n);
1010 man->next = ROFF_NEXT_CHILD;
1011 return n;
1012 }
1013
1014 struct roff_node *
1015 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1016 {
1017 struct roff_node *n;
1018
1019 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1020 roff_node_append(man, n);
1021 man->next = ROFF_NEXT_CHILD;
1022 return n;
1023 }
1024
1025 struct roff_node *
1026 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1027 {
1028 struct roff_node *n;
1029
1030 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1031 roff_node_append(man, n);
1032 man->next = ROFF_NEXT_CHILD;
1033 return n;
1034 }
1035
1036 static void
1037 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1038 {
1039 struct roff_node *n;
1040 struct tbl_span *span;
1041
1042 if (man->meta.macroset == MACROSET_MAN)
1043 man_breakscope(man, ROFF_TS);
1044 while ((span = tbl_span(tbl)) != NULL) {
1045 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1046 n->span = span;
1047 roff_node_append(man, n);
1048 n->flags |= NODE_VALID | NODE_ENDED;
1049 man->next = ROFF_NEXT_SIBLING;
1050 }
1051 }
1052
1053 void
1054 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1055 {
1056
1057 /* Adjust siblings. */
1058
1059 if (n->prev)
1060 n->prev->next = n->next;
1061 if (n->next)
1062 n->next->prev = n->prev;
1063
1064 /* Adjust parent. */
1065
1066 if (n->parent != NULL) {
1067 if (n->parent->child == n)
1068 n->parent->child = n->next;
1069 if (n->parent->last == n)
1070 n->parent->last = n->prev;
1071 }
1072
1073 /* Adjust parse point. */
1074
1075 if (man == NULL)
1076 return;
1077 if (man->last == n) {
1078 if (n->prev == NULL) {
1079 man->last = n->parent;
1080 man->next = ROFF_NEXT_CHILD;
1081 } else {
1082 man->last = n->prev;
1083 man->next = ROFF_NEXT_SIBLING;
1084 }
1085 }
1086 if (man->meta.first == n)
1087 man->meta.first = NULL;
1088 }
1089
1090 void
1091 roff_node_relink(struct roff_man *man, struct roff_node *n)
1092 {
1093 roff_node_unlink(man, n);
1094 n->prev = n->next = NULL;
1095 roff_node_append(man, n);
1096 }
1097
1098 void
1099 roff_node_free(struct roff_node *n)
1100 {
1101
1102 if (n->args != NULL)
1103 mdoc_argv_free(n->args);
1104 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1105 free(n->norm);
1106 eqn_box_free(n->eqn);
1107 free(n->string);
1108 free(n->tag);
1109 free(n);
1110 }
1111
1112 void
1113 roff_node_delete(struct roff_man *man, struct roff_node *n)
1114 {
1115
1116 while (n->child != NULL)
1117 roff_node_delete(man, n->child);
1118 roff_node_unlink(man, n);
1119 roff_node_free(n);
1120 }
1121
1122 int
1123 roff_node_transparent(struct roff_node *n)
1124 {
1125 if (n == NULL)
1126 return 0;
1127 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1128 return 1;
1129 return roff_tok_transparent(n->tok);
1130 }
1131
1132 int
1133 roff_tok_transparent(enum roff_tok tok)
1134 {
1135 switch (tok) {
1136 case ROFF_ft:
1137 case ROFF_ll:
1138 case ROFF_mc:
1139 case ROFF_po:
1140 case ROFF_ta:
1141 case MDOC_Db:
1142 case MDOC_Es:
1143 case MDOC_Sm:
1144 case MDOC_Tg:
1145 case MAN_DT:
1146 case MAN_UC:
1147 case MAN_PD:
1148 case MAN_AT:
1149 return 1;
1150 default:
1151 return 0;
1152 }
1153 }
1154
1155 struct roff_node *
1156 roff_node_child(struct roff_node *n)
1157 {
1158 for (n = n->child; roff_node_transparent(n); n = n->next)
1159 continue;
1160 return n;
1161 }
1162
1163 struct roff_node *
1164 roff_node_prev(struct roff_node *n)
1165 {
1166 do {
1167 n = n->prev;
1168 } while (roff_node_transparent(n));
1169 return n;
1170 }
1171
1172 struct roff_node *
1173 roff_node_next(struct roff_node *n)
1174 {
1175 do {
1176 n = n->next;
1177 } while (roff_node_transparent(n));
1178 return n;
1179 }
1180
1181 void
1182 deroff(char **dest, const struct roff_node *n)
1183 {
1184 char *cp;
1185 size_t sz;
1186
1187 if (n->string == NULL) {
1188 for (n = n->child; n != NULL; n = n->next)
1189 deroff(dest, n);
1190 return;
1191 }
1192
1193 /* Skip leading whitespace. */
1194
1195 for (cp = n->string; *cp != '\0'; cp++) {
1196 if (cp[0] == '\\' && cp[1] != '\0' &&
1197 strchr(" %&0^|~", cp[1]) != NULL)
1198 cp++;
1199 else if ( ! isspace((unsigned char)*cp))
1200 break;
1201 }
1202
1203 /* Skip trailing backslash. */
1204
1205 sz = strlen(cp);
1206 if (sz > 0 && cp[sz - 1] == '\\')
1207 sz--;
1208
1209 /* Skip trailing whitespace. */
1210
1211 for (; sz; sz--)
1212 if ( ! isspace((unsigned char)cp[sz-1]))
1213 break;
1214
1215 /* Skip empty strings. */
1216
1217 if (sz == 0)
1218 return;
1219
1220 if (*dest == NULL) {
1221 *dest = mandoc_strndup(cp, sz);
1222 return;
1223 }
1224
1225 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1226 free(*dest);
1227 *dest = cp;
1228 }
1229
1230 /* --- main functions of the roff parser ---------------------------------- */
1231
1232 /*
1233 * In the current line, expand escape sequences that produce parsable
1234 * input text. Also check the syntax of the remaining escape sequences,
1235 * which typically produce output glyphs or change formatter state.
1236 */
1237 static int
1238 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1239 {
1240 struct mctx *ctx; /* current macro call context */
1241 char ubuf[24]; /* buffer to print the number */
1242 struct roff_node *n; /* used for header comments */
1243 const char *start; /* start of the string to process */
1244 char *stesc; /* start of an escape sequence ('\\') */
1245 const char *esct; /* type of esccape sequence */
1246 char *ep; /* end of comment string */
1247 const char *stnam; /* start of the name, after "[(*" */
1248 const char *cp; /* end of the name, e.g. before ']' */
1249 const char *res; /* the string to be substituted */
1250 char *nbuf; /* new buffer to copy buf->buf to */
1251 size_t maxl; /* expected length of the escape name */
1252 size_t naml; /* actual length of the escape name */
1253 size_t asz; /* length of the replacement */
1254 size_t rsz; /* length of the rest of the string */
1255 int inaml; /* length returned from mandoc_escape() */
1256 int expand_count; /* to avoid infinite loops */
1257 int npos; /* position in numeric expression */
1258 int arg_complete; /* argument not interrupted by eol */
1259 int quote_args; /* true for \\$@, false for \\$* */
1260 int done; /* no more input available */
1261 int deftype; /* type of definition to paste */
1262 int rcsid; /* kind of RCS id seen */
1263 enum mandocerr err; /* for escape sequence problems */
1264 char sign; /* increment number register */
1265 char term; /* character terminating the escape */
1266
1267 /* Search forward for comments. */
1268
1269 done = 0;
1270 start = buf->buf + pos;
1271 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1272 if (stesc[0] != newesc || stesc[1] == '\0')
1273 continue;
1274 stesc++;
1275 if (*stesc != '"' && *stesc != '#')
1276 continue;
1277
1278 /* Comment found, look for RCS id. */
1279
1280 rcsid = 0;
1281 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1282 rcsid = 1 << MANDOC_OS_OPENBSD;
1283 cp += 8;
1284 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1285 rcsid = 1 << MANDOC_OS_NETBSD;
1286 cp += 7;
1287 }
1288 if (cp != NULL &&
1289 isalnum((unsigned char)*cp) == 0 &&
1290 strchr(cp, '$') != NULL) {
1291 if (r->man->meta.rcsids & rcsid)
1292 mandoc_msg(MANDOCERR_RCS_REP, ln,
1293 (int)(stesc - buf->buf) + 1,
1294 "%s", stesc + 1);
1295 r->man->meta.rcsids |= rcsid;
1296 }
1297
1298 /* Handle trailing whitespace. */
1299
1300 ep = strchr(stesc--, '\0') - 1;
1301 if (*ep == '\n') {
1302 done = 1;
1303 ep--;
1304 }
1305 if (*ep == ' ' || *ep == '\t')
1306 mandoc_msg(MANDOCERR_SPACE_EOL,
1307 ln, (int)(ep - buf->buf), NULL);
1308
1309 /*
1310 * Save comments preceding the title macro
1311 * in the syntax tree.
1312 */
1313
1314 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1315 while (*ep == ' ' || *ep == '\t')
1316 ep--;
1317 ep[1] = '\0';
1318 n = roff_node_alloc(r->man,
1319 ln, stesc + 1 - buf->buf,
1320 ROFFT_COMMENT, TOKEN_NONE);
1321 n->string = mandoc_strdup(stesc + 2);
1322 roff_node_append(r->man, n);
1323 n->flags |= NODE_VALID | NODE_ENDED;
1324 r->man->next = ROFF_NEXT_SIBLING;
1325 }
1326
1327 /* Line continuation with comment. */
1328
1329 if (stesc[1] == '#') {
1330 *stesc = '\0';
1331 return ROFF_IGN | ROFF_APPEND;
1332 }
1333
1334 /* Discard normal comments. */
1335
1336 while (stesc > start && stesc[-1] == ' ' &&
1337 (stesc == start + 1 || stesc[-2] != '\\'))
1338 stesc--;
1339 *stesc = '\0';
1340 break;
1341 }
1342 if (stesc == start)
1343 return ROFF_CONT;
1344 stesc--;
1345
1346 /* Notice the end of the input. */
1347
1348 if (*stesc == '\n') {
1349 *stesc-- = '\0';
1350 done = 1;
1351 }
1352
1353 expand_count = 0;
1354 while (stesc >= start) {
1355 if (*stesc != newesc) {
1356
1357 /*
1358 * If we have a non-standard escape character,
1359 * escape literal backslashes because all
1360 * processing in subsequent functions uses
1361 * the standard escaping rules.
1362 */
1363
1364 if (newesc != ASCII_ESC && *stesc == '\\') {
1365 *stesc = '\0';
1366 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1367 buf->buf, stesc + 1) + 1;
1368 start = nbuf + pos;
1369 stesc = nbuf + (stesc - buf->buf);
1370 free(buf->buf);
1371 buf->buf = nbuf;
1372 }
1373
1374 /* Search backwards for the next escape. */
1375
1376 stesc--;
1377 continue;
1378 }
1379
1380 /* If it is escaped, skip it. */
1381
1382 for (cp = stesc - 1; cp >= start; cp--)
1383 if (*cp != r->escape)
1384 break;
1385
1386 if ((stesc - cp) % 2 == 0) {
1387 while (stesc > cp)
1388 *stesc-- = '\\';
1389 continue;
1390 } else if (stesc[1] != '\0') {
1391 *stesc = '\\';
1392 } else {
1393 *stesc-- = '\0';
1394 if (done)
1395 continue;
1396 else
1397 return ROFF_IGN | ROFF_APPEND;
1398 }
1399
1400 /* Decide whether to expand or to check only. */
1401
1402 term = '\0';
1403 cp = stesc + 1;
1404 while (*cp == 'E')
1405 cp++;
1406 esct = cp;
1407 switch (*esct) {
1408 case '*':
1409 case '$':
1410 res = NULL;
1411 break;
1412 case 'B':
1413 case 'w':
1414 term = cp[1];
1415 /* FALLTHROUGH */
1416 case 'n':
1417 sign = cp[1];
1418 if (sign == '+' || sign == '-')
1419 cp++;
1420 res = ubuf;
1421 break;
1422 default:
1423 err = MANDOCERR_OK;
1424 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1425 case ESCAPE_SPECIAL:
1426 if (mchars_spec2cp(stnam, inaml) >= 0)
1427 break;
1428 /* FALLTHROUGH */
1429 case ESCAPE_ERROR:
1430 err = MANDOCERR_ESC_BAD;
1431 break;
1432 case ESCAPE_UNDEF:
1433 err = MANDOCERR_ESC_UNDEF;
1434 break;
1435 case ESCAPE_UNSUPP:
1436 err = MANDOCERR_ESC_UNSUPP;
1437 break;
1438 default:
1439 break;
1440 }
1441 if (err != MANDOCERR_OK)
1442 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1443 "%.*s", (int)(cp - stesc), stesc);
1444 stesc--;
1445 continue;
1446 }
1447
1448 if (EXPAND_LIMIT < ++expand_count) {
1449 mandoc_msg(MANDOCERR_ROFFLOOP,
1450 ln, (int)(stesc - buf->buf), NULL);
1451 return ROFF_IGN;
1452 }
1453
1454 /*
1455 * The third character decides the length
1456 * of the name of the string or register.
1457 * Save a pointer to the name.
1458 */
1459
1460 if (term == '\0') {
1461 switch (*++cp) {
1462 case '\0':
1463 maxl = 0;
1464 break;
1465 case '(':
1466 cp++;
1467 maxl = 2;
1468 break;
1469 case '[':
1470 cp++;
1471 term = ']';
1472 maxl = 0;
1473 break;
1474 default:
1475 maxl = 1;
1476 break;
1477 }
1478 } else {
1479 cp += 2;
1480 maxl = 0;
1481 }
1482 stnam = cp;
1483
1484 /* Advance to the end of the name. */
1485
1486 naml = 0;
1487 arg_complete = 1;
1488 while (maxl == 0 || naml < maxl) {
1489 if (*cp == '\0') {
1490 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1491 (int)(stesc - buf->buf), "%s", stesc);
1492 arg_complete = 0;
1493 break;
1494 }
1495 if (maxl == 0 && *cp == term) {
1496 cp++;
1497 break;
1498 }
1499 if (*cp++ != '\\' || *esct != 'w') {
1500 naml++;
1501 continue;
1502 }
1503 switch (mandoc_escape(&cp, NULL, NULL)) {
1504 case ESCAPE_SPECIAL:
1505 case ESCAPE_UNICODE:
1506 case ESCAPE_NUMBERED:
1507 case ESCAPE_UNDEF:
1508 case ESCAPE_OVERSTRIKE:
1509 naml++;
1510 break;
1511 default:
1512 break;
1513 }
1514 }
1515
1516 /*
1517 * Retrieve the replacement string; if it is
1518 * undefined, resume searching for escapes.
1519 */
1520
1521 switch (*esct) {
1522 case '*':
1523 if (arg_complete) {
1524 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1525 res = roff_getstrn(r, stnam, naml, &deftype);
1526
1527 /*
1528 * If not overriden, let \*(.T
1529 * through to the formatters.
1530 */
1531
1532 if (res == NULL && naml == 2 &&
1533 stnam[0] == '.' && stnam[1] == 'T') {
1534 roff_setstrn(&r->strtab,
1535 ".T", 2, NULL, 0, 0);
1536 stesc--;
1537 continue;
1538 }
1539 }
1540 break;
1541 case '$':
1542 if (r->mstackpos < 0) {
1543 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1544 (int)(stesc - buf->buf), "%.3s", stesc);
1545 break;
1546 }
1547 ctx = r->mstack + r->mstackpos;
1548 npos = esct[1] - '1';
1549 if (npos >= 0 && npos <= 8) {
1550 res = npos < ctx->argc ?
1551 ctx->argv[npos] : "";
1552 break;
1553 }
1554 if (esct[1] == '*')
1555 quote_args = 0;
1556 else if (esct[1] == '@')
1557 quote_args = 1;
1558 else {
1559 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1560 (int)(stesc - buf->buf), "%.3s", stesc);
1561 break;
1562 }
1563 asz = 0;
1564 for (npos = 0; npos < ctx->argc; npos++) {
1565 if (npos)
1566 asz++; /* blank */
1567 if (quote_args)
1568 asz += 2; /* quotes */
1569 asz += strlen(ctx->argv[npos]);
1570 }
1571 if (asz != 3) {
1572 rsz = buf->sz - (stesc - buf->buf) - 3;
1573 if (asz < 3)
1574 memmove(stesc + asz, stesc + 3, rsz);
1575 buf->sz += asz - 3;
1576 nbuf = mandoc_realloc(buf->buf, buf->sz);
1577 start = nbuf + pos;
1578 stesc = nbuf + (stesc - buf->buf);
1579 buf->buf = nbuf;
1580 if (asz > 3)
1581 memmove(stesc + asz, stesc + 3, rsz);
1582 }
1583 for (npos = 0; npos < ctx->argc; npos++) {
1584 if (npos)
1585 *stesc++ = ' ';
1586 if (quote_args)
1587 *stesc++ = '"';
1588 cp = ctx->argv[npos];
1589 while (*cp != '\0')
1590 *stesc++ = *cp++;
1591 if (quote_args)
1592 *stesc++ = '"';
1593 }
1594 continue;
1595 case 'B':
1596 npos = 0;
1597 ubuf[0] = arg_complete &&
1598 roff_evalnum(r, ln, stnam, &npos,
1599 NULL, ROFFNUM_SCALE) &&
1600 stnam + npos + 1 == cp ? '1' : '0';
1601 ubuf[1] = '\0';
1602 break;
1603 case 'n':
1604 if (arg_complete)
1605 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1606 roff_getregn(r, stnam, naml, sign));
1607 else
1608 ubuf[0] = '\0';
1609 break;
1610 case 'w':
1611 /* use even incomplete args */
1612 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1613 24 * (int)naml);
1614 break;
1615 }
1616
1617 if (res == NULL) {
1618 if (*esct == '*')
1619 mandoc_msg(MANDOCERR_STR_UNDEF,
1620 ln, (int)(stesc - buf->buf),
1621 "%.*s", (int)naml, stnam);
1622 res = "";
1623 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1624 mandoc_msg(MANDOCERR_ROFFLOOP,
1625 ln, (int)(stesc - buf->buf), NULL);
1626 return ROFF_IGN;
1627 }
1628
1629 /* Replace the escape sequence by the string. */
1630
1631 *stesc = '\0';
1632 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1633 buf->buf, res, cp) + 1;
1634
1635 /* Prepare for the next replacement. */
1636
1637 start = nbuf + pos;
1638 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1639 free(buf->buf);
1640 buf->buf = nbuf;
1641 }
1642 return ROFF_CONT;
1643 }
1644
1645 /*
1646 * Parse a quoted or unquoted roff-style request or macro argument.
1647 * Return a pointer to the parsed argument, which is either the original
1648 * pointer or advanced by one byte in case the argument is quoted.
1649 * NUL-terminate the argument in place.
1650 * Collapse pairs of quotes inside quoted arguments.
1651 * Advance the argument pointer to the next argument,
1652 * or to the NUL byte terminating the argument line.
1653 */
1654 char *
1655 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1656 {
1657 struct buf buf;
1658 char *cp, *start;
1659 int newesc, pairs, quoted, white;
1660
1661 /* Quoting can only start with a new word. */
1662 start = *cpp;
1663 quoted = 0;
1664 if ('"' == *start) {
1665 quoted = 1;
1666 start++;
1667 }
1668
1669 newesc = pairs = white = 0;
1670 for (cp = start; '\0' != *cp; cp++) {
1671
1672 /*
1673 * Move the following text left
1674 * after quoted quotes and after "\\" and "\t".
1675 */
1676 if (pairs)
1677 cp[-pairs] = cp[0];
1678
1679 if ('\\' == cp[0]) {
1680 /*
1681 * In copy mode, translate double to single
1682 * backslashes and backslash-t to literal tabs.
1683 */
1684 switch (cp[1]) {
1685 case 'a':
1686 case 't':
1687 cp[-pairs] = '\t';
1688 pairs++;
1689 cp++;
1690 break;
1691 case '\\':
1692 newesc = 1;
1693 cp[-pairs] = ASCII_ESC;
1694 pairs++;
1695 cp++;
1696 break;
1697 case ' ':
1698 /* Skip escaped blanks. */
1699 if (0 == quoted)
1700 cp++;
1701 break;
1702 default:
1703 break;
1704 }
1705 } else if (0 == quoted) {
1706 if (' ' == cp[0]) {
1707 /* Unescaped blanks end unquoted args. */
1708 white = 1;
1709 break;
1710 }
1711 } else if ('"' == cp[0]) {
1712 if ('"' == cp[1]) {
1713 /* Quoted quotes collapse. */
1714 pairs++;
1715 cp++;
1716 } else {
1717 /* Unquoted quotes end quoted args. */
1718 quoted = 2;
1719 break;
1720 }
1721 }
1722 }
1723
1724 /* Quoted argument without a closing quote. */
1725 if (1 == quoted)
1726 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1727
1728 /* NUL-terminate this argument and move to the next one. */
1729 if (pairs)
1730 cp[-pairs] = '\0';
1731 if ('\0' != *cp) {
1732 *cp++ = '\0';
1733 while (' ' == *cp)
1734 cp++;
1735 }
1736 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1737 *cpp = cp;
1738
1739 if ('\0' == *cp && (white || ' ' == cp[-1]))
1740 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1741
1742 start = mandoc_strdup(start);
1743 if (newesc == 0)
1744 return start;
1745
1746 buf.buf = start;
1747 buf.sz = strlen(start) + 1;
1748 buf.next = NULL;
1749 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1750 free(buf.buf);
1751 buf.buf = mandoc_strdup("");
1752 }
1753 return buf.buf;
1754 }
1755
1756
1757 /*
1758 * Process text streams.
1759 */
1760 static int
1761 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1762 {
1763 size_t sz;
1764 const char *start;
1765 char *p;
1766 int isz;
1767 enum mandoc_esc esc;
1768
1769 /* Spring the input line trap. */
1770
1771 if (roffit_lines == 1) {
1772 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1773 free(buf->buf);
1774 buf->buf = p;
1775 buf->sz = isz + 1;
1776 *offs = 0;
1777 free(roffit_macro);
1778 roffit_lines = 0;
1779 return ROFF_REPARSE;
1780 } else if (roffit_lines > 1)
1781 --roffit_lines;
1782
1783 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1784 if (roffce_lines < 1) {
1785 r->man->last = roffce_node;
1786 r->man->next = ROFF_NEXT_SIBLING;
1787 roffce_lines = 0;
1788 roffce_node = NULL;
1789 } else
1790 roffce_lines--;
1791 }
1792
1793 /* Convert all breakable hyphens into ASCII_HYPH. */
1794
1795 start = p = buf->buf + pos;
1796
1797 while (*p != '\0') {
1798 sz = strcspn(p, "-\\");
1799 p += sz;
1800
1801 if (*p == '\0')
1802 break;
1803
1804 if (*p == '\\') {
1805 /* Skip over escapes. */
1806 p++;
1807 esc = mandoc_escape((const char **)&p, NULL, NULL);
1808 if (esc == ESCAPE_ERROR)
1809 break;
1810 while (*p == '-')
1811 p++;
1812 continue;
1813 } else if (p == start) {
1814 p++;
1815 continue;
1816 }
1817
1818 if (isalpha((unsigned char)p[-1]) &&
1819 isalpha((unsigned char)p[1]))
1820 *p = ASCII_HYPH;
1821 p++;
1822 }
1823 return ROFF_CONT;
1824 }
1825
1826 int
1827 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1828 {
1829 enum roff_tok t;
1830 int e;
1831 int pos; /* parse point */
1832 int spos; /* saved parse point for messages */
1833 int ppos; /* original offset in buf->buf */
1834 int ctl; /* macro line (boolean) */
1835
1836 ppos = pos = *offs;
1837
1838 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1839 (r->man->flags & ROFF_NOFILL) == 0 &&
1840 strchr(" .\\", buf->buf[pos]) == NULL &&
1841 buf->buf[pos] != r->control &&
1842 strcspn(buf->buf, " ") < 80)
1843 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1844 "%.20s...", buf->buf + pos);
1845
1846 /* Handle in-line equation delimiters. */
1847
1848 if (r->tbl == NULL &&
1849 r->last_eqn != NULL && r->last_eqn->delim &&
1850 (r->eqn == NULL || r->eqn_inline)) {
1851 e = roff_eqndelim(r, buf, pos);
1852 if (e == ROFF_REPARSE)
1853 return e;
1854 assert(e == ROFF_CONT);
1855 }
1856
1857 /* Expand some escape sequences. */
1858
1859 e = roff_expand(r, buf, ln, pos, r->escape);
1860 if ((e & ROFF_MASK) == ROFF_IGN)
1861 return e;
1862 assert(e == ROFF_CONT);
1863
1864 ctl = roff_getcontrol(r, buf->buf, &pos);
1865
1866 /*
1867 * First, if a scope is open and we're not a macro, pass the
1868 * text through the macro's filter.
1869 * Equations process all content themselves.
1870 * Tables process almost all content themselves, but we want
1871 * to warn about macros before passing it there.
1872 */
1873
1874 if (r->last != NULL && ! ctl) {
1875 t = r->last->tok;
1876 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1877 if ((e & ROFF_MASK) == ROFF_IGN)
1878 return e;
1879 e &= ~ROFF_MASK;
1880 } else
1881 e = ROFF_IGN;
1882 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1883 eqn_read(r->eqn, buf->buf + ppos);
1884 return e;
1885 }
1886 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1887 tbl_read(r->tbl, ln, buf->buf, ppos);
1888 roff_addtbl(r->man, ln, r->tbl);
1889 return e;
1890 }
1891 if ( ! ctl) {
1892 r->options &= ~MPARSE_COMMENT;
1893 return roff_parsetext(r, buf, pos, offs) | e;
1894 }
1895
1896 /* Skip empty request lines. */
1897
1898 if (buf->buf[pos] == '"') {
1899 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1900 return ROFF_IGN;
1901 } else if (buf->buf[pos] == '\0')
1902 return ROFF_IGN;
1903
1904 /*
1905 * If a scope is open, go to the child handler for that macro,
1906 * as it may want to preprocess before doing anything with it.
1907 * Don't do so if an equation is open.
1908 */
1909
1910 if (r->last) {
1911 t = r->last->tok;
1912 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1913 }
1914
1915 /* No scope is open. This is a new request or macro. */
1916
1917 r->options &= ~MPARSE_COMMENT;
1918 spos = pos;
1919 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1920
1921 /* Tables ignore most macros. */
1922
1923 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1924 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1925 mandoc_msg(MANDOCERR_TBLMACRO,
1926 ln, pos, "%s", buf->buf + spos);
1927 if (t != TOKEN_NONE)
1928 return ROFF_IGN;
1929 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1930 pos++;
1931 while (buf->buf[pos] == ' ')
1932 pos++;
1933 tbl_read(r->tbl, ln, buf->buf, pos);
1934 roff_addtbl(r->man, ln, r->tbl);
1935 return ROFF_IGN;
1936 }
1937
1938 /* For now, let high level macros abort .ce mode. */
1939
1940 if (ctl && roffce_node != NULL &&
1941 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1942 t == ROFF_TH || t == ROFF_TS)) {
1943 r->man->last = roffce_node;
1944 r->man->next = ROFF_NEXT_SIBLING;
1945 roffce_lines = 0;
1946 roffce_node = NULL;
1947 }
1948
1949 /*
1950 * This is neither a roff request nor a user-defined macro.
1951 * Let the standard macro set parsers handle it.
1952 */
1953
1954 if (t == TOKEN_NONE)
1955 return ROFF_CONT;
1956
1957 /* Execute a roff request or a user defined macro. */
1958
1959 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1960 }
1961
1962 /*
1963 * Internal interface function to tell the roff parser that execution
1964 * of the current macro ended. This is required because macro
1965 * definitions usually do not end with a .return request.
1966 */
1967 void
1968 roff_userret(struct roff *r)
1969 {
1970 struct mctx *ctx;
1971 int i;
1972
1973 assert(r->mstackpos >= 0);
1974 ctx = r->mstack + r->mstackpos;
1975 for (i = 0; i < ctx->argc; i++)
1976 free(ctx->argv[i]);
1977 ctx->argc = 0;
1978 r->mstackpos--;
1979 }
1980
1981 void
1982 roff_endparse(struct roff *r)
1983 {
1984 if (r->last != NULL)
1985 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1986 r->last->col, "%s", roff_name[r->last->tok]);
1987
1988 if (r->eqn != NULL) {
1989 mandoc_msg(MANDOCERR_BLK_NOEND,
1990 r->eqn->node->line, r->eqn->node->pos, "EQ");
1991 eqn_parse(r->eqn);
1992 r->eqn = NULL;
1993 }
1994
1995 if (r->tbl != NULL) {
1996 tbl_end(r->tbl, 1);
1997 r->tbl = NULL;
1998 }
1999 }
2000
2001 /*
2002 * Parse a roff node's type from the input buffer. This must be in the
2003 * form of ".foo xxx" in the usual way.
2004 */
2005 static enum roff_tok
2006 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2007 {
2008 char *cp;
2009 const char *mac;
2010 size_t maclen;
2011 int deftype;
2012 enum roff_tok t;
2013
2014 cp = buf + *pos;
2015
2016 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2017 return TOKEN_NONE;
2018
2019 mac = cp;
2020 maclen = roff_getname(r, &cp, ln, ppos);
2021
2022 deftype = ROFFDEF_USER | ROFFDEF_REN;
2023 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2024 switch (deftype) {
2025 case ROFFDEF_USER:
2026 t = ROFF_USERDEF;
2027 break;
2028 case ROFFDEF_REN:
2029 t = ROFF_RENAMED;
2030 break;
2031 default:
2032 t = roffhash_find(r->reqtab, mac, maclen);
2033 break;
2034 }
2035 if (t != TOKEN_NONE)
2036 *pos = cp - buf;
2037 else if (deftype == ROFFDEF_UNDEF) {
2038 /* Using an undefined macro defines it to be empty. */
2039 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2040 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2041 }
2042 return t;
2043 }
2044
2045 /* --- handling of request blocks ----------------------------------------- */
2046
2047 /*
2048 * Close a macro definition block or an "ignore" block.
2049 */
2050 static int
2051 roff_cblock(ROFF_ARGS)
2052 {
2053 int rr;
2054
2055 if (r->last == NULL) {
2056 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2057 return ROFF_IGN;
2058 }
2059
2060 switch (r->last->tok) {
2061 case ROFF_am:
2062 case ROFF_ami:
2063 case ROFF_de:
2064 case ROFF_dei:
2065 case ROFF_ig:
2066 break;
2067 case ROFF_am1:
2068 case ROFF_de1:
2069 /* Remapped in roff_block(). */
2070 abort();
2071 default:
2072 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2073 return ROFF_IGN;
2074 }
2075
2076 roffnode_pop(r);
2077 roffnode_cleanscope(r);
2078
2079 /*
2080 * If a conditional block with braces is still open,
2081 * check for "\}" block end markers.
2082 */
2083
2084 if (r->last != NULL && r->last->endspan < 0) {
2085 rr = 1; /* If arguments follow "\}", warn about them. */
2086 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2087 }
2088
2089 if (buf->buf[pos] != '\0')
2090 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2091 ".. %s", buf->buf + pos);
2092
2093 return ROFF_IGN;
2094 }
2095
2096 /*
2097 * Pop all nodes ending at the end of the current input line.
2098 * Return the number of loops ended.
2099 */
2100 static int
2101 roffnode_cleanscope(struct roff *r)
2102 {
2103 int inloop;
2104
2105 inloop = 0;
2106 while (r->last != NULL && r->last->endspan > 0) {
2107 if (--r->last->endspan != 0)
2108 break;
2109 inloop += roffnode_pop(r);
2110 }
2111 return inloop;
2112 }
2113
2114 /*
2115 * Handle the closing "\}" of a conditional block.
2116 * Apart from generating warnings, this only pops nodes.
2117 * Return the number of loops ended.
2118 */
2119 static int
2120 roff_ccond(struct roff *r, int ln, int ppos)
2121 {
2122 if (NULL == r->last) {
2123 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2124 return 0;
2125 }
2126
2127 switch (r->last->tok) {
2128 case ROFF_el:
2129 case ROFF_ie:
2130 case ROFF_if:
2131 case ROFF_while:
2132 break;
2133 default:
2134 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2135 return 0;
2136 }
2137
2138 if (r->last->endspan > -1) {
2139 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2140 return 0;
2141 }
2142
2143 return roffnode_pop(r) + roffnode_cleanscope(r);
2144 }
2145
2146 static int
2147 roff_block(ROFF_ARGS)
2148 {
2149 const char *name, *value;
2150 char *call, *cp, *iname, *rname;
2151 size_t csz, namesz, rsz;
2152 int deftype;
2153
2154 /* Ignore groff compatibility mode for now. */
2155
2156 if (tok == ROFF_de1)
2157 tok = ROFF_de;
2158 else if (tok == ROFF_dei1)
2159 tok = ROFF_dei;
2160 else if (tok == ROFF_am1)
2161 tok = ROFF_am;
2162 else if (tok == ROFF_ami1)
2163 tok = ROFF_ami;
2164
2165 /* Parse the macro name argument. */
2166
2167 cp = buf->buf + pos;
2168 if (tok == ROFF_ig) {
2169 iname = NULL;
2170 namesz = 0;
2171 } else {
2172 iname = cp;
2173 namesz = roff_getname(r, &cp, ln, ppos);
2174 iname[namesz] = '\0';
2175 }
2176
2177 /* Resolve the macro name argument if it is indirect. */
2178
2179 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2180 deftype = ROFFDEF_USER;
2181 name = roff_getstrn(r, iname, namesz, &deftype);
2182 if (name == NULL) {
2183 mandoc_msg(MANDOCERR_STR_UNDEF,
2184 ln, (int)(iname - buf->buf),
2185 "%.*s", (int)namesz, iname);
2186 namesz = 0;
2187 } else
2188 namesz = strlen(name);
2189 } else
2190 name = iname;
2191
2192 if (namesz == 0 && tok != ROFF_ig) {
2193 mandoc_msg(MANDOCERR_REQ_EMPTY,
2194 ln, ppos, "%s", roff_name[tok]);
2195 return ROFF_IGN;
2196 }
2197
2198 roffnode_push(r, tok, name, ln, ppos);
2199
2200 /*
2201 * At the beginning of a `de' macro, clear the existing string
2202 * with the same name, if there is one. New content will be
2203 * appended from roff_block_text() in multiline mode.
2204 */
2205
2206 if (tok == ROFF_de || tok == ROFF_dei) {
2207 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2208 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2209 } else if (tok == ROFF_am || tok == ROFF_ami) {
2210 deftype = ROFFDEF_ANY;
2211 value = roff_getstrn(r, iname, namesz, &deftype);
2212 switch (deftype) { /* Before appending, ... */
2213 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2214 roff_setstrn(&r->strtab, name, namesz,
2215 value, strlen(value), 0);
2216 break;
2217 case ROFFDEF_REN: /* call original standard macro. */
2218 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2219 (int)strlen(value), value);
2220 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2221 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2222 free(call);
2223 break;
2224 case ROFFDEF_STD: /* rename and call standard macro. */
2225 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2226 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2227 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2228 (int)rsz, rname);
2229 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2230 free(call);
2231 free(rname);
2232 break;
2233 default:
2234 break;
2235 }
2236 }
2237
2238 if (*cp == '\0')
2239 return ROFF_IGN;
2240
2241 /* Get the custom end marker. */
2242
2243 iname = cp;
2244 namesz = roff_getname(r, &cp, ln, ppos);
2245
2246 /* Resolve the end marker if it is indirect. */
2247
2248 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2249 deftype = ROFFDEF_USER;
2250 name = roff_getstrn(r, iname, namesz, &deftype);
2251 if (name == NULL) {
2252 mandoc_msg(MANDOCERR_STR_UNDEF,
2253 ln, (int)(iname - buf->buf),
2254 "%.*s", (int)namesz, iname);
2255 namesz = 0;
2256 } else
2257 namesz = strlen(name);
2258 } else
2259 name = iname;
2260
2261 if (namesz)
2262 r->last->end = mandoc_strndup(name, namesz);
2263
2264 if (*cp != '\0')
2265 mandoc_msg(MANDOCERR_ARG_EXCESS,
2266 ln, pos, ".%s ... %s", roff_name[tok], cp);
2267
2268 return ROFF_IGN;
2269 }
2270
2271 static int
2272 roff_block_sub(ROFF_ARGS)
2273 {
2274 enum roff_tok t;
2275 int i, j;
2276
2277 /*
2278 * First check whether a custom macro exists at this level. If
2279 * it does, then check against it. This is some of groff's
2280 * stranger behaviours. If we encountered a custom end-scope
2281 * tag and that tag also happens to be a "real" macro, then we
2282 * need to try interpreting it again as a real macro. If it's
2283 * not, then return ignore. Else continue.
2284 */
2285
2286 if (r->last->end) {
2287 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2288 if (buf->buf[i] != r->last->end[j])
2289 break;
2290
2291 if (r->last->end[j] == '\0' &&
2292 (buf->buf[i] == '\0' ||
2293 buf->buf[i] == ' ' ||
2294 buf->buf[i] == '\t')) {
2295 roffnode_pop(r);
2296 roffnode_cleanscope(r);
2297
2298 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2299 i++;
2300
2301 pos = i;
2302 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2303 TOKEN_NONE)
2304 return ROFF_RERUN;
2305 return ROFF_IGN;
2306 }
2307 }
2308
2309 /*
2310 * If we have no custom end-query or lookup failed, then try
2311 * pulling it out of the hashtable.
2312 */
2313
2314 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2315
2316 if (t != ROFF_cblock) {
2317 if (tok != ROFF_ig)
2318 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2319 return ROFF_IGN;
2320 }
2321
2322 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2323 }
2324
2325 static int
2326 roff_block_text(ROFF_ARGS)
2327 {
2328
2329 if (tok != ROFF_ig)
2330 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2331
2332 return ROFF_IGN;
2333 }
2334
2335 /*
2336 * Check for a closing "\}" and handle it.
2337 * In this function, the final "int *offs" argument is used for
2338 * different purposes than elsewhere:
2339 * Input: *offs == 0: caller wants to discard arguments following \}
2340 * *offs == 1: caller wants to preserve text following \}
2341 * Output: *offs = 0: tell caller to discard input line
2342 * *offs = 1: tell caller to use input line
2343 */
2344 static int
2345 roff_cond_checkend(ROFF_ARGS)
2346 {
2347 char *ep;
2348 int endloop, irc, rr;
2349
2350 irc = ROFF_IGN;
2351 rr = r->last->rule;
2352 endloop = tok != ROFF_while ? ROFF_IGN :
2353 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2354 if (roffnode_cleanscope(r))
2355 irc |= endloop;
2356
2357 /*
2358 * If "\}" occurs on a macro line without a preceding macro or
2359 * a text line contains nothing else, drop the line completely.
2360 */
2361
2362 ep = buf->buf + pos;
2363 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2364 rr = 0;
2365
2366 /*
2367 * The closing delimiter "\}" rewinds the conditional scope
2368 * but is otherwise ignored when interpreting the line.
2369 */
2370
2371 while ((ep = strchr(ep, '\\')) != NULL) {
2372 switch (ep[1]) {
2373 case '}':
2374 if (ep[2] == '\0')
2375 ep[0] = '\0';
2376 else if (rr)
2377 ep[1] = '&';
2378 else
2379 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2380 if (roff_ccond(r, ln, ep - buf->buf))
2381 irc |= endloop;
2382 break;
2383 case '\0':
2384 ++ep;
2385 break;
2386 default:
2387 ep += 2;
2388 break;
2389 }
2390 }
2391 *offs = rr;
2392 return irc;
2393 }
2394
2395 /*
2396 * Parse and process a request or macro line in conditional scope.
2397 */
2398 static int
2399 roff_cond_sub(ROFF_ARGS)
2400 {
2401 struct roffnode *bl;
2402 int irc, rr;
2403 enum roff_tok t;
2404
2405 rr = 0; /* If arguments follow "\}", skip them. */
2406 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2407 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2408
2409 /* For now, let high level macros abort .ce mode. */
2410
2411 if (roffce_node != NULL &&
2412 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2413 t == ROFF_TH || t == ROFF_TS)) {
2414 r->man->last = roffce_node;
2415 r->man->next = ROFF_NEXT_SIBLING;
2416 roffce_lines = 0;
2417 roffce_node = NULL;
2418 }
2419
2420 /*
2421 * Fully handle known macros when they are structurally
2422 * required or when the conditional evaluated to true.
2423 */
2424
2425 if (t == ROFF_break) {
2426 if (irc & ROFF_LOOPMASK)
2427 irc = ROFF_IGN | ROFF_LOOPEXIT;
2428 else if (rr) {
2429 for (bl = r->last; bl != NULL; bl = bl->parent) {
2430 bl->rule = 0;
2431 if (bl->tok == ROFF_while)
2432 break;
2433 }
2434 }
2435 } else if (t != TOKEN_NONE &&
2436 (rr || roffs[t].flags & ROFFMAC_STRUCT))
2437 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2438 else
2439 irc |= rr ? ROFF_CONT : ROFF_IGN;
2440 return irc;
2441 }
2442
2443 /*
2444 * Parse and process a text line in conditional scope.
2445 */
2446 static int
2447 roff_cond_text(ROFF_ARGS)
2448 {
2449 int irc, rr;
2450
2451 rr = 1; /* If arguments follow "\}", preserve them. */
2452 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2453 if (rr)
2454 irc |= ROFF_CONT;
2455 return irc;
2456 }
2457
2458 /* --- handling of numeric and conditional expressions -------------------- */
2459
2460 /*
2461 * Parse a single signed integer number. Stop at the first non-digit.
2462 * If there is at least one digit, return success and advance the
2463 * parse point, else return failure and let the parse point unchanged.
2464 * Ignore overflows, treat them just like the C language.
2465 */
2466 static int
2467 roff_getnum(const char *v, int *pos, int *res, int flags)
2468 {
2469 int myres, scaled, n, p;
2470
2471 if (NULL == res)
2472 res = &myres;
2473
2474 p = *pos;
2475 n = v[p] == '-';
2476 if (n || v[p] == '+')
2477 p++;
2478
2479 if (flags & ROFFNUM_WHITE)
2480 while (isspace((unsigned char)v[p]))
2481 p++;
2482
2483 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2484 *res = 10 * *res + v[p] - '0';
2485 if (p == *pos + n)
2486 return 0;
2487
2488 if (n)
2489 *res = -*res;
2490
2491 /* Each number may be followed by one optional scaling unit. */
2492
2493 switch (v[p]) {
2494 case 'f':
2495 scaled = *res * 65536;
2496 break;
2497 case 'i':
2498 scaled = *res * 240;
2499 break;
2500 case 'c':
2501 scaled = *res * 240 / 2.54;
2502 break;
2503 case 'v':
2504 case 'P':
2505 scaled = *res * 40;
2506 break;
2507 case 'm':
2508 case 'n':
2509 scaled = *res * 24;
2510 break;
2511 case 'p':
2512 scaled = *res * 10 / 3;
2513 break;
2514 case 'u':
2515 scaled = *res;
2516 break;
2517 case 'M':
2518 scaled = *res * 6 / 25;
2519 break;
2520 default:
2521 scaled = *res;
2522 p--;
2523 break;
2524 }
2525 if (flags & ROFFNUM_SCALE)
2526 *res = scaled;
2527
2528 *pos = p + 1;
2529 return 1;
2530 }
2531
2532 /*
2533 * Evaluate a string comparison condition.
2534 * The first character is the delimiter.
2535 * Succeed if the string up to its second occurrence
2536 * matches the string up to its third occurence.
2537 * Advance the cursor after the third occurrence
2538 * or lacking that, to the end of the line.
2539 */
2540 static int
2541 roff_evalstrcond(const char *v, int *pos)
2542 {
2543 const char *s1, *s2, *s3;
2544 int match;
2545
2546 match = 0;
2547 s1 = v + *pos; /* initial delimiter */
2548 s2 = s1 + 1; /* for scanning the first string */
2549 s3 = strchr(s2, *s1); /* for scanning the second string */
2550
2551 if (NULL == s3) /* found no middle delimiter */
2552 goto out;
2553
2554 while ('\0' != *++s3) {
2555 if (*s2 != *s3) { /* mismatch */
2556 s3 = strchr(s3, *s1);
2557 break;
2558 }
2559 if (*s3 == *s1) { /* found the final delimiter */
2560 match = 1;
2561 break;
2562 }
2563 s2++;
2564 }
2565
2566 out:
2567 if (NULL == s3)
2568 s3 = strchr(s2, '\0');
2569 else if (*s3 != '\0')
2570 s3++;
2571 *pos = s3 - v;
2572 return match;
2573 }
2574
2575 /*
2576 * Evaluate an optionally negated single character, numerical,
2577 * or string condition.
2578 */
2579 static int
2580 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2581 {
2582 const char *start, *end;
2583 char *cp, *name;
2584 size_t sz;
2585 int deftype, len, number, savepos, istrue, wanttrue;
2586
2587 if ('!' == v[*pos]) {
2588 wanttrue = 0;
2589 (*pos)++;
2590 } else
2591 wanttrue = 1;
2592
2593 switch (v[*pos]) {
2594 case '\0':
2595 return 0;
2596 case 'n':
2597 case 'o':
2598 (*pos)++;
2599 return wanttrue;
2600 case 'e':
2601 case 't':
2602 case 'v':
2603 (*pos)++;
2604 return !wanttrue;
2605 case 'c':
2606 do {
2607 (*pos)++;
2608 } while (v[*pos] == ' ');
2609
2610 /*
2611 * Quirk for groff compatibility:
2612 * The horizontal tab is neither available nor unavailable.
2613 */
2614
2615 if (v[*pos] == '\t') {
2616 (*pos)++;
2617 return 0;
2618 }
2619
2620 /* Printable ASCII characters are available. */
2621
2622 if (v[*pos] != '\\') {
2623 (*pos)++;
2624 return wanttrue;
2625 }
2626
2627 end = v + ++*pos;
2628 switch (mandoc_escape(&end, &start, &len)) {
2629 case ESCAPE_SPECIAL:
2630 istrue = mchars_spec2cp(start, len) != -1;
2631 break;
2632 case ESCAPE_UNICODE:
2633 istrue = 1;
2634 break;
2635 case ESCAPE_NUMBERED:
2636 istrue = mchars_num2char(start, len) != -1;
2637 break;
2638 default:
2639 istrue = !wanttrue;
2640 break;
2641 }
2642 *pos = end - v;
2643 return istrue == wanttrue;
2644 case 'd':
2645 case 'r':
2646 cp = v + *pos + 1;
2647 while (*cp == ' ')
2648 cp++;
2649 name = cp;
2650 sz = roff_getname(r, &cp, ln, cp - v);
2651 if (sz == 0)
2652 istrue = 0;
2653 else if (v[*pos] == 'r')
2654 istrue = roff_hasregn(r, name, sz);
2655 else {
2656 deftype = ROFFDEF_ANY;
2657 roff_getstrn(r, name, sz, &deftype);
2658 istrue = !!deftype;
2659 }
2660 *pos = (name + sz) - v;
2661 return istrue == wanttrue;
2662 default:
2663 break;
2664 }
2665
2666 savepos = *pos;
2667 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2668 return (number > 0) == wanttrue;
2669 else if (*pos == savepos)
2670 return roff_evalstrcond(v, pos) == wanttrue;
2671 else
2672 return 0;
2673 }
2674
2675 static int
2676 roff_line_ignore(ROFF_ARGS)
2677 {
2678
2679 return ROFF_IGN;
2680 }
2681
2682 static int
2683 roff_insec(ROFF_ARGS)
2684 {
2685
2686 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2687 return ROFF_IGN;
2688 }
2689
2690 static int
2691 roff_unsupp(ROFF_ARGS)
2692 {
2693
2694 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2695 return ROFF_IGN;
2696 }
2697
2698 static int
2699 roff_cond(ROFF_ARGS)
2700 {
2701 int irc;
2702
2703 roffnode_push(r, tok, NULL, ln, ppos);
2704
2705 /*
2706 * An `.el' has no conditional body: it will consume the value
2707 * of the current rstack entry set in prior `ie' calls or
2708 * defaults to DENY.
2709 *
2710 * If we're not an `el', however, then evaluate the conditional.
2711 */
2712
2713 r->last->rule = tok == ROFF_el ?
2714 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2715 roff_evalcond(r, ln, buf->buf, &pos);
2716
2717 /*
2718 * An if-else will put the NEGATION of the current evaluated
2719 * conditional into the stack of rules.
2720 */
2721
2722 if (tok == ROFF_ie) {
2723 if (r->rstackpos + 1 == r->rstacksz) {
2724 r->rstacksz += 16;
2725 r->rstack = mandoc_reallocarray(r->rstack,
2726 r->rstacksz, sizeof(int));
2727 }
2728 r->rstack[++r->rstackpos] = !r->last->rule;
2729 }
2730
2731 /* If the parent has false as its rule, then so do we. */
2732
2733 if (r->last->parent && !r->last->parent->rule)
2734 r->last->rule = 0;
2735
2736 /*
2737 * Determine scope.
2738 * If there is nothing on the line after the conditional,
2739 * not even whitespace, use next-line scope.
2740 * Except that .while does not support next-line scope.
2741 */
2742
2743 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2744 r->last->endspan = 2;
2745 goto out;
2746 }
2747
2748 while (buf->buf[pos] == ' ')
2749 pos++;
2750
2751 /* An opening brace requests multiline scope. */
2752
2753 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2754 r->last->endspan = -1;
2755 pos += 2;
2756 while (buf->buf[pos] == ' ')
2757 pos++;
2758 goto out;
2759 }
2760
2761 /*
2762 * Anything else following the conditional causes
2763 * single-line scope. Warn if the scope contains
2764 * nothing but trailing whitespace.
2765 */
2766
2767 if (buf->buf[pos] == '\0')
2768 mandoc_msg(MANDOCERR_COND_EMPTY,
2769 ln, ppos, "%s", roff_name[tok]);
2770
2771 r->last->endspan = 1;
2772
2773 out:
2774 *offs = pos;
2775 irc = ROFF_RERUN;
2776 if (tok == ROFF_while)
2777 irc |= ROFF_WHILE;
2778 return irc;
2779 }
2780
2781 static int
2782 roff_ds(ROFF_ARGS)
2783 {
2784 char *string;
2785 const char *name;
2786 size_t namesz;
2787
2788 /* Ignore groff compatibility mode for now. */
2789
2790 if (tok == ROFF_ds1)
2791 tok = ROFF_ds;
2792 else if (tok == ROFF_as1)
2793 tok = ROFF_as;
2794
2795 /*
2796 * The first word is the name of the string.
2797 * If it is empty or terminated by an escape sequence,
2798 * abort the `ds' request without defining anything.
2799 */
2800
2801 name = string = buf->buf + pos;
2802 if (*name == '\0')
2803 return ROFF_IGN;
2804
2805 namesz = roff_getname(r, &string, ln, pos);
2806 switch (name[namesz]) {
2807 case '\\':
2808 return ROFF_IGN;
2809 case '\t':
2810 string = buf->buf + pos + namesz;
2811 break;
2812 default:
2813 break;
2814 }
2815
2816 /* Read past the initial double-quote, if any. */
2817 if (*string == '"')
2818 string++;
2819
2820 /* The rest is the value. */
2821 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2822 ROFF_as == tok);
2823 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2824 return ROFF_IGN;
2825 }
2826
2827 /*
2828 * Parse a single operator, one or two characters long.
2829 * If the operator is recognized, return success and advance the
2830 * parse point, else return failure and let the parse point unchanged.
2831 */
2832 static int
2833 roff_getop(const char *v, int *pos, char *res)
2834 {
2835
2836 *res = v[*pos];
2837
2838 switch (*res) {
2839 case '+':
2840 case '-':
2841 case '*':
2842 case '/':
2843 case '%':
2844 case '&':
2845 case ':':
2846 break;
2847 case '<':
2848 switch (v[*pos + 1]) {
2849 case '=':
2850 *res = 'l';
2851 (*pos)++;
2852 break;
2853 case '>':
2854 *res = '!';
2855 (*pos)++;
2856 break;
2857 case '?':
2858 *res = 'i';
2859 (*pos)++;
2860 break;
2861 default:
2862 break;
2863 }
2864 break;
2865 case '>':
2866 switch (v[*pos + 1]) {
2867 case '=':
2868 *res = 'g';
2869 (*pos)++;
2870 break;
2871 case '?':
2872 *res = 'a';
2873 (*pos)++;
2874 break;
2875 default:
2876 break;
2877 }
2878 break;
2879 case '=':
2880 if ('=' == v[*pos + 1])
2881 (*pos)++;
2882 break;
2883 default:
2884 return 0;
2885 }
2886 (*pos)++;
2887
2888 return *res;
2889 }
2890
2891 /*
2892 * Evaluate either a parenthesized numeric expression
2893 * or a single signed integer number.
2894 */
2895 static int
2896 roff_evalpar(struct roff *r, int ln,
2897 const char *v, int *pos, int *res, int flags)
2898 {
2899
2900 if ('(' != v[*pos])
2901 return roff_getnum(v, pos, res, flags);
2902
2903 (*pos)++;
2904 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2905 return 0;
2906
2907 /*
2908 * Omission of the closing parenthesis
2909 * is an error in validation mode,
2910 * but ignored in evaluation mode.
2911 */
2912
2913 if (')' == v[*pos])
2914 (*pos)++;
2915 else if (NULL == res)
2916 return 0;
2917
2918 return 1;
2919 }
2920
2921 /*
2922 * Evaluate a complete numeric expression.
2923 * Proceed left to right, there is no concept of precedence.
2924 */
2925 static int
2926 roff_evalnum(struct roff *r, int ln, const char *v,
2927 int *pos, int *res, int flags)
2928 {
2929 int mypos, operand2;
2930 char operator;
2931
2932 if (NULL == pos) {
2933 mypos = 0;
2934 pos = &mypos;
2935 }
2936
2937 if (flags & ROFFNUM_WHITE)
2938 while (isspace((unsigned char)v[*pos]))
2939 (*pos)++;
2940
2941 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2942 return 0;
2943
2944 while (1) {
2945 if (flags & ROFFNUM_WHITE)
2946 while (isspace((unsigned char)v[*pos]))
2947 (*pos)++;
2948
2949 if ( ! roff_getop(v, pos, &operator))
2950 break;
2951
2952 if (flags & ROFFNUM_WHITE)
2953 while (isspace((unsigned char)v[*pos]))
2954 (*pos)++;
2955
2956 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2957 return 0;
2958
2959 if (flags & ROFFNUM_WHITE)
2960 while (isspace((unsigned char)v[*pos]))
2961 (*pos)++;
2962
2963 if (NULL == res)
2964 continue;
2965
2966 switch (operator) {
2967 case '+':
2968 *res += operand2;
2969 break;
2970 case '-':
2971 *res -= operand2;
2972 break;
2973 case '*':
2974 *res *= operand2;
2975 break;
2976 case '/':
2977 if (operand2 == 0) {
2978 mandoc_msg(MANDOCERR_DIVZERO,
2979 ln, *pos, "%s", v);
2980 *res = 0;
2981 break;
2982 }
2983 *res /= operand2;
2984 break;
2985 case '%':
2986 if (operand2 == 0) {
2987 mandoc_msg(MANDOCERR_DIVZERO,
2988 ln, *pos, "%s", v);
2989 *res = 0;
2990 break;
2991 }
2992 *res %= operand2;
2993 break;
2994 case '<':
2995 *res = *res < operand2;
2996 break;
2997 case '>':
2998 *res = *res > operand2;
2999 break;
3000 case 'l':
3001 *res = *res <= operand2;
3002 break;
3003 case 'g':
3004 *res = *res >= operand2;
3005 break;
3006 case '=':
3007 *res = *res == operand2;
3008 break;
3009 case '!':
3010 *res = *res != operand2;
3011 break;
3012 case '&':
3013 *res = *res && operand2;
3014 break;
3015 case ':':
3016 *res = *res || operand2;
3017 break;
3018 case 'i':
3019 if (operand2 < *res)
3020 *res = operand2;
3021 break;
3022 case 'a':
3023 if (operand2 > *res)
3024 *res = operand2;
3025 break;
3026 default:
3027 abort();
3028 }
3029 }
3030 return 1;
3031 }
3032
3033 /* --- register management ------------------------------------------------ */
3034
3035 void
3036 roff_setreg(struct roff *r, const char *name, int val, char sign)
3037 {
3038 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3039 }
3040
3041 static void
3042 roff_setregn(struct roff *r, const char *name, size_t len,
3043 int val, char sign, int step)
3044 {
3045 struct roffreg *reg;
3046
3047 /* Search for an existing register with the same name. */
3048 reg = r->regtab;
3049
3050 while (reg != NULL && (reg->key.sz != len ||
3051 strncmp(reg->key.p, name, len) != 0))
3052 reg = reg->next;
3053
3054 if (NULL == reg) {
3055 /* Create a new register. */
3056 reg = mandoc_malloc(sizeof(struct roffreg));
3057 reg->key.p = mandoc_strndup(name, len);
3058 reg->key.sz = len;
3059 reg->val = 0;
3060 reg->step = 0;
3061 reg->next = r->regtab;
3062 r->regtab = reg;
3063 }
3064
3065 if ('+' == sign)
3066 reg->val += val;
3067 else if ('-' == sign)
3068 reg->val -= val;
3069 else
3070 reg->val = val;
3071 if (step != INT_MIN)
3072 reg->step = step;
3073 }
3074
3075 /*
3076 * Handle some predefined read-only number registers.
3077 * For now, return -1 if the requested register is not predefined;
3078 * in case a predefined read-only register having the value -1
3079 * were to turn up, another special value would have to be chosen.
3080 */
3081 static int
3082 roff_getregro(const struct roff *r, const char *name)
3083 {
3084
3085 switch (*name) {
3086 case '$': /* Number of arguments of the last macro evaluated. */
3087 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3088 case 'A': /* ASCII approximation mode is always off. */
3089 return 0;
3090 case 'g': /* Groff compatibility mode is always on. */
3091 return 1;
3092 case 'H': /* Fixed horizontal resolution. */
3093 return 24;
3094 case 'j': /* Always adjust left margin only. */
3095 return 0;
3096 case 'T': /* Some output device is always defined. */
3097 return 1;
3098 case 'V': /* Fixed vertical resolution. */
3099 return 40;
3100 default:
3101 return -1;
3102 }
3103 }
3104
3105 int
3106 roff_getreg(struct roff *r, const char *name)
3107 {
3108 return roff_getregn(r, name, strlen(name), '\0');
3109 }
3110
3111 static int
3112 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3113 {
3114 struct roffreg *reg;
3115 int val;
3116
3117 if ('.' == name[0] && 2 == len) {
3118 val = roff_getregro(r, name + 1);
3119 if (-1 != val)
3120 return val;
3121 }
3122
3123 for (reg = r->regtab; reg; reg = reg->next) {
3124 if (len == reg->key.sz &&
3125 0 == strncmp(name, reg->key.p, len)) {
3126 switch (sign) {
3127 case '+':
3128 reg->val += reg->step;
3129 break;
3130 case '-':
3131 reg->val -= reg->step;
3132 break;
3133 default:
3134 break;
3135 }
3136 return reg->val;
3137 }
3138 }
3139
3140 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3141 return 0;
3142 }
3143
3144 static int
3145 roff_hasregn(const struct roff *r, const char *name, size_t len)
3146 {
3147 struct roffreg *reg;
3148 int val;
3149
3150 if ('.' == name[0] && 2 == len) {
3151 val = roff_getregro(r, name + 1);
3152 if (-1 != val)
3153 return 1;
3154 }
3155
3156 for (reg = r->regtab; reg; reg = reg->next)
3157 if (len == reg->key.sz &&
3158 0 == strncmp(name, reg->key.p, len))
3159 return 1;
3160
3161 return 0;
3162 }
3163
3164 static void
3165 roff_freereg(struct roffreg *reg)
3166 {
3167 struct roffreg *old_reg;
3168
3169 while (NULL != reg) {
3170 free(reg->key.p);
3171 old_reg = reg;
3172 reg = reg->next;
3173 free(old_reg);
3174 }
3175 }
3176
3177 static int
3178 roff_nr(ROFF_ARGS)
3179 {
3180 char *key, *val, *step;
3181 size_t keysz;
3182 int iv, is, len;
3183 char sign;
3184
3185 key = val = buf->buf + pos;
3186 if (*key == '\0')
3187 return ROFF_IGN;
3188
3189 keysz = roff_getname(r, &val, ln, pos);
3190 if (key[keysz] == '\\' || key[keysz] == '\t')
3191 return ROFF_IGN;
3192
3193 sign = *val;
3194 if (sign == '+' || sign == '-')
3195 val++;
3196
3197 len = 0;
3198 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3199 return ROFF_IGN;
3200
3201 step = val + len;
3202 while (isspace((unsigned char)*step))
3203 step++;
3204 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3205 is = INT_MIN;
3206
3207 roff_setregn(r, key, keysz, iv, sign, is);
3208 return ROFF_IGN;
3209 }
3210
3211 static int
3212 roff_rr(ROFF_ARGS)
3213 {
3214 struct roffreg *reg, **prev;
3215 char *name, *cp;
3216 size_t namesz;
3217
3218 name = cp = buf->buf + pos;
3219 if (*name == '\0')
3220 return ROFF_IGN;
3221 namesz = roff_getname(r, &cp, ln, pos);
3222 name[namesz] = '\0';
3223
3224 prev = &r->regtab;
3225 while (1) {
3226 reg = *prev;
3227 if (reg == NULL || !strcmp(name, reg->key.p))
3228 break;
3229 prev = &reg->next;
3230 }
3231 if (reg != NULL) {
3232 *prev = reg->next;
3233 free(reg->key.p);
3234 free(reg);
3235 }
3236 return ROFF_IGN;
3237 }
3238
3239 /* --- handler functions for roff requests -------------------------------- */
3240
3241 static int
3242 roff_rm(ROFF_ARGS)
3243 {
3244 const char *name;
3245 char *cp;
3246 size_t namesz;
3247
3248 cp = buf->buf + pos;
3249 while (*cp != '\0') {
3250 name = cp;
3251 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3252 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3253 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3254 if (name[namesz] == '\\' || name[namesz] == '\t')
3255 break;
3256 }
3257 return ROFF_IGN;
3258 }
3259
3260 static int
3261 roff_it(ROFF_ARGS)
3262 {
3263 int iv;
3264
3265 /* Parse the number of lines. */
3266
3267 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3268 mandoc_msg(MANDOCERR_IT_NONUM,
3269 ln, ppos, "%s", buf->buf + 1);
3270 return ROFF_IGN;
3271 }
3272
3273 while (isspace((unsigned char)buf->buf[pos]))
3274 pos++;
3275
3276 /*
3277 * Arm the input line trap.
3278 * Special-casing "an-trap" is an ugly workaround to cope
3279 * with DocBook stupidly fiddling with man(7) internals.
3280 */
3281
3282 roffit_lines = iv;
3283 roffit_macro = mandoc_strdup(iv != 1 ||
3284 strcmp(buf->buf + pos, "an-trap") ?
3285 buf->buf + pos : "br");
3286 return ROFF_IGN;
3287 }
3288
3289 static int
3290 roff_Dd(ROFF_ARGS)
3291 {
3292 int mask;
3293 enum roff_tok t, te;
3294
3295 switch (tok) {
3296 case ROFF_Dd:
3297 tok = MDOC_Dd;
3298 te = MDOC_MAX;
3299 if (r->format == 0)
3300 r->format = MPARSE_MDOC;
3301 mask = MPARSE_MDOC | MPARSE_QUICK;
3302 break;
3303 case ROFF_TH:
3304 tok = MAN_TH;
3305 te = MAN_MAX;
3306 if (r->format == 0)
3307 r->format = MPARSE_MAN;
3308 mask = MPARSE_QUICK;
3309 break;
3310 default:
3311 abort();
3312 }
3313 if ((r->options & mask) == 0)
3314 for (t = tok; t < te; t++)
3315 roff_setstr(r, roff_name[t], NULL, 0);
3316 return ROFF_CONT;
3317 }
3318
3319 static int
3320 roff_TE(ROFF_ARGS)
3321 {
3322 r->man->flags &= ~ROFF_NONOFILL;
3323 if (r->tbl == NULL) {
3324 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3325 return ROFF_IGN;
3326 }
3327 if (tbl_end(r->tbl, 0) == 0) {
3328 r->tbl = NULL;
3329 free(buf->buf);
3330 buf->buf = mandoc_strdup(".sp");
3331 buf->sz = 4;
3332 *offs = 0;
3333 return ROFF_REPARSE;
3334 }
3335 r->tbl = NULL;
3336 return ROFF_IGN;
3337 }
3338
3339 static int
3340 roff_T_(ROFF_ARGS)
3341 {
3342
3343 if (NULL == r->tbl)
3344 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3345 else
3346 tbl_restart(ln, ppos, r->tbl);
3347
3348 return ROFF_IGN;
3349 }
3350
3351 /*
3352 * Handle in-line equation delimiters.
3353 */
3354 static int
3355 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3356 {
3357 char *cp1, *cp2;
3358 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3359
3360 /*
3361 * Outside equations, look for an opening delimiter.
3362 * If we are inside an equation, we already know it is
3363 * in-line, or this function wouldn't have been called;
3364 * so look for a closing delimiter.
3365 */
3366
3367 cp1 = buf->buf + pos;
3368 cp2 = strchr(cp1, r->eqn == NULL ?
3369 r->last_eqn->odelim : r->last_eqn->cdelim);
3370 if (cp2 == NULL)
3371 return ROFF_CONT;
3372
3373 *cp2++ = '\0';
3374 bef_pr = bef_nl = aft_nl = aft_pr = "";
3375
3376 /* Handle preceding text, protecting whitespace. */
3377
3378 if (*buf->buf != '\0') {
3379 if (r->eqn == NULL)
3380 bef_pr = "\\&";
3381 bef_nl = "\n";
3382 }
3383
3384 /*
3385 * Prepare replacing the delimiter with an equation macro
3386 * and drop leading white space from the equation.
3387 */
3388
3389 if (r->eqn == NULL) {
3390 while (*cp2 == ' ')
3391 cp2++;
3392 mac = ".EQ";
3393 } else
3394 mac = ".EN";
3395
3396 /* Handle following text, protecting whitespace. */
3397
3398 if (*cp2 != '\0') {
3399 aft_nl = "\n";
3400 if (r->eqn != NULL)
3401 aft_pr = "\\&";
3402 }
3403
3404 /* Do the actual replacement. */
3405
3406 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3407 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3408 free(buf->buf);
3409 buf->buf = cp1;
3410
3411 /* Toggle the in-line state of the eqn subsystem. */
3412
3413 r->eqn_inline = r->eqn == NULL;
3414 return ROFF_REPARSE;
3415 }
3416
3417 static int
3418 roff_EQ(ROFF_ARGS)
3419 {
3420 struct roff_node *n;
3421
3422 if (r->man->meta.macroset == MACROSET_MAN)
3423 man_breakscope(r->man, ROFF_EQ);
3424 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3425 if (ln > r->man->last->line)
3426 n->flags |= NODE_LINE;
3427 n->eqn = eqn_box_new();
3428 roff_node_append(r->man, n);
3429 r->man->next = ROFF_NEXT_SIBLING;
3430
3431 assert(r->eqn == NULL);
3432 if (r->last_eqn == NULL)
3433 r->last_eqn = eqn_alloc();
3434 else
3435 eqn_reset(r->last_eqn);
3436 r->eqn = r->last_eqn;
3437 r->eqn->node = n;
3438
3439 if (buf->buf[pos] != '\0')
3440 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3441 ".EQ %s", buf->buf + pos);
3442
3443 return ROFF_IGN;
3444 }
3445
3446 static int
3447 roff_EN(ROFF_ARGS)
3448 {
3449 if (r->eqn != NULL) {
3450 eqn_parse(r->eqn);
3451 r->eqn = NULL;
3452 } else
3453 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3454 if (buf->buf[pos] != '\0')
3455 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3456 "EN %s", buf->buf + pos);
3457 return ROFF_IGN;
3458 }
3459
3460 static int
3461 roff_TS(ROFF_ARGS)
3462 {
3463 if (r->tbl != NULL) {
3464 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3465 tbl_end(r->tbl, 0);
3466 }
3467 r->man->flags |= ROFF_NONOFILL;
3468 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3469 if (r->last_tbl == NULL)
3470 r->first_tbl = r->tbl;
3471 r->last_tbl = r->tbl;
3472 return ROFF_IGN;
3473 }
3474
3475 static int
3476 roff_noarg(ROFF_ARGS)
3477 {
3478 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3479 man_breakscope(r->man, tok);
3480 if (tok == ROFF_brp)
3481 tok = ROFF_br;
3482 roff_elem_alloc(r->man, ln, ppos, tok);
3483 if (buf->buf[pos] != '\0')
3484 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3485 "%s %s", roff_name[tok], buf->buf + pos);
3486 if (tok == ROFF_nf)
3487 r->man->flags |= ROFF_NOFILL;
3488 else if (tok == ROFF_fi)
3489 r->man->flags &= ~ROFF_NOFILL;
3490 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3491 r->man->next = ROFF_NEXT_SIBLING;
3492 return ROFF_IGN;
3493 }
3494
3495 static int
3496 roff_onearg(ROFF_ARGS)
3497 {
3498 struct roff_node *n;
3499 char *cp;
3500 int npos;
3501
3502 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3503 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3504 tok == ROFF_ti))
3505 man_breakscope(r->man, tok);
3506
3507 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3508 r->man->last = roffce_node;
3509 r->man->next = ROFF_NEXT_SIBLING;
3510 }
3511
3512 roff_elem_alloc(r->man, ln, ppos, tok);
3513 n = r->man->last;
3514
3515 cp = buf->buf + pos;
3516 if (*cp != '\0') {
3517 while (*cp != '\0' && *cp != ' ')
3518 cp++;
3519 while (*cp == ' ')
3520 *cp++ = '\0';
3521 if (*cp != '\0')
3522 mandoc_msg(MANDOCERR_ARG_EXCESS,
3523 ln, (int)(cp - buf->buf),
3524 "%s ... %s", roff_name[tok], cp);
3525 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3526 }
3527
3528 if (tok == ROFF_ce || tok == ROFF_rj) {
3529 if (r->man->last->type == ROFFT_ELEM) {
3530 roff_word_alloc(r->man, ln, pos, "1");
3531 r->man->last->flags |= NODE_NOSRC;
3532 }
3533 npos = 0;
3534 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3535 &roffce_lines, 0) == 0) {
3536 mandoc_msg(MANDOCERR_CE_NONUM,
3537 ln, pos, "ce %s", buf->buf + pos);
3538 roffce_lines = 1;
3539 }
3540 if (roffce_lines < 1) {
3541 r->man->last = r->man->last->parent;
3542 roffce_node = NULL;
3543 roffce_lines = 0;
3544 } else
3545 roffce_node = r->man->last->parent;
3546 } else {
3547 n->flags |= NODE_VALID | NODE_ENDED;
3548 r->man->last = n;
3549 }
3550 n->flags |= NODE_LINE;
3551 r->man->next = ROFF_NEXT_SIBLING;
3552 return ROFF_IGN;
3553 }
3554
3555 static int
3556 roff_manyarg(ROFF_ARGS)
3557 {
3558 struct roff_node *n;
3559 char *sp, *ep;
3560
3561 roff_elem_alloc(r->man, ln, ppos, tok);
3562 n = r->man->last;
3563
3564 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3565 while (*ep != '\0' && *ep != ' ')
3566 ep++;
3567 while (*ep == ' ')
3568 *ep++ = '\0';
3569 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3570 }
3571
3572 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3573 r->man->last = n;
3574 r->man->next = ROFF_NEXT_SIBLING;
3575 return ROFF_IGN;
3576 }
3577
3578 static int
3579 roff_als(ROFF_ARGS)
3580 {
3581 char *oldn, *newn, *end, *value;
3582 size_t oldsz, newsz, valsz;
3583
3584 newn = oldn = buf->buf + pos;
3585 if (*newn == '\0')
3586 return ROFF_IGN;
3587
3588 newsz = roff_getname(r, &oldn, ln, pos);
3589 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3590 return ROFF_IGN;
3591
3592 end = oldn;
3593 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3594 if (oldsz == 0)
3595 return ROFF_IGN;
3596
3597 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3598 (int)oldsz, oldn);
3599 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3600 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3601 free(value);
3602 return ROFF_IGN;
3603 }
3604
3605 /*
3606 * The .break request only makes sense inside conditionals,
3607 * and that case is already handled in roff_cond_sub().
3608 */
3609 static int
3610 roff_break(ROFF_ARGS)
3611 {
3612 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3613 return ROFF_IGN;
3614 }
3615
3616 static int
3617 roff_cc(ROFF_ARGS)
3618 {
3619 const char *p;
3620
3621 p = buf->buf + pos;
3622
3623 if (*p == '\0' || (r->control = *p++) == '.')
3624 r->control = '\0';
3625
3626 if (*p != '\0')
3627 mandoc_msg(MANDOCERR_ARG_EXCESS,
3628 ln, p - buf->buf, "cc ... %s", p);
3629
3630 return ROFF_IGN;
3631 }
3632
3633 static int
3634 roff_char(ROFF_ARGS)
3635 {
3636 const char *p, *kp, *vp;
3637 size_t ksz, vsz;
3638 int font;
3639
3640 /* Parse the character to be replaced. */
3641
3642 kp = buf->buf + pos;
3643 p = kp + 1;
3644 if (*kp == '\0' || (*kp == '\\' &&
3645 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3646 (*p != ' ' && *p != '\0')) {
3647 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3648 return ROFF_IGN;
3649 }
3650 ksz = p - kp;
3651 while (*p == ' ')
3652 p++;
3653
3654 /*
3655 * If the replacement string contains a font escape sequence,
3656 * we have to restore the font at the end.
3657 */
3658
3659 vp = p;
3660 vsz = strlen(p);
3661 font = 0;
3662 while (*p != '\0') {
3663 if (*p++ != '\\')
3664 continue;
3665 switch (mandoc_escape(&p, NULL, NULL)) {
3666 case ESCAPE_FONT:
3667 case ESCAPE_FONTROMAN:
3668 case ESCAPE_FONTITALIC:
3669 case ESCAPE_FONTBOLD:
3670 case ESCAPE_FONTBI:
3671 case ESCAPE_FONTCR:
3672 case ESCAPE_FONTCB:
3673 case ESCAPE_FONTCI:
3674 case ESCAPE_FONTPREV:
3675 font++;
3676 break;
3677 default:
3678 break;
3679 }
3680 }
3681 if (font > 1)
3682 mandoc_msg(MANDOCERR_CHAR_FONT,
3683 ln, (int)(vp - buf->buf), "%s", vp);
3684
3685 /*
3686 * Approximate the effect of .char using the .tr tables.
3687 * XXX In groff, .char and .tr interact differently.
3688 */
3689
3690 if (ksz == 1) {
3691 if (r->xtab == NULL)
3692 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3693 assert((unsigned int)*kp < 128);
3694 free(r->xtab[(int)*kp].p);
3695 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3696 "%s%s", vp, font ? "\fP" : "");
3697 } else {
3698 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3699 if (font)
3700 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3701 }
3702 return ROFF_IGN;
3703 }
3704
3705 static int
3706 roff_ec(ROFF_ARGS)
3707 {
3708 const char *p;
3709
3710 p = buf->buf + pos;
3711 if (*p == '\0')
3712 r->escape = '\\';
3713 else {
3714 r->escape = *p;
3715 if (*++p != '\0')
3716 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3717 (int)(p - buf->buf), "ec ... %s", p);
3718 }
3719 return ROFF_IGN;
3720 }
3721
3722 static int
3723 roff_eo(ROFF_ARGS)
3724 {
3725 r->escape = '\0';
3726 if (buf->buf[pos] != '\0')
3727 mandoc_msg(MANDOCERR_ARG_SKIP,
3728 ln, pos, "eo %s", buf->buf + pos);
3729 return ROFF_IGN;
3730 }
3731
3732 static int
3733 roff_nop(ROFF_ARGS)
3734 {
3735 while (buf->buf[pos] == ' ')
3736 pos++;
3737 *offs = pos;
3738 return ROFF_RERUN;
3739 }
3740
3741 static int
3742 roff_tr(ROFF_ARGS)
3743 {
3744 const char *p, *first, *second;
3745 size_t fsz, ssz;
3746 enum mandoc_esc esc;
3747
3748 p = buf->buf + pos;
3749
3750 if (*p == '\0') {
3751 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3752 return ROFF_IGN;
3753 }
3754
3755 while (*p != '\0') {
3756 fsz = ssz = 1;
3757
3758 first = p++;
3759 if (*first == '\\') {
3760 esc = mandoc_escape(&p, NULL, NULL);
3761 if (esc == ESCAPE_ERROR) {
3762 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3763 (int)(p - buf->buf), "%s", first);
3764 return ROFF_IGN;
3765 }
3766 fsz = (size_t)(p - first);
3767 }
3768
3769 second = p++;
3770 if (*second == '\\') {
3771 esc = mandoc_escape(&p, NULL, NULL);
3772 if (esc == ESCAPE_ERROR) {
3773 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3774 (int)(p - buf->buf), "%s", second);
3775 return ROFF_IGN;
3776 }
3777 ssz = (size_t)(p - second);
3778 } else if (*second == '\0') {
3779 mandoc_msg(MANDOCERR_TR_ODD, ln,
3780 (int)(first - buf->buf), "tr %s", first);
3781 second = " ";
3782 p--;
3783 }
3784
3785 if (fsz > 1) {
3786 roff_setstrn(&r->xmbtab, first, fsz,
3787 second, ssz, 0);
3788 continue;
3789 }
3790
3791 if (r->xtab == NULL)
3792 r->xtab = mandoc_calloc(128,
3793 sizeof(struct roffstr));
3794
3795 free(r->xtab[(int)*first].p);
3796 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3797 r->xtab[(int)*first].sz = ssz;
3798 }
3799
3800 return ROFF_IGN;
3801 }
3802
3803 /*
3804 * Implementation of the .return request.
3805 * There is no need to call roff_userret() from here.
3806 * The read module will call that after rewinding the reader stack
3807 * to the place from where the current macro was called.
3808 */
3809 static int
3810 roff_return(ROFF_ARGS)
3811 {
3812 if (r->mstackpos >= 0)
3813 return ROFF_IGN | ROFF_USERRET;
3814
3815 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3816 return ROFF_IGN;
3817 }
3818
3819 static int
3820 roff_rn(ROFF_ARGS)
3821 {
3822 const char *value;
3823 char *oldn, *newn, *end;
3824 size_t oldsz, newsz;
3825 int deftype;
3826
3827 oldn = newn = buf->buf + pos;
3828 if (*oldn == '\0')
3829 return ROFF_IGN;
3830
3831 oldsz = roff_getname(r, &newn, ln, pos);
3832 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3833 return ROFF_IGN;
3834
3835 end = newn;
3836 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3837 if (newsz == 0)
3838 return ROFF_IGN;
3839
3840 deftype = ROFFDEF_ANY;
3841 value = roff_getstrn(r, oldn, oldsz, &deftype);
3842 switch (deftype) {
3843 case ROFFDEF_USER:
3844 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3845 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3846 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3847 break;
3848 case ROFFDEF_PRE:
3849 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3850 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3851 break;
3852 case ROFFDEF_REN:
3853 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3854 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3855 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3856 break;
3857 case ROFFDEF_STD:
3858 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3859 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3860 break;
3861 default:
3862 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3863 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3864 break;
3865 }
3866 return ROFF_IGN;
3867 }
3868
3869 static int
3870 roff_shift(ROFF_ARGS)
3871 {
3872 struct mctx *ctx;
3873 int argpos, levels, i;
3874
3875 argpos = pos;
3876 levels = 1;
3877 if (buf->buf[pos] != '\0' &&
3878 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3879 mandoc_msg(MANDOCERR_CE_NONUM,
3880 ln, pos, "shift %s", buf->buf + pos);
3881 levels = 1;
3882 }
3883 if (r->mstackpos < 0) {
3884 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3885 return ROFF_IGN;
3886 }
3887 ctx = r->mstack + r->mstackpos;
3888 if (levels > ctx->argc) {
3889 mandoc_msg(MANDOCERR_SHIFT,
3890 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3891 levels = ctx->argc;
3892 }
3893 if (levels < 0) {
3894 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3895 levels = 0;
3896 }
3897 if (levels == 0)
3898 return ROFF_IGN;
3899 for (i = 0; i < levels; i++)
3900 free(ctx->argv[i]);
3901 ctx->argc -= levels;
3902 for (i = 0; i < ctx->argc; i++)
3903 ctx->argv[i] = ctx->argv[i + levels];
3904 return ROFF_IGN;
3905 }
3906
3907 static int
3908 roff_so(ROFF_ARGS)
3909 {
3910 char *name, *cp;
3911
3912 name = buf->buf + pos;
3913 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3914
3915 /*
3916 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3917 * opening anything that's not in our cwd or anything beneath
3918 * it. Thus, explicitly disallow traversing up the file-system
3919 * or using absolute paths.
3920 */
3921
3922 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3923 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3924 buf->sz = mandoc_asprintf(&cp,
3925 ".sp\nSee the file %s.\n.sp", name) + 1;
3926 free(buf->buf);
3927 buf->buf = cp;
3928 *offs = 0;
3929 return ROFF_REPARSE;
3930 }
3931
3932 *offs = pos;
3933 return ROFF_SO;
3934 }
3935
3936 /* --- user defined strings and macros ------------------------------------ */
3937
3938 static int
3939 roff_userdef(ROFF_ARGS)
3940 {
3941 struct mctx *ctx;
3942 char *arg, *ap, *dst, *src;
3943 size_t sz;
3944
3945 /* If the macro is empty, ignore it altogether. */
3946
3947 if (*r->current_string == '\0')
3948 return ROFF_IGN;
3949
3950 /* Initialize a new macro stack context. */
3951
3952 if (++r->mstackpos == r->mstacksz) {
3953 r->mstack = mandoc_recallocarray(r->mstack,
3954 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3955 r->mstacksz += 8;
3956 }
3957 ctx = r->mstack + r->mstackpos;
3958 ctx->argc = 0;
3959
3960 /*
3961 * Collect pointers to macro argument strings,
3962 * NUL-terminating them and escaping quotes.
3963 */
3964
3965 src = buf->buf + pos;
3966 while (*src != '\0') {
3967 if (ctx->argc == ctx->argsz) {
3968 ctx->argsz += 8;
3969 ctx->argv = mandoc_reallocarray(ctx->argv,
3970 ctx->argsz, sizeof(*ctx->argv));
3971 }
3972 arg = roff_getarg(r, &src, ln, &pos);
3973 sz = 1; /* For the terminating NUL. */
3974 for (ap = arg; *ap != '\0'; ap++)
3975 sz += *ap == '"' ? 4 : 1;
3976 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3977 for (ap = arg; *ap != '\0'; ap++) {
3978 if (*ap == '"') {
3979 memcpy(dst, "\\(dq", 4);
3980 dst += 4;
3981 } else
3982 *dst++ = *ap;
3983 }
3984 *dst = '\0';
3985 free(arg);
3986 }
3987
3988 /* Replace the macro invocation by the macro definition. */
3989
3990 free(buf->buf);
3991 buf->buf = mandoc_strdup(r->current_string);
3992 buf->sz = strlen(buf->buf) + 1;
3993 *offs = 0;
3994
3995 return buf->buf[buf->sz - 2] == '\n' ?
3996 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3997 }
3998
3999 /*
4000 * Calling a high-level macro that was renamed with .rn.
4001 * r->current_string has already been set up by roff_parse().
4002 */
4003 static int
4004 roff_renamed(ROFF_ARGS)
4005 {
4006 char *nbuf;
4007
4008 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4009 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4010 free(buf->buf);
4011 buf->buf = nbuf;
4012 *offs = 0;
4013 return ROFF_CONT;
4014 }
4015
4016 /*
4017 * Measure the length in bytes of the roff identifier at *cpp
4018 * and advance the pointer to the next word.
4019 */
4020 static size_t
4021 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4022 {
4023 char *name, *cp;
4024 size_t namesz;
4025
4026 name = *cpp;
4027 if (*name == '\0')
4028 return 0;
4029
4030 /* Advance cp to the byte after the end of the name. */
4031
4032 for (cp = name; 1; cp++) {
4033 namesz = cp - name;
4034 if (*cp == '\0')
4035 break;
4036 if (*cp == ' ' || *cp == '\t') {
4037 cp++;
4038 break;
4039 }
4040 if (*cp != '\\')
4041 continue;
4042 if (cp[1] == '{' || cp[1] == '}')
4043 break;
4044 if (*++cp == '\\')
4045 continue;
4046 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4047 "%.*s", (int)(cp - name + 1), name);
4048 mandoc_escape((const char **)&cp, NULL, NULL);
4049 break;
4050 }
4051
4052 /* Read past spaces. */
4053
4054 while (*cp == ' ')
4055 cp++;
4056
4057 *cpp = cp;
4058 return namesz;
4059 }
4060
4061 /*
4062 * Store *string into the user-defined string called *name.
4063 * To clear an existing entry, call with (*r, *name, NULL, 0).
4064 * append == 0: replace mode
4065 * append == 1: single-line append mode
4066 * append == 2: multiline append mode, append '\n' after each call
4067 */
4068 static void
4069 roff_setstr(struct roff *r, const char *name, const char *string,
4070 int append)
4071 {
4072 size_t namesz;
4073
4074 namesz = strlen(name);
4075 roff_setstrn(&r->strtab, name, namesz, string,
4076 string ? strlen(string) : 0, append);
4077 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4078 }
4079
4080 static void
4081 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4082 const char *string, size_t stringsz, int append)
4083 {
4084 struct roffkv *n;
4085 char *c;
4086 int i;
4087 size_t oldch, newch;
4088
4089 /* Search for an existing string with the same name. */
4090 n = *r;
4091
4092 while (n && (namesz != n->key.sz ||
4093 strncmp(n->key.p, name, namesz)))
4094 n = n->next;
4095
4096 if (NULL == n) {
4097 /* Create a new string table entry. */
4098 n = mandoc_malloc(sizeof(struct roffkv));
4099 n->key.p = mandoc_strndup(name, namesz);
4100 n->key.sz = namesz;
4101 n->val.p = NULL;
4102 n->val.sz = 0;
4103 n->next = *r;
4104 *r = n;
4105 } else if (0 == append) {
4106 free(n->val.p);
4107 n->val.p = NULL;
4108 n->val.sz = 0;
4109 }
4110
4111 if (NULL == string)
4112 return;
4113
4114 /*
4115 * One additional byte for the '\n' in multiline mode,
4116 * and one for the terminating '\0'.
4117 */
4118 newch = stringsz + (1 < append ? 2u : 1u);
4119
4120 if (NULL == n->val.p) {
4121 n->val.p = mandoc_malloc(newch);
4122 *n->val.p = '\0';
4123 oldch = 0;
4124 } else {
4125 oldch = n->val.sz;
4126 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4127 }
4128
4129 /* Skip existing content in the destination buffer. */
4130 c = n->val.p + (int)oldch;
4131
4132 /* Append new content to the destination buffer. */
4133 i = 0;
4134 while (i < (int)stringsz) {
4135 /*
4136 * Rudimentary roff copy mode:
4137 * Handle escaped backslashes.
4138 */
4139 if ('\\' == string[i] && '\\' == string[i + 1])
4140 i++;
4141 *c++ = string[i++];
4142 }
4143
4144 /* Append terminating bytes. */
4145 if (1 < append)
4146 *c++ = '\n';
4147
4148 *c = '\0';
4149 n->val.sz = (int)(c - n->val.p);
4150 }
4151
4152 static const char *
4153 roff_getstrn(struct roff *r, const char *name, size_t len,
4154 int *deftype)
4155 {
4156 const struct roffkv *n;
4157 int found, i;
4158 enum roff_tok tok;
4159
4160 found = 0;
4161 for (n = r->strtab; n != NULL; n = n->next) {
4162 if (strncmp(name, n->key.p, len) != 0 ||
4163 n->key.p[len] != '\0' || n->val.p == NULL)
4164 continue;
4165 if (*deftype & ROFFDEF_USER) {
4166 *deftype = ROFFDEF_USER;
4167 return n->val.p;
4168 } else {
4169 found = 1;
4170 break;
4171 }
4172 }
4173 for (n = r->rentab; n != NULL; n = n->next) {
4174 if (strncmp(name, n->key.p, len) != 0 ||
4175 n->key.p[len] != '\0' || n->val.p == NULL)
4176 continue;
4177 if (*deftype & ROFFDEF_REN) {
4178 *deftype = ROFFDEF_REN;
4179 return n->val.p;
4180 } else {
4181 found = 1;
4182 break;
4183 }
4184 }
4185 for (i = 0; i < PREDEFS_MAX; i++) {
4186 if (strncmp(name, predefs[i].name, len) != 0 ||
4187 predefs[i].name[len] != '\0')
4188 continue;
4189 if (*deftype & ROFFDEF_PRE) {
4190 *deftype = ROFFDEF_PRE;
4191 return predefs[i].str;
4192 } else {
4193 found = 1;
4194 break;
4195 }
4196 }
4197 if (r->man->meta.macroset != MACROSET_MAN) {
4198 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4199 if (strncmp(name, roff_name[tok], len) != 0 ||
4200 roff_name[tok][len] != '\0')
4201 continue;
4202 if (*deftype & ROFFDEF_STD) {
4203 *deftype = ROFFDEF_STD;
4204 return NULL;
4205 } else {
4206 found = 1;
4207 break;
4208 }
4209 }
4210 }
4211 if (r->man->meta.macroset != MACROSET_MDOC) {
4212 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4213 if (strncmp(name, roff_name[tok], len) != 0 ||
4214 roff_name[tok][len] != '\0')
4215 continue;
4216 if (*deftype & ROFFDEF_STD) {
4217 *deftype = ROFFDEF_STD;
4218 return NULL;
4219 } else {
4220 found = 1;
4221 break;
4222 }
4223 }
4224 }
4225
4226 if (found == 0 && *deftype != ROFFDEF_ANY) {
4227 if (*deftype & ROFFDEF_REN) {
4228 /*
4229 * This might still be a request,
4230 * so do not treat it as undefined yet.
4231 */
4232 *deftype = ROFFDEF_UNDEF;
4233 return NULL;
4234 }
4235
4236 /* Using an undefined string defines it to be empty. */
4237
4238 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4239 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4240 }
4241
4242 *deftype = 0;
4243 return NULL;
4244 }
4245
4246 static void
4247 roff_freestr(struct roffkv *r)
4248 {
4249 struct roffkv *n, *nn;
4250
4251 for (n = r; n; n = nn) {
4252 free(n->key.p);
4253 free(n->val.p);
4254 nn = n->next;
4255 free(n);
4256 }
4257 }
4258
4259 /* --- accessors and utility functions ------------------------------------ */
4260
4261 /*
4262 * Duplicate an input string, making the appropriate character
4263 * conversations (as stipulated by `tr') along the way.
4264 * Returns a heap-allocated string with all the replacements made.
4265 */
4266 char *
4267 roff_strdup(const struct roff *r, const char *p)
4268 {
4269 const struct roffkv *cp;
4270 char *res;
4271 const char *pp;
4272 size_t ssz, sz;
4273 enum mandoc_esc esc;
4274
4275 if (NULL == r->xmbtab && NULL == r->xtab)
4276 return mandoc_strdup(p);
4277 else if ('\0' == *p)
4278 return mandoc_strdup("");
4279
4280 /*
4281 * Step through each character looking for term matches
4282 * (remember that a `tr' can be invoked with an escape, which is
4283 * a glyph but the escape is multi-character).
4284 * We only do this if the character hash has been initialised
4285 * and the string is >0 length.
4286 */
4287
4288 res = NULL;
4289 ssz = 0;
4290
4291 while ('\0' != *p) {
4292 assert((unsigned int)*p < 128);
4293 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4294 sz = r->xtab[(int)*p].sz;
4295 res = mandoc_realloc(res, ssz + sz + 1);
4296 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4297 ssz += sz;
4298 p++;
4299 continue;
4300 } else if ('\\' != *p) {
4301 res = mandoc_realloc(res, ssz + 2);
4302 res[ssz++] = *p++;
4303 continue;
4304 }
4305
4306 /* Search for term matches. */
4307 for (cp = r->xmbtab; cp; cp = cp->next)
4308 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4309 break;
4310
4311 if (NULL != cp) {
4312 /*
4313 * A match has been found.
4314 * Append the match to the array and move
4315 * forward by its keysize.
4316 */
4317 res = mandoc_realloc(res,
4318 ssz + cp->val.sz + 1);
4319 memcpy(res + ssz, cp->val.p, cp->val.sz);
4320 ssz += cp->val.sz;
4321 p += (int)cp->key.sz;
4322 continue;
4323 }
4324
4325 /*
4326 * Handle escapes carefully: we need to copy
4327 * over just the escape itself, or else we might
4328 * do replacements within the escape itself.
4329 * Make sure to pass along the bogus string.
4330 */
4331 pp = p++;
4332 esc = mandoc_escape(&p, NULL, NULL);
4333 if (ESCAPE_ERROR == esc) {
4334 sz = strlen(pp);
4335 res = mandoc_realloc(res, ssz + sz + 1);
4336 memcpy(res + ssz, pp, sz);
4337 break;
4338 }
4339 /*
4340 * We bail out on bad escapes.
4341 * No need to warn: we already did so when
4342 * roff_expand() was called.
4343 */
4344 sz = (int)(p - pp);
4345 res = mandoc_realloc(res, ssz + sz + 1);
4346 memcpy(res + ssz, pp, sz);
4347 ssz += sz;
4348 }
4349
4350 res[(int)ssz] = '\0';
4351 return res;
4352 }
4353
4354 int
4355 roff_getformat(const struct roff *r)
4356 {
4357
4358 return r->format;
4359 }
4360
4361 /*
4362 * Find out whether a line is a macro line or not.
4363 * If it is, adjust the current position and return one; if it isn't,
4364 * return zero and don't change the current position.
4365 * If the control character has been set with `.cc', then let that grain
4366 * precedence.
4367 * This is slighly contrary to groff, where using the non-breaking
4368 * control character when `cc' has been invoked will cause the
4369 * non-breaking macro contents to be printed verbatim.
4370 */
4371 int
4372 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4373 {
4374 int pos;
4375
4376 pos = *ppos;
4377
4378 if (r->control != '\0' && cp[pos] == r->control)
4379 pos++;
4380 else if (r->control != '\0')
4381 return 0;
4382 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4383 pos += 2;
4384 else if ('.' == cp[pos] || '\'' == cp[pos])
4385 pos++;
4386 else
4387 return 0;
4388
4389 while (' ' == cp[pos] || '\t' == cp[pos])
4390 pos++;
4391
4392 *ppos = pos;
4393 return 1;
4394 }