]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
7bed94293f04a895bd66738090d6d5106fbdb813
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.384 2022/04/28 16:21:10 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_mc(ROFF_ARGS);
231 static int roff_noarg(ROFF_ARGS);
232 static int roff_nop(ROFF_ARGS);
233 static int roff_nr(ROFF_ARGS);
234 static int roff_onearg(ROFF_ARGS);
235 static enum roff_tok roff_parse(struct roff *, char *, int *,
236 int, int);
237 static int roff_parsetext(struct roff *, struct buf *,
238 int, int *);
239 static int roff_renamed(ROFF_ARGS);
240 static int roff_return(ROFF_ARGS);
241 static int roff_rm(ROFF_ARGS);
242 static int roff_rn(ROFF_ARGS);
243 static int roff_rr(ROFF_ARGS);
244 static void roff_setregn(struct roff *, const char *,
245 size_t, int, char, int);
246 static void roff_setstr(struct roff *,
247 const char *, const char *, int);
248 static void roff_setstrn(struct roffkv **, const char *,
249 size_t, const char *, size_t, int);
250 static int roff_shift(ROFF_ARGS);
251 static int roff_so(ROFF_ARGS);
252 static int roff_tr(ROFF_ARGS);
253 static int roff_Dd(ROFF_ARGS);
254 static int roff_TE(ROFF_ARGS);
255 static int roff_TS(ROFF_ARGS);
256 static int roff_EQ(ROFF_ARGS);
257 static int roff_EN(ROFF_ARGS);
258 static int roff_T_(ROFF_ARGS);
259 static int roff_unsupp(ROFF_ARGS);
260 static int roff_userdef(ROFF_ARGS);
261
262 /* --- constant data ------------------------------------------------------ */
263
264 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
265 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
266
267 const char *__roff_name[MAN_MAX + 1] = {
268 "br", "ce", "fi", "ft",
269 "ll", "mc", "nf",
270 "po", "rj", "sp",
271 "ta", "ti", NULL,
272 "ab", "ad", "af", "aln",
273 "als", "am", "am1", "ami",
274 "ami1", "as", "as1", "asciify",
275 "backtrace", "bd", "bleedat", "blm",
276 "box", "boxa", "bp", "BP",
277 "break", "breakchar", "brnl", "brp",
278 "brpnl", "c2", "cc",
279 "cf", "cflags", "ch", "char",
280 "chop", "class", "close", "CL",
281 "color", "composite", "continue", "cp",
282 "cropat", "cs", "cu", "da",
283 "dch", "Dd", "de", "de1",
284 "defcolor", "dei", "dei1", "device",
285 "devicem", "di", "do", "ds",
286 "ds1", "dwh", "dt", "ec",
287 "ecr", "ecs", "el", "em",
288 "EN", "eo", "EP", "EQ",
289 "errprint", "ev", "evc", "ex",
290 "fallback", "fam", "fc", "fchar",
291 "fcolor", "fdeferlig", "feature", "fkern",
292 "fl", "flig", "fp", "fps",
293 "fschar", "fspacewidth", "fspecial", "ftr",
294 "fzoom", "gcolor", "hc", "hcode",
295 "hidechar", "hla", "hlm", "hpf",
296 "hpfa", "hpfcode", "hw", "hy",
297 "hylang", "hylen", "hym", "hypp",
298 "hys", "ie", "if", "ig",
299 "index", "it", "itc", "IX",
300 "kern", "kernafter", "kernbefore", "kernpair",
301 "lc", "lc_ctype", "lds", "length",
302 "letadj", "lf", "lg", "lhang",
303 "linetabs", "lnr", "lnrf", "lpfx",
304 "ls", "lsm", "lt",
305 "mediasize", "minss", "mk", "mso",
306 "na", "ne", "nh", "nhychar",
307 "nm", "nn", "nop", "nr",
308 "nrf", "nroff", "ns", "nx",
309 "open", "opena", "os", "output",
310 "padj", "papersize", "pc", "pev",
311 "pi", "PI", "pl", "pm",
312 "pn", "pnr", "ps",
313 "psbb", "pshape", "pso", "ptr",
314 "pvs", "rchar", "rd", "recursionlimit",
315 "return", "rfschar", "rhang",
316 "rm", "rn", "rnn", "rr",
317 "rs", "rt", "schar", "sentchar",
318 "shc", "shift", "sizes", "so",
319 "spacewidth", "special", "spreadwarn", "ss",
320 "sty", "substring", "sv", "sy",
321 "T&", "tc", "TE",
322 "TH", "tkf", "tl",
323 "tm", "tm1", "tmc", "tr",
324 "track", "transchar", "trf", "trimat",
325 "trin", "trnt", "troff", "TS",
326 "uf", "ul", "unformat", "unwatch",
327 "unwatchn", "vpt", "vs", "warn",
328 "warnscale", "watch", "watchlength", "watchn",
329 "wh", "while", "write", "writec",
330 "writem", "xflag", ".", NULL,
331 NULL, "text",
332 "Dd", "Dt", "Os", "Sh",
333 "Ss", "Pp", "D1", "Dl",
334 "Bd", "Ed", "Bl", "El",
335 "It", "Ad", "An", "Ap",
336 "Ar", "Cd", "Cm", "Dv",
337 "Er", "Ev", "Ex", "Fa",
338 "Fd", "Fl", "Fn", "Ft",
339 "Ic", "In", "Li", "Nd",
340 "Nm", "Op", "Ot", "Pa",
341 "Rv", "St", "Va", "Vt",
342 "Xr", "%A", "%B", "%D",
343 "%I", "%J", "%N", "%O",
344 "%P", "%R", "%T", "%V",
345 "Ac", "Ao", "Aq", "At",
346 "Bc", "Bf", "Bo", "Bq",
347 "Bsx", "Bx", "Db", "Dc",
348 "Do", "Dq", "Ec", "Ef",
349 "Em", "Eo", "Fx", "Ms",
350 "No", "Ns", "Nx", "Ox",
351 "Pc", "Pf", "Po", "Pq",
352 "Qc", "Ql", "Qo", "Qq",
353 "Re", "Rs", "Sc", "So",
354 "Sq", "Sm", "Sx", "Sy",
355 "Tn", "Ux", "Xc", "Xo",
356 "Fo", "Fc", "Oo", "Oc",
357 "Bk", "Ek", "Bt", "Hf",
358 "Fr", "Ud", "Lb", "Lp",
359 "Lk", "Mt", "Brq", "Bro",
360 "Brc", "%C", "Es", "En",
361 "Dx", "%Q", "%U", "Ta",
362 "Tg", NULL,
363 "TH", "SH", "SS", "TP",
364 "TQ",
365 "LP", "PP", "P", "IP",
366 "HP", "SM", "SB", "BI",
367 "IB", "BR", "RB", "R",
368 "B", "I", "IR", "RI",
369 "RE", "RS", "DT", "UC",
370 "PD", "AT", "in",
371 "SY", "YS", "OP",
372 "EX", "EE", "UR",
373 "UE", "MT", "ME", NULL
374 };
375 const char *const *roff_name = __roff_name;
376
377 static struct roffmac roffs[TOKEN_NONE] = {
378 { roff_noarg, NULL, NULL, 0 }, /* br */
379 { roff_onearg, NULL, NULL, 0 }, /* ce */
380 { roff_noarg, NULL, NULL, 0 }, /* fi */
381 { roff_onearg, NULL, NULL, 0 }, /* ft */
382 { roff_onearg, NULL, NULL, 0 }, /* ll */
383 { roff_mc, NULL, NULL, 0 }, /* mc */
384 { roff_noarg, NULL, NULL, 0 }, /* nf */
385 { roff_onearg, NULL, NULL, 0 }, /* po */
386 { roff_onearg, NULL, NULL, 0 }, /* rj */
387 { roff_onearg, NULL, NULL, 0 }, /* sp */
388 { roff_manyarg, NULL, NULL, 0 }, /* ta */
389 { roff_onearg, NULL, NULL, 0 }, /* ti */
390 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
391 { roff_unsupp, NULL, NULL, 0 }, /* ab */
392 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
393 { roff_line_ignore, NULL, NULL, 0 }, /* af */
394 { roff_unsupp, NULL, NULL, 0 }, /* aln */
395 { roff_als, NULL, NULL, 0 }, /* als */
396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
400 { roff_ds, NULL, NULL, 0 }, /* as */
401 { roff_ds, NULL, NULL, 0 }, /* as1 */
402 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
403 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
404 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
405 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
406 { roff_unsupp, NULL, NULL, 0 }, /* blm */
407 { roff_unsupp, NULL, NULL, 0 }, /* box */
408 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
409 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
410 { roff_unsupp, NULL, NULL, 0 }, /* BP */
411 { roff_break, NULL, NULL, 0 }, /* break */
412 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
413 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
414 { roff_noarg, NULL, NULL, 0 }, /* brp */
415 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
416 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
417 { roff_cc, NULL, NULL, 0 }, /* cc */
418 { roff_insec, NULL, NULL, 0 }, /* cf */
419 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
420 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
421 { roff_char, NULL, NULL, 0 }, /* char */
422 { roff_unsupp, NULL, NULL, 0 }, /* chop */
423 { roff_line_ignore, NULL, NULL, 0 }, /* class */
424 { roff_insec, NULL, NULL, 0 }, /* close */
425 { roff_unsupp, NULL, NULL, 0 }, /* CL */
426 { roff_line_ignore, NULL, NULL, 0 }, /* color */
427 { roff_unsupp, NULL, NULL, 0 }, /* composite */
428 { roff_unsupp, NULL, NULL, 0 }, /* continue */
429 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
431 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
432 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
433 { roff_unsupp, NULL, NULL, 0 }, /* da */
434 { roff_unsupp, NULL, NULL, 0 }, /* dch */
435 { roff_Dd, NULL, NULL, 0 }, /* Dd */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
438 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
440 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
441 { roff_unsupp, NULL, NULL, 0 }, /* device */
442 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
443 { roff_unsupp, NULL, NULL, 0 }, /* di */
444 { roff_unsupp, NULL, NULL, 0 }, /* do */
445 { roff_ds, NULL, NULL, 0 }, /* ds */
446 { roff_ds, NULL, NULL, 0 }, /* ds1 */
447 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
448 { roff_unsupp, NULL, NULL, 0 }, /* dt */
449 { roff_ec, NULL, NULL, 0 }, /* ec */
450 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
451 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
452 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
453 { roff_unsupp, NULL, NULL, 0 }, /* em */
454 { roff_EN, NULL, NULL, 0 }, /* EN */
455 { roff_eo, NULL, NULL, 0 }, /* eo */
456 { roff_unsupp, NULL, NULL, 0 }, /* EP */
457 { roff_EQ, NULL, NULL, 0 }, /* EQ */
458 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
459 { roff_unsupp, NULL, NULL, 0 }, /* ev */
460 { roff_unsupp, NULL, NULL, 0 }, /* evc */
461 { roff_unsupp, NULL, NULL, 0 }, /* ex */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
464 { roff_unsupp, NULL, NULL, 0 }, /* fc */
465 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
467 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
468 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
470 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
471 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
473 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
474 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
476 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
477 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
478 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
479 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
494 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
495 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
496 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
497 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
498 { roff_unsupp, NULL, NULL, 0 }, /* index */
499 { roff_it, NULL, NULL, 0 }, /* it */
500 { roff_unsupp, NULL, NULL, 0 }, /* itc */
501 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
502 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
504 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
505 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
506 { roff_unsupp, NULL, NULL, 0 }, /* lc */
507 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
508 { roff_unsupp, NULL, NULL, 0 }, /* lds */
509 { roff_unsupp, NULL, NULL, 0 }, /* length */
510 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
511 { roff_insec, NULL, NULL, 0 }, /* lf */
512 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
513 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
514 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
515 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
516 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
517 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
518 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
519 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
520 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
521 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
522 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
523 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
524 { roff_insec, NULL, NULL, 0 }, /* mso */
525 { roff_line_ignore, NULL, NULL, 0 }, /* na */
526 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
527 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
528 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
529 { roff_unsupp, NULL, NULL, 0 }, /* nm */
530 { roff_unsupp, NULL, NULL, 0 }, /* nn */
531 { roff_nop, NULL, NULL, 0 }, /* nop */
532 { roff_nr, NULL, NULL, 0 }, /* nr */
533 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
534 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
535 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
536 { roff_insec, NULL, NULL, 0 }, /* nx */
537 { roff_insec, NULL, NULL, 0 }, /* open */
538 { roff_insec, NULL, NULL, 0 }, /* opena */
539 { roff_line_ignore, NULL, NULL, 0 }, /* os */
540 { roff_unsupp, NULL, NULL, 0 }, /* output */
541 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
542 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
544 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
545 { roff_insec, NULL, NULL, 0 }, /* pi */
546 { roff_unsupp, NULL, NULL, 0 }, /* PI */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
550 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
551 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
552 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
553 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
554 { roff_insec, NULL, NULL, 0 }, /* pso */
555 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
556 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
557 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
558 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
559 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
560 { roff_return, NULL, NULL, 0 }, /* return */
561 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
562 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
563 { roff_rm, NULL, NULL, 0 }, /* rm */
564 { roff_rn, NULL, NULL, 0 }, /* rn */
565 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
566 { roff_rr, NULL, NULL, 0 }, /* rr */
567 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
568 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
569 { roff_unsupp, NULL, NULL, 0 }, /* schar */
570 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
571 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
572 { roff_shift, NULL, NULL, 0 }, /* shift */
573 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
574 { roff_so, NULL, NULL, 0 }, /* so */
575 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
576 { roff_line_ignore, NULL, NULL, 0 }, /* special */
577 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
578 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
579 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
580 { roff_unsupp, NULL, NULL, 0 }, /* substring */
581 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
582 { roff_insec, NULL, NULL, 0 }, /* sy */
583 { roff_T_, NULL, NULL, 0 }, /* T& */
584 { roff_unsupp, NULL, NULL, 0 }, /* tc */
585 { roff_TE, NULL, NULL, 0 }, /* TE */
586 { roff_Dd, NULL, NULL, 0 }, /* TH */
587 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
588 { roff_unsupp, NULL, NULL, 0 }, /* tl */
589 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
591 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
592 { roff_tr, NULL, NULL, 0 }, /* tr */
593 { roff_line_ignore, NULL, NULL, 0 }, /* track */
594 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
595 { roff_insec, NULL, NULL, 0 }, /* trf */
596 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
597 { roff_unsupp, NULL, NULL, 0 }, /* trin */
598 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
599 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
600 { roff_TS, NULL, NULL, 0 }, /* TS */
601 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
602 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
603 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
604 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
605 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
606 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
607 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
608 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
609 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
610 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
611 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
612 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
613 { roff_unsupp, NULL, NULL, 0 }, /* wh */
614 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
615 { roff_insec, NULL, NULL, 0 }, /* write */
616 { roff_insec, NULL, NULL, 0 }, /* writec */
617 { roff_insec, NULL, NULL, 0 }, /* writem */
618 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
619 { roff_cblock, NULL, NULL, 0 }, /* . */
620 { roff_renamed, NULL, NULL, 0 },
621 { roff_userdef, NULL, NULL, 0 }
622 };
623
624 /* Array of injected predefined strings. */
625 #define PREDEFS_MAX 38
626 static const struct predef predefs[PREDEFS_MAX] = {
627 #include "predefs.in"
628 };
629
630 static int roffce_lines; /* number of input lines to center */
631 static struct roff_node *roffce_node; /* active request */
632 static int roffit_lines; /* number of lines to delay */
633 static char *roffit_macro; /* nil-terminated macro line */
634
635
636 /* --- request table ------------------------------------------------------ */
637
638 struct ohash *
639 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
640 {
641 struct ohash *htab;
642 struct roffreq *req;
643 enum roff_tok tok;
644 size_t sz;
645 unsigned int slot;
646
647 htab = mandoc_malloc(sizeof(*htab));
648 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
649
650 for (tok = mintok; tok < maxtok; tok++) {
651 if (roff_name[tok] == NULL)
652 continue;
653 sz = strlen(roff_name[tok]);
654 req = mandoc_malloc(sizeof(*req) + sz + 1);
655 req->tok = tok;
656 memcpy(req->name, roff_name[tok], sz + 1);
657 slot = ohash_qlookup(htab, req->name);
658 ohash_insert(htab, slot, req);
659 }
660 return htab;
661 }
662
663 void
664 roffhash_free(struct ohash *htab)
665 {
666 struct roffreq *req;
667 unsigned int slot;
668
669 if (htab == NULL)
670 return;
671 for (req = ohash_first(htab, &slot); req != NULL;
672 req = ohash_next(htab, &slot))
673 free(req);
674 ohash_delete(htab);
675 free(htab);
676 }
677
678 enum roff_tok
679 roffhash_find(struct ohash *htab, const char *name, size_t sz)
680 {
681 struct roffreq *req;
682 const char *end;
683
684 if (sz) {
685 end = name + sz;
686 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
687 } else
688 req = ohash_find(htab, ohash_qlookup(htab, name));
689 return req == NULL ? TOKEN_NONE : req->tok;
690 }
691
692 /* --- stack of request blocks -------------------------------------------- */
693
694 /*
695 * Pop the current node off of the stack of roff instructions currently
696 * pending. Return 1 if it is a loop or 0 otherwise.
697 */
698 static int
699 roffnode_pop(struct roff *r)
700 {
701 struct roffnode *p;
702 int inloop;
703
704 p = r->last;
705 inloop = p->tok == ROFF_while;
706 r->last = p->parent;
707 free(p->name);
708 free(p->end);
709 free(p);
710 return inloop;
711 }
712
713 /*
714 * Push a roff node onto the instruction stack. This must later be
715 * removed with roffnode_pop().
716 */
717 static void
718 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
719 int line, int col)
720 {
721 struct roffnode *p;
722
723 p = mandoc_calloc(1, sizeof(struct roffnode));
724 p->tok = tok;
725 if (name)
726 p->name = mandoc_strdup(name);
727 p->parent = r->last;
728 p->line = line;
729 p->col = col;
730 p->rule = p->parent ? p->parent->rule : 0;
731
732 r->last = p;
733 }
734
735 /* --- roff parser state data management ---------------------------------- */
736
737 static void
738 roff_free1(struct roff *r)
739 {
740 int i;
741
742 tbl_free(r->first_tbl);
743 r->first_tbl = r->last_tbl = r->tbl = NULL;
744
745 eqn_free(r->last_eqn);
746 r->last_eqn = r->eqn = NULL;
747
748 while (r->mstackpos >= 0)
749 roff_userret(r);
750
751 while (r->last)
752 roffnode_pop(r);
753
754 free (r->rstack);
755 r->rstack = NULL;
756 r->rstacksz = 0;
757 r->rstackpos = -1;
758
759 roff_freereg(r->regtab);
760 r->regtab = NULL;
761
762 roff_freestr(r->strtab);
763 roff_freestr(r->rentab);
764 roff_freestr(r->xmbtab);
765 r->strtab = r->rentab = r->xmbtab = NULL;
766
767 if (r->xtab)
768 for (i = 0; i < 128; i++)
769 free(r->xtab[i].p);
770 free(r->xtab);
771 r->xtab = NULL;
772 }
773
774 void
775 roff_reset(struct roff *r)
776 {
777 roff_free1(r);
778 r->options |= MPARSE_COMMENT;
779 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
780 r->control = '\0';
781 r->escape = '\\';
782 roffce_lines = 0;
783 roffce_node = NULL;
784 roffit_lines = 0;
785 roffit_macro = NULL;
786 }
787
788 void
789 roff_free(struct roff *r)
790 {
791 int i;
792
793 roff_free1(r);
794 for (i = 0; i < r->mstacksz; i++)
795 free(r->mstack[i].argv);
796 free(r->mstack);
797 roffhash_free(r->reqtab);
798 free(r);
799 }
800
801 struct roff *
802 roff_alloc(int options)
803 {
804 struct roff *r;
805
806 r = mandoc_calloc(1, sizeof(struct roff));
807 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
808 r->options = options | MPARSE_COMMENT;
809 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
810 r->mstackpos = -1;
811 r->rstackpos = -1;
812 r->escape = '\\';
813 return r;
814 }
815
816 /* --- syntax tree state data management ---------------------------------- */
817
818 static void
819 roff_man_free1(struct roff_man *man)
820 {
821 if (man->meta.first != NULL)
822 roff_node_delete(man, man->meta.first);
823 free(man->meta.msec);
824 free(man->meta.vol);
825 free(man->meta.os);
826 free(man->meta.arch);
827 free(man->meta.title);
828 free(man->meta.name);
829 free(man->meta.date);
830 free(man->meta.sodest);
831 }
832
833 void
834 roff_state_reset(struct roff_man *man)
835 {
836 man->last = man->meta.first;
837 man->last_es = NULL;
838 man->flags = 0;
839 man->lastsec = man->lastnamed = SEC_NONE;
840 man->next = ROFF_NEXT_CHILD;
841 roff_setreg(man->roff, "nS", 0, '=');
842 }
843
844 static void
845 roff_man_alloc1(struct roff_man *man)
846 {
847 memset(&man->meta, 0, sizeof(man->meta));
848 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
849 man->meta.first->type = ROFFT_ROOT;
850 man->meta.macroset = MACROSET_NONE;
851 roff_state_reset(man);
852 }
853
854 void
855 roff_man_reset(struct roff_man *man)
856 {
857 roff_man_free1(man);
858 roff_man_alloc1(man);
859 }
860
861 void
862 roff_man_free(struct roff_man *man)
863 {
864 roff_man_free1(man);
865 free(man->os_r);
866 free(man);
867 }
868
869 struct roff_man *
870 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
871 {
872 struct roff_man *man;
873
874 man = mandoc_calloc(1, sizeof(*man));
875 man->roff = roff;
876 man->os_s = os_s;
877 man->quick = quick;
878 roff_man_alloc1(man);
879 roff->man = man;
880 return man;
881 }
882
883 /* --- syntax tree handling ----------------------------------------------- */
884
885 struct roff_node *
886 roff_node_alloc(struct roff_man *man, int line, int pos,
887 enum roff_type type, int tok)
888 {
889 struct roff_node *n;
890
891 n = mandoc_calloc(1, sizeof(*n));
892 n->line = line;
893 n->pos = pos;
894 n->tok = tok;
895 n->type = type;
896 n->sec = man->lastsec;
897
898 if (man->flags & MDOC_SYNOPSIS)
899 n->flags |= NODE_SYNPRETTY;
900 else
901 n->flags &= ~NODE_SYNPRETTY;
902 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
903 n->flags |= NODE_NOFILL;
904 else
905 n->flags &= ~NODE_NOFILL;
906 if (man->flags & MDOC_NEWLINE)
907 n->flags |= NODE_LINE;
908 man->flags &= ~MDOC_NEWLINE;
909
910 return n;
911 }
912
913 void
914 roff_node_append(struct roff_man *man, struct roff_node *n)
915 {
916
917 switch (man->next) {
918 case ROFF_NEXT_SIBLING:
919 if (man->last->next != NULL) {
920 n->next = man->last->next;
921 man->last->next->prev = n;
922 } else
923 man->last->parent->last = n;
924 man->last->next = n;
925 n->prev = man->last;
926 n->parent = man->last->parent;
927 break;
928 case ROFF_NEXT_CHILD:
929 if (man->last->child != NULL) {
930 n->next = man->last->child;
931 man->last->child->prev = n;
932 } else
933 man->last->last = n;
934 man->last->child = n;
935 n->parent = man->last;
936 break;
937 default:
938 abort();
939 }
940 man->last = n;
941
942 switch (n->type) {
943 case ROFFT_HEAD:
944 n->parent->head = n;
945 break;
946 case ROFFT_BODY:
947 if (n->end != ENDBODY_NOT)
948 return;
949 n->parent->body = n;
950 break;
951 case ROFFT_TAIL:
952 n->parent->tail = n;
953 break;
954 default:
955 return;
956 }
957
958 /*
959 * Copy over the normalised-data pointer of our parent. Not
960 * everybody has one, but copying a null pointer is fine.
961 */
962
963 n->norm = n->parent->norm;
964 assert(n->parent->type == ROFFT_BLOCK);
965 }
966
967 void
968 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
969 {
970 struct roff_node *n;
971
972 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
973 n->string = roff_strdup(man->roff, word);
974 roff_node_append(man, n);
975 n->flags |= NODE_VALID | NODE_ENDED;
976 man->next = ROFF_NEXT_SIBLING;
977 }
978
979 void
980 roff_word_append(struct roff_man *man, const char *word)
981 {
982 struct roff_node *n;
983 char *addstr, *newstr;
984
985 n = man->last;
986 addstr = roff_strdup(man->roff, word);
987 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
988 free(addstr);
989 free(n->string);
990 n->string = newstr;
991 man->next = ROFF_NEXT_SIBLING;
992 }
993
994 void
995 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
996 {
997 struct roff_node *n;
998
999 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1000 roff_node_append(man, n);
1001 man->next = ROFF_NEXT_CHILD;
1002 }
1003
1004 struct roff_node *
1005 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1006 {
1007 struct roff_node *n;
1008
1009 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1010 roff_node_append(man, n);
1011 man->next = ROFF_NEXT_CHILD;
1012 return n;
1013 }
1014
1015 struct roff_node *
1016 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1017 {
1018 struct roff_node *n;
1019
1020 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1021 roff_node_append(man, n);
1022 man->next = ROFF_NEXT_CHILD;
1023 return n;
1024 }
1025
1026 struct roff_node *
1027 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1028 {
1029 struct roff_node *n;
1030
1031 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1032 roff_node_append(man, n);
1033 man->next = ROFF_NEXT_CHILD;
1034 return n;
1035 }
1036
1037 static void
1038 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1039 {
1040 struct roff_node *n;
1041 struct tbl_span *span;
1042
1043 if (man->meta.macroset == MACROSET_MAN)
1044 man_breakscope(man, ROFF_TS);
1045 while ((span = tbl_span(tbl)) != NULL) {
1046 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1047 n->span = span;
1048 roff_node_append(man, n);
1049 n->flags |= NODE_VALID | NODE_ENDED;
1050 man->next = ROFF_NEXT_SIBLING;
1051 }
1052 }
1053
1054 void
1055 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1056 {
1057
1058 /* Adjust siblings. */
1059
1060 if (n->prev)
1061 n->prev->next = n->next;
1062 if (n->next)
1063 n->next->prev = n->prev;
1064
1065 /* Adjust parent. */
1066
1067 if (n->parent != NULL) {
1068 if (n->parent->child == n)
1069 n->parent->child = n->next;
1070 if (n->parent->last == n)
1071 n->parent->last = n->prev;
1072 }
1073
1074 /* Adjust parse point. */
1075
1076 if (man == NULL)
1077 return;
1078 if (man->last == n) {
1079 if (n->prev == NULL) {
1080 man->last = n->parent;
1081 man->next = ROFF_NEXT_CHILD;
1082 } else {
1083 man->last = n->prev;
1084 man->next = ROFF_NEXT_SIBLING;
1085 }
1086 }
1087 if (man->meta.first == n)
1088 man->meta.first = NULL;
1089 }
1090
1091 void
1092 roff_node_relink(struct roff_man *man, struct roff_node *n)
1093 {
1094 roff_node_unlink(man, n);
1095 n->prev = n->next = NULL;
1096 roff_node_append(man, n);
1097 }
1098
1099 void
1100 roff_node_free(struct roff_node *n)
1101 {
1102
1103 if (n->args != NULL)
1104 mdoc_argv_free(n->args);
1105 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1106 free(n->norm);
1107 eqn_box_free(n->eqn);
1108 free(n->string);
1109 free(n->tag);
1110 free(n);
1111 }
1112
1113 void
1114 roff_node_delete(struct roff_man *man, struct roff_node *n)
1115 {
1116
1117 while (n->child != NULL)
1118 roff_node_delete(man, n->child);
1119 roff_node_unlink(man, n);
1120 roff_node_free(n);
1121 }
1122
1123 int
1124 roff_node_transparent(struct roff_node *n)
1125 {
1126 if (n == NULL)
1127 return 0;
1128 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1129 return 1;
1130 return roff_tok_transparent(n->tok);
1131 }
1132
1133 int
1134 roff_tok_transparent(enum roff_tok tok)
1135 {
1136 switch (tok) {
1137 case ROFF_ft:
1138 case ROFF_ll:
1139 case ROFF_mc:
1140 case ROFF_po:
1141 case ROFF_ta:
1142 case MDOC_Db:
1143 case MDOC_Es:
1144 case MDOC_Sm:
1145 case MDOC_Tg:
1146 case MAN_DT:
1147 case MAN_UC:
1148 case MAN_PD:
1149 case MAN_AT:
1150 return 1;
1151 default:
1152 return 0;
1153 }
1154 }
1155
1156 struct roff_node *
1157 roff_node_child(struct roff_node *n)
1158 {
1159 for (n = n->child; roff_node_transparent(n); n = n->next)
1160 continue;
1161 return n;
1162 }
1163
1164 struct roff_node *
1165 roff_node_prev(struct roff_node *n)
1166 {
1167 do {
1168 n = n->prev;
1169 } while (roff_node_transparent(n));
1170 return n;
1171 }
1172
1173 struct roff_node *
1174 roff_node_next(struct roff_node *n)
1175 {
1176 do {
1177 n = n->next;
1178 } while (roff_node_transparent(n));
1179 return n;
1180 }
1181
1182 void
1183 deroff(char **dest, const struct roff_node *n)
1184 {
1185 char *cp;
1186 size_t sz;
1187
1188 if (n->string == NULL) {
1189 for (n = n->child; n != NULL; n = n->next)
1190 deroff(dest, n);
1191 return;
1192 }
1193
1194 /* Skip leading whitespace. */
1195
1196 for (cp = n->string; *cp != '\0'; cp++) {
1197 if (cp[0] == '\\' && cp[1] != '\0' &&
1198 strchr(" %&0^|~", cp[1]) != NULL)
1199 cp++;
1200 else if ( ! isspace((unsigned char)*cp))
1201 break;
1202 }
1203
1204 /* Skip trailing backslash. */
1205
1206 sz = strlen(cp);
1207 if (sz > 0 && cp[sz - 1] == '\\')
1208 sz--;
1209
1210 /* Skip trailing whitespace. */
1211
1212 for (; sz; sz--)
1213 if ( ! isspace((unsigned char)cp[sz-1]))
1214 break;
1215
1216 /* Skip empty strings. */
1217
1218 if (sz == 0)
1219 return;
1220
1221 if (*dest == NULL) {
1222 *dest = mandoc_strndup(cp, sz);
1223 return;
1224 }
1225
1226 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1227 free(*dest);
1228 *dest = cp;
1229 }
1230
1231 /* --- main functions of the roff parser ---------------------------------- */
1232
1233 /*
1234 * In the current line, expand escape sequences that produce parsable
1235 * input text. Also check the syntax of the remaining escape sequences,
1236 * which typically produce output glyphs or change formatter state.
1237 */
1238 static int
1239 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1240 {
1241 struct mctx *ctx; /* current macro call context */
1242 char ubuf[24]; /* buffer to print the number */
1243 struct roff_node *n; /* used for header comments */
1244 const char *start; /* start of the string to process */
1245 char *stesc; /* start of an escape sequence ('\\') */
1246 const char *esct; /* type of esccape sequence */
1247 char *ep; /* end of comment string */
1248 const char *stnam; /* start of the name, after "[(*" */
1249 const char *cp; /* end of the name, e.g. before ']' */
1250 const char *res; /* the string to be substituted */
1251 char *nbuf; /* new buffer to copy buf->buf to */
1252 size_t maxl; /* expected length of the escape name */
1253 size_t naml; /* actual length of the escape name */
1254 size_t asz; /* length of the replacement */
1255 size_t rsz; /* length of the rest of the string */
1256 int inaml; /* length returned from mandoc_escape() */
1257 int expand_count; /* to avoid infinite loops */
1258 int npos; /* position in numeric expression */
1259 int arg_complete; /* argument not interrupted by eol */
1260 int quote_args; /* true for \\$@, false for \\$* */
1261 int done; /* no more input available */
1262 int deftype; /* type of definition to paste */
1263 int rcsid; /* kind of RCS id seen */
1264 enum mandocerr err; /* for escape sequence problems */
1265 char sign; /* increment number register */
1266 char term; /* character terminating the escape */
1267
1268 /* Search forward for comments. */
1269
1270 done = 0;
1271 start = buf->buf + pos;
1272 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1273 if (stesc[0] != newesc || stesc[1] == '\0')
1274 continue;
1275 stesc++;
1276 if (*stesc != '"' && *stesc != '#')
1277 continue;
1278
1279 /* Comment found, look for RCS id. */
1280
1281 rcsid = 0;
1282 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1283 rcsid = 1 << MANDOC_OS_OPENBSD;
1284 cp += 8;
1285 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1286 rcsid = 1 << MANDOC_OS_NETBSD;
1287 cp += 7;
1288 }
1289 if (cp != NULL &&
1290 isalnum((unsigned char)*cp) == 0 &&
1291 strchr(cp, '$') != NULL) {
1292 if (r->man->meta.rcsids & rcsid)
1293 mandoc_msg(MANDOCERR_RCS_REP, ln,
1294 (int)(stesc - buf->buf) + 1,
1295 "%s", stesc + 1);
1296 r->man->meta.rcsids |= rcsid;
1297 }
1298
1299 /* Handle trailing whitespace. */
1300
1301 ep = strchr(stesc--, '\0') - 1;
1302 if (*ep == '\n') {
1303 done = 1;
1304 ep--;
1305 }
1306 if (*ep == ' ' || *ep == '\t')
1307 mandoc_msg(MANDOCERR_SPACE_EOL,
1308 ln, (int)(ep - buf->buf), NULL);
1309
1310 /*
1311 * Save comments preceding the title macro
1312 * in the syntax tree.
1313 */
1314
1315 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1316 while (*ep == ' ' || *ep == '\t')
1317 ep--;
1318 ep[1] = '\0';
1319 n = roff_node_alloc(r->man,
1320 ln, stesc + 1 - buf->buf,
1321 ROFFT_COMMENT, TOKEN_NONE);
1322 n->string = mandoc_strdup(stesc + 2);
1323 roff_node_append(r->man, n);
1324 n->flags |= NODE_VALID | NODE_ENDED;
1325 r->man->next = ROFF_NEXT_SIBLING;
1326 }
1327
1328 /* Line continuation with comment. */
1329
1330 if (stesc[1] == '#') {
1331 *stesc = '\0';
1332 return ROFF_IGN | ROFF_APPEND;
1333 }
1334
1335 /* Discard normal comments. */
1336
1337 while (stesc > start && stesc[-1] == ' ' &&
1338 (stesc == start + 1 || stesc[-2] != '\\'))
1339 stesc--;
1340 *stesc = '\0';
1341 break;
1342 }
1343 if (stesc == start)
1344 return ROFF_CONT;
1345 stesc--;
1346
1347 /* Notice the end of the input. */
1348
1349 if (*stesc == '\n') {
1350 *stesc-- = '\0';
1351 done = 1;
1352 }
1353
1354 expand_count = 0;
1355 while (stesc >= start) {
1356 if (*stesc != newesc) {
1357
1358 /*
1359 * If we have a non-standard escape character,
1360 * escape literal backslashes because all
1361 * processing in subsequent functions uses
1362 * the standard escaping rules.
1363 */
1364
1365 if (newesc != ASCII_ESC && *stesc == '\\') {
1366 *stesc = '\0';
1367 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1368 buf->buf, stesc + 1) + 1;
1369 start = nbuf + pos;
1370 stesc = nbuf + (stesc - buf->buf);
1371 free(buf->buf);
1372 buf->buf = nbuf;
1373 }
1374
1375 /* Search backwards for the next escape. */
1376
1377 stesc--;
1378 continue;
1379 }
1380
1381 /* If it is escaped, skip it. */
1382
1383 for (cp = stesc - 1; cp >= start; cp--)
1384 if (*cp != r->escape)
1385 break;
1386
1387 if ((stesc - cp) % 2 == 0) {
1388 while (stesc > cp)
1389 *stesc-- = '\\';
1390 continue;
1391 } else if (stesc[1] != '\0') {
1392 *stesc = '\\';
1393 } else {
1394 *stesc-- = '\0';
1395 if (done)
1396 continue;
1397 else
1398 return ROFF_IGN | ROFF_APPEND;
1399 }
1400
1401 /* Decide whether to expand or to check only. */
1402
1403 term = '\0';
1404 cp = stesc + 1;
1405 while (*cp == 'E')
1406 cp++;
1407 esct = cp;
1408 switch (*esct) {
1409 case '*':
1410 case '$':
1411 res = NULL;
1412 break;
1413 case 'B':
1414 case 'w':
1415 term = cp[1];
1416 /* FALLTHROUGH */
1417 case 'n':
1418 sign = cp[1];
1419 if (sign == '+' || sign == '-')
1420 cp++;
1421 res = ubuf;
1422 break;
1423 default:
1424 err = MANDOCERR_OK;
1425 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1426 case ESCAPE_SPECIAL:
1427 if (mchars_spec2cp(stnam, inaml) >= 0)
1428 break;
1429 /* FALLTHROUGH */
1430 case ESCAPE_ERROR:
1431 err = MANDOCERR_ESC_BAD;
1432 break;
1433 case ESCAPE_UNDEF:
1434 err = MANDOCERR_ESC_UNDEF;
1435 break;
1436 case ESCAPE_UNSUPP:
1437 err = MANDOCERR_ESC_UNSUPP;
1438 break;
1439 default:
1440 break;
1441 }
1442 if (err != MANDOCERR_OK)
1443 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1444 "%.*s", (int)(cp - stesc), stesc);
1445 stesc--;
1446 continue;
1447 }
1448
1449 if (EXPAND_LIMIT < ++expand_count) {
1450 mandoc_msg(MANDOCERR_ROFFLOOP,
1451 ln, (int)(stesc - buf->buf), NULL);
1452 return ROFF_IGN;
1453 }
1454
1455 /*
1456 * The third character decides the length
1457 * of the name of the string or register.
1458 * Save a pointer to the name.
1459 */
1460
1461 if (term == '\0') {
1462 switch (*++cp) {
1463 case '\0':
1464 maxl = 0;
1465 break;
1466 case '(':
1467 cp++;
1468 maxl = 2;
1469 break;
1470 case '[':
1471 cp++;
1472 term = ']';
1473 maxl = 0;
1474 break;
1475 default:
1476 maxl = 1;
1477 break;
1478 }
1479 } else {
1480 cp += 2;
1481 maxl = 0;
1482 }
1483 stnam = cp;
1484
1485 /* Advance to the end of the name. */
1486
1487 naml = 0;
1488 arg_complete = 1;
1489 while (maxl == 0 || naml < maxl) {
1490 if (*cp == '\0') {
1491 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1492 (int)(stesc - buf->buf), "%s", stesc);
1493 arg_complete = 0;
1494 break;
1495 }
1496 if (maxl == 0 && *cp == term) {
1497 cp++;
1498 break;
1499 }
1500 if (*cp++ != '\\' || *esct != 'w') {
1501 naml++;
1502 continue;
1503 }
1504 switch (mandoc_escape(&cp, NULL, NULL)) {
1505 case ESCAPE_SPECIAL:
1506 case ESCAPE_UNICODE:
1507 case ESCAPE_NUMBERED:
1508 case ESCAPE_UNDEF:
1509 case ESCAPE_OVERSTRIKE:
1510 naml++;
1511 break;
1512 default:
1513 break;
1514 }
1515 }
1516
1517 /*
1518 * Retrieve the replacement string; if it is
1519 * undefined, resume searching for escapes.
1520 */
1521
1522 switch (*esct) {
1523 case '*':
1524 if (arg_complete) {
1525 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1526 res = roff_getstrn(r, stnam, naml, &deftype);
1527
1528 /*
1529 * If not overriden, let \*(.T
1530 * through to the formatters.
1531 */
1532
1533 if (res == NULL && naml == 2 &&
1534 stnam[0] == '.' && stnam[1] == 'T') {
1535 roff_setstrn(&r->strtab,
1536 ".T", 2, NULL, 0, 0);
1537 stesc--;
1538 continue;
1539 }
1540 }
1541 break;
1542 case '$':
1543 if (r->mstackpos < 0) {
1544 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1545 (int)(stesc - buf->buf), "%.3s", stesc);
1546 break;
1547 }
1548 ctx = r->mstack + r->mstackpos;
1549 npos = esct[1] - '1';
1550 if (npos >= 0 && npos <= 8) {
1551 res = npos < ctx->argc ?
1552 ctx->argv[npos] : "";
1553 break;
1554 }
1555 if (esct[1] == '*')
1556 quote_args = 0;
1557 else if (esct[1] == '@')
1558 quote_args = 1;
1559 else {
1560 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1561 (int)(stesc - buf->buf), "%.3s", stesc);
1562 break;
1563 }
1564 asz = 0;
1565 for (npos = 0; npos < ctx->argc; npos++) {
1566 if (npos)
1567 asz++; /* blank */
1568 if (quote_args)
1569 asz += 2; /* quotes */
1570 asz += strlen(ctx->argv[npos]);
1571 }
1572 if (asz != 3) {
1573 rsz = buf->sz - (stesc - buf->buf) - 3;
1574 if (asz < 3)
1575 memmove(stesc + asz, stesc + 3, rsz);
1576 buf->sz += asz - 3;
1577 nbuf = mandoc_realloc(buf->buf, buf->sz);
1578 start = nbuf + pos;
1579 stesc = nbuf + (stesc - buf->buf);
1580 buf->buf = nbuf;
1581 if (asz > 3)
1582 memmove(stesc + asz, stesc + 3, rsz);
1583 }
1584 for (npos = 0; npos < ctx->argc; npos++) {
1585 if (npos)
1586 *stesc++ = ' ';
1587 if (quote_args)
1588 *stesc++ = '"';
1589 cp = ctx->argv[npos];
1590 while (*cp != '\0')
1591 *stesc++ = *cp++;
1592 if (quote_args)
1593 *stesc++ = '"';
1594 }
1595 continue;
1596 case 'B':
1597 npos = 0;
1598 ubuf[0] = arg_complete &&
1599 roff_evalnum(r, ln, stnam, &npos,
1600 NULL, ROFFNUM_SCALE) &&
1601 stnam + npos + 1 == cp ? '1' : '0';
1602 ubuf[1] = '\0';
1603 break;
1604 case 'n':
1605 if (arg_complete)
1606 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1607 roff_getregn(r, stnam, naml, sign));
1608 else
1609 ubuf[0] = '\0';
1610 break;
1611 case 'w':
1612 /* use even incomplete args */
1613 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1614 24 * (int)naml);
1615 break;
1616 }
1617
1618 if (res == NULL) {
1619 if (*esct == '*')
1620 mandoc_msg(MANDOCERR_STR_UNDEF,
1621 ln, (int)(stesc - buf->buf),
1622 "%.*s", (int)naml, stnam);
1623 res = "";
1624 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1625 mandoc_msg(MANDOCERR_ROFFLOOP,
1626 ln, (int)(stesc - buf->buf), NULL);
1627 return ROFF_IGN;
1628 }
1629
1630 /* Replace the escape sequence by the string. */
1631
1632 *stesc = '\0';
1633 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1634 buf->buf, res, cp) + 1;
1635
1636 /* Prepare for the next replacement. */
1637
1638 start = nbuf + pos;
1639 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1640 free(buf->buf);
1641 buf->buf = nbuf;
1642 }
1643 return ROFF_CONT;
1644 }
1645
1646 /*
1647 * Parse a quoted or unquoted roff-style request or macro argument.
1648 * Return a pointer to the parsed argument, which is either the original
1649 * pointer or advanced by one byte in case the argument is quoted.
1650 * NUL-terminate the argument in place.
1651 * Collapse pairs of quotes inside quoted arguments.
1652 * Advance the argument pointer to the next argument,
1653 * or to the NUL byte terminating the argument line.
1654 */
1655 char *
1656 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1657 {
1658 struct buf buf;
1659 char *cp, *start;
1660 int newesc, pairs, quoted, white;
1661
1662 /* Quoting can only start with a new word. */
1663 start = *cpp;
1664 quoted = 0;
1665 if ('"' == *start) {
1666 quoted = 1;
1667 start++;
1668 }
1669
1670 newesc = pairs = white = 0;
1671 for (cp = start; '\0' != *cp; cp++) {
1672
1673 /*
1674 * Move the following text left
1675 * after quoted quotes and after "\\" and "\t".
1676 */
1677 if (pairs)
1678 cp[-pairs] = cp[0];
1679
1680 if ('\\' == cp[0]) {
1681 /*
1682 * In copy mode, translate double to single
1683 * backslashes and backslash-t to literal tabs.
1684 */
1685 switch (cp[1]) {
1686 case 'a':
1687 case 't':
1688 cp[-pairs] = '\t';
1689 pairs++;
1690 cp++;
1691 break;
1692 case '\\':
1693 newesc = 1;
1694 cp[-pairs] = ASCII_ESC;
1695 pairs++;
1696 cp++;
1697 break;
1698 case ' ':
1699 /* Skip escaped blanks. */
1700 if (0 == quoted)
1701 cp++;
1702 break;
1703 default:
1704 break;
1705 }
1706 } else if (0 == quoted) {
1707 if (' ' == cp[0]) {
1708 /* Unescaped blanks end unquoted args. */
1709 white = 1;
1710 break;
1711 }
1712 } else if ('"' == cp[0]) {
1713 if ('"' == cp[1]) {
1714 /* Quoted quotes collapse. */
1715 pairs++;
1716 cp++;
1717 } else {
1718 /* Unquoted quotes end quoted args. */
1719 quoted = 2;
1720 break;
1721 }
1722 }
1723 }
1724
1725 /* Quoted argument without a closing quote. */
1726 if (1 == quoted)
1727 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1728
1729 /* NUL-terminate this argument and move to the next one. */
1730 if (pairs)
1731 cp[-pairs] = '\0';
1732 if ('\0' != *cp) {
1733 *cp++ = '\0';
1734 while (' ' == *cp)
1735 cp++;
1736 }
1737 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1738 *cpp = cp;
1739
1740 if ('\0' == *cp && (white || ' ' == cp[-1]))
1741 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1742
1743 start = mandoc_strdup(start);
1744 if (newesc == 0)
1745 return start;
1746
1747 buf.buf = start;
1748 buf.sz = strlen(start) + 1;
1749 buf.next = NULL;
1750 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1751 free(buf.buf);
1752 buf.buf = mandoc_strdup("");
1753 }
1754 return buf.buf;
1755 }
1756
1757
1758 /*
1759 * Process text streams.
1760 */
1761 static int
1762 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1763 {
1764 size_t sz;
1765 const char *start;
1766 char *p;
1767 int isz;
1768 enum mandoc_esc esc;
1769
1770 /* Spring the input line trap. */
1771
1772 if (roffit_lines == 1) {
1773 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1774 free(buf->buf);
1775 buf->buf = p;
1776 buf->sz = isz + 1;
1777 *offs = 0;
1778 free(roffit_macro);
1779 roffit_lines = 0;
1780 return ROFF_REPARSE;
1781 } else if (roffit_lines > 1)
1782 --roffit_lines;
1783
1784 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1785 if (roffce_lines < 1) {
1786 r->man->last = roffce_node;
1787 r->man->next = ROFF_NEXT_SIBLING;
1788 roffce_lines = 0;
1789 roffce_node = NULL;
1790 } else
1791 roffce_lines--;
1792 }
1793
1794 /* Convert all breakable hyphens into ASCII_HYPH. */
1795
1796 start = p = buf->buf + pos;
1797
1798 while (*p != '\0') {
1799 sz = strcspn(p, "-\\");
1800 p += sz;
1801
1802 if (*p == '\0')
1803 break;
1804
1805 if (*p == '\\') {
1806 /* Skip over escapes. */
1807 p++;
1808 esc = mandoc_escape((const char **)&p, NULL, NULL);
1809 if (esc == ESCAPE_ERROR)
1810 break;
1811 while (*p == '-')
1812 p++;
1813 continue;
1814 } else if (p == start) {
1815 p++;
1816 continue;
1817 }
1818
1819 if (isalpha((unsigned char)p[-1]) &&
1820 isalpha((unsigned char)p[1]))
1821 *p = ASCII_HYPH;
1822 p++;
1823 }
1824 return ROFF_CONT;
1825 }
1826
1827 int
1828 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1829 {
1830 enum roff_tok t;
1831 int e;
1832 int pos; /* parse point */
1833 int spos; /* saved parse point for messages */
1834 int ppos; /* original offset in buf->buf */
1835 int ctl; /* macro line (boolean) */
1836
1837 ppos = pos = *offs;
1838
1839 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1840 (r->man->flags & ROFF_NOFILL) == 0 &&
1841 strchr(" .\\", buf->buf[pos]) == NULL &&
1842 buf->buf[pos] != r->control &&
1843 strcspn(buf->buf, " ") < 80)
1844 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1845 "%.20s...", buf->buf + pos);
1846
1847 /* Handle in-line equation delimiters. */
1848
1849 if (r->tbl == NULL &&
1850 r->last_eqn != NULL && r->last_eqn->delim &&
1851 (r->eqn == NULL || r->eqn_inline)) {
1852 e = roff_eqndelim(r, buf, pos);
1853 if (e == ROFF_REPARSE)
1854 return e;
1855 assert(e == ROFF_CONT);
1856 }
1857
1858 /* Expand some escape sequences. */
1859
1860 e = roff_expand(r, buf, ln, pos, r->escape);
1861 if ((e & ROFF_MASK) == ROFF_IGN)
1862 return e;
1863 assert(e == ROFF_CONT);
1864
1865 ctl = roff_getcontrol(r, buf->buf, &pos);
1866
1867 /*
1868 * First, if a scope is open and we're not a macro, pass the
1869 * text through the macro's filter.
1870 * Equations process all content themselves.
1871 * Tables process almost all content themselves, but we want
1872 * to warn about macros before passing it there.
1873 */
1874
1875 if (r->last != NULL && ! ctl) {
1876 t = r->last->tok;
1877 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1878 if ((e & ROFF_MASK) == ROFF_IGN)
1879 return e;
1880 e &= ~ROFF_MASK;
1881 } else
1882 e = ROFF_IGN;
1883 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1884 eqn_read(r->eqn, buf->buf + ppos);
1885 return e;
1886 }
1887 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1888 tbl_read(r->tbl, ln, buf->buf, ppos);
1889 roff_addtbl(r->man, ln, r->tbl);
1890 return e;
1891 }
1892 if ( ! ctl) {
1893 r->options &= ~MPARSE_COMMENT;
1894 return roff_parsetext(r, buf, pos, offs) | e;
1895 }
1896
1897 /* Skip empty request lines. */
1898
1899 if (buf->buf[pos] == '"') {
1900 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1901 return ROFF_IGN;
1902 } else if (buf->buf[pos] == '\0')
1903 return ROFF_IGN;
1904
1905 /*
1906 * If a scope is open, go to the child handler for that macro,
1907 * as it may want to preprocess before doing anything with it.
1908 * Don't do so if an equation is open.
1909 */
1910
1911 if (r->last) {
1912 t = r->last->tok;
1913 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1914 }
1915
1916 /* No scope is open. This is a new request or macro. */
1917
1918 r->options &= ~MPARSE_COMMENT;
1919 spos = pos;
1920 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1921
1922 /* Tables ignore most macros. */
1923
1924 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1925 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1926 mandoc_msg(MANDOCERR_TBLMACRO,
1927 ln, pos, "%s", buf->buf + spos);
1928 if (t != TOKEN_NONE)
1929 return ROFF_IGN;
1930 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1931 pos++;
1932 while (buf->buf[pos] == ' ')
1933 pos++;
1934 tbl_read(r->tbl, ln, buf->buf, pos);
1935 roff_addtbl(r->man, ln, r->tbl);
1936 return ROFF_IGN;
1937 }
1938
1939 /* For now, let high level macros abort .ce mode. */
1940
1941 if (ctl && roffce_node != NULL &&
1942 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1943 t == ROFF_TH || t == ROFF_TS)) {
1944 r->man->last = roffce_node;
1945 r->man->next = ROFF_NEXT_SIBLING;
1946 roffce_lines = 0;
1947 roffce_node = NULL;
1948 }
1949
1950 /*
1951 * This is neither a roff request nor a user-defined macro.
1952 * Let the standard macro set parsers handle it.
1953 */
1954
1955 if (t == TOKEN_NONE)
1956 return ROFF_CONT;
1957
1958 /* Execute a roff request or a user defined macro. */
1959
1960 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1961 }
1962
1963 /*
1964 * Internal interface function to tell the roff parser that execution
1965 * of the current macro ended. This is required because macro
1966 * definitions usually do not end with a .return request.
1967 */
1968 void
1969 roff_userret(struct roff *r)
1970 {
1971 struct mctx *ctx;
1972 int i;
1973
1974 assert(r->mstackpos >= 0);
1975 ctx = r->mstack + r->mstackpos;
1976 for (i = 0; i < ctx->argc; i++)
1977 free(ctx->argv[i]);
1978 ctx->argc = 0;
1979 r->mstackpos--;
1980 }
1981
1982 void
1983 roff_endparse(struct roff *r)
1984 {
1985 if (r->last != NULL)
1986 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1987 r->last->col, "%s", roff_name[r->last->tok]);
1988
1989 if (r->eqn != NULL) {
1990 mandoc_msg(MANDOCERR_BLK_NOEND,
1991 r->eqn->node->line, r->eqn->node->pos, "EQ");
1992 eqn_parse(r->eqn);
1993 r->eqn = NULL;
1994 }
1995
1996 if (r->tbl != NULL) {
1997 tbl_end(r->tbl, 1);
1998 r->tbl = NULL;
1999 }
2000 }
2001
2002 /*
2003 * Parse a roff node's type from the input buffer. This must be in the
2004 * form of ".foo xxx" in the usual way.
2005 */
2006 static enum roff_tok
2007 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2008 {
2009 char *cp;
2010 const char *mac;
2011 size_t maclen;
2012 int deftype;
2013 enum roff_tok t;
2014
2015 cp = buf + *pos;
2016
2017 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2018 return TOKEN_NONE;
2019
2020 mac = cp;
2021 maclen = roff_getname(r, &cp, ln, ppos);
2022
2023 deftype = ROFFDEF_USER | ROFFDEF_REN;
2024 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2025 switch (deftype) {
2026 case ROFFDEF_USER:
2027 t = ROFF_USERDEF;
2028 break;
2029 case ROFFDEF_REN:
2030 t = ROFF_RENAMED;
2031 break;
2032 default:
2033 t = roffhash_find(r->reqtab, mac, maclen);
2034 break;
2035 }
2036 if (t != TOKEN_NONE)
2037 *pos = cp - buf;
2038 else if (deftype == ROFFDEF_UNDEF) {
2039 /* Using an undefined macro defines it to be empty. */
2040 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2041 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2042 }
2043 return t;
2044 }
2045
2046 /* --- handling of request blocks ----------------------------------------- */
2047
2048 /*
2049 * Close a macro definition block or an "ignore" block.
2050 */
2051 static int
2052 roff_cblock(ROFF_ARGS)
2053 {
2054 int rr;
2055
2056 if (r->last == NULL) {
2057 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2058 return ROFF_IGN;
2059 }
2060
2061 switch (r->last->tok) {
2062 case ROFF_am:
2063 case ROFF_ami:
2064 case ROFF_de:
2065 case ROFF_dei:
2066 case ROFF_ig:
2067 break;
2068 case ROFF_am1:
2069 case ROFF_de1:
2070 /* Remapped in roff_block(). */
2071 abort();
2072 default:
2073 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2074 return ROFF_IGN;
2075 }
2076
2077 roffnode_pop(r);
2078 roffnode_cleanscope(r);
2079
2080 /*
2081 * If a conditional block with braces is still open,
2082 * check for "\}" block end markers.
2083 */
2084
2085 if (r->last != NULL && r->last->endspan < 0) {
2086 rr = 1; /* If arguments follow "\}", warn about them. */
2087 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2088 }
2089
2090 if (buf->buf[pos] != '\0')
2091 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2092 ".. %s", buf->buf + pos);
2093
2094 return ROFF_IGN;
2095 }
2096
2097 /*
2098 * Pop all nodes ending at the end of the current input line.
2099 * Return the number of loops ended.
2100 */
2101 static int
2102 roffnode_cleanscope(struct roff *r)
2103 {
2104 int inloop;
2105
2106 inloop = 0;
2107 while (r->last != NULL && r->last->endspan > 0) {
2108 if (--r->last->endspan != 0)
2109 break;
2110 inloop += roffnode_pop(r);
2111 }
2112 return inloop;
2113 }
2114
2115 /*
2116 * Handle the closing "\}" of a conditional block.
2117 * Apart from generating warnings, this only pops nodes.
2118 * Return the number of loops ended.
2119 */
2120 static int
2121 roff_ccond(struct roff *r, int ln, int ppos)
2122 {
2123 if (NULL == r->last) {
2124 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2125 return 0;
2126 }
2127
2128 switch (r->last->tok) {
2129 case ROFF_el:
2130 case ROFF_ie:
2131 case ROFF_if:
2132 case ROFF_while:
2133 break;
2134 default:
2135 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2136 return 0;
2137 }
2138
2139 if (r->last->endspan > -1) {
2140 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2141 return 0;
2142 }
2143
2144 return roffnode_pop(r) + roffnode_cleanscope(r);
2145 }
2146
2147 static int
2148 roff_block(ROFF_ARGS)
2149 {
2150 const char *name, *value;
2151 char *call, *cp, *iname, *rname;
2152 size_t csz, namesz, rsz;
2153 int deftype;
2154
2155 /* Ignore groff compatibility mode for now. */
2156
2157 if (tok == ROFF_de1)
2158 tok = ROFF_de;
2159 else if (tok == ROFF_dei1)
2160 tok = ROFF_dei;
2161 else if (tok == ROFF_am1)
2162 tok = ROFF_am;
2163 else if (tok == ROFF_ami1)
2164 tok = ROFF_ami;
2165
2166 /* Parse the macro name argument. */
2167
2168 cp = buf->buf + pos;
2169 if (tok == ROFF_ig) {
2170 iname = NULL;
2171 namesz = 0;
2172 } else {
2173 iname = cp;
2174 namesz = roff_getname(r, &cp, ln, ppos);
2175 iname[namesz] = '\0';
2176 }
2177
2178 /* Resolve the macro name argument if it is indirect. */
2179
2180 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2181 deftype = ROFFDEF_USER;
2182 name = roff_getstrn(r, iname, namesz, &deftype);
2183 if (name == NULL) {
2184 mandoc_msg(MANDOCERR_STR_UNDEF,
2185 ln, (int)(iname - buf->buf),
2186 "%.*s", (int)namesz, iname);
2187 namesz = 0;
2188 } else
2189 namesz = strlen(name);
2190 } else
2191 name = iname;
2192
2193 if (namesz == 0 && tok != ROFF_ig) {
2194 mandoc_msg(MANDOCERR_REQ_EMPTY,
2195 ln, ppos, "%s", roff_name[tok]);
2196 return ROFF_IGN;
2197 }
2198
2199 roffnode_push(r, tok, name, ln, ppos);
2200
2201 /*
2202 * At the beginning of a `de' macro, clear the existing string
2203 * with the same name, if there is one. New content will be
2204 * appended from roff_block_text() in multiline mode.
2205 */
2206
2207 if (tok == ROFF_de || tok == ROFF_dei) {
2208 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2209 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2210 } else if (tok == ROFF_am || tok == ROFF_ami) {
2211 deftype = ROFFDEF_ANY;
2212 value = roff_getstrn(r, iname, namesz, &deftype);
2213 switch (deftype) { /* Before appending, ... */
2214 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2215 roff_setstrn(&r->strtab, name, namesz,
2216 value, strlen(value), 0);
2217 break;
2218 case ROFFDEF_REN: /* call original standard macro. */
2219 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2220 (int)strlen(value), value);
2221 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2222 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2223 free(call);
2224 break;
2225 case ROFFDEF_STD: /* rename and call standard macro. */
2226 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2227 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2228 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2229 (int)rsz, rname);
2230 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2231 free(call);
2232 free(rname);
2233 break;
2234 default:
2235 break;
2236 }
2237 }
2238
2239 if (*cp == '\0')
2240 return ROFF_IGN;
2241
2242 /* Get the custom end marker. */
2243
2244 iname = cp;
2245 namesz = roff_getname(r, &cp, ln, ppos);
2246
2247 /* Resolve the end marker if it is indirect. */
2248
2249 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2250 deftype = ROFFDEF_USER;
2251 name = roff_getstrn(r, iname, namesz, &deftype);
2252 if (name == NULL) {
2253 mandoc_msg(MANDOCERR_STR_UNDEF,
2254 ln, (int)(iname - buf->buf),
2255 "%.*s", (int)namesz, iname);
2256 namesz = 0;
2257 } else
2258 namesz = strlen(name);
2259 } else
2260 name = iname;
2261
2262 if (namesz)
2263 r->last->end = mandoc_strndup(name, namesz);
2264
2265 if (*cp != '\0')
2266 mandoc_msg(MANDOCERR_ARG_EXCESS,
2267 ln, pos, ".%s ... %s", roff_name[tok], cp);
2268
2269 return ROFF_IGN;
2270 }
2271
2272 static int
2273 roff_block_sub(ROFF_ARGS)
2274 {
2275 enum roff_tok t;
2276 int i, j;
2277
2278 /*
2279 * First check whether a custom macro exists at this level. If
2280 * it does, then check against it. This is some of groff's
2281 * stranger behaviours. If we encountered a custom end-scope
2282 * tag and that tag also happens to be a "real" macro, then we
2283 * need to try interpreting it again as a real macro. If it's
2284 * not, then return ignore. Else continue.
2285 */
2286
2287 if (r->last->end) {
2288 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2289 if (buf->buf[i] != r->last->end[j])
2290 break;
2291
2292 if (r->last->end[j] == '\0' &&
2293 (buf->buf[i] == '\0' ||
2294 buf->buf[i] == ' ' ||
2295 buf->buf[i] == '\t')) {
2296 roffnode_pop(r);
2297 roffnode_cleanscope(r);
2298
2299 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2300 i++;
2301
2302 pos = i;
2303 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2304 TOKEN_NONE)
2305 return ROFF_RERUN;
2306 return ROFF_IGN;
2307 }
2308 }
2309
2310 /*
2311 * If we have no custom end-query or lookup failed, then try
2312 * pulling it out of the hashtable.
2313 */
2314
2315 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2316
2317 if (t != ROFF_cblock) {
2318 if (tok != ROFF_ig)
2319 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2320 return ROFF_IGN;
2321 }
2322
2323 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2324 }
2325
2326 static int
2327 roff_block_text(ROFF_ARGS)
2328 {
2329
2330 if (tok != ROFF_ig)
2331 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2332
2333 return ROFF_IGN;
2334 }
2335
2336 /*
2337 * Check for a closing "\}" and handle it.
2338 * In this function, the final "int *offs" argument is used for
2339 * different purposes than elsewhere:
2340 * Input: *offs == 0: caller wants to discard arguments following \}
2341 * *offs == 1: caller wants to preserve text following \}
2342 * Output: *offs = 0: tell caller to discard input line
2343 * *offs = 1: tell caller to use input line
2344 */
2345 static int
2346 roff_cond_checkend(ROFF_ARGS)
2347 {
2348 char *ep;
2349 int endloop, irc, rr;
2350
2351 irc = ROFF_IGN;
2352 rr = r->last->rule;
2353 endloop = tok != ROFF_while ? ROFF_IGN :
2354 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2355 if (roffnode_cleanscope(r))
2356 irc |= endloop;
2357
2358 /*
2359 * If "\}" occurs on a macro line without a preceding macro or
2360 * a text line contains nothing else, drop the line completely.
2361 */
2362
2363 ep = buf->buf + pos;
2364 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2365 rr = 0;
2366
2367 /*
2368 * The closing delimiter "\}" rewinds the conditional scope
2369 * but is otherwise ignored when interpreting the line.
2370 */
2371
2372 while ((ep = strchr(ep, '\\')) != NULL) {
2373 switch (ep[1]) {
2374 case '}':
2375 if (ep[2] == '\0')
2376 ep[0] = '\0';
2377 else if (rr)
2378 ep[1] = '&';
2379 else
2380 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2381 if (roff_ccond(r, ln, ep - buf->buf))
2382 irc |= endloop;
2383 break;
2384 case '\0':
2385 ++ep;
2386 break;
2387 default:
2388 ep += 2;
2389 break;
2390 }
2391 }
2392 *offs = rr;
2393 return irc;
2394 }
2395
2396 /*
2397 * Parse and process a request or macro line in conditional scope.
2398 */
2399 static int
2400 roff_cond_sub(ROFF_ARGS)
2401 {
2402 struct roffnode *bl;
2403 int irc, rr;
2404 enum roff_tok t;
2405
2406 rr = 0; /* If arguments follow "\}", skip them. */
2407 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2408 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2409
2410 /* For now, let high level macros abort .ce mode. */
2411
2412 if (roffce_node != NULL &&
2413 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2414 t == ROFF_TH || t == ROFF_TS)) {
2415 r->man->last = roffce_node;
2416 r->man->next = ROFF_NEXT_SIBLING;
2417 roffce_lines = 0;
2418 roffce_node = NULL;
2419 }
2420
2421 /*
2422 * Fully handle known macros when they are structurally
2423 * required or when the conditional evaluated to true.
2424 */
2425
2426 if (t == ROFF_break) {
2427 if (irc & ROFF_LOOPMASK)
2428 irc = ROFF_IGN | ROFF_LOOPEXIT;
2429 else if (rr) {
2430 for (bl = r->last; bl != NULL; bl = bl->parent) {
2431 bl->rule = 0;
2432 if (bl->tok == ROFF_while)
2433 break;
2434 }
2435 }
2436 } else if (t != TOKEN_NONE &&
2437 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
2438 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2439 if (irc & ROFF_WHILE)
2440 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2441 } else
2442 irc |= rr ? ROFF_CONT : ROFF_IGN;
2443 return irc;
2444 }
2445
2446 /*
2447 * Parse and process a text line in conditional scope.
2448 */
2449 static int
2450 roff_cond_text(ROFF_ARGS)
2451 {
2452 int irc, rr;
2453
2454 rr = 1; /* If arguments follow "\}", preserve them. */
2455 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2456 if (rr)
2457 irc |= ROFF_CONT;
2458 return irc;
2459 }
2460
2461 /* --- handling of numeric and conditional expressions -------------------- */
2462
2463 /*
2464 * Parse a single signed integer number. Stop at the first non-digit.
2465 * If there is at least one digit, return success and advance the
2466 * parse point, else return failure and let the parse point unchanged.
2467 * Ignore overflows, treat them just like the C language.
2468 */
2469 static int
2470 roff_getnum(const char *v, int *pos, int *res, int flags)
2471 {
2472 int myres, scaled, n, p;
2473
2474 if (NULL == res)
2475 res = &myres;
2476
2477 p = *pos;
2478 n = v[p] == '-';
2479 if (n || v[p] == '+')
2480 p++;
2481
2482 if (flags & ROFFNUM_WHITE)
2483 while (isspace((unsigned char)v[p]))
2484 p++;
2485
2486 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2487 *res = 10 * *res + v[p] - '0';
2488 if (p == *pos + n)
2489 return 0;
2490
2491 if (n)
2492 *res = -*res;
2493
2494 /* Each number may be followed by one optional scaling unit. */
2495
2496 switch (v[p]) {
2497 case 'f':
2498 scaled = *res * 65536;
2499 break;
2500 case 'i':
2501 scaled = *res * 240;
2502 break;
2503 case 'c':
2504 scaled = *res * 240 / 2.54;
2505 break;
2506 case 'v':
2507 case 'P':
2508 scaled = *res * 40;
2509 break;
2510 case 'm':
2511 case 'n':
2512 scaled = *res * 24;
2513 break;
2514 case 'p':
2515 scaled = *res * 10 / 3;
2516 break;
2517 case 'u':
2518 scaled = *res;
2519 break;
2520 case 'M':
2521 scaled = *res * 6 / 25;
2522 break;
2523 default:
2524 scaled = *res;
2525 p--;
2526 break;
2527 }
2528 if (flags & ROFFNUM_SCALE)
2529 *res = scaled;
2530
2531 *pos = p + 1;
2532 return 1;
2533 }
2534
2535 /*
2536 * Evaluate a string comparison condition.
2537 * The first character is the delimiter.
2538 * Succeed if the string up to its second occurrence
2539 * matches the string up to its third occurence.
2540 * Advance the cursor after the third occurrence
2541 * or lacking that, to the end of the line.
2542 */
2543 static int
2544 roff_evalstrcond(const char *v, int *pos)
2545 {
2546 const char *s1, *s2, *s3;
2547 int match;
2548
2549 match = 0;
2550 s1 = v + *pos; /* initial delimiter */
2551 s2 = s1 + 1; /* for scanning the first string */
2552 s3 = strchr(s2, *s1); /* for scanning the second string */
2553
2554 if (NULL == s3) /* found no middle delimiter */
2555 goto out;
2556
2557 while ('\0' != *++s3) {
2558 if (*s2 != *s3) { /* mismatch */
2559 s3 = strchr(s3, *s1);
2560 break;
2561 }
2562 if (*s3 == *s1) { /* found the final delimiter */
2563 match = 1;
2564 break;
2565 }
2566 s2++;
2567 }
2568
2569 out:
2570 if (NULL == s3)
2571 s3 = strchr(s2, '\0');
2572 else if (*s3 != '\0')
2573 s3++;
2574 *pos = s3 - v;
2575 return match;
2576 }
2577
2578 /*
2579 * Evaluate an optionally negated single character, numerical,
2580 * or string condition.
2581 */
2582 static int
2583 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2584 {
2585 const char *start, *end;
2586 char *cp, *name;
2587 size_t sz;
2588 int deftype, len, number, savepos, istrue, wanttrue;
2589
2590 if ('!' == v[*pos]) {
2591 wanttrue = 0;
2592 (*pos)++;
2593 } else
2594 wanttrue = 1;
2595
2596 switch (v[*pos]) {
2597 case '\0':
2598 return 0;
2599 case 'n':
2600 case 'o':
2601 (*pos)++;
2602 return wanttrue;
2603 case 'e':
2604 case 't':
2605 case 'v':
2606 (*pos)++;
2607 return !wanttrue;
2608 case 'c':
2609 do {
2610 (*pos)++;
2611 } while (v[*pos] == ' ');
2612
2613 /*
2614 * Quirk for groff compatibility:
2615 * The horizontal tab is neither available nor unavailable.
2616 */
2617
2618 if (v[*pos] == '\t') {
2619 (*pos)++;
2620 return 0;
2621 }
2622
2623 /* Printable ASCII characters are available. */
2624
2625 if (v[*pos] != '\\') {
2626 (*pos)++;
2627 return wanttrue;
2628 }
2629
2630 end = v + ++*pos;
2631 switch (mandoc_escape(&end, &start, &len)) {
2632 case ESCAPE_SPECIAL:
2633 istrue = mchars_spec2cp(start, len) != -1;
2634 break;
2635 case ESCAPE_UNICODE:
2636 istrue = 1;
2637 break;
2638 case ESCAPE_NUMBERED:
2639 istrue = mchars_num2char(start, len) != -1;
2640 break;
2641 default:
2642 istrue = !wanttrue;
2643 break;
2644 }
2645 *pos = end - v;
2646 return istrue == wanttrue;
2647 case 'd':
2648 case 'r':
2649 cp = v + *pos + 1;
2650 while (*cp == ' ')
2651 cp++;
2652 name = cp;
2653 sz = roff_getname(r, &cp, ln, cp - v);
2654 if (sz == 0)
2655 istrue = 0;
2656 else if (v[*pos] == 'r')
2657 istrue = roff_hasregn(r, name, sz);
2658 else {
2659 deftype = ROFFDEF_ANY;
2660 roff_getstrn(r, name, sz, &deftype);
2661 istrue = !!deftype;
2662 }
2663 *pos = (name + sz) - v;
2664 return istrue == wanttrue;
2665 default:
2666 break;
2667 }
2668
2669 savepos = *pos;
2670 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2671 return (number > 0) == wanttrue;
2672 else if (*pos == savepos)
2673 return roff_evalstrcond(v, pos) == wanttrue;
2674 else
2675 return 0;
2676 }
2677
2678 static int
2679 roff_line_ignore(ROFF_ARGS)
2680 {
2681
2682 return ROFF_IGN;
2683 }
2684
2685 static int
2686 roff_insec(ROFF_ARGS)
2687 {
2688
2689 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2690 return ROFF_IGN;
2691 }
2692
2693 static int
2694 roff_unsupp(ROFF_ARGS)
2695 {
2696
2697 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2698 return ROFF_IGN;
2699 }
2700
2701 static int
2702 roff_cond(ROFF_ARGS)
2703 {
2704 int irc;
2705
2706 roffnode_push(r, tok, NULL, ln, ppos);
2707
2708 /*
2709 * An `.el' has no conditional body: it will consume the value
2710 * of the current rstack entry set in prior `ie' calls or
2711 * defaults to DENY.
2712 *
2713 * If we're not an `el', however, then evaluate the conditional.
2714 */
2715
2716 r->last->rule = tok == ROFF_el ?
2717 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2718 roff_evalcond(r, ln, buf->buf, &pos);
2719
2720 /*
2721 * An if-else will put the NEGATION of the current evaluated
2722 * conditional into the stack of rules.
2723 */
2724
2725 if (tok == ROFF_ie) {
2726 if (r->rstackpos + 1 == r->rstacksz) {
2727 r->rstacksz += 16;
2728 r->rstack = mandoc_reallocarray(r->rstack,
2729 r->rstacksz, sizeof(int));
2730 }
2731 r->rstack[++r->rstackpos] = !r->last->rule;
2732 }
2733
2734 /* If the parent has false as its rule, then so do we. */
2735
2736 if (r->last->parent && !r->last->parent->rule)
2737 r->last->rule = 0;
2738
2739 /*
2740 * Determine scope.
2741 * If there is nothing on the line after the conditional,
2742 * not even whitespace, use next-line scope.
2743 * Except that .while does not support next-line scope.
2744 */
2745
2746 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2747 r->last->endspan = 2;
2748 goto out;
2749 }
2750
2751 while (buf->buf[pos] == ' ')
2752 pos++;
2753
2754 /* An opening brace requests multiline scope. */
2755
2756 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2757 r->last->endspan = -1;
2758 pos += 2;
2759 while (buf->buf[pos] == ' ')
2760 pos++;
2761 goto out;
2762 }
2763
2764 /*
2765 * Anything else following the conditional causes
2766 * single-line scope. Warn if the scope contains
2767 * nothing but trailing whitespace.
2768 */
2769
2770 if (buf->buf[pos] == '\0')
2771 mandoc_msg(MANDOCERR_COND_EMPTY,
2772 ln, ppos, "%s", roff_name[tok]);
2773
2774 r->last->endspan = 1;
2775
2776 out:
2777 *offs = pos;
2778 irc = ROFF_RERUN;
2779 if (tok == ROFF_while)
2780 irc |= ROFF_WHILE;
2781 return irc;
2782 }
2783
2784 static int
2785 roff_ds(ROFF_ARGS)
2786 {
2787 char *string;
2788 const char *name;
2789 size_t namesz;
2790
2791 /* Ignore groff compatibility mode for now. */
2792
2793 if (tok == ROFF_ds1)
2794 tok = ROFF_ds;
2795 else if (tok == ROFF_as1)
2796 tok = ROFF_as;
2797
2798 /*
2799 * The first word is the name of the string.
2800 * If it is empty or terminated by an escape sequence,
2801 * abort the `ds' request without defining anything.
2802 */
2803
2804 name = string = buf->buf + pos;
2805 if (*name == '\0')
2806 return ROFF_IGN;
2807
2808 namesz = roff_getname(r, &string, ln, pos);
2809 switch (name[namesz]) {
2810 case '\\':
2811 return ROFF_IGN;
2812 case '\t':
2813 string = buf->buf + pos + namesz;
2814 break;
2815 default:
2816 break;
2817 }
2818
2819 /* Read past the initial double-quote, if any. */
2820 if (*string == '"')
2821 string++;
2822
2823 /* The rest is the value. */
2824 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2825 ROFF_as == tok);
2826 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2827 return ROFF_IGN;
2828 }
2829
2830 /*
2831 * Parse a single operator, one or two characters long.
2832 * If the operator is recognized, return success and advance the
2833 * parse point, else return failure and let the parse point unchanged.
2834 */
2835 static int
2836 roff_getop(const char *v, int *pos, char *res)
2837 {
2838
2839 *res = v[*pos];
2840
2841 switch (*res) {
2842 case '+':
2843 case '-':
2844 case '*':
2845 case '/':
2846 case '%':
2847 case '&':
2848 case ':':
2849 break;
2850 case '<':
2851 switch (v[*pos + 1]) {
2852 case '=':
2853 *res = 'l';
2854 (*pos)++;
2855 break;
2856 case '>':
2857 *res = '!';
2858 (*pos)++;
2859 break;
2860 case '?':
2861 *res = 'i';
2862 (*pos)++;
2863 break;
2864 default:
2865 break;
2866 }
2867 break;
2868 case '>':
2869 switch (v[*pos + 1]) {
2870 case '=':
2871 *res = 'g';
2872 (*pos)++;
2873 break;
2874 case '?':
2875 *res = 'a';
2876 (*pos)++;
2877 break;
2878 default:
2879 break;
2880 }
2881 break;
2882 case '=':
2883 if ('=' == v[*pos + 1])
2884 (*pos)++;
2885 break;
2886 default:
2887 return 0;
2888 }
2889 (*pos)++;
2890
2891 return *res;
2892 }
2893
2894 /*
2895 * Evaluate either a parenthesized numeric expression
2896 * or a single signed integer number.
2897 */
2898 static int
2899 roff_evalpar(struct roff *r, int ln,
2900 const char *v, int *pos, int *res, int flags)
2901 {
2902
2903 if ('(' != v[*pos])
2904 return roff_getnum(v, pos, res, flags);
2905
2906 (*pos)++;
2907 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2908 return 0;
2909
2910 /*
2911 * Omission of the closing parenthesis
2912 * is an error in validation mode,
2913 * but ignored in evaluation mode.
2914 */
2915
2916 if (')' == v[*pos])
2917 (*pos)++;
2918 else if (NULL == res)
2919 return 0;
2920
2921 return 1;
2922 }
2923
2924 /*
2925 * Evaluate a complete numeric expression.
2926 * Proceed left to right, there is no concept of precedence.
2927 */
2928 static int
2929 roff_evalnum(struct roff *r, int ln, const char *v,
2930 int *pos, int *res, int flags)
2931 {
2932 int mypos, operand2;
2933 char operator;
2934
2935 if (NULL == pos) {
2936 mypos = 0;
2937 pos = &mypos;
2938 }
2939
2940 if (flags & ROFFNUM_WHITE)
2941 while (isspace((unsigned char)v[*pos]))
2942 (*pos)++;
2943
2944 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2945 return 0;
2946
2947 while (1) {
2948 if (flags & ROFFNUM_WHITE)
2949 while (isspace((unsigned char)v[*pos]))
2950 (*pos)++;
2951
2952 if ( ! roff_getop(v, pos, &operator))
2953 break;
2954
2955 if (flags & ROFFNUM_WHITE)
2956 while (isspace((unsigned char)v[*pos]))
2957 (*pos)++;
2958
2959 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2960 return 0;
2961
2962 if (flags & ROFFNUM_WHITE)
2963 while (isspace((unsigned char)v[*pos]))
2964 (*pos)++;
2965
2966 if (NULL == res)
2967 continue;
2968
2969 switch (operator) {
2970 case '+':
2971 *res += operand2;
2972 break;
2973 case '-':
2974 *res -= operand2;
2975 break;
2976 case '*':
2977 *res *= operand2;
2978 break;
2979 case '/':
2980 if (operand2 == 0) {
2981 mandoc_msg(MANDOCERR_DIVZERO,
2982 ln, *pos, "%s", v);
2983 *res = 0;
2984 break;
2985 }
2986 *res /= operand2;
2987 break;
2988 case '%':
2989 if (operand2 == 0) {
2990 mandoc_msg(MANDOCERR_DIVZERO,
2991 ln, *pos, "%s", v);
2992 *res = 0;
2993 break;
2994 }
2995 *res %= operand2;
2996 break;
2997 case '<':
2998 *res = *res < operand2;
2999 break;
3000 case '>':
3001 *res = *res > operand2;
3002 break;
3003 case 'l':
3004 *res = *res <= operand2;
3005 break;
3006 case 'g':
3007 *res = *res >= operand2;
3008 break;
3009 case '=':
3010 *res = *res == operand2;
3011 break;
3012 case '!':
3013 *res = *res != operand2;
3014 break;
3015 case '&':
3016 *res = *res && operand2;
3017 break;
3018 case ':':
3019 *res = *res || operand2;
3020 break;
3021 case 'i':
3022 if (operand2 < *res)
3023 *res = operand2;
3024 break;
3025 case 'a':
3026 if (operand2 > *res)
3027 *res = operand2;
3028 break;
3029 default:
3030 abort();
3031 }
3032 }
3033 return 1;
3034 }
3035
3036 /* --- register management ------------------------------------------------ */
3037
3038 void
3039 roff_setreg(struct roff *r, const char *name, int val, char sign)
3040 {
3041 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3042 }
3043
3044 static void
3045 roff_setregn(struct roff *r, const char *name, size_t len,
3046 int val, char sign, int step)
3047 {
3048 struct roffreg *reg;
3049
3050 /* Search for an existing register with the same name. */
3051 reg = r->regtab;
3052
3053 while (reg != NULL && (reg->key.sz != len ||
3054 strncmp(reg->key.p, name, len) != 0))
3055 reg = reg->next;
3056
3057 if (NULL == reg) {
3058 /* Create a new register. */
3059 reg = mandoc_malloc(sizeof(struct roffreg));
3060 reg->key.p = mandoc_strndup(name, len);
3061 reg->key.sz = len;
3062 reg->val = 0;
3063 reg->step = 0;
3064 reg->next = r->regtab;
3065 r->regtab = reg;
3066 }
3067
3068 if ('+' == sign)
3069 reg->val += val;
3070 else if ('-' == sign)
3071 reg->val -= val;
3072 else
3073 reg->val = val;
3074 if (step != INT_MIN)
3075 reg->step = step;
3076 }
3077
3078 /*
3079 * Handle some predefined read-only number registers.
3080 * For now, return -1 if the requested register is not predefined;
3081 * in case a predefined read-only register having the value -1
3082 * were to turn up, another special value would have to be chosen.
3083 */
3084 static int
3085 roff_getregro(const struct roff *r, const char *name)
3086 {
3087
3088 switch (*name) {
3089 case '$': /* Number of arguments of the last macro evaluated. */
3090 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3091 case 'A': /* ASCII approximation mode is always off. */
3092 return 0;
3093 case 'g': /* Groff compatibility mode is always on. */
3094 return 1;
3095 case 'H': /* Fixed horizontal resolution. */
3096 return 24;
3097 case 'j': /* Always adjust left margin only. */
3098 return 0;
3099 case 'T': /* Some output device is always defined. */
3100 return 1;
3101 case 'V': /* Fixed vertical resolution. */
3102 return 40;
3103 default:
3104 return -1;
3105 }
3106 }
3107
3108 int
3109 roff_getreg(struct roff *r, const char *name)
3110 {
3111 return roff_getregn(r, name, strlen(name), '\0');
3112 }
3113
3114 static int
3115 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3116 {
3117 struct roffreg *reg;
3118 int val;
3119
3120 if ('.' == name[0] && 2 == len) {
3121 val = roff_getregro(r, name + 1);
3122 if (-1 != val)
3123 return val;
3124 }
3125
3126 for (reg = r->regtab; reg; reg = reg->next) {
3127 if (len == reg->key.sz &&
3128 0 == strncmp(name, reg->key.p, len)) {
3129 switch (sign) {
3130 case '+':
3131 reg->val += reg->step;
3132 break;
3133 case '-':
3134 reg->val -= reg->step;
3135 break;
3136 default:
3137 break;
3138 }
3139 return reg->val;
3140 }
3141 }
3142
3143 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3144 return 0;
3145 }
3146
3147 static int
3148 roff_hasregn(const struct roff *r, const char *name, size_t len)
3149 {
3150 struct roffreg *reg;
3151 int val;
3152
3153 if ('.' == name[0] && 2 == len) {
3154 val = roff_getregro(r, name + 1);
3155 if (-1 != val)
3156 return 1;
3157 }
3158
3159 for (reg = r->regtab; reg; reg = reg->next)
3160 if (len == reg->key.sz &&
3161 0 == strncmp(name, reg->key.p, len))
3162 return 1;
3163
3164 return 0;
3165 }
3166
3167 static void
3168 roff_freereg(struct roffreg *reg)
3169 {
3170 struct roffreg *old_reg;
3171
3172 while (NULL != reg) {
3173 free(reg->key.p);
3174 old_reg = reg;
3175 reg = reg->next;
3176 free(old_reg);
3177 }
3178 }
3179
3180 static int
3181 roff_nr(ROFF_ARGS)
3182 {
3183 char *key, *val, *step;
3184 size_t keysz;
3185 int iv, is, len;
3186 char sign;
3187
3188 key = val = buf->buf + pos;
3189 if (*key == '\0')
3190 return ROFF_IGN;
3191
3192 keysz = roff_getname(r, &val, ln, pos);
3193 if (key[keysz] == '\\' || key[keysz] == '\t')
3194 return ROFF_IGN;
3195
3196 sign = *val;
3197 if (sign == '+' || sign == '-')
3198 val++;
3199
3200 len = 0;
3201 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3202 return ROFF_IGN;
3203
3204 step = val + len;
3205 while (isspace((unsigned char)*step))
3206 step++;
3207 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3208 is = INT_MIN;
3209
3210 roff_setregn(r, key, keysz, iv, sign, is);
3211 return ROFF_IGN;
3212 }
3213
3214 static int
3215 roff_rr(ROFF_ARGS)
3216 {
3217 struct roffreg *reg, **prev;
3218 char *name, *cp;
3219 size_t namesz;
3220
3221 name = cp = buf->buf + pos;
3222 if (*name == '\0')
3223 return ROFF_IGN;
3224 namesz = roff_getname(r, &cp, ln, pos);
3225 name[namesz] = '\0';
3226
3227 prev = &r->regtab;
3228 while (1) {
3229 reg = *prev;
3230 if (reg == NULL || !strcmp(name, reg->key.p))
3231 break;
3232 prev = &reg->next;
3233 }
3234 if (reg != NULL) {
3235 *prev = reg->next;
3236 free(reg->key.p);
3237 free(reg);
3238 }
3239 return ROFF_IGN;
3240 }
3241
3242 /* --- handler functions for roff requests -------------------------------- */
3243
3244 static int
3245 roff_rm(ROFF_ARGS)
3246 {
3247 const char *name;
3248 char *cp;
3249 size_t namesz;
3250
3251 cp = buf->buf + pos;
3252 while (*cp != '\0') {
3253 name = cp;
3254 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3255 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3256 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3257 if (name[namesz] == '\\' || name[namesz] == '\t')
3258 break;
3259 }
3260 return ROFF_IGN;
3261 }
3262
3263 static int
3264 roff_it(ROFF_ARGS)
3265 {
3266 int iv;
3267
3268 /* Parse the number of lines. */
3269
3270 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3271 mandoc_msg(MANDOCERR_IT_NONUM,
3272 ln, ppos, "%s", buf->buf + 1);
3273 return ROFF_IGN;
3274 }
3275
3276 while (isspace((unsigned char)buf->buf[pos]))
3277 pos++;
3278
3279 /*
3280 * Arm the input line trap.
3281 * Special-casing "an-trap" is an ugly workaround to cope
3282 * with DocBook stupidly fiddling with man(7) internals.
3283 */
3284
3285 roffit_lines = iv;
3286 roffit_macro = mandoc_strdup(iv != 1 ||
3287 strcmp(buf->buf + pos, "an-trap") ?
3288 buf->buf + pos : "br");
3289 return ROFF_IGN;
3290 }
3291
3292 static int
3293 roff_Dd(ROFF_ARGS)
3294 {
3295 int mask;
3296 enum roff_tok t, te;
3297
3298 switch (tok) {
3299 case ROFF_Dd:
3300 tok = MDOC_Dd;
3301 te = MDOC_MAX;
3302 if (r->format == 0)
3303 r->format = MPARSE_MDOC;
3304 mask = MPARSE_MDOC | MPARSE_QUICK;
3305 break;
3306 case ROFF_TH:
3307 tok = MAN_TH;
3308 te = MAN_MAX;
3309 if (r->format == 0)
3310 r->format = MPARSE_MAN;
3311 mask = MPARSE_QUICK;
3312 break;
3313 default:
3314 abort();
3315 }
3316 if ((r->options & mask) == 0)
3317 for (t = tok; t < te; t++)
3318 roff_setstr(r, roff_name[t], NULL, 0);
3319 return ROFF_CONT;
3320 }
3321
3322 static int
3323 roff_TE(ROFF_ARGS)
3324 {
3325 r->man->flags &= ~ROFF_NONOFILL;
3326 if (r->tbl == NULL) {
3327 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3328 return ROFF_IGN;
3329 }
3330 if (tbl_end(r->tbl, 0) == 0) {
3331 r->tbl = NULL;
3332 free(buf->buf);
3333 buf->buf = mandoc_strdup(".sp");
3334 buf->sz = 4;
3335 *offs = 0;
3336 return ROFF_REPARSE;
3337 }
3338 r->tbl = NULL;
3339 return ROFF_IGN;
3340 }
3341
3342 static int
3343 roff_T_(ROFF_ARGS)
3344 {
3345
3346 if (NULL == r->tbl)
3347 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3348 else
3349 tbl_restart(ln, ppos, r->tbl);
3350
3351 return ROFF_IGN;
3352 }
3353
3354 /*
3355 * Handle in-line equation delimiters.
3356 */
3357 static int
3358 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3359 {
3360 char *cp1, *cp2;
3361 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3362
3363 /*
3364 * Outside equations, look for an opening delimiter.
3365 * If we are inside an equation, we already know it is
3366 * in-line, or this function wouldn't have been called;
3367 * so look for a closing delimiter.
3368 */
3369
3370 cp1 = buf->buf + pos;
3371 cp2 = strchr(cp1, r->eqn == NULL ?
3372 r->last_eqn->odelim : r->last_eqn->cdelim);
3373 if (cp2 == NULL)
3374 return ROFF_CONT;
3375
3376 *cp2++ = '\0';
3377 bef_pr = bef_nl = aft_nl = aft_pr = "";
3378
3379 /* Handle preceding text, protecting whitespace. */
3380
3381 if (*buf->buf != '\0') {
3382 if (r->eqn == NULL)
3383 bef_pr = "\\&";
3384 bef_nl = "\n";
3385 }
3386
3387 /*
3388 * Prepare replacing the delimiter with an equation macro
3389 * and drop leading white space from the equation.
3390 */
3391
3392 if (r->eqn == NULL) {
3393 while (*cp2 == ' ')
3394 cp2++;
3395 mac = ".EQ";
3396 } else
3397 mac = ".EN";
3398
3399 /* Handle following text, protecting whitespace. */
3400
3401 if (*cp2 != '\0') {
3402 aft_nl = "\n";
3403 if (r->eqn != NULL)
3404 aft_pr = "\\&";
3405 }
3406
3407 /* Do the actual replacement. */
3408
3409 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3410 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3411 free(buf->buf);
3412 buf->buf = cp1;
3413
3414 /* Toggle the in-line state of the eqn subsystem. */
3415
3416 r->eqn_inline = r->eqn == NULL;
3417 return ROFF_REPARSE;
3418 }
3419
3420 static int
3421 roff_EQ(ROFF_ARGS)
3422 {
3423 struct roff_node *n;
3424
3425 if (r->man->meta.macroset == MACROSET_MAN)
3426 man_breakscope(r->man, ROFF_EQ);
3427 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3428 if (ln > r->man->last->line)
3429 n->flags |= NODE_LINE;
3430 n->eqn = eqn_box_new();
3431 roff_node_append(r->man, n);
3432 r->man->next = ROFF_NEXT_SIBLING;
3433
3434 assert(r->eqn == NULL);
3435 if (r->last_eqn == NULL)
3436 r->last_eqn = eqn_alloc();
3437 else
3438 eqn_reset(r->last_eqn);
3439 r->eqn = r->last_eqn;
3440 r->eqn->node = n;
3441
3442 if (buf->buf[pos] != '\0')
3443 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3444 ".EQ %s", buf->buf + pos);
3445
3446 return ROFF_IGN;
3447 }
3448
3449 static int
3450 roff_EN(ROFF_ARGS)
3451 {
3452 if (r->eqn != NULL) {
3453 eqn_parse(r->eqn);
3454 r->eqn = NULL;
3455 } else
3456 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3457 if (buf->buf[pos] != '\0')
3458 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3459 "EN %s", buf->buf + pos);
3460 return ROFF_IGN;
3461 }
3462
3463 static int
3464 roff_TS(ROFF_ARGS)
3465 {
3466 if (r->tbl != NULL) {
3467 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3468 tbl_end(r->tbl, 0);
3469 }
3470 r->man->flags |= ROFF_NONOFILL;
3471 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3472 if (r->last_tbl == NULL)
3473 r->first_tbl = r->tbl;
3474 r->last_tbl = r->tbl;
3475 return ROFF_IGN;
3476 }
3477
3478 static int
3479 roff_noarg(ROFF_ARGS)
3480 {
3481 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3482 man_breakscope(r->man, tok);
3483 if (tok == ROFF_brp)
3484 tok = ROFF_br;
3485 roff_elem_alloc(r->man, ln, ppos, tok);
3486 if (buf->buf[pos] != '\0')
3487 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3488 "%s %s", roff_name[tok], buf->buf + pos);
3489 if (tok == ROFF_nf)
3490 r->man->flags |= ROFF_NOFILL;
3491 else if (tok == ROFF_fi)
3492 r->man->flags &= ~ROFF_NOFILL;
3493 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3494 r->man->next = ROFF_NEXT_SIBLING;
3495 return ROFF_IGN;
3496 }
3497
3498 static int
3499 roff_onearg(ROFF_ARGS)
3500 {
3501 struct roff_node *n;
3502 char *cp;
3503 int npos;
3504
3505 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3506 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3507 tok == ROFF_ti))
3508 man_breakscope(r->man, tok);
3509
3510 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3511 r->man->last = roffce_node;
3512 r->man->next = ROFF_NEXT_SIBLING;
3513 }
3514
3515 roff_elem_alloc(r->man, ln, ppos, tok);
3516 n = r->man->last;
3517
3518 cp = buf->buf + pos;
3519 if (*cp != '\0') {
3520 while (*cp != '\0' && *cp != ' ')
3521 cp++;
3522 while (*cp == ' ')
3523 *cp++ = '\0';
3524 if (*cp != '\0')
3525 mandoc_msg(MANDOCERR_ARG_EXCESS,
3526 ln, (int)(cp - buf->buf),
3527 "%s ... %s", roff_name[tok], cp);
3528 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3529 }
3530
3531 if (tok == ROFF_ce || tok == ROFF_rj) {
3532 if (r->man->last->type == ROFFT_ELEM) {
3533 roff_word_alloc(r->man, ln, pos, "1");
3534 r->man->last->flags |= NODE_NOSRC;
3535 }
3536 npos = 0;
3537 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3538 &roffce_lines, 0) == 0) {
3539 mandoc_msg(MANDOCERR_CE_NONUM,
3540 ln, pos, "ce %s", buf->buf + pos);
3541 roffce_lines = 1;
3542 }
3543 if (roffce_lines < 1) {
3544 r->man->last = r->man->last->parent;
3545 roffce_node = NULL;
3546 roffce_lines = 0;
3547 } else
3548 roffce_node = r->man->last->parent;
3549 } else {
3550 n->flags |= NODE_VALID | NODE_ENDED;
3551 r->man->last = n;
3552 }
3553 n->flags |= NODE_LINE;
3554 r->man->next = ROFF_NEXT_SIBLING;
3555 return ROFF_IGN;
3556 }
3557
3558 static int
3559 roff_manyarg(ROFF_ARGS)
3560 {
3561 struct roff_node *n;
3562 char *sp, *ep;
3563
3564 roff_elem_alloc(r->man, ln, ppos, tok);
3565 n = r->man->last;
3566
3567 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3568 while (*ep != '\0' && *ep != ' ')
3569 ep++;
3570 while (*ep == ' ')
3571 *ep++ = '\0';
3572 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3573 }
3574
3575 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3576 r->man->last = n;
3577 r->man->next = ROFF_NEXT_SIBLING;
3578 return ROFF_IGN;
3579 }
3580
3581 static int
3582 roff_als(ROFF_ARGS)
3583 {
3584 char *oldn, *newn, *end, *value;
3585 size_t oldsz, newsz, valsz;
3586
3587 newn = oldn = buf->buf + pos;
3588 if (*newn == '\0')
3589 return ROFF_IGN;
3590
3591 newsz = roff_getname(r, &oldn, ln, pos);
3592 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3593 return ROFF_IGN;
3594
3595 end = oldn;
3596 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3597 if (oldsz == 0)
3598 return ROFF_IGN;
3599
3600 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3601 (int)oldsz, oldn);
3602 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3603 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3604 free(value);
3605 return ROFF_IGN;
3606 }
3607
3608 /*
3609 * The .break request only makes sense inside conditionals,
3610 * and that case is already handled in roff_cond_sub().
3611 */
3612 static int
3613 roff_break(ROFF_ARGS)
3614 {
3615 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3616 return ROFF_IGN;
3617 }
3618
3619 static int
3620 roff_cc(ROFF_ARGS)
3621 {
3622 const char *p;
3623
3624 p = buf->buf + pos;
3625
3626 if (*p == '\0' || (r->control = *p++) == '.')
3627 r->control = '\0';
3628
3629 if (*p != '\0')
3630 mandoc_msg(MANDOCERR_ARG_EXCESS,
3631 ln, p - buf->buf, "cc ... %s", p);
3632
3633 return ROFF_IGN;
3634 }
3635
3636 static int
3637 roff_char(ROFF_ARGS)
3638 {
3639 const char *p, *kp, *vp;
3640 size_t ksz, vsz;
3641 int font;
3642
3643 /* Parse the character to be replaced. */
3644
3645 kp = buf->buf + pos;
3646 p = kp + 1;
3647 if (*kp == '\0' || (*kp == '\\' &&
3648 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3649 (*p != ' ' && *p != '\0')) {
3650 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3651 return ROFF_IGN;
3652 }
3653 ksz = p - kp;
3654 while (*p == ' ')
3655 p++;
3656
3657 /*
3658 * If the replacement string contains a font escape sequence,
3659 * we have to restore the font at the end.
3660 */
3661
3662 vp = p;
3663 vsz = strlen(p);
3664 font = 0;
3665 while (*p != '\0') {
3666 if (*p++ != '\\')
3667 continue;
3668 switch (mandoc_escape(&p, NULL, NULL)) {
3669 case ESCAPE_FONT:
3670 case ESCAPE_FONTROMAN:
3671 case ESCAPE_FONTITALIC:
3672 case ESCAPE_FONTBOLD:
3673 case ESCAPE_FONTBI:
3674 case ESCAPE_FONTCR:
3675 case ESCAPE_FONTCB:
3676 case ESCAPE_FONTCI:
3677 case ESCAPE_FONTPREV:
3678 font++;
3679 break;
3680 default:
3681 break;
3682 }
3683 }
3684 if (font > 1)
3685 mandoc_msg(MANDOCERR_CHAR_FONT,
3686 ln, (int)(vp - buf->buf), "%s", vp);
3687
3688 /*
3689 * Approximate the effect of .char using the .tr tables.
3690 * XXX In groff, .char and .tr interact differently.
3691 */
3692
3693 if (ksz == 1) {
3694 if (r->xtab == NULL)
3695 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3696 assert((unsigned int)*kp < 128);
3697 free(r->xtab[(int)*kp].p);
3698 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3699 "%s%s", vp, font ? "\fP" : "");
3700 } else {
3701 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3702 if (font)
3703 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3704 }
3705 return ROFF_IGN;
3706 }
3707
3708 static int
3709 roff_ec(ROFF_ARGS)
3710 {
3711 const char *p;
3712
3713 p = buf->buf + pos;
3714 if (*p == '\0')
3715 r->escape = '\\';
3716 else {
3717 r->escape = *p;
3718 if (*++p != '\0')
3719 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3720 (int)(p - buf->buf), "ec ... %s", p);
3721 }
3722 return ROFF_IGN;
3723 }
3724
3725 static int
3726 roff_eo(ROFF_ARGS)
3727 {
3728 r->escape = '\0';
3729 if (buf->buf[pos] != '\0')
3730 mandoc_msg(MANDOCERR_ARG_SKIP,
3731 ln, pos, "eo %s", buf->buf + pos);
3732 return ROFF_IGN;
3733 }
3734
3735 static int
3736 roff_mc(ROFF_ARGS)
3737 {
3738 struct roff_node *n;
3739 char *cp;
3740
3741 /* Parse the first argument. */
3742
3743 cp = buf->buf + pos;
3744 if (*cp != '\0')
3745 cp++;
3746 if (buf->buf[pos] == '\\') {
3747 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3748 case ESCAPE_SPECIAL:
3749 case ESCAPE_UNICODE:
3750 case ESCAPE_NUMBERED:
3751 break;
3752 default:
3753 *cp = '\0';
3754 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3755 "mc %s", buf->buf + pos);
3756 buf->buf[pos] = '\0';
3757 break;
3758 }
3759 }
3760
3761 /* Ignore additional arguments. */
3762
3763 while (*cp == ' ')
3764 *cp++ = '\0';
3765 if (*cp != '\0') {
3766 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3767 "mc ... %s", cp);
3768 *cp = '\0';
3769 }
3770
3771 /* Create the .mc node. */
3772
3773 roff_elem_alloc(r->man, ln, ppos, tok);
3774 n = r->man->last;
3775 if (buf->buf[pos] != '\0')
3776 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3777 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3778 r->man->last = n;
3779 r->man->next = ROFF_NEXT_SIBLING;
3780 return ROFF_IGN;
3781 }
3782
3783 static int
3784 roff_nop(ROFF_ARGS)
3785 {
3786 while (buf->buf[pos] == ' ')
3787 pos++;
3788 *offs = pos;
3789 return ROFF_RERUN;
3790 }
3791
3792 static int
3793 roff_tr(ROFF_ARGS)
3794 {
3795 const char *p, *first, *second;
3796 size_t fsz, ssz;
3797 enum mandoc_esc esc;
3798
3799 p = buf->buf + pos;
3800
3801 if (*p == '\0') {
3802 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3803 return ROFF_IGN;
3804 }
3805
3806 while (*p != '\0') {
3807 fsz = ssz = 1;
3808
3809 first = p++;
3810 if (*first == '\\') {
3811 esc = mandoc_escape(&p, NULL, NULL);
3812 if (esc == ESCAPE_ERROR) {
3813 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3814 (int)(p - buf->buf), "%s", first);
3815 return ROFF_IGN;
3816 }
3817 fsz = (size_t)(p - first);
3818 }
3819
3820 second = p++;
3821 if (*second == '\\') {
3822 esc = mandoc_escape(&p, NULL, NULL);
3823 if (esc == ESCAPE_ERROR) {
3824 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3825 (int)(p - buf->buf), "%s", second);
3826 return ROFF_IGN;
3827 }
3828 ssz = (size_t)(p - second);
3829 } else if (*second == '\0') {
3830 mandoc_msg(MANDOCERR_TR_ODD, ln,
3831 (int)(first - buf->buf), "tr %s", first);
3832 second = " ";
3833 p--;
3834 }
3835
3836 if (fsz > 1) {
3837 roff_setstrn(&r->xmbtab, first, fsz,
3838 second, ssz, 0);
3839 continue;
3840 }
3841
3842 if (r->xtab == NULL)
3843 r->xtab = mandoc_calloc(128,
3844 sizeof(struct roffstr));
3845
3846 free(r->xtab[(int)*first].p);
3847 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3848 r->xtab[(int)*first].sz = ssz;
3849 }
3850
3851 return ROFF_IGN;
3852 }
3853
3854 /*
3855 * Implementation of the .return request.
3856 * There is no need to call roff_userret() from here.
3857 * The read module will call that after rewinding the reader stack
3858 * to the place from where the current macro was called.
3859 */
3860 static int
3861 roff_return(ROFF_ARGS)
3862 {
3863 if (r->mstackpos >= 0)
3864 return ROFF_IGN | ROFF_USERRET;
3865
3866 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3867 return ROFF_IGN;
3868 }
3869
3870 static int
3871 roff_rn(ROFF_ARGS)
3872 {
3873 const char *value;
3874 char *oldn, *newn, *end;
3875 size_t oldsz, newsz;
3876 int deftype;
3877
3878 oldn = newn = buf->buf + pos;
3879 if (*oldn == '\0')
3880 return ROFF_IGN;
3881
3882 oldsz = roff_getname(r, &newn, ln, pos);
3883 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3884 return ROFF_IGN;
3885
3886 end = newn;
3887 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3888 if (newsz == 0)
3889 return ROFF_IGN;
3890
3891 deftype = ROFFDEF_ANY;
3892 value = roff_getstrn(r, oldn, oldsz, &deftype);
3893 switch (deftype) {
3894 case ROFFDEF_USER:
3895 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3896 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3897 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3898 break;
3899 case ROFFDEF_PRE:
3900 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3901 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3902 break;
3903 case ROFFDEF_REN:
3904 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3905 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3906 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3907 break;
3908 case ROFFDEF_STD:
3909 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3910 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3911 break;
3912 default:
3913 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3914 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3915 break;
3916 }
3917 return ROFF_IGN;
3918 }
3919
3920 static int
3921 roff_shift(ROFF_ARGS)
3922 {
3923 struct mctx *ctx;
3924 int argpos, levels, i;
3925
3926 argpos = pos;
3927 levels = 1;
3928 if (buf->buf[pos] != '\0' &&
3929 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3930 mandoc_msg(MANDOCERR_CE_NONUM,
3931 ln, pos, "shift %s", buf->buf + pos);
3932 levels = 1;
3933 }
3934 if (r->mstackpos < 0) {
3935 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3936 return ROFF_IGN;
3937 }
3938 ctx = r->mstack + r->mstackpos;
3939 if (levels > ctx->argc) {
3940 mandoc_msg(MANDOCERR_SHIFT,
3941 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3942 levels = ctx->argc;
3943 }
3944 if (levels < 0) {
3945 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3946 levels = 0;
3947 }
3948 if (levels == 0)
3949 return ROFF_IGN;
3950 for (i = 0; i < levels; i++)
3951 free(ctx->argv[i]);
3952 ctx->argc -= levels;
3953 for (i = 0; i < ctx->argc; i++)
3954 ctx->argv[i] = ctx->argv[i + levels];
3955 return ROFF_IGN;
3956 }
3957
3958 static int
3959 roff_so(ROFF_ARGS)
3960 {
3961 char *name, *cp;
3962
3963 name = buf->buf + pos;
3964 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3965
3966 /*
3967 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3968 * opening anything that's not in our cwd or anything beneath
3969 * it. Thus, explicitly disallow traversing up the file-system
3970 * or using absolute paths.
3971 */
3972
3973 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3974 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3975 buf->sz = mandoc_asprintf(&cp,
3976 ".sp\nSee the file %s.\n.sp", name) + 1;
3977 free(buf->buf);
3978 buf->buf = cp;
3979 *offs = 0;
3980 return ROFF_REPARSE;
3981 }
3982
3983 *offs = pos;
3984 return ROFF_SO;
3985 }
3986
3987 /* --- user defined strings and macros ------------------------------------ */
3988
3989 static int
3990 roff_userdef(ROFF_ARGS)
3991 {
3992 struct mctx *ctx;
3993 char *arg, *ap, *dst, *src;
3994 size_t sz;
3995
3996 /* If the macro is empty, ignore it altogether. */
3997
3998 if (*r->current_string == '\0')
3999 return ROFF_IGN;
4000
4001 /* Initialize a new macro stack context. */
4002
4003 if (++r->mstackpos == r->mstacksz) {
4004 r->mstack = mandoc_recallocarray(r->mstack,
4005 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
4006 r->mstacksz += 8;
4007 }
4008 ctx = r->mstack + r->mstackpos;
4009 ctx->argc = 0;
4010
4011 /*
4012 * Collect pointers to macro argument strings,
4013 * NUL-terminating them and escaping quotes.
4014 */
4015
4016 src = buf->buf + pos;
4017 while (*src != '\0') {
4018 if (ctx->argc == ctx->argsz) {
4019 ctx->argsz += 8;
4020 ctx->argv = mandoc_reallocarray(ctx->argv,
4021 ctx->argsz, sizeof(*ctx->argv));
4022 }
4023 arg = roff_getarg(r, &src, ln, &pos);
4024 sz = 1; /* For the terminating NUL. */
4025 for (ap = arg; *ap != '\0'; ap++)
4026 sz += *ap == '"' ? 4 : 1;
4027 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
4028 for (ap = arg; *ap != '\0'; ap++) {
4029 if (*ap == '"') {
4030 memcpy(dst, "\\(dq", 4);
4031 dst += 4;
4032 } else
4033 *dst++ = *ap;
4034 }
4035 *dst = '\0';
4036 free(arg);
4037 }
4038
4039 /* Replace the macro invocation by the macro definition. */
4040
4041 free(buf->buf);
4042 buf->buf = mandoc_strdup(r->current_string);
4043 buf->sz = strlen(buf->buf) + 1;
4044 *offs = 0;
4045
4046 return buf->buf[buf->sz - 2] == '\n' ?
4047 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4048 }
4049
4050 /*
4051 * Calling a high-level macro that was renamed with .rn.
4052 * r->current_string has already been set up by roff_parse().
4053 */
4054 static int
4055 roff_renamed(ROFF_ARGS)
4056 {
4057 char *nbuf;
4058
4059 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4060 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4061 free(buf->buf);
4062 buf->buf = nbuf;
4063 *offs = 0;
4064 return ROFF_CONT;
4065 }
4066
4067 /*
4068 * Measure the length in bytes of the roff identifier at *cpp
4069 * and advance the pointer to the next word.
4070 */
4071 static size_t
4072 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4073 {
4074 char *name, *cp;
4075 size_t namesz;
4076
4077 name = *cpp;
4078 if (*name == '\0')
4079 return 0;
4080
4081 /* Advance cp to the byte after the end of the name. */
4082
4083 for (cp = name; 1; cp++) {
4084 namesz = cp - name;
4085 if (*cp == '\0')
4086 break;
4087 if (*cp == ' ' || *cp == '\t') {
4088 cp++;
4089 break;
4090 }
4091 if (*cp != '\\')
4092 continue;
4093 if (cp[1] == '{' || cp[1] == '}')
4094 break;
4095 if (*++cp == '\\')
4096 continue;
4097 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4098 "%.*s", (int)(cp - name + 1), name);
4099 mandoc_escape((const char **)&cp, NULL, NULL);
4100 break;
4101 }
4102
4103 /* Read past spaces. */
4104
4105 while (*cp == ' ')
4106 cp++;
4107
4108 *cpp = cp;
4109 return namesz;
4110 }
4111
4112 /*
4113 * Store *string into the user-defined string called *name.
4114 * To clear an existing entry, call with (*r, *name, NULL, 0).
4115 * append == 0: replace mode
4116 * append == 1: single-line append mode
4117 * append == 2: multiline append mode, append '\n' after each call
4118 */
4119 static void
4120 roff_setstr(struct roff *r, const char *name, const char *string,
4121 int append)
4122 {
4123 size_t namesz;
4124
4125 namesz = strlen(name);
4126 roff_setstrn(&r->strtab, name, namesz, string,
4127 string ? strlen(string) : 0, append);
4128 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4129 }
4130
4131 static void
4132 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4133 const char *string, size_t stringsz, int append)
4134 {
4135 struct roffkv *n;
4136 char *c;
4137 int i;
4138 size_t oldch, newch;
4139
4140 /* Search for an existing string with the same name. */
4141 n = *r;
4142
4143 while (n && (namesz != n->key.sz ||
4144 strncmp(n->key.p, name, namesz)))
4145 n = n->next;
4146
4147 if (NULL == n) {
4148 /* Create a new string table entry. */
4149 n = mandoc_malloc(sizeof(struct roffkv));
4150 n->key.p = mandoc_strndup(name, namesz);
4151 n->key.sz = namesz;
4152 n->val.p = NULL;
4153 n->val.sz = 0;
4154 n->next = *r;
4155 *r = n;
4156 } else if (0 == append) {
4157 free(n->val.p);
4158 n->val.p = NULL;
4159 n->val.sz = 0;
4160 }
4161
4162 if (NULL == string)
4163 return;
4164
4165 /*
4166 * One additional byte for the '\n' in multiline mode,
4167 * and one for the terminating '\0'.
4168 */
4169 newch = stringsz + (1 < append ? 2u : 1u);
4170
4171 if (NULL == n->val.p) {
4172 n->val.p = mandoc_malloc(newch);
4173 *n->val.p = '\0';
4174 oldch = 0;
4175 } else {
4176 oldch = n->val.sz;
4177 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4178 }
4179
4180 /* Skip existing content in the destination buffer. */
4181 c = n->val.p + (int)oldch;
4182
4183 /* Append new content to the destination buffer. */
4184 i = 0;
4185 while (i < (int)stringsz) {
4186 /*
4187 * Rudimentary roff copy mode:
4188 * Handle escaped backslashes.
4189 */
4190 if ('\\' == string[i] && '\\' == string[i + 1])
4191 i++;
4192 *c++ = string[i++];
4193 }
4194
4195 /* Append terminating bytes. */
4196 if (1 < append)
4197 *c++ = '\n';
4198
4199 *c = '\0';
4200 n->val.sz = (int)(c - n->val.p);
4201 }
4202
4203 static const char *
4204 roff_getstrn(struct roff *r, const char *name, size_t len,
4205 int *deftype)
4206 {
4207 const struct roffkv *n;
4208 int found, i;
4209 enum roff_tok tok;
4210
4211 found = 0;
4212 for (n = r->strtab; n != NULL; n = n->next) {
4213 if (strncmp(name, n->key.p, len) != 0 ||
4214 n->key.p[len] != '\0' || n->val.p == NULL)
4215 continue;
4216 if (*deftype & ROFFDEF_USER) {
4217 *deftype = ROFFDEF_USER;
4218 return n->val.p;
4219 } else {
4220 found = 1;
4221 break;
4222 }
4223 }
4224 for (n = r->rentab; n != NULL; n = n->next) {
4225 if (strncmp(name, n->key.p, len) != 0 ||
4226 n->key.p[len] != '\0' || n->val.p == NULL)
4227 continue;
4228 if (*deftype & ROFFDEF_REN) {
4229 *deftype = ROFFDEF_REN;
4230 return n->val.p;
4231 } else {
4232 found = 1;
4233 break;
4234 }
4235 }
4236 for (i = 0; i < PREDEFS_MAX; i++) {
4237 if (strncmp(name, predefs[i].name, len) != 0 ||
4238 predefs[i].name[len] != '\0')
4239 continue;
4240 if (*deftype & ROFFDEF_PRE) {
4241 *deftype = ROFFDEF_PRE;
4242 return predefs[i].str;
4243 } else {
4244 found = 1;
4245 break;
4246 }
4247 }
4248 if (r->man->meta.macroset != MACROSET_MAN) {
4249 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4250 if (strncmp(name, roff_name[tok], len) != 0 ||
4251 roff_name[tok][len] != '\0')
4252 continue;
4253 if (*deftype & ROFFDEF_STD) {
4254 *deftype = ROFFDEF_STD;
4255 return NULL;
4256 } else {
4257 found = 1;
4258 break;
4259 }
4260 }
4261 }
4262 if (r->man->meta.macroset != MACROSET_MDOC) {
4263 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4264 if (strncmp(name, roff_name[tok], len) != 0 ||
4265 roff_name[tok][len] != '\0')
4266 continue;
4267 if (*deftype & ROFFDEF_STD) {
4268 *deftype = ROFFDEF_STD;
4269 return NULL;
4270 } else {
4271 found = 1;
4272 break;
4273 }
4274 }
4275 }
4276
4277 if (found == 0 && *deftype != ROFFDEF_ANY) {
4278 if (*deftype & ROFFDEF_REN) {
4279 /*
4280 * This might still be a request,
4281 * so do not treat it as undefined yet.
4282 */
4283 *deftype = ROFFDEF_UNDEF;
4284 return NULL;
4285 }
4286
4287 /* Using an undefined string defines it to be empty. */
4288
4289 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4290 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4291 }
4292
4293 *deftype = 0;
4294 return NULL;
4295 }
4296
4297 static void
4298 roff_freestr(struct roffkv *r)
4299 {
4300 struct roffkv *n, *nn;
4301
4302 for (n = r; n; n = nn) {
4303 free(n->key.p);
4304 free(n->val.p);
4305 nn = n->next;
4306 free(n);
4307 }
4308 }
4309
4310 /* --- accessors and utility functions ------------------------------------ */
4311
4312 /*
4313 * Duplicate an input string, making the appropriate character
4314 * conversations (as stipulated by `tr') along the way.
4315 * Returns a heap-allocated string with all the replacements made.
4316 */
4317 char *
4318 roff_strdup(const struct roff *r, const char *p)
4319 {
4320 const struct roffkv *cp;
4321 char *res;
4322 const char *pp;
4323 size_t ssz, sz;
4324 enum mandoc_esc esc;
4325
4326 if (NULL == r->xmbtab && NULL == r->xtab)
4327 return mandoc_strdup(p);
4328 else if ('\0' == *p)
4329 return mandoc_strdup("");
4330
4331 /*
4332 * Step through each character looking for term matches
4333 * (remember that a `tr' can be invoked with an escape, which is
4334 * a glyph but the escape is multi-character).
4335 * We only do this if the character hash has been initialised
4336 * and the string is >0 length.
4337 */
4338
4339 res = NULL;
4340 ssz = 0;
4341
4342 while ('\0' != *p) {
4343 assert((unsigned int)*p < 128);
4344 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4345 sz = r->xtab[(int)*p].sz;
4346 res = mandoc_realloc(res, ssz + sz + 1);
4347 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4348 ssz += sz;
4349 p++;
4350 continue;
4351 } else if ('\\' != *p) {
4352 res = mandoc_realloc(res, ssz + 2);
4353 res[ssz++] = *p++;
4354 continue;
4355 }
4356
4357 /* Search for term matches. */
4358 for (cp = r->xmbtab; cp; cp = cp->next)
4359 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4360 break;
4361
4362 if (NULL != cp) {
4363 /*
4364 * A match has been found.
4365 * Append the match to the array and move
4366 * forward by its keysize.
4367 */
4368 res = mandoc_realloc(res,
4369 ssz + cp->val.sz + 1);
4370 memcpy(res + ssz, cp->val.p, cp->val.sz);
4371 ssz += cp->val.sz;
4372 p += (int)cp->key.sz;
4373 continue;
4374 }
4375
4376 /*
4377 * Handle escapes carefully: we need to copy
4378 * over just the escape itself, or else we might
4379 * do replacements within the escape itself.
4380 * Make sure to pass along the bogus string.
4381 */
4382 pp = p++;
4383 esc = mandoc_escape(&p, NULL, NULL);
4384 if (ESCAPE_ERROR == esc) {
4385 sz = strlen(pp);
4386 res = mandoc_realloc(res, ssz + sz + 1);
4387 memcpy(res + ssz, pp, sz);
4388 break;
4389 }
4390 /*
4391 * We bail out on bad escapes.
4392 * No need to warn: we already did so when
4393 * roff_expand() was called.
4394 */
4395 sz = (int)(p - pp);
4396 res = mandoc_realloc(res, ssz + sz + 1);
4397 memcpy(res + ssz, pp, sz);
4398 ssz += sz;
4399 }
4400
4401 res[(int)ssz] = '\0';
4402 return res;
4403 }
4404
4405 int
4406 roff_getformat(const struct roff *r)
4407 {
4408
4409 return r->format;
4410 }
4411
4412 /*
4413 * Find out whether a line is a macro line or not.
4414 * If it is, adjust the current position and return one; if it isn't,
4415 * return zero and don't change the current position.
4416 * If the control character has been set with `.cc', then let that grain
4417 * precedence.
4418 * This is slighly contrary to groff, where using the non-breaking
4419 * control character when `cc' has been invoked will cause the
4420 * non-breaking macro contents to be printed verbatim.
4421 */
4422 int
4423 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4424 {
4425 int pos;
4426
4427 pos = *ppos;
4428
4429 if (r->control != '\0' && cp[pos] == r->control)
4430 pos++;
4431 else if (r->control != '\0')
4432 return 0;
4433 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4434 pos += 2;
4435 else if ('.' == cp[pos] || '\'' == cp[pos])
4436 pos++;
4437 else
4438 return 0;
4439
4440 while (' ' == cp[pos] || '\t' == cp[pos])
4441 pos++;
4442
4443 *ppos = pos;
4444 return 1;
4445 }