]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Split a new function roff_parse_comment() out of roff_expand() because this
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.387 2022/05/01 16:22:06 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_mc(ROFF_ARGS);
231 static int roff_noarg(ROFF_ARGS);
232 static int roff_nop(ROFF_ARGS);
233 static int roff_nr(ROFF_ARGS);
234 static int roff_onearg(ROFF_ARGS);
235 static enum roff_tok roff_parse(struct roff *, char *, int *,
236 int, int);
237 static int roff_parse_comment(struct roff *, struct buf *,
238 int, int, char);
239 static int roff_parsetext(struct roff *, struct buf *,
240 int, int *);
241 static int roff_renamed(ROFF_ARGS);
242 static int roff_req_or_macro(ROFF_ARGS);
243 static int roff_return(ROFF_ARGS);
244 static int roff_rm(ROFF_ARGS);
245 static int roff_rn(ROFF_ARGS);
246 static int roff_rr(ROFF_ARGS);
247 static void roff_setregn(struct roff *, const char *,
248 size_t, int, char, int);
249 static void roff_setstr(struct roff *,
250 const char *, const char *, int);
251 static void roff_setstrn(struct roffkv **, const char *,
252 size_t, const char *, size_t, int);
253 static int roff_shift(ROFF_ARGS);
254 static int roff_so(ROFF_ARGS);
255 static int roff_tr(ROFF_ARGS);
256 static int roff_Dd(ROFF_ARGS);
257 static int roff_TE(ROFF_ARGS);
258 static int roff_TS(ROFF_ARGS);
259 static int roff_EQ(ROFF_ARGS);
260 static int roff_EN(ROFF_ARGS);
261 static int roff_T_(ROFF_ARGS);
262 static int roff_unsupp(ROFF_ARGS);
263 static int roff_userdef(ROFF_ARGS);
264
265 /* --- constant data ------------------------------------------------------ */
266
267 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
268 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
269
270 const char *__roff_name[MAN_MAX + 1] = {
271 "br", "ce", "fi", "ft",
272 "ll", "mc", "nf",
273 "po", "rj", "sp",
274 "ta", "ti", NULL,
275 "ab", "ad", "af", "aln",
276 "als", "am", "am1", "ami",
277 "ami1", "as", "as1", "asciify",
278 "backtrace", "bd", "bleedat", "blm",
279 "box", "boxa", "bp", "BP",
280 "break", "breakchar", "brnl", "brp",
281 "brpnl", "c2", "cc",
282 "cf", "cflags", "ch", "char",
283 "chop", "class", "close", "CL",
284 "color", "composite", "continue", "cp",
285 "cropat", "cs", "cu", "da",
286 "dch", "Dd", "de", "de1",
287 "defcolor", "dei", "dei1", "device",
288 "devicem", "di", "do", "ds",
289 "ds1", "dwh", "dt", "ec",
290 "ecr", "ecs", "el", "em",
291 "EN", "eo", "EP", "EQ",
292 "errprint", "ev", "evc", "ex",
293 "fallback", "fam", "fc", "fchar",
294 "fcolor", "fdeferlig", "feature", "fkern",
295 "fl", "flig", "fp", "fps",
296 "fschar", "fspacewidth", "fspecial", "ftr",
297 "fzoom", "gcolor", "hc", "hcode",
298 "hidechar", "hla", "hlm", "hpf",
299 "hpfa", "hpfcode", "hw", "hy",
300 "hylang", "hylen", "hym", "hypp",
301 "hys", "ie", "if", "ig",
302 "index", "it", "itc", "IX",
303 "kern", "kernafter", "kernbefore", "kernpair",
304 "lc", "lc_ctype", "lds", "length",
305 "letadj", "lf", "lg", "lhang",
306 "linetabs", "lnr", "lnrf", "lpfx",
307 "ls", "lsm", "lt",
308 "mediasize", "minss", "mk", "mso",
309 "na", "ne", "nh", "nhychar",
310 "nm", "nn", "nop", "nr",
311 "nrf", "nroff", "ns", "nx",
312 "open", "opena", "os", "output",
313 "padj", "papersize", "pc", "pev",
314 "pi", "PI", "pl", "pm",
315 "pn", "pnr", "ps",
316 "psbb", "pshape", "pso", "ptr",
317 "pvs", "rchar", "rd", "recursionlimit",
318 "return", "rfschar", "rhang",
319 "rm", "rn", "rnn", "rr",
320 "rs", "rt", "schar", "sentchar",
321 "shc", "shift", "sizes", "so",
322 "spacewidth", "special", "spreadwarn", "ss",
323 "sty", "substring", "sv", "sy",
324 "T&", "tc", "TE",
325 "TH", "tkf", "tl",
326 "tm", "tm1", "tmc", "tr",
327 "track", "transchar", "trf", "trimat",
328 "trin", "trnt", "troff", "TS",
329 "uf", "ul", "unformat", "unwatch",
330 "unwatchn", "vpt", "vs", "warn",
331 "warnscale", "watch", "watchlength", "watchn",
332 "wh", "while", "write", "writec",
333 "writem", "xflag", ".", NULL,
334 NULL, "text",
335 "Dd", "Dt", "Os", "Sh",
336 "Ss", "Pp", "D1", "Dl",
337 "Bd", "Ed", "Bl", "El",
338 "It", "Ad", "An", "Ap",
339 "Ar", "Cd", "Cm", "Dv",
340 "Er", "Ev", "Ex", "Fa",
341 "Fd", "Fl", "Fn", "Ft",
342 "Ic", "In", "Li", "Nd",
343 "Nm", "Op", "Ot", "Pa",
344 "Rv", "St", "Va", "Vt",
345 "Xr", "%A", "%B", "%D",
346 "%I", "%J", "%N", "%O",
347 "%P", "%R", "%T", "%V",
348 "Ac", "Ao", "Aq", "At",
349 "Bc", "Bf", "Bo", "Bq",
350 "Bsx", "Bx", "Db", "Dc",
351 "Do", "Dq", "Ec", "Ef",
352 "Em", "Eo", "Fx", "Ms",
353 "No", "Ns", "Nx", "Ox",
354 "Pc", "Pf", "Po", "Pq",
355 "Qc", "Ql", "Qo", "Qq",
356 "Re", "Rs", "Sc", "So",
357 "Sq", "Sm", "Sx", "Sy",
358 "Tn", "Ux", "Xc", "Xo",
359 "Fo", "Fc", "Oo", "Oc",
360 "Bk", "Ek", "Bt", "Hf",
361 "Fr", "Ud", "Lb", "Lp",
362 "Lk", "Mt", "Brq", "Bro",
363 "Brc", "%C", "Es", "En",
364 "Dx", "%Q", "%U", "Ta",
365 "Tg", NULL,
366 "TH", "SH", "SS", "TP",
367 "TQ",
368 "LP", "PP", "P", "IP",
369 "HP", "SM", "SB", "BI",
370 "IB", "BR", "RB", "R",
371 "B", "I", "IR", "RI",
372 "RE", "RS", "DT", "UC",
373 "PD", "AT", "in",
374 "SY", "YS", "OP",
375 "EX", "EE", "UR",
376 "UE", "MT", "ME", NULL
377 };
378 const char *const *roff_name = __roff_name;
379
380 static struct roffmac roffs[TOKEN_NONE] = {
381 { roff_noarg, NULL, NULL, 0 }, /* br */
382 { roff_onearg, NULL, NULL, 0 }, /* ce */
383 { roff_noarg, NULL, NULL, 0 }, /* fi */
384 { roff_onearg, NULL, NULL, 0 }, /* ft */
385 { roff_onearg, NULL, NULL, 0 }, /* ll */
386 { roff_mc, NULL, NULL, 0 }, /* mc */
387 { roff_noarg, NULL, NULL, 0 }, /* nf */
388 { roff_onearg, NULL, NULL, 0 }, /* po */
389 { roff_onearg, NULL, NULL, 0 }, /* rj */
390 { roff_onearg, NULL, NULL, 0 }, /* sp */
391 { roff_manyarg, NULL, NULL, 0 }, /* ta */
392 { roff_onearg, NULL, NULL, 0 }, /* ti */
393 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
394 { roff_unsupp, NULL, NULL, 0 }, /* ab */
395 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
396 { roff_line_ignore, NULL, NULL, 0 }, /* af */
397 { roff_unsupp, NULL, NULL, 0 }, /* aln */
398 { roff_als, NULL, NULL, 0 }, /* als */
399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
400 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
401 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
402 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
403 { roff_ds, NULL, NULL, 0 }, /* as */
404 { roff_ds, NULL, NULL, 0 }, /* as1 */
405 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
406 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
407 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
408 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
409 { roff_unsupp, NULL, NULL, 0 }, /* blm */
410 { roff_unsupp, NULL, NULL, 0 }, /* box */
411 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
412 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
413 { roff_unsupp, NULL, NULL, 0 }, /* BP */
414 { roff_break, NULL, NULL, 0 }, /* break */
415 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
416 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
417 { roff_noarg, NULL, NULL, 0 }, /* brp */
418 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
419 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
420 { roff_cc, NULL, NULL, 0 }, /* cc */
421 { roff_insec, NULL, NULL, 0 }, /* cf */
422 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
423 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
424 { roff_char, NULL, NULL, 0 }, /* char */
425 { roff_unsupp, NULL, NULL, 0 }, /* chop */
426 { roff_line_ignore, NULL, NULL, 0 }, /* class */
427 { roff_insec, NULL, NULL, 0 }, /* close */
428 { roff_unsupp, NULL, NULL, 0 }, /* CL */
429 { roff_line_ignore, NULL, NULL, 0 }, /* color */
430 { roff_unsupp, NULL, NULL, 0 }, /* composite */
431 { roff_unsupp, NULL, NULL, 0 }, /* continue */
432 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
433 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
434 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
435 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
436 { roff_unsupp, NULL, NULL, 0 }, /* da */
437 { roff_unsupp, NULL, NULL, 0 }, /* dch */
438 { roff_Dd, NULL, NULL, 0 }, /* Dd */
439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
440 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
441 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
442 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
443 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
444 { roff_unsupp, NULL, NULL, 0 }, /* device */
445 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
446 { roff_unsupp, NULL, NULL, 0 }, /* di */
447 { roff_unsupp, NULL, NULL, 0 }, /* do */
448 { roff_ds, NULL, NULL, 0 }, /* ds */
449 { roff_ds, NULL, NULL, 0 }, /* ds1 */
450 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
451 { roff_unsupp, NULL, NULL, 0 }, /* dt */
452 { roff_ec, NULL, NULL, 0 }, /* ec */
453 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
454 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
455 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
456 { roff_unsupp, NULL, NULL, 0 }, /* em */
457 { roff_EN, NULL, NULL, 0 }, /* EN */
458 { roff_eo, NULL, NULL, 0 }, /* eo */
459 { roff_unsupp, NULL, NULL, 0 }, /* EP */
460 { roff_EQ, NULL, NULL, 0 }, /* EQ */
461 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
462 { roff_unsupp, NULL, NULL, 0 }, /* ev */
463 { roff_unsupp, NULL, NULL, 0 }, /* evc */
464 { roff_unsupp, NULL, NULL, 0 }, /* ex */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
467 { roff_unsupp, NULL, NULL, 0 }, /* fc */
468 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
470 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
471 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
473 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
474 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
476 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
477 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
478 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
479 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
480 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
481 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
482 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
494 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
495 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
496 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
497 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
498 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
499 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
500 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
501 { roff_unsupp, NULL, NULL, 0 }, /* index */
502 { roff_it, NULL, NULL, 0 }, /* it */
503 { roff_unsupp, NULL, NULL, 0 }, /* itc */
504 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
505 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
506 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
507 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
508 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
509 { roff_unsupp, NULL, NULL, 0 }, /* lc */
510 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
511 { roff_unsupp, NULL, NULL, 0 }, /* lds */
512 { roff_unsupp, NULL, NULL, 0 }, /* length */
513 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
514 { roff_insec, NULL, NULL, 0 }, /* lf */
515 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
516 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
517 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
518 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
519 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
520 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
521 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
522 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
523 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
524 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
525 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
526 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
527 { roff_insec, NULL, NULL, 0 }, /* mso */
528 { roff_line_ignore, NULL, NULL, 0 }, /* na */
529 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
530 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
531 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
532 { roff_unsupp, NULL, NULL, 0 }, /* nm */
533 { roff_unsupp, NULL, NULL, 0 }, /* nn */
534 { roff_nop, NULL, NULL, 0 }, /* nop */
535 { roff_nr, NULL, NULL, 0 }, /* nr */
536 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
537 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
538 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
539 { roff_insec, NULL, NULL, 0 }, /* nx */
540 { roff_insec, NULL, NULL, 0 }, /* open */
541 { roff_insec, NULL, NULL, 0 }, /* opena */
542 { roff_line_ignore, NULL, NULL, 0 }, /* os */
543 { roff_unsupp, NULL, NULL, 0 }, /* output */
544 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
545 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
548 { roff_insec, NULL, NULL, 0 }, /* pi */
549 { roff_unsupp, NULL, NULL, 0 }, /* PI */
550 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
551 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
552 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
553 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
554 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
555 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
556 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
557 { roff_insec, NULL, NULL, 0 }, /* pso */
558 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
559 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
560 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
561 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
562 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
563 { roff_return, NULL, NULL, 0 }, /* return */
564 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
565 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
566 { roff_rm, NULL, NULL, 0 }, /* rm */
567 { roff_rn, NULL, NULL, 0 }, /* rn */
568 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
569 { roff_rr, NULL, NULL, 0 }, /* rr */
570 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
571 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
572 { roff_unsupp, NULL, NULL, 0 }, /* schar */
573 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
574 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
575 { roff_shift, NULL, NULL, 0 }, /* shift */
576 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
577 { roff_so, NULL, NULL, 0 }, /* so */
578 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
579 { roff_line_ignore, NULL, NULL, 0 }, /* special */
580 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
581 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
582 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
583 { roff_unsupp, NULL, NULL, 0 }, /* substring */
584 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
585 { roff_insec, NULL, NULL, 0 }, /* sy */
586 { roff_T_, NULL, NULL, 0 }, /* T& */
587 { roff_unsupp, NULL, NULL, 0 }, /* tc */
588 { roff_TE, NULL, NULL, 0 }, /* TE */
589 { roff_Dd, NULL, NULL, 0 }, /* TH */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
591 { roff_unsupp, NULL, NULL, 0 }, /* tl */
592 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
593 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
594 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
595 { roff_tr, NULL, NULL, 0 }, /* tr */
596 { roff_line_ignore, NULL, NULL, 0 }, /* track */
597 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
598 { roff_insec, NULL, NULL, 0 }, /* trf */
599 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
600 { roff_unsupp, NULL, NULL, 0 }, /* trin */
601 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
602 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
603 { roff_TS, NULL, NULL, 0 }, /* TS */
604 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
605 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
606 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
607 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
608 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
609 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
610 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
611 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
612 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
613 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
614 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
615 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
616 { roff_unsupp, NULL, NULL, 0 }, /* wh */
617 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
618 { roff_insec, NULL, NULL, 0 }, /* write */
619 { roff_insec, NULL, NULL, 0 }, /* writec */
620 { roff_insec, NULL, NULL, 0 }, /* writem */
621 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
622 { roff_cblock, NULL, NULL, 0 }, /* . */
623 { roff_renamed, NULL, NULL, 0 },
624 { roff_userdef, NULL, NULL, 0 }
625 };
626
627 /* Array of injected predefined strings. */
628 #define PREDEFS_MAX 38
629 static const struct predef predefs[PREDEFS_MAX] = {
630 #include "predefs.in"
631 };
632
633 static int roffce_lines; /* number of input lines to center */
634 static struct roff_node *roffce_node; /* active request */
635 static int roffit_lines; /* number of lines to delay */
636 static char *roffit_macro; /* nil-terminated macro line */
637
638
639 /* --- request table ------------------------------------------------------ */
640
641 struct ohash *
642 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
643 {
644 struct ohash *htab;
645 struct roffreq *req;
646 enum roff_tok tok;
647 size_t sz;
648 unsigned int slot;
649
650 htab = mandoc_malloc(sizeof(*htab));
651 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
652
653 for (tok = mintok; tok < maxtok; tok++) {
654 if (roff_name[tok] == NULL)
655 continue;
656 sz = strlen(roff_name[tok]);
657 req = mandoc_malloc(sizeof(*req) + sz + 1);
658 req->tok = tok;
659 memcpy(req->name, roff_name[tok], sz + 1);
660 slot = ohash_qlookup(htab, req->name);
661 ohash_insert(htab, slot, req);
662 }
663 return htab;
664 }
665
666 void
667 roffhash_free(struct ohash *htab)
668 {
669 struct roffreq *req;
670 unsigned int slot;
671
672 if (htab == NULL)
673 return;
674 for (req = ohash_first(htab, &slot); req != NULL;
675 req = ohash_next(htab, &slot))
676 free(req);
677 ohash_delete(htab);
678 free(htab);
679 }
680
681 enum roff_tok
682 roffhash_find(struct ohash *htab, const char *name, size_t sz)
683 {
684 struct roffreq *req;
685 const char *end;
686
687 if (sz) {
688 end = name + sz;
689 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
690 } else
691 req = ohash_find(htab, ohash_qlookup(htab, name));
692 return req == NULL ? TOKEN_NONE : req->tok;
693 }
694
695 /* --- stack of request blocks -------------------------------------------- */
696
697 /*
698 * Pop the current node off of the stack of roff instructions currently
699 * pending. Return 1 if it is a loop or 0 otherwise.
700 */
701 static int
702 roffnode_pop(struct roff *r)
703 {
704 struct roffnode *p;
705 int inloop;
706
707 p = r->last;
708 inloop = p->tok == ROFF_while;
709 r->last = p->parent;
710 free(p->name);
711 free(p->end);
712 free(p);
713 return inloop;
714 }
715
716 /*
717 * Push a roff node onto the instruction stack. This must later be
718 * removed with roffnode_pop().
719 */
720 static void
721 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
722 int line, int col)
723 {
724 struct roffnode *p;
725
726 p = mandoc_calloc(1, sizeof(struct roffnode));
727 p->tok = tok;
728 if (name)
729 p->name = mandoc_strdup(name);
730 p->parent = r->last;
731 p->line = line;
732 p->col = col;
733 p->rule = p->parent ? p->parent->rule : 0;
734
735 r->last = p;
736 }
737
738 /* --- roff parser state data management ---------------------------------- */
739
740 static void
741 roff_free1(struct roff *r)
742 {
743 int i;
744
745 tbl_free(r->first_tbl);
746 r->first_tbl = r->last_tbl = r->tbl = NULL;
747
748 eqn_free(r->last_eqn);
749 r->last_eqn = r->eqn = NULL;
750
751 while (r->mstackpos >= 0)
752 roff_userret(r);
753
754 while (r->last)
755 roffnode_pop(r);
756
757 free (r->rstack);
758 r->rstack = NULL;
759 r->rstacksz = 0;
760 r->rstackpos = -1;
761
762 roff_freereg(r->regtab);
763 r->regtab = NULL;
764
765 roff_freestr(r->strtab);
766 roff_freestr(r->rentab);
767 roff_freestr(r->xmbtab);
768 r->strtab = r->rentab = r->xmbtab = NULL;
769
770 if (r->xtab)
771 for (i = 0; i < 128; i++)
772 free(r->xtab[i].p);
773 free(r->xtab);
774 r->xtab = NULL;
775 }
776
777 void
778 roff_reset(struct roff *r)
779 {
780 roff_free1(r);
781 r->options |= MPARSE_COMMENT;
782 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
783 r->control = '\0';
784 r->escape = '\\';
785 roffce_lines = 0;
786 roffce_node = NULL;
787 roffit_lines = 0;
788 roffit_macro = NULL;
789 }
790
791 void
792 roff_free(struct roff *r)
793 {
794 int i;
795
796 roff_free1(r);
797 for (i = 0; i < r->mstacksz; i++)
798 free(r->mstack[i].argv);
799 free(r->mstack);
800 roffhash_free(r->reqtab);
801 free(r);
802 }
803
804 struct roff *
805 roff_alloc(int options)
806 {
807 struct roff *r;
808
809 r = mandoc_calloc(1, sizeof(struct roff));
810 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
811 r->options = options | MPARSE_COMMENT;
812 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
813 r->mstackpos = -1;
814 r->rstackpos = -1;
815 r->escape = '\\';
816 return r;
817 }
818
819 /* --- syntax tree state data management ---------------------------------- */
820
821 static void
822 roff_man_free1(struct roff_man *man)
823 {
824 if (man->meta.first != NULL)
825 roff_node_delete(man, man->meta.first);
826 free(man->meta.msec);
827 free(man->meta.vol);
828 free(man->meta.os);
829 free(man->meta.arch);
830 free(man->meta.title);
831 free(man->meta.name);
832 free(man->meta.date);
833 free(man->meta.sodest);
834 }
835
836 void
837 roff_state_reset(struct roff_man *man)
838 {
839 man->last = man->meta.first;
840 man->last_es = NULL;
841 man->flags = 0;
842 man->lastsec = man->lastnamed = SEC_NONE;
843 man->next = ROFF_NEXT_CHILD;
844 roff_setreg(man->roff, "nS", 0, '=');
845 }
846
847 static void
848 roff_man_alloc1(struct roff_man *man)
849 {
850 memset(&man->meta, 0, sizeof(man->meta));
851 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
852 man->meta.first->type = ROFFT_ROOT;
853 man->meta.macroset = MACROSET_NONE;
854 roff_state_reset(man);
855 }
856
857 void
858 roff_man_reset(struct roff_man *man)
859 {
860 roff_man_free1(man);
861 roff_man_alloc1(man);
862 }
863
864 void
865 roff_man_free(struct roff_man *man)
866 {
867 roff_man_free1(man);
868 free(man->os_r);
869 free(man);
870 }
871
872 struct roff_man *
873 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
874 {
875 struct roff_man *man;
876
877 man = mandoc_calloc(1, sizeof(*man));
878 man->roff = roff;
879 man->os_s = os_s;
880 man->quick = quick;
881 roff_man_alloc1(man);
882 roff->man = man;
883 return man;
884 }
885
886 /* --- syntax tree handling ----------------------------------------------- */
887
888 struct roff_node *
889 roff_node_alloc(struct roff_man *man, int line, int pos,
890 enum roff_type type, int tok)
891 {
892 struct roff_node *n;
893
894 n = mandoc_calloc(1, sizeof(*n));
895 n->line = line;
896 n->pos = pos;
897 n->tok = tok;
898 n->type = type;
899 n->sec = man->lastsec;
900
901 if (man->flags & MDOC_SYNOPSIS)
902 n->flags |= NODE_SYNPRETTY;
903 else
904 n->flags &= ~NODE_SYNPRETTY;
905 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
906 n->flags |= NODE_NOFILL;
907 else
908 n->flags &= ~NODE_NOFILL;
909 if (man->flags & MDOC_NEWLINE)
910 n->flags |= NODE_LINE;
911 man->flags &= ~MDOC_NEWLINE;
912
913 return n;
914 }
915
916 void
917 roff_node_append(struct roff_man *man, struct roff_node *n)
918 {
919
920 switch (man->next) {
921 case ROFF_NEXT_SIBLING:
922 if (man->last->next != NULL) {
923 n->next = man->last->next;
924 man->last->next->prev = n;
925 } else
926 man->last->parent->last = n;
927 man->last->next = n;
928 n->prev = man->last;
929 n->parent = man->last->parent;
930 break;
931 case ROFF_NEXT_CHILD:
932 if (man->last->child != NULL) {
933 n->next = man->last->child;
934 man->last->child->prev = n;
935 } else
936 man->last->last = n;
937 man->last->child = n;
938 n->parent = man->last;
939 break;
940 default:
941 abort();
942 }
943 man->last = n;
944
945 switch (n->type) {
946 case ROFFT_HEAD:
947 n->parent->head = n;
948 break;
949 case ROFFT_BODY:
950 if (n->end != ENDBODY_NOT)
951 return;
952 n->parent->body = n;
953 break;
954 case ROFFT_TAIL:
955 n->parent->tail = n;
956 break;
957 default:
958 return;
959 }
960
961 /*
962 * Copy over the normalised-data pointer of our parent. Not
963 * everybody has one, but copying a null pointer is fine.
964 */
965
966 n->norm = n->parent->norm;
967 assert(n->parent->type == ROFFT_BLOCK);
968 }
969
970 void
971 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
972 {
973 struct roff_node *n;
974
975 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
976 n->string = roff_strdup(man->roff, word);
977 roff_node_append(man, n);
978 n->flags |= NODE_VALID | NODE_ENDED;
979 man->next = ROFF_NEXT_SIBLING;
980 }
981
982 void
983 roff_word_append(struct roff_man *man, const char *word)
984 {
985 struct roff_node *n;
986 char *addstr, *newstr;
987
988 n = man->last;
989 addstr = roff_strdup(man->roff, word);
990 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
991 free(addstr);
992 free(n->string);
993 n->string = newstr;
994 man->next = ROFF_NEXT_SIBLING;
995 }
996
997 void
998 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
999 {
1000 struct roff_node *n;
1001
1002 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1003 roff_node_append(man, n);
1004 man->next = ROFF_NEXT_CHILD;
1005 }
1006
1007 struct roff_node *
1008 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1009 {
1010 struct roff_node *n;
1011
1012 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1013 roff_node_append(man, n);
1014 man->next = ROFF_NEXT_CHILD;
1015 return n;
1016 }
1017
1018 struct roff_node *
1019 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1020 {
1021 struct roff_node *n;
1022
1023 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1024 roff_node_append(man, n);
1025 man->next = ROFF_NEXT_CHILD;
1026 return n;
1027 }
1028
1029 struct roff_node *
1030 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1031 {
1032 struct roff_node *n;
1033
1034 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1035 roff_node_append(man, n);
1036 man->next = ROFF_NEXT_CHILD;
1037 return n;
1038 }
1039
1040 static void
1041 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1042 {
1043 struct roff_node *n;
1044 struct tbl_span *span;
1045
1046 if (man->meta.macroset == MACROSET_MAN)
1047 man_breakscope(man, ROFF_TS);
1048 while ((span = tbl_span(tbl)) != NULL) {
1049 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1050 n->span = span;
1051 roff_node_append(man, n);
1052 n->flags |= NODE_VALID | NODE_ENDED;
1053 man->next = ROFF_NEXT_SIBLING;
1054 }
1055 }
1056
1057 void
1058 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1059 {
1060
1061 /* Adjust siblings. */
1062
1063 if (n->prev)
1064 n->prev->next = n->next;
1065 if (n->next)
1066 n->next->prev = n->prev;
1067
1068 /* Adjust parent. */
1069
1070 if (n->parent != NULL) {
1071 if (n->parent->child == n)
1072 n->parent->child = n->next;
1073 if (n->parent->last == n)
1074 n->parent->last = n->prev;
1075 }
1076
1077 /* Adjust parse point. */
1078
1079 if (man == NULL)
1080 return;
1081 if (man->last == n) {
1082 if (n->prev == NULL) {
1083 man->last = n->parent;
1084 man->next = ROFF_NEXT_CHILD;
1085 } else {
1086 man->last = n->prev;
1087 man->next = ROFF_NEXT_SIBLING;
1088 }
1089 }
1090 if (man->meta.first == n)
1091 man->meta.first = NULL;
1092 }
1093
1094 void
1095 roff_node_relink(struct roff_man *man, struct roff_node *n)
1096 {
1097 roff_node_unlink(man, n);
1098 n->prev = n->next = NULL;
1099 roff_node_append(man, n);
1100 }
1101
1102 void
1103 roff_node_free(struct roff_node *n)
1104 {
1105
1106 if (n->args != NULL)
1107 mdoc_argv_free(n->args);
1108 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1109 free(n->norm);
1110 eqn_box_free(n->eqn);
1111 free(n->string);
1112 free(n->tag);
1113 free(n);
1114 }
1115
1116 void
1117 roff_node_delete(struct roff_man *man, struct roff_node *n)
1118 {
1119
1120 while (n->child != NULL)
1121 roff_node_delete(man, n->child);
1122 roff_node_unlink(man, n);
1123 roff_node_free(n);
1124 }
1125
1126 int
1127 roff_node_transparent(struct roff_node *n)
1128 {
1129 if (n == NULL)
1130 return 0;
1131 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1132 return 1;
1133 return roff_tok_transparent(n->tok);
1134 }
1135
1136 int
1137 roff_tok_transparent(enum roff_tok tok)
1138 {
1139 switch (tok) {
1140 case ROFF_ft:
1141 case ROFF_ll:
1142 case ROFF_mc:
1143 case ROFF_po:
1144 case ROFF_ta:
1145 case MDOC_Db:
1146 case MDOC_Es:
1147 case MDOC_Sm:
1148 case MDOC_Tg:
1149 case MAN_DT:
1150 case MAN_UC:
1151 case MAN_PD:
1152 case MAN_AT:
1153 return 1;
1154 default:
1155 return 0;
1156 }
1157 }
1158
1159 struct roff_node *
1160 roff_node_child(struct roff_node *n)
1161 {
1162 for (n = n->child; roff_node_transparent(n); n = n->next)
1163 continue;
1164 return n;
1165 }
1166
1167 struct roff_node *
1168 roff_node_prev(struct roff_node *n)
1169 {
1170 do {
1171 n = n->prev;
1172 } while (roff_node_transparent(n));
1173 return n;
1174 }
1175
1176 struct roff_node *
1177 roff_node_next(struct roff_node *n)
1178 {
1179 do {
1180 n = n->next;
1181 } while (roff_node_transparent(n));
1182 return n;
1183 }
1184
1185 void
1186 deroff(char **dest, const struct roff_node *n)
1187 {
1188 char *cp;
1189 size_t sz;
1190
1191 if (n->string == NULL) {
1192 for (n = n->child; n != NULL; n = n->next)
1193 deroff(dest, n);
1194 return;
1195 }
1196
1197 /* Skip leading whitespace. */
1198
1199 for (cp = n->string; *cp != '\0'; cp++) {
1200 if (cp[0] == '\\' && cp[1] != '\0' &&
1201 strchr(" %&0^|~", cp[1]) != NULL)
1202 cp++;
1203 else if ( ! isspace((unsigned char)*cp))
1204 break;
1205 }
1206
1207 /* Skip trailing backslash. */
1208
1209 sz = strlen(cp);
1210 if (sz > 0 && cp[sz - 1] == '\\')
1211 sz--;
1212
1213 /* Skip trailing whitespace. */
1214
1215 for (; sz; sz--)
1216 if ( ! isspace((unsigned char)cp[sz-1]))
1217 break;
1218
1219 /* Skip empty strings. */
1220
1221 if (sz == 0)
1222 return;
1223
1224 if (*dest == NULL) {
1225 *dest = mandoc_strndup(cp, sz);
1226 return;
1227 }
1228
1229 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1230 free(*dest);
1231 *dest = cp;
1232 }
1233
1234 /* --- main functions of the roff parser ---------------------------------- */
1235
1236 static int
1237 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos,
1238 char newesc)
1239 {
1240 struct roff_node *n; /* used for header comments */
1241 const char *start; /* start of the string to process */
1242 const char *cp; /* for RCS id parsing */
1243 char *stesc; /* start of an escape sequence ('\\') */
1244 char *ep; /* end of comment string */
1245 int rcsid; /* kind of RCS id seen */
1246
1247 for (start = stesc = buf->buf + pos;; stesc++) {
1248 /* The line ends without continuation or comment. */
1249 if (stesc[0] == '\0')
1250 return ROFF_CONT;
1251
1252 /* Unescaped byte: skip it. */
1253 if (stesc[0] != newesc)
1254 continue;
1255
1256 /* Backslash at end of line requests line continuation. */
1257 if (stesc[1] == '\0') {
1258 stesc[0] = '\0';
1259 return ROFF_IGN | ROFF_APPEND;
1260 }
1261
1262 /* Found a comment: process it. */
1263 if (stesc[1] == '"' || stesc[1] == '#')
1264 break;
1265
1266 /* Escaped escape character: skip them both. */
1267 if (stesc[1] == newesc)
1268 stesc++;
1269 }
1270
1271 /* Look for an RCS id in the comment. */
1272
1273 rcsid = 0;
1274 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1275 rcsid = 1 << MANDOC_OS_OPENBSD;
1276 cp += 8;
1277 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1278 rcsid = 1 << MANDOC_OS_NETBSD;
1279 cp += 7;
1280 }
1281 if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1282 strchr(cp, '$') != NULL) {
1283 if (r->man->meta.rcsids & rcsid)
1284 mandoc_msg(MANDOCERR_RCS_REP, ln,
1285 (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1286 r->man->meta.rcsids |= rcsid;
1287 }
1288
1289 /* Warn about trailing whitespace at the end of the comment. */
1290
1291 ep = strchr(stesc + 2, '\0') - 1;
1292 if (*ep == '\n')
1293 *ep-- = '\0';
1294 if (*ep == ' ' || *ep == '\t')
1295 mandoc_msg(MANDOCERR_SPACE_EOL,
1296 ln, (int)(ep - buf->buf), NULL);
1297
1298 /* Save comments preceding the title macro in the syntax tree. */
1299
1300 if (r->options & MPARSE_COMMENT) {
1301 while (*ep == ' ' || *ep == '\t')
1302 ep--;
1303 ep[1] = '\0';
1304 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1305 ROFFT_COMMENT, TOKEN_NONE);
1306 n->string = mandoc_strdup(stesc + 2);
1307 roff_node_append(r->man, n);
1308 n->flags |= NODE_VALID | NODE_ENDED;
1309 r->man->next = ROFF_NEXT_SIBLING;
1310 }
1311
1312 /* The comment requests line continuation. */
1313
1314 if (stesc[1] == '#') {
1315 *stesc = '\0';
1316 return ROFF_IGN | ROFF_APPEND;
1317 }
1318
1319 /* Discard the comment including preceding whitespace. */
1320
1321 while (stesc > start && stesc[-1] == ' ' &&
1322 (stesc == start + 1 || stesc[-2] != '\\'))
1323 stesc--;
1324 *stesc = '\0';
1325 return ROFF_CONT;
1326 }
1327
1328 /*
1329 * In the current line, expand escape sequences that produce parsable
1330 * input text. Also check the syntax of the remaining escape sequences,
1331 * which typically produce output glyphs or change formatter state.
1332 */
1333 static int
1334 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1335 {
1336 struct mctx *ctx; /* current macro call context */
1337 char ubuf[24]; /* buffer to print the number */
1338 const char *start; /* start of the string to process */
1339 char *stesc; /* start of an escape sequence ('\\') */
1340 const char *esct; /* type of esccape sequence */
1341 const char *stnam; /* start of the name, after "[(*" */
1342 const char *cp; /* end of the name, e.g. before ']' */
1343 const char *res; /* the string to be substituted */
1344 char *nbuf; /* new buffer to copy buf->buf to */
1345 size_t maxl; /* expected length of the escape name */
1346 size_t naml; /* actual length of the escape name */
1347 size_t asz; /* length of the replacement */
1348 size_t rsz; /* length of the rest of the string */
1349 int inaml; /* length returned from mandoc_escape() */
1350 int expand_count; /* to avoid infinite loops */
1351 int npos; /* position in numeric expression */
1352 int arg_complete; /* argument not interrupted by eol */
1353 int quote_args; /* true for \\$@, false for \\$* */
1354 int deftype; /* type of definition to paste */
1355 enum mandocerr err; /* for escape sequence problems */
1356 char sign; /* increment number register */
1357 char term; /* character terminating the escape */
1358
1359 start = buf->buf + pos;
1360 stesc = strchr(start, '\0') - 1;
1361 if (stesc >= start && *stesc == '\n')
1362 *stesc-- = '\0';
1363
1364 expand_count = 0;
1365 while (stesc >= start) {
1366 if (*stesc != newesc) {
1367
1368 /*
1369 * If we have a non-standard escape character,
1370 * escape literal backslashes because all
1371 * processing in subsequent functions uses
1372 * the standard escaping rules.
1373 */
1374
1375 if (newesc != ASCII_ESC && *stesc == '\\') {
1376 *stesc = '\0';
1377 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1378 buf->buf, stesc + 1) + 1;
1379 start = nbuf + pos;
1380 stesc = nbuf + (stesc - buf->buf);
1381 free(buf->buf);
1382 buf->buf = nbuf;
1383 }
1384
1385 /* Search backwards for the next escape. */
1386
1387 stesc--;
1388 continue;
1389 }
1390
1391 /* If it is escaped, skip it. */
1392
1393 for (cp = stesc - 1; cp >= start; cp--)
1394 if (*cp != r->escape)
1395 break;
1396
1397 if ((stesc - cp) % 2 == 0) {
1398 while (stesc > cp)
1399 *stesc-- = '\\';
1400 continue;
1401 } else if (stesc[1] == '\0') {
1402 *stesc-- = '\0';
1403 continue;
1404 } else
1405 *stesc = '\\';
1406
1407 /* Decide whether to expand or to check only. */
1408
1409 term = '\0';
1410 cp = stesc + 1;
1411 while (*cp == 'E')
1412 cp++;
1413 esct = cp;
1414 switch (*esct) {
1415 case '*':
1416 case '$':
1417 res = NULL;
1418 break;
1419 case 'B':
1420 case 'w':
1421 term = cp[1];
1422 /* FALLTHROUGH */
1423 case 'n':
1424 sign = cp[1];
1425 if (sign == '+' || sign == '-')
1426 cp++;
1427 res = ubuf;
1428 break;
1429 default:
1430 err = MANDOCERR_OK;
1431 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1432 case ESCAPE_SPECIAL:
1433 if (mchars_spec2cp(stnam, inaml) >= 0)
1434 break;
1435 /* FALLTHROUGH */
1436 case ESCAPE_ERROR:
1437 err = MANDOCERR_ESC_BAD;
1438 break;
1439 case ESCAPE_UNDEF:
1440 err = MANDOCERR_ESC_UNDEF;
1441 break;
1442 case ESCAPE_UNSUPP:
1443 err = MANDOCERR_ESC_UNSUPP;
1444 break;
1445 default:
1446 break;
1447 }
1448 if (err != MANDOCERR_OK)
1449 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1450 "%.*s", (int)(cp - stesc), stesc);
1451 stesc--;
1452 continue;
1453 }
1454
1455 if (EXPAND_LIMIT < ++expand_count) {
1456 mandoc_msg(MANDOCERR_ROFFLOOP,
1457 ln, (int)(stesc - buf->buf), NULL);
1458 return ROFF_IGN;
1459 }
1460
1461 /*
1462 * The third character decides the length
1463 * of the name of the string or register.
1464 * Save a pointer to the name.
1465 */
1466
1467 if (term == '\0') {
1468 switch (*++cp) {
1469 case '\0':
1470 maxl = 0;
1471 break;
1472 case '(':
1473 cp++;
1474 maxl = 2;
1475 break;
1476 case '[':
1477 cp++;
1478 term = ']';
1479 maxl = 0;
1480 break;
1481 default:
1482 maxl = 1;
1483 break;
1484 }
1485 } else {
1486 cp += 2;
1487 maxl = 0;
1488 }
1489 stnam = cp;
1490
1491 /* Advance to the end of the name. */
1492
1493 naml = 0;
1494 arg_complete = 1;
1495 while (maxl == 0 || naml < maxl) {
1496 if (*cp == '\0') {
1497 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1498 (int)(stesc - buf->buf), "%s", stesc);
1499 arg_complete = 0;
1500 break;
1501 }
1502 if (maxl == 0 && *cp == term) {
1503 cp++;
1504 break;
1505 }
1506 if (*cp++ != '\\' || *esct != 'w') {
1507 naml++;
1508 continue;
1509 }
1510 switch (mandoc_escape(&cp, NULL, NULL)) {
1511 case ESCAPE_SPECIAL:
1512 case ESCAPE_UNICODE:
1513 case ESCAPE_NUMBERED:
1514 case ESCAPE_UNDEF:
1515 case ESCAPE_OVERSTRIKE:
1516 naml++;
1517 break;
1518 default:
1519 break;
1520 }
1521 }
1522
1523 /*
1524 * Retrieve the replacement string; if it is
1525 * undefined, resume searching for escapes.
1526 */
1527
1528 switch (*esct) {
1529 case '*':
1530 if (arg_complete) {
1531 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1532 res = roff_getstrn(r, stnam, naml, &deftype);
1533
1534 /*
1535 * If not overriden, let \*(.T
1536 * through to the formatters.
1537 */
1538
1539 if (res == NULL && naml == 2 &&
1540 stnam[0] == '.' && stnam[1] == 'T') {
1541 roff_setstrn(&r->strtab,
1542 ".T", 2, NULL, 0, 0);
1543 stesc--;
1544 continue;
1545 }
1546 }
1547 break;
1548 case '$':
1549 if (r->mstackpos < 0) {
1550 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1551 (int)(stesc - buf->buf), "%.3s", stesc);
1552 break;
1553 }
1554 ctx = r->mstack + r->mstackpos;
1555 npos = esct[1] - '1';
1556 if (npos >= 0 && npos <= 8) {
1557 res = npos < ctx->argc ?
1558 ctx->argv[npos] : "";
1559 break;
1560 }
1561 if (esct[1] == '*')
1562 quote_args = 0;
1563 else if (esct[1] == '@')
1564 quote_args = 1;
1565 else {
1566 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1567 (int)(stesc - buf->buf), "%.3s", stesc);
1568 break;
1569 }
1570 asz = 0;
1571 for (npos = 0; npos < ctx->argc; npos++) {
1572 if (npos)
1573 asz++; /* blank */
1574 if (quote_args)
1575 asz += 2; /* quotes */
1576 asz += strlen(ctx->argv[npos]);
1577 }
1578 if (asz != 3) {
1579 rsz = buf->sz - (stesc - buf->buf) - 3;
1580 if (asz < 3)
1581 memmove(stesc + asz, stesc + 3, rsz);
1582 buf->sz += asz - 3;
1583 nbuf = mandoc_realloc(buf->buf, buf->sz);
1584 start = nbuf + pos;
1585 stesc = nbuf + (stesc - buf->buf);
1586 buf->buf = nbuf;
1587 if (asz > 3)
1588 memmove(stesc + asz, stesc + 3, rsz);
1589 }
1590 for (npos = 0; npos < ctx->argc; npos++) {
1591 if (npos)
1592 *stesc++ = ' ';
1593 if (quote_args)
1594 *stesc++ = '"';
1595 cp = ctx->argv[npos];
1596 while (*cp != '\0')
1597 *stesc++ = *cp++;
1598 if (quote_args)
1599 *stesc++ = '"';
1600 }
1601 continue;
1602 case 'B':
1603 npos = 0;
1604 ubuf[0] = arg_complete &&
1605 roff_evalnum(r, ln, stnam, &npos,
1606 NULL, ROFFNUM_SCALE) &&
1607 stnam + npos + 1 == cp ? '1' : '0';
1608 ubuf[1] = '\0';
1609 break;
1610 case 'n':
1611 if (arg_complete)
1612 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1613 roff_getregn(r, stnam, naml, sign));
1614 else
1615 ubuf[0] = '\0';
1616 break;
1617 case 'w':
1618 /* use even incomplete args */
1619 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1620 24 * (int)naml);
1621 break;
1622 }
1623
1624 if (res == NULL) {
1625 if (*esct == '*')
1626 mandoc_msg(MANDOCERR_STR_UNDEF,
1627 ln, (int)(stesc - buf->buf),
1628 "%.*s", (int)naml, stnam);
1629 res = "";
1630 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1631 mandoc_msg(MANDOCERR_ROFFLOOP,
1632 ln, (int)(stesc - buf->buf), NULL);
1633 return ROFF_IGN;
1634 }
1635
1636 /* Replace the escape sequence by the string. */
1637
1638 *stesc = '\0';
1639 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1640 buf->buf, res, cp) + 1;
1641
1642 /* Prepare for the next replacement. */
1643
1644 start = nbuf + pos;
1645 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1646 free(buf->buf);
1647 buf->buf = nbuf;
1648 }
1649 return ROFF_CONT;
1650 }
1651
1652 /*
1653 * Parse a quoted or unquoted roff-style request or macro argument.
1654 * Return a pointer to the parsed argument, which is either the original
1655 * pointer or advanced by one byte in case the argument is quoted.
1656 * NUL-terminate the argument in place.
1657 * Collapse pairs of quotes inside quoted arguments.
1658 * Advance the argument pointer to the next argument,
1659 * or to the NUL byte terminating the argument line.
1660 */
1661 char *
1662 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1663 {
1664 struct buf buf;
1665 char *cp, *start;
1666 int newesc, pairs, quoted, white;
1667
1668 /* Quoting can only start with a new word. */
1669 start = *cpp;
1670 quoted = 0;
1671 if ('"' == *start) {
1672 quoted = 1;
1673 start++;
1674 }
1675
1676 newesc = pairs = white = 0;
1677 for (cp = start; '\0' != *cp; cp++) {
1678
1679 /*
1680 * Move the following text left
1681 * after quoted quotes and after "\\" and "\t".
1682 */
1683 if (pairs)
1684 cp[-pairs] = cp[0];
1685
1686 if ('\\' == cp[0]) {
1687 /*
1688 * In copy mode, translate double to single
1689 * backslashes and backslash-t to literal tabs.
1690 */
1691 switch (cp[1]) {
1692 case 'a':
1693 case 't':
1694 cp[-pairs] = '\t';
1695 pairs++;
1696 cp++;
1697 break;
1698 case '\\':
1699 newesc = 1;
1700 cp[-pairs] = ASCII_ESC;
1701 pairs++;
1702 cp++;
1703 break;
1704 case ' ':
1705 /* Skip escaped blanks. */
1706 if (0 == quoted)
1707 cp++;
1708 break;
1709 default:
1710 break;
1711 }
1712 } else if (0 == quoted) {
1713 if (' ' == cp[0]) {
1714 /* Unescaped blanks end unquoted args. */
1715 white = 1;
1716 break;
1717 }
1718 } else if ('"' == cp[0]) {
1719 if ('"' == cp[1]) {
1720 /* Quoted quotes collapse. */
1721 pairs++;
1722 cp++;
1723 } else {
1724 /* Unquoted quotes end quoted args. */
1725 quoted = 2;
1726 break;
1727 }
1728 }
1729 }
1730
1731 /* Quoted argument without a closing quote. */
1732 if (1 == quoted)
1733 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1734
1735 /* NUL-terminate this argument and move to the next one. */
1736 if (pairs)
1737 cp[-pairs] = '\0';
1738 if ('\0' != *cp) {
1739 *cp++ = '\0';
1740 while (' ' == *cp)
1741 cp++;
1742 }
1743 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1744 *cpp = cp;
1745
1746 if ('\0' == *cp && (white || ' ' == cp[-1]))
1747 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1748
1749 start = mandoc_strdup(start);
1750 if (newesc == 0)
1751 return start;
1752
1753 buf.buf = start;
1754 buf.sz = strlen(start) + 1;
1755 buf.next = NULL;
1756 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1757 free(buf.buf);
1758 buf.buf = mandoc_strdup("");
1759 }
1760 return buf.buf;
1761 }
1762
1763
1764 /*
1765 * Process text streams.
1766 */
1767 static int
1768 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1769 {
1770 size_t sz;
1771 const char *start;
1772 char *p;
1773 int isz;
1774 enum mandoc_esc esc;
1775
1776 /* Spring the input line trap. */
1777
1778 if (roffit_lines == 1) {
1779 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1780 free(buf->buf);
1781 buf->buf = p;
1782 buf->sz = isz + 1;
1783 *offs = 0;
1784 free(roffit_macro);
1785 roffit_lines = 0;
1786 return ROFF_REPARSE;
1787 } else if (roffit_lines > 1)
1788 --roffit_lines;
1789
1790 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1791 if (roffce_lines < 1) {
1792 r->man->last = roffce_node;
1793 r->man->next = ROFF_NEXT_SIBLING;
1794 roffce_lines = 0;
1795 roffce_node = NULL;
1796 } else
1797 roffce_lines--;
1798 }
1799
1800 /* Convert all breakable hyphens into ASCII_HYPH. */
1801
1802 start = p = buf->buf + pos;
1803
1804 while (*p != '\0') {
1805 sz = strcspn(p, "-\\");
1806 p += sz;
1807
1808 if (*p == '\0')
1809 break;
1810
1811 if (*p == '\\') {
1812 /* Skip over escapes. */
1813 p++;
1814 esc = mandoc_escape((const char **)&p, NULL, NULL);
1815 if (esc == ESCAPE_ERROR)
1816 break;
1817 while (*p == '-')
1818 p++;
1819 continue;
1820 } else if (p == start) {
1821 p++;
1822 continue;
1823 }
1824
1825 if (isalpha((unsigned char)p[-1]) &&
1826 isalpha((unsigned char)p[1]))
1827 *p = ASCII_HYPH;
1828 p++;
1829 }
1830 return ROFF_CONT;
1831 }
1832
1833 int
1834 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1835 {
1836 enum roff_tok t;
1837 int e;
1838 int pos; /* parse point */
1839 int spos; /* saved parse point for messages */
1840 int ppos; /* original offset in buf->buf */
1841 int ctl; /* macro line (boolean) */
1842
1843 ppos = pos = *offs;
1844
1845 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1846 (r->man->flags & ROFF_NOFILL) == 0 &&
1847 strchr(" .\\", buf->buf[pos]) == NULL &&
1848 buf->buf[pos] != r->control &&
1849 strcspn(buf->buf, " ") < 80)
1850 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1851 "%.20s...", buf->buf + pos);
1852
1853 /* Handle in-line equation delimiters. */
1854
1855 if (r->tbl == NULL &&
1856 r->last_eqn != NULL && r->last_eqn->delim &&
1857 (r->eqn == NULL || r->eqn_inline)) {
1858 e = roff_eqndelim(r, buf, pos);
1859 if (e == ROFF_REPARSE)
1860 return e;
1861 assert(e == ROFF_CONT);
1862 }
1863
1864 /* Handle comments and escape sequences. */
1865
1866 e = roff_parse_comment(r, buf, ln, pos, r->escape);
1867 if ((e & ROFF_MASK) == ROFF_IGN)
1868 return e;
1869 assert(e == ROFF_CONT);
1870
1871 e = roff_expand(r, buf, ln, pos, r->escape);
1872 if ((e & ROFF_MASK) == ROFF_IGN)
1873 return e;
1874 assert(e == ROFF_CONT);
1875
1876 ctl = roff_getcontrol(r, buf->buf, &pos);
1877
1878 /*
1879 * First, if a scope is open and we're not a macro, pass the
1880 * text through the macro's filter.
1881 * Equations process all content themselves.
1882 * Tables process almost all content themselves, but we want
1883 * to warn about macros before passing it there.
1884 */
1885
1886 if (r->last != NULL && ! ctl) {
1887 t = r->last->tok;
1888 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1889 if ((e & ROFF_MASK) == ROFF_IGN)
1890 return e;
1891 e &= ~ROFF_MASK;
1892 } else
1893 e = ROFF_IGN;
1894 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1895 eqn_read(r->eqn, buf->buf + ppos);
1896 return e;
1897 }
1898 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1899 tbl_read(r->tbl, ln, buf->buf, ppos);
1900 roff_addtbl(r->man, ln, r->tbl);
1901 return e;
1902 }
1903 if ( ! ctl) {
1904 r->options &= ~MPARSE_COMMENT;
1905 return roff_parsetext(r, buf, pos, offs) | e;
1906 }
1907
1908 /* Skip empty request lines. */
1909
1910 if (buf->buf[pos] == '"') {
1911 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1912 return ROFF_IGN;
1913 } else if (buf->buf[pos] == '\0')
1914 return ROFF_IGN;
1915
1916 /*
1917 * If a scope is open, go to the child handler for that macro,
1918 * as it may want to preprocess before doing anything with it.
1919 */
1920
1921 if (r->last) {
1922 t = r->last->tok;
1923 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1924 }
1925
1926 r->options &= ~MPARSE_COMMENT;
1927 spos = pos;
1928 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1929 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1930 }
1931
1932 /*
1933 * Handle a new request or macro.
1934 * May be called outside any scope or from inside a conditional scope.
1935 */
1936 static int
1937 roff_req_or_macro(ROFF_ARGS) {
1938
1939 /* For now, tables ignore most macros and some request. */
1940
1941 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1942 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1943 tok == ROFF_sp)) {
1944 mandoc_msg(MANDOCERR_TBLMACRO,
1945 ln, ppos, "%s", buf->buf + ppos);
1946 if (tok != TOKEN_NONE)
1947 return ROFF_IGN;
1948 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1949 pos++;
1950 while (buf->buf[pos] == ' ')
1951 pos++;
1952 tbl_read(r->tbl, ln, buf->buf, pos);
1953 roff_addtbl(r->man, ln, r->tbl);
1954 return ROFF_IGN;
1955 }
1956
1957 /* For now, let high level macros abort .ce mode. */
1958
1959 if (roffce_node != NULL &&
1960 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1961 tok == ROFF_TH || tok == ROFF_TS)) {
1962 r->man->last = roffce_node;
1963 r->man->next = ROFF_NEXT_SIBLING;
1964 roffce_lines = 0;
1965 roffce_node = NULL;
1966 }
1967
1968 /*
1969 * This is neither a roff request nor a user-defined macro.
1970 * Let the standard macro set parsers handle it.
1971 */
1972
1973 if (tok == TOKEN_NONE)
1974 return ROFF_CONT;
1975
1976 /* Execute a roff request or a user-defined macro. */
1977
1978 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1979 }
1980
1981 /*
1982 * Internal interface function to tell the roff parser that execution
1983 * of the current macro ended. This is required because macro
1984 * definitions usually do not end with a .return request.
1985 */
1986 void
1987 roff_userret(struct roff *r)
1988 {
1989 struct mctx *ctx;
1990 int i;
1991
1992 assert(r->mstackpos >= 0);
1993 ctx = r->mstack + r->mstackpos;
1994 for (i = 0; i < ctx->argc; i++)
1995 free(ctx->argv[i]);
1996 ctx->argc = 0;
1997 r->mstackpos--;
1998 }
1999
2000 void
2001 roff_endparse(struct roff *r)
2002 {
2003 if (r->last != NULL)
2004 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
2005 r->last->col, "%s", roff_name[r->last->tok]);
2006
2007 if (r->eqn != NULL) {
2008 mandoc_msg(MANDOCERR_BLK_NOEND,
2009 r->eqn->node->line, r->eqn->node->pos, "EQ");
2010 eqn_parse(r->eqn);
2011 r->eqn = NULL;
2012 }
2013
2014 if (r->tbl != NULL) {
2015 tbl_end(r->tbl, 1);
2016 r->tbl = NULL;
2017 }
2018 }
2019
2020 /*
2021 * Parse the request or macro name at buf[*pos].
2022 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
2023 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
2024 * As a side effect, set r->current_string to the definition or to NULL.
2025 */
2026 static enum roff_tok
2027 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2028 {
2029 char *cp;
2030 const char *mac;
2031 size_t maclen;
2032 int deftype;
2033 enum roff_tok t;
2034
2035 cp = buf + *pos;
2036
2037 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2038 return TOKEN_NONE;
2039
2040 mac = cp;
2041 maclen = roff_getname(r, &cp, ln, ppos);
2042
2043 deftype = ROFFDEF_USER | ROFFDEF_REN;
2044 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2045 switch (deftype) {
2046 case ROFFDEF_USER:
2047 t = ROFF_USERDEF;
2048 break;
2049 case ROFFDEF_REN:
2050 t = ROFF_RENAMED;
2051 break;
2052 default:
2053 t = roffhash_find(r->reqtab, mac, maclen);
2054 break;
2055 }
2056 if (t != TOKEN_NONE)
2057 *pos = cp - buf;
2058 else if (deftype == ROFFDEF_UNDEF) {
2059 /* Using an undefined macro defines it to be empty. */
2060 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2061 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2062 }
2063 return t;
2064 }
2065
2066 /* --- handling of request blocks ----------------------------------------- */
2067
2068 /*
2069 * Close a macro definition block or an "ignore" block.
2070 */
2071 static int
2072 roff_cblock(ROFF_ARGS)
2073 {
2074 int rr;
2075
2076 if (r->last == NULL) {
2077 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2078 return ROFF_IGN;
2079 }
2080
2081 switch (r->last->tok) {
2082 case ROFF_am:
2083 case ROFF_ami:
2084 case ROFF_de:
2085 case ROFF_dei:
2086 case ROFF_ig:
2087 break;
2088 case ROFF_am1:
2089 case ROFF_de1:
2090 /* Remapped in roff_block(). */
2091 abort();
2092 default:
2093 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2094 return ROFF_IGN;
2095 }
2096
2097 roffnode_pop(r);
2098 roffnode_cleanscope(r);
2099
2100 /*
2101 * If a conditional block with braces is still open,
2102 * check for "\}" block end markers.
2103 */
2104
2105 if (r->last != NULL && r->last->endspan < 0) {
2106 rr = 1; /* If arguments follow "\}", warn about them. */
2107 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2108 }
2109
2110 if (buf->buf[pos] != '\0')
2111 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2112 ".. %s", buf->buf + pos);
2113
2114 return ROFF_IGN;
2115 }
2116
2117 /*
2118 * Pop all nodes ending at the end of the current input line.
2119 * Return the number of loops ended.
2120 */
2121 static int
2122 roffnode_cleanscope(struct roff *r)
2123 {
2124 int inloop;
2125
2126 inloop = 0;
2127 while (r->last != NULL && r->last->endspan > 0) {
2128 if (--r->last->endspan != 0)
2129 break;
2130 inloop += roffnode_pop(r);
2131 }
2132 return inloop;
2133 }
2134
2135 /*
2136 * Handle the closing "\}" of a conditional block.
2137 * Apart from generating warnings, this only pops nodes.
2138 * Return the number of loops ended.
2139 */
2140 static int
2141 roff_ccond(struct roff *r, int ln, int ppos)
2142 {
2143 if (NULL == r->last) {
2144 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2145 return 0;
2146 }
2147
2148 switch (r->last->tok) {
2149 case ROFF_el:
2150 case ROFF_ie:
2151 case ROFF_if:
2152 case ROFF_while:
2153 break;
2154 default:
2155 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2156 return 0;
2157 }
2158
2159 if (r->last->endspan > -1) {
2160 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2161 return 0;
2162 }
2163
2164 return roffnode_pop(r) + roffnode_cleanscope(r);
2165 }
2166
2167 static int
2168 roff_block(ROFF_ARGS)
2169 {
2170 const char *name, *value;
2171 char *call, *cp, *iname, *rname;
2172 size_t csz, namesz, rsz;
2173 int deftype;
2174
2175 /* Ignore groff compatibility mode for now. */
2176
2177 if (tok == ROFF_de1)
2178 tok = ROFF_de;
2179 else if (tok == ROFF_dei1)
2180 tok = ROFF_dei;
2181 else if (tok == ROFF_am1)
2182 tok = ROFF_am;
2183 else if (tok == ROFF_ami1)
2184 tok = ROFF_ami;
2185
2186 /* Parse the macro name argument. */
2187
2188 cp = buf->buf + pos;
2189 if (tok == ROFF_ig) {
2190 iname = NULL;
2191 namesz = 0;
2192 } else {
2193 iname = cp;
2194 namesz = roff_getname(r, &cp, ln, ppos);
2195 iname[namesz] = '\0';
2196 }
2197
2198 /* Resolve the macro name argument if it is indirect. */
2199
2200 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2201 deftype = ROFFDEF_USER;
2202 name = roff_getstrn(r, iname, namesz, &deftype);
2203 if (name == NULL) {
2204 mandoc_msg(MANDOCERR_STR_UNDEF,
2205 ln, (int)(iname - buf->buf),
2206 "%.*s", (int)namesz, iname);
2207 namesz = 0;
2208 } else
2209 namesz = strlen(name);
2210 } else
2211 name = iname;
2212
2213 if (namesz == 0 && tok != ROFF_ig) {
2214 mandoc_msg(MANDOCERR_REQ_EMPTY,
2215 ln, ppos, "%s", roff_name[tok]);
2216 return ROFF_IGN;
2217 }
2218
2219 roffnode_push(r, tok, name, ln, ppos);
2220
2221 /*
2222 * At the beginning of a `de' macro, clear the existing string
2223 * with the same name, if there is one. New content will be
2224 * appended from roff_block_text() in multiline mode.
2225 */
2226
2227 if (tok == ROFF_de || tok == ROFF_dei) {
2228 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2229 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2230 } else if (tok == ROFF_am || tok == ROFF_ami) {
2231 deftype = ROFFDEF_ANY;
2232 value = roff_getstrn(r, iname, namesz, &deftype);
2233 switch (deftype) { /* Before appending, ... */
2234 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2235 roff_setstrn(&r->strtab, name, namesz,
2236 value, strlen(value), 0);
2237 break;
2238 case ROFFDEF_REN: /* call original standard macro. */
2239 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2240 (int)strlen(value), value);
2241 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2242 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2243 free(call);
2244 break;
2245 case ROFFDEF_STD: /* rename and call standard macro. */
2246 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2247 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2248 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2249 (int)rsz, rname);
2250 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2251 free(call);
2252 free(rname);
2253 break;
2254 default:
2255 break;
2256 }
2257 }
2258
2259 if (*cp == '\0')
2260 return ROFF_IGN;
2261
2262 /* Get the custom end marker. */
2263
2264 iname = cp;
2265 namesz = roff_getname(r, &cp, ln, ppos);
2266
2267 /* Resolve the end marker if it is indirect. */
2268
2269 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2270 deftype = ROFFDEF_USER;
2271 name = roff_getstrn(r, iname, namesz, &deftype);
2272 if (name == NULL) {
2273 mandoc_msg(MANDOCERR_STR_UNDEF,
2274 ln, (int)(iname - buf->buf),
2275 "%.*s", (int)namesz, iname);
2276 namesz = 0;
2277 } else
2278 namesz = strlen(name);
2279 } else
2280 name = iname;
2281
2282 if (namesz)
2283 r->last->end = mandoc_strndup(name, namesz);
2284
2285 if (*cp != '\0')
2286 mandoc_msg(MANDOCERR_ARG_EXCESS,
2287 ln, pos, ".%s ... %s", roff_name[tok], cp);
2288
2289 return ROFF_IGN;
2290 }
2291
2292 static int
2293 roff_block_sub(ROFF_ARGS)
2294 {
2295 enum roff_tok t;
2296 int i, j;
2297
2298 /*
2299 * If a custom end marker is a user-defined or predefined macro
2300 * or a request, interpret it.
2301 */
2302
2303 if (r->last->end) {
2304 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2305 if (buf->buf[i] != r->last->end[j])
2306 break;
2307
2308 if (r->last->end[j] == '\0' &&
2309 (buf->buf[i] == '\0' ||
2310 buf->buf[i] == ' ' ||
2311 buf->buf[i] == '\t')) {
2312 roffnode_pop(r);
2313 roffnode_cleanscope(r);
2314
2315 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2316 i++;
2317
2318 pos = i;
2319 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2320 TOKEN_NONE)
2321 return ROFF_RERUN;
2322 return ROFF_IGN;
2323 }
2324 }
2325
2326 /* Handle the standard end marker. */
2327
2328 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2329 if (t == ROFF_cblock)
2330 return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2331
2332 /* Not an end marker, so append the line to the block. */
2333
2334 if (tok != ROFF_ig)
2335 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2336 return ROFF_IGN;
2337 }
2338
2339 static int
2340 roff_block_text(ROFF_ARGS)
2341 {
2342
2343 if (tok != ROFF_ig)
2344 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2345
2346 return ROFF_IGN;
2347 }
2348
2349 /*
2350 * Check for a closing "\}" and handle it.
2351 * In this function, the final "int *offs" argument is used for
2352 * different purposes than elsewhere:
2353 * Input: *offs == 0: caller wants to discard arguments following \}
2354 * *offs == 1: caller wants to preserve text following \}
2355 * Output: *offs = 0: tell caller to discard input line
2356 * *offs = 1: tell caller to use input line
2357 */
2358 static int
2359 roff_cond_checkend(ROFF_ARGS)
2360 {
2361 char *ep;
2362 int endloop, irc, rr;
2363
2364 irc = ROFF_IGN;
2365 rr = r->last->rule;
2366 endloop = tok != ROFF_while ? ROFF_IGN :
2367 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2368 if (roffnode_cleanscope(r))
2369 irc |= endloop;
2370
2371 /*
2372 * If "\}" occurs on a macro line without a preceding macro or
2373 * a text line contains nothing else, drop the line completely.
2374 */
2375
2376 ep = buf->buf + pos;
2377 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2378 rr = 0;
2379
2380 /*
2381 * The closing delimiter "\}" rewinds the conditional scope
2382 * but is otherwise ignored when interpreting the line.
2383 */
2384
2385 while ((ep = strchr(ep, '\\')) != NULL) {
2386 switch (ep[1]) {
2387 case '}':
2388 if (ep[2] == '\0')
2389 ep[0] = '\0';
2390 else if (rr)
2391 ep[1] = '&';
2392 else
2393 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2394 if (roff_ccond(r, ln, ep - buf->buf))
2395 irc |= endloop;
2396 break;
2397 case '\0':
2398 ++ep;
2399 break;
2400 default:
2401 ep += 2;
2402 break;
2403 }
2404 }
2405 *offs = rr;
2406 return irc;
2407 }
2408
2409 /*
2410 * Parse and process a request or macro line in conditional scope.
2411 */
2412 static int
2413 roff_cond_sub(ROFF_ARGS)
2414 {
2415 struct roffnode *bl;
2416 int irc, rr, spos;
2417 enum roff_tok t;
2418
2419 rr = 0; /* If arguments follow "\}", skip them. */
2420 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2421 spos = pos;
2422 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2423
2424 /*
2425 * Handle requests and macros if the conditional evaluated
2426 * to true or if they are structurally required.
2427 * The .break request is always handled specially.
2428 */
2429
2430 if (t == ROFF_break) {
2431 if (irc & ROFF_LOOPMASK)
2432 irc = ROFF_IGN | ROFF_LOOPEXIT;
2433 else if (rr) {
2434 for (bl = r->last; bl != NULL; bl = bl->parent) {
2435 bl->rule = 0;
2436 if (bl->tok == ROFF_while)
2437 break;
2438 }
2439 }
2440 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2441 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2442 if (irc & ROFF_WHILE)
2443 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2444 }
2445 return irc;
2446 }
2447
2448 /*
2449 * Parse and process a text line in conditional scope.
2450 */
2451 static int
2452 roff_cond_text(ROFF_ARGS)
2453 {
2454 int irc, rr;
2455
2456 rr = 1; /* If arguments follow "\}", preserve them. */
2457 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2458 if (rr)
2459 irc |= ROFF_CONT;
2460 return irc;
2461 }
2462
2463 /* --- handling of numeric and conditional expressions -------------------- */
2464
2465 /*
2466 * Parse a single signed integer number. Stop at the first non-digit.
2467 * If there is at least one digit, return success and advance the
2468 * parse point, else return failure and let the parse point unchanged.
2469 * Ignore overflows, treat them just like the C language.
2470 */
2471 static int
2472 roff_getnum(const char *v, int *pos, int *res, int flags)
2473 {
2474 int myres, scaled, n, p;
2475
2476 if (NULL == res)
2477 res = &myres;
2478
2479 p = *pos;
2480 n = v[p] == '-';
2481 if (n || v[p] == '+')
2482 p++;
2483
2484 if (flags & ROFFNUM_WHITE)
2485 while (isspace((unsigned char)v[p]))
2486 p++;
2487
2488 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2489 *res = 10 * *res + v[p] - '0';
2490 if (p == *pos + n)
2491 return 0;
2492
2493 if (n)
2494 *res = -*res;
2495
2496 /* Each number may be followed by one optional scaling unit. */
2497
2498 switch (v[p]) {
2499 case 'f':
2500 scaled = *res * 65536;
2501 break;
2502 case 'i':
2503 scaled = *res * 240;
2504 break;
2505 case 'c':
2506 scaled = *res * 240 / 2.54;
2507 break;
2508 case 'v':
2509 case 'P':
2510 scaled = *res * 40;
2511 break;
2512 case 'm':
2513 case 'n':
2514 scaled = *res * 24;
2515 break;
2516 case 'p':
2517 scaled = *res * 10 / 3;
2518 break;
2519 case 'u':
2520 scaled = *res;
2521 break;
2522 case 'M':
2523 scaled = *res * 6 / 25;
2524 break;
2525 default:
2526 scaled = *res;
2527 p--;
2528 break;
2529 }
2530 if (flags & ROFFNUM_SCALE)
2531 *res = scaled;
2532
2533 *pos = p + 1;
2534 return 1;
2535 }
2536
2537 /*
2538 * Evaluate a string comparison condition.
2539 * The first character is the delimiter.
2540 * Succeed if the string up to its second occurrence
2541 * matches the string up to its third occurence.
2542 * Advance the cursor after the third occurrence
2543 * or lacking that, to the end of the line.
2544 */
2545 static int
2546 roff_evalstrcond(const char *v, int *pos)
2547 {
2548 const char *s1, *s2, *s3;
2549 int match;
2550
2551 match = 0;
2552 s1 = v + *pos; /* initial delimiter */
2553 s2 = s1 + 1; /* for scanning the first string */
2554 s3 = strchr(s2, *s1); /* for scanning the second string */
2555
2556 if (NULL == s3) /* found no middle delimiter */
2557 goto out;
2558
2559 while ('\0' != *++s3) {
2560 if (*s2 != *s3) { /* mismatch */
2561 s3 = strchr(s3, *s1);
2562 break;
2563 }
2564 if (*s3 == *s1) { /* found the final delimiter */
2565 match = 1;
2566 break;
2567 }
2568 s2++;
2569 }
2570
2571 out:
2572 if (NULL == s3)
2573 s3 = strchr(s2, '\0');
2574 else if (*s3 != '\0')
2575 s3++;
2576 *pos = s3 - v;
2577 return match;
2578 }
2579
2580 /*
2581 * Evaluate an optionally negated single character, numerical,
2582 * or string condition.
2583 */
2584 static int
2585 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2586 {
2587 const char *start, *end;
2588 char *cp, *name;
2589 size_t sz;
2590 int deftype, len, number, savepos, istrue, wanttrue;
2591
2592 if ('!' == v[*pos]) {
2593 wanttrue = 0;
2594 (*pos)++;
2595 } else
2596 wanttrue = 1;
2597
2598 switch (v[*pos]) {
2599 case '\0':
2600 return 0;
2601 case 'n':
2602 case 'o':
2603 (*pos)++;
2604 return wanttrue;
2605 case 'e':
2606 case 't':
2607 case 'v':
2608 (*pos)++;
2609 return !wanttrue;
2610 case 'c':
2611 do {
2612 (*pos)++;
2613 } while (v[*pos] == ' ');
2614
2615 /*
2616 * Quirk for groff compatibility:
2617 * The horizontal tab is neither available nor unavailable.
2618 */
2619
2620 if (v[*pos] == '\t') {
2621 (*pos)++;
2622 return 0;
2623 }
2624
2625 /* Printable ASCII characters are available. */
2626
2627 if (v[*pos] != '\\') {
2628 (*pos)++;
2629 return wanttrue;
2630 }
2631
2632 end = v + ++*pos;
2633 switch (mandoc_escape(&end, &start, &len)) {
2634 case ESCAPE_SPECIAL:
2635 istrue = mchars_spec2cp(start, len) != -1;
2636 break;
2637 case ESCAPE_UNICODE:
2638 istrue = 1;
2639 break;
2640 case ESCAPE_NUMBERED:
2641 istrue = mchars_num2char(start, len) != -1;
2642 break;
2643 default:
2644 istrue = !wanttrue;
2645 break;
2646 }
2647 *pos = end - v;
2648 return istrue == wanttrue;
2649 case 'd':
2650 case 'r':
2651 cp = v + *pos + 1;
2652 while (*cp == ' ')
2653 cp++;
2654 name = cp;
2655 sz = roff_getname(r, &cp, ln, cp - v);
2656 if (sz == 0)
2657 istrue = 0;
2658 else if (v[*pos] == 'r')
2659 istrue = roff_hasregn(r, name, sz);
2660 else {
2661 deftype = ROFFDEF_ANY;
2662 roff_getstrn(r, name, sz, &deftype);
2663 istrue = !!deftype;
2664 }
2665 *pos = (name + sz) - v;
2666 return istrue == wanttrue;
2667 default:
2668 break;
2669 }
2670
2671 savepos = *pos;
2672 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2673 return (number > 0) == wanttrue;
2674 else if (*pos == savepos)
2675 return roff_evalstrcond(v, pos) == wanttrue;
2676 else
2677 return 0;
2678 }
2679
2680 static int
2681 roff_line_ignore(ROFF_ARGS)
2682 {
2683
2684 return ROFF_IGN;
2685 }
2686
2687 static int
2688 roff_insec(ROFF_ARGS)
2689 {
2690
2691 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2692 return ROFF_IGN;
2693 }
2694
2695 static int
2696 roff_unsupp(ROFF_ARGS)
2697 {
2698
2699 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2700 return ROFF_IGN;
2701 }
2702
2703 static int
2704 roff_cond(ROFF_ARGS)
2705 {
2706 int irc;
2707
2708 roffnode_push(r, tok, NULL, ln, ppos);
2709
2710 /*
2711 * An `.el' has no conditional body: it will consume the value
2712 * of the current rstack entry set in prior `ie' calls or
2713 * defaults to DENY.
2714 *
2715 * If we're not an `el', however, then evaluate the conditional.
2716 */
2717
2718 r->last->rule = tok == ROFF_el ?
2719 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2720 roff_evalcond(r, ln, buf->buf, &pos);
2721
2722 /*
2723 * An if-else will put the NEGATION of the current evaluated
2724 * conditional into the stack of rules.
2725 */
2726
2727 if (tok == ROFF_ie) {
2728 if (r->rstackpos + 1 == r->rstacksz) {
2729 r->rstacksz += 16;
2730 r->rstack = mandoc_reallocarray(r->rstack,
2731 r->rstacksz, sizeof(int));
2732 }
2733 r->rstack[++r->rstackpos] = !r->last->rule;
2734 }
2735
2736 /* If the parent has false as its rule, then so do we. */
2737
2738 if (r->last->parent && !r->last->parent->rule)
2739 r->last->rule = 0;
2740
2741 /*
2742 * Determine scope.
2743 * If there is nothing on the line after the conditional,
2744 * not even whitespace, use next-line scope.
2745 * Except that .while does not support next-line scope.
2746 */
2747
2748 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2749 r->last->endspan = 2;
2750 goto out;
2751 }
2752
2753 while (buf->buf[pos] == ' ')
2754 pos++;
2755
2756 /* An opening brace requests multiline scope. */
2757
2758 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2759 r->last->endspan = -1;
2760 pos += 2;
2761 while (buf->buf[pos] == ' ')
2762 pos++;
2763 goto out;
2764 }
2765
2766 /*
2767 * Anything else following the conditional causes
2768 * single-line scope. Warn if the scope contains
2769 * nothing but trailing whitespace.
2770 */
2771
2772 if (buf->buf[pos] == '\0')
2773 mandoc_msg(MANDOCERR_COND_EMPTY,
2774 ln, ppos, "%s", roff_name[tok]);
2775
2776 r->last->endspan = 1;
2777
2778 out:
2779 *offs = pos;
2780 irc = ROFF_RERUN;
2781 if (tok == ROFF_while)
2782 irc |= ROFF_WHILE;
2783 return irc;
2784 }
2785
2786 static int
2787 roff_ds(ROFF_ARGS)
2788 {
2789 char *string;
2790 const char *name;
2791 size_t namesz;
2792
2793 /* Ignore groff compatibility mode for now. */
2794
2795 if (tok == ROFF_ds1)
2796 tok = ROFF_ds;
2797 else if (tok == ROFF_as1)
2798 tok = ROFF_as;
2799
2800 /*
2801 * The first word is the name of the string.
2802 * If it is empty or terminated by an escape sequence,
2803 * abort the `ds' request without defining anything.
2804 */
2805
2806 name = string = buf->buf + pos;
2807 if (*name == '\0')
2808 return ROFF_IGN;
2809
2810 namesz = roff_getname(r, &string, ln, pos);
2811 switch (name[namesz]) {
2812 case '\\':
2813 return ROFF_IGN;
2814 case '\t':
2815 string = buf->buf + pos + namesz;
2816 break;
2817 default:
2818 break;
2819 }
2820
2821 /* Read past the initial double-quote, if any. */
2822 if (*string == '"')
2823 string++;
2824
2825 /* The rest is the value. */
2826 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2827 ROFF_as == tok);
2828 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2829 return ROFF_IGN;
2830 }
2831
2832 /*
2833 * Parse a single operator, one or two characters long.
2834 * If the operator is recognized, return success and advance the
2835 * parse point, else return failure and let the parse point unchanged.
2836 */
2837 static int
2838 roff_getop(const char *v, int *pos, char *res)
2839 {
2840
2841 *res = v[*pos];
2842
2843 switch (*res) {
2844 case '+':
2845 case '-':
2846 case '*':
2847 case '/':
2848 case '%':
2849 case '&':
2850 case ':':
2851 break;
2852 case '<':
2853 switch (v[*pos + 1]) {
2854 case '=':
2855 *res = 'l';
2856 (*pos)++;
2857 break;
2858 case '>':
2859 *res = '!';
2860 (*pos)++;
2861 break;
2862 case '?':
2863 *res = 'i';
2864 (*pos)++;
2865 break;
2866 default:
2867 break;
2868 }
2869 break;
2870 case '>':
2871 switch (v[*pos + 1]) {
2872 case '=':
2873 *res = 'g';
2874 (*pos)++;
2875 break;
2876 case '?':
2877 *res = 'a';
2878 (*pos)++;
2879 break;
2880 default:
2881 break;
2882 }
2883 break;
2884 case '=':
2885 if ('=' == v[*pos + 1])
2886 (*pos)++;
2887 break;
2888 default:
2889 return 0;
2890 }
2891 (*pos)++;
2892
2893 return *res;
2894 }
2895
2896 /*
2897 * Evaluate either a parenthesized numeric expression
2898 * or a single signed integer number.
2899 */
2900 static int
2901 roff_evalpar(struct roff *r, int ln,
2902 const char *v, int *pos, int *res, int flags)
2903 {
2904
2905 if ('(' != v[*pos])
2906 return roff_getnum(v, pos, res, flags);
2907
2908 (*pos)++;
2909 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2910 return 0;
2911
2912 /*
2913 * Omission of the closing parenthesis
2914 * is an error in validation mode,
2915 * but ignored in evaluation mode.
2916 */
2917
2918 if (')' == v[*pos])
2919 (*pos)++;
2920 else if (NULL == res)
2921 return 0;
2922
2923 return 1;
2924 }
2925
2926 /*
2927 * Evaluate a complete numeric expression.
2928 * Proceed left to right, there is no concept of precedence.
2929 */
2930 static int
2931 roff_evalnum(struct roff *r, int ln, const char *v,
2932 int *pos, int *res, int flags)
2933 {
2934 int mypos, operand2;
2935 char operator;
2936
2937 if (NULL == pos) {
2938 mypos = 0;
2939 pos = &mypos;
2940 }
2941
2942 if (flags & ROFFNUM_WHITE)
2943 while (isspace((unsigned char)v[*pos]))
2944 (*pos)++;
2945
2946 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2947 return 0;
2948
2949 while (1) {
2950 if (flags & ROFFNUM_WHITE)
2951 while (isspace((unsigned char)v[*pos]))
2952 (*pos)++;
2953
2954 if ( ! roff_getop(v, pos, &operator))
2955 break;
2956
2957 if (flags & ROFFNUM_WHITE)
2958 while (isspace((unsigned char)v[*pos]))
2959 (*pos)++;
2960
2961 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2962 return 0;
2963
2964 if (flags & ROFFNUM_WHITE)
2965 while (isspace((unsigned char)v[*pos]))
2966 (*pos)++;
2967
2968 if (NULL == res)
2969 continue;
2970
2971 switch (operator) {
2972 case '+':
2973 *res += operand2;
2974 break;
2975 case '-':
2976 *res -= operand2;
2977 break;
2978 case '*':
2979 *res *= operand2;
2980 break;
2981 case '/':
2982 if (operand2 == 0) {
2983 mandoc_msg(MANDOCERR_DIVZERO,
2984 ln, *pos, "%s", v);
2985 *res = 0;
2986 break;
2987 }
2988 *res /= operand2;
2989 break;
2990 case '%':
2991 if (operand2 == 0) {
2992 mandoc_msg(MANDOCERR_DIVZERO,
2993 ln, *pos, "%s", v);
2994 *res = 0;
2995 break;
2996 }
2997 *res %= operand2;
2998 break;
2999 case '<':
3000 *res = *res < operand2;
3001 break;
3002 case '>':
3003 *res = *res > operand2;
3004 break;
3005 case 'l':
3006 *res = *res <= operand2;
3007 break;
3008 case 'g':
3009 *res = *res >= operand2;
3010 break;
3011 case '=':
3012 *res = *res == operand2;
3013 break;
3014 case '!':
3015 *res = *res != operand2;
3016 break;
3017 case '&':
3018 *res = *res && operand2;
3019 break;
3020 case ':':
3021 *res = *res || operand2;
3022 break;
3023 case 'i':
3024 if (operand2 < *res)
3025 *res = operand2;
3026 break;
3027 case 'a':
3028 if (operand2 > *res)
3029 *res = operand2;
3030 break;
3031 default:
3032 abort();
3033 }
3034 }
3035 return 1;
3036 }
3037
3038 /* --- register management ------------------------------------------------ */
3039
3040 void
3041 roff_setreg(struct roff *r, const char *name, int val, char sign)
3042 {
3043 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3044 }
3045
3046 static void
3047 roff_setregn(struct roff *r, const char *name, size_t len,
3048 int val, char sign, int step)
3049 {
3050 struct roffreg *reg;
3051
3052 /* Search for an existing register with the same name. */
3053 reg = r->regtab;
3054
3055 while (reg != NULL && (reg->key.sz != len ||
3056 strncmp(reg->key.p, name, len) != 0))
3057 reg = reg->next;
3058
3059 if (NULL == reg) {
3060 /* Create a new register. */
3061 reg = mandoc_malloc(sizeof(struct roffreg));
3062 reg->key.p = mandoc_strndup(name, len);
3063 reg->key.sz = len;
3064 reg->val = 0;
3065 reg->step = 0;
3066 reg->next = r->regtab;
3067 r->regtab = reg;
3068 }
3069
3070 if ('+' == sign)
3071 reg->val += val;
3072 else if ('-' == sign)
3073 reg->val -= val;
3074 else
3075 reg->val = val;
3076 if (step != INT_MIN)
3077 reg->step = step;
3078 }
3079
3080 /*
3081 * Handle some predefined read-only number registers.
3082 * For now, return -1 if the requested register is not predefined;
3083 * in case a predefined read-only register having the value -1
3084 * were to turn up, another special value would have to be chosen.
3085 */
3086 static int
3087 roff_getregro(const struct roff *r, const char *name)
3088 {
3089
3090 switch (*name) {
3091 case '$': /* Number of arguments of the last macro evaluated. */
3092 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3093 case 'A': /* ASCII approximation mode is always off. */
3094 return 0;
3095 case 'g': /* Groff compatibility mode is always on. */
3096 return 1;
3097 case 'H': /* Fixed horizontal resolution. */
3098 return 24;
3099 case 'j': /* Always adjust left margin only. */
3100 return 0;
3101 case 'T': /* Some output device is always defined. */
3102 return 1;
3103 case 'V': /* Fixed vertical resolution. */
3104 return 40;
3105 default:
3106 return -1;
3107 }
3108 }
3109
3110 int
3111 roff_getreg(struct roff *r, const char *name)
3112 {
3113 return roff_getregn(r, name, strlen(name), '\0');
3114 }
3115
3116 static int
3117 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3118 {
3119 struct roffreg *reg;
3120 int val;
3121
3122 if ('.' == name[0] && 2 == len) {
3123 val = roff_getregro(r, name + 1);
3124 if (-1 != val)
3125 return val;
3126 }
3127
3128 for (reg = r->regtab; reg; reg = reg->next) {
3129 if (len == reg->key.sz &&
3130 0 == strncmp(name, reg->key.p, len)) {
3131 switch (sign) {
3132 case '+':
3133 reg->val += reg->step;
3134 break;
3135 case '-':
3136 reg->val -= reg->step;
3137 break;
3138 default:
3139 break;
3140 }
3141 return reg->val;
3142 }
3143 }
3144
3145 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3146 return 0;
3147 }
3148
3149 static int
3150 roff_hasregn(const struct roff *r, const char *name, size_t len)
3151 {
3152 struct roffreg *reg;
3153 int val;
3154
3155 if ('.' == name[0] && 2 == len) {
3156 val = roff_getregro(r, name + 1);
3157 if (-1 != val)
3158 return 1;
3159 }
3160
3161 for (reg = r->regtab; reg; reg = reg->next)
3162 if (len == reg->key.sz &&
3163 0 == strncmp(name, reg->key.p, len))
3164 return 1;
3165
3166 return 0;
3167 }
3168
3169 static void
3170 roff_freereg(struct roffreg *reg)
3171 {
3172 struct roffreg *old_reg;
3173
3174 while (NULL != reg) {
3175 free(reg->key.p);
3176 old_reg = reg;
3177 reg = reg->next;
3178 free(old_reg);
3179 }
3180 }
3181
3182 static int
3183 roff_nr(ROFF_ARGS)
3184 {
3185 char *key, *val, *step;
3186 size_t keysz;
3187 int iv, is, len;
3188 char sign;
3189
3190 key = val = buf->buf + pos;
3191 if (*key == '\0')
3192 return ROFF_IGN;
3193
3194 keysz = roff_getname(r, &val, ln, pos);
3195 if (key[keysz] == '\\' || key[keysz] == '\t')
3196 return ROFF_IGN;
3197
3198 sign = *val;
3199 if (sign == '+' || sign == '-')
3200 val++;
3201
3202 len = 0;
3203 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3204 return ROFF_IGN;
3205
3206 step = val + len;
3207 while (isspace((unsigned char)*step))
3208 step++;
3209 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3210 is = INT_MIN;
3211
3212 roff_setregn(r, key, keysz, iv, sign, is);
3213 return ROFF_IGN;
3214 }
3215
3216 static int
3217 roff_rr(ROFF_ARGS)
3218 {
3219 struct roffreg *reg, **prev;
3220 char *name, *cp;
3221 size_t namesz;
3222
3223 name = cp = buf->buf + pos;
3224 if (*name == '\0')
3225 return ROFF_IGN;
3226 namesz = roff_getname(r, &cp, ln, pos);
3227 name[namesz] = '\0';
3228
3229 prev = &r->regtab;
3230 while (1) {
3231 reg = *prev;
3232 if (reg == NULL || !strcmp(name, reg->key.p))
3233 break;
3234 prev = &reg->next;
3235 }
3236 if (reg != NULL) {
3237 *prev = reg->next;
3238 free(reg->key.p);
3239 free(reg);
3240 }
3241 return ROFF_IGN;
3242 }
3243
3244 /* --- handler functions for roff requests -------------------------------- */
3245
3246 static int
3247 roff_rm(ROFF_ARGS)
3248 {
3249 const char *name;
3250 char *cp;
3251 size_t namesz;
3252
3253 cp = buf->buf + pos;
3254 while (*cp != '\0') {
3255 name = cp;
3256 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3257 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3258 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3259 if (name[namesz] == '\\' || name[namesz] == '\t')
3260 break;
3261 }
3262 return ROFF_IGN;
3263 }
3264
3265 static int
3266 roff_it(ROFF_ARGS)
3267 {
3268 int iv;
3269
3270 /* Parse the number of lines. */
3271
3272 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3273 mandoc_msg(MANDOCERR_IT_NONUM,
3274 ln, ppos, "%s", buf->buf + 1);
3275 return ROFF_IGN;
3276 }
3277
3278 while (isspace((unsigned char)buf->buf[pos]))
3279 pos++;
3280
3281 /*
3282 * Arm the input line trap.
3283 * Special-casing "an-trap" is an ugly workaround to cope
3284 * with DocBook stupidly fiddling with man(7) internals.
3285 */
3286
3287 roffit_lines = iv;
3288 roffit_macro = mandoc_strdup(iv != 1 ||
3289 strcmp(buf->buf + pos, "an-trap") ?
3290 buf->buf + pos : "br");
3291 return ROFF_IGN;
3292 }
3293
3294 static int
3295 roff_Dd(ROFF_ARGS)
3296 {
3297 int mask;
3298 enum roff_tok t, te;
3299
3300 switch (tok) {
3301 case ROFF_Dd:
3302 tok = MDOC_Dd;
3303 te = MDOC_MAX;
3304 if (r->format == 0)
3305 r->format = MPARSE_MDOC;
3306 mask = MPARSE_MDOC | MPARSE_QUICK;
3307 break;
3308 case ROFF_TH:
3309 tok = MAN_TH;
3310 te = MAN_MAX;
3311 if (r->format == 0)
3312 r->format = MPARSE_MAN;
3313 mask = MPARSE_QUICK;
3314 break;
3315 default:
3316 abort();
3317 }
3318 if ((r->options & mask) == 0)
3319 for (t = tok; t < te; t++)
3320 roff_setstr(r, roff_name[t], NULL, 0);
3321 return ROFF_CONT;
3322 }
3323
3324 static int
3325 roff_TE(ROFF_ARGS)
3326 {
3327 r->man->flags &= ~ROFF_NONOFILL;
3328 if (r->tbl == NULL) {
3329 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3330 return ROFF_IGN;
3331 }
3332 if (tbl_end(r->tbl, 0) == 0) {
3333 r->tbl = NULL;
3334 free(buf->buf);
3335 buf->buf = mandoc_strdup(".sp");
3336 buf->sz = 4;
3337 *offs = 0;
3338 return ROFF_REPARSE;
3339 }
3340 r->tbl = NULL;
3341 return ROFF_IGN;
3342 }
3343
3344 static int
3345 roff_T_(ROFF_ARGS)
3346 {
3347
3348 if (NULL == r->tbl)
3349 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3350 else
3351 tbl_restart(ln, ppos, r->tbl);
3352
3353 return ROFF_IGN;
3354 }
3355
3356 /*
3357 * Handle in-line equation delimiters.
3358 */
3359 static int
3360 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3361 {
3362 char *cp1, *cp2;
3363 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3364
3365 /*
3366 * Outside equations, look for an opening delimiter.
3367 * If we are inside an equation, we already know it is
3368 * in-line, or this function wouldn't have been called;
3369 * so look for a closing delimiter.
3370 */
3371
3372 cp1 = buf->buf + pos;
3373 cp2 = strchr(cp1, r->eqn == NULL ?
3374 r->last_eqn->odelim : r->last_eqn->cdelim);
3375 if (cp2 == NULL)
3376 return ROFF_CONT;
3377
3378 *cp2++ = '\0';
3379 bef_pr = bef_nl = aft_nl = aft_pr = "";
3380
3381 /* Handle preceding text, protecting whitespace. */
3382
3383 if (*buf->buf != '\0') {
3384 if (r->eqn == NULL)
3385 bef_pr = "\\&";
3386 bef_nl = "\n";
3387 }
3388
3389 /*
3390 * Prepare replacing the delimiter with an equation macro
3391 * and drop leading white space from the equation.
3392 */
3393
3394 if (r->eqn == NULL) {
3395 while (*cp2 == ' ')
3396 cp2++;
3397 mac = ".EQ";
3398 } else
3399 mac = ".EN";
3400
3401 /* Handle following text, protecting whitespace. */
3402
3403 if (*cp2 != '\0') {
3404 aft_nl = "\n";
3405 if (r->eqn != NULL)
3406 aft_pr = "\\&";
3407 }
3408
3409 /* Do the actual replacement. */
3410
3411 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3412 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3413 free(buf->buf);
3414 buf->buf = cp1;
3415
3416 /* Toggle the in-line state of the eqn subsystem. */
3417
3418 r->eqn_inline = r->eqn == NULL;
3419 return ROFF_REPARSE;
3420 }
3421
3422 static int
3423 roff_EQ(ROFF_ARGS)
3424 {
3425 struct roff_node *n;
3426
3427 if (r->man->meta.macroset == MACROSET_MAN)
3428 man_breakscope(r->man, ROFF_EQ);
3429 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3430 if (ln > r->man->last->line)
3431 n->flags |= NODE_LINE;
3432 n->eqn = eqn_box_new();
3433 roff_node_append(r->man, n);
3434 r->man->next = ROFF_NEXT_SIBLING;
3435
3436 assert(r->eqn == NULL);
3437 if (r->last_eqn == NULL)
3438 r->last_eqn = eqn_alloc();
3439 else
3440 eqn_reset(r->last_eqn);
3441 r->eqn = r->last_eqn;
3442 r->eqn->node = n;
3443
3444 if (buf->buf[pos] != '\0')
3445 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3446 ".EQ %s", buf->buf + pos);
3447
3448 return ROFF_IGN;
3449 }
3450
3451 static int
3452 roff_EN(ROFF_ARGS)
3453 {
3454 if (r->eqn != NULL) {
3455 eqn_parse(r->eqn);
3456 r->eqn = NULL;
3457 } else
3458 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3459 if (buf->buf[pos] != '\0')
3460 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3461 "EN %s", buf->buf + pos);
3462 return ROFF_IGN;
3463 }
3464
3465 static int
3466 roff_TS(ROFF_ARGS)
3467 {
3468 if (r->tbl != NULL) {
3469 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3470 tbl_end(r->tbl, 0);
3471 }
3472 r->man->flags |= ROFF_NONOFILL;
3473 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3474 if (r->last_tbl == NULL)
3475 r->first_tbl = r->tbl;
3476 r->last_tbl = r->tbl;
3477 return ROFF_IGN;
3478 }
3479
3480 static int
3481 roff_noarg(ROFF_ARGS)
3482 {
3483 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3484 man_breakscope(r->man, tok);
3485 if (tok == ROFF_brp)
3486 tok = ROFF_br;
3487 roff_elem_alloc(r->man, ln, ppos, tok);
3488 if (buf->buf[pos] != '\0')
3489 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3490 "%s %s", roff_name[tok], buf->buf + pos);
3491 if (tok == ROFF_nf)
3492 r->man->flags |= ROFF_NOFILL;
3493 else if (tok == ROFF_fi)
3494 r->man->flags &= ~ROFF_NOFILL;
3495 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3496 r->man->next = ROFF_NEXT_SIBLING;
3497 return ROFF_IGN;
3498 }
3499
3500 static int
3501 roff_onearg(ROFF_ARGS)
3502 {
3503 struct roff_node *n;
3504 char *cp;
3505 int npos;
3506
3507 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3508 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3509 tok == ROFF_ti))
3510 man_breakscope(r->man, tok);
3511
3512 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3513 r->man->last = roffce_node;
3514 r->man->next = ROFF_NEXT_SIBLING;
3515 }
3516
3517 roff_elem_alloc(r->man, ln, ppos, tok);
3518 n = r->man->last;
3519
3520 cp = buf->buf + pos;
3521 if (*cp != '\0') {
3522 while (*cp != '\0' && *cp != ' ')
3523 cp++;
3524 while (*cp == ' ')
3525 *cp++ = '\0';
3526 if (*cp != '\0')
3527 mandoc_msg(MANDOCERR_ARG_EXCESS,
3528 ln, (int)(cp - buf->buf),
3529 "%s ... %s", roff_name[tok], cp);
3530 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3531 }
3532
3533 if (tok == ROFF_ce || tok == ROFF_rj) {
3534 if (r->man->last->type == ROFFT_ELEM) {
3535 roff_word_alloc(r->man, ln, pos, "1");
3536 r->man->last->flags |= NODE_NOSRC;
3537 }
3538 npos = 0;
3539 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3540 &roffce_lines, 0) == 0) {
3541 mandoc_msg(MANDOCERR_CE_NONUM,
3542 ln, pos, "ce %s", buf->buf + pos);
3543 roffce_lines = 1;
3544 }
3545 if (roffce_lines < 1) {
3546 r->man->last = r->man->last->parent;
3547 roffce_node = NULL;
3548 roffce_lines = 0;
3549 } else
3550 roffce_node = r->man->last->parent;
3551 } else {
3552 n->flags |= NODE_VALID | NODE_ENDED;
3553 r->man->last = n;
3554 }
3555 n->flags |= NODE_LINE;
3556 r->man->next = ROFF_NEXT_SIBLING;
3557 return ROFF_IGN;
3558 }
3559
3560 static int
3561 roff_manyarg(ROFF_ARGS)
3562 {
3563 struct roff_node *n;
3564 char *sp, *ep;
3565
3566 roff_elem_alloc(r->man, ln, ppos, tok);
3567 n = r->man->last;
3568
3569 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3570 while (*ep != '\0' && *ep != ' ')
3571 ep++;
3572 while (*ep == ' ')
3573 *ep++ = '\0';
3574 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3575 }
3576
3577 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3578 r->man->last = n;
3579 r->man->next = ROFF_NEXT_SIBLING;
3580 return ROFF_IGN;
3581 }
3582
3583 static int
3584 roff_als(ROFF_ARGS)
3585 {
3586 char *oldn, *newn, *end, *value;
3587 size_t oldsz, newsz, valsz;
3588
3589 newn = oldn = buf->buf + pos;
3590 if (*newn == '\0')
3591 return ROFF_IGN;
3592
3593 newsz = roff_getname(r, &oldn, ln, pos);
3594 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3595 return ROFF_IGN;
3596
3597 end = oldn;
3598 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3599 if (oldsz == 0)
3600 return ROFF_IGN;
3601
3602 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3603 (int)oldsz, oldn);
3604 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3605 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3606 free(value);
3607 return ROFF_IGN;
3608 }
3609
3610 /*
3611 * The .break request only makes sense inside conditionals,
3612 * and that case is already handled in roff_cond_sub().
3613 */
3614 static int
3615 roff_break(ROFF_ARGS)
3616 {
3617 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3618 return ROFF_IGN;
3619 }
3620
3621 static int
3622 roff_cc(ROFF_ARGS)
3623 {
3624 const char *p;
3625
3626 p = buf->buf + pos;
3627
3628 if (*p == '\0' || (r->control = *p++) == '.')
3629 r->control = '\0';
3630
3631 if (*p != '\0')
3632 mandoc_msg(MANDOCERR_ARG_EXCESS,
3633 ln, p - buf->buf, "cc ... %s", p);
3634
3635 return ROFF_IGN;
3636 }
3637
3638 static int
3639 roff_char(ROFF_ARGS)
3640 {
3641 const char *p, *kp, *vp;
3642 size_t ksz, vsz;
3643 int font;
3644
3645 /* Parse the character to be replaced. */
3646
3647 kp = buf->buf + pos;
3648 p = kp + 1;
3649 if (*kp == '\0' || (*kp == '\\' &&
3650 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3651 (*p != ' ' && *p != '\0')) {
3652 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3653 return ROFF_IGN;
3654 }
3655 ksz = p - kp;
3656 while (*p == ' ')
3657 p++;
3658
3659 /*
3660 * If the replacement string contains a font escape sequence,
3661 * we have to restore the font at the end.
3662 */
3663
3664 vp = p;
3665 vsz = strlen(p);
3666 font = 0;
3667 while (*p != '\0') {
3668 if (*p++ != '\\')
3669 continue;
3670 switch (mandoc_escape(&p, NULL, NULL)) {
3671 case ESCAPE_FONT:
3672 case ESCAPE_FONTROMAN:
3673 case ESCAPE_FONTITALIC:
3674 case ESCAPE_FONTBOLD:
3675 case ESCAPE_FONTBI:
3676 case ESCAPE_FONTCR:
3677 case ESCAPE_FONTCB:
3678 case ESCAPE_FONTCI:
3679 case ESCAPE_FONTPREV:
3680 font++;
3681 break;
3682 default:
3683 break;
3684 }
3685 }
3686 if (font > 1)
3687 mandoc_msg(MANDOCERR_CHAR_FONT,
3688 ln, (int)(vp - buf->buf), "%s", vp);
3689
3690 /*
3691 * Approximate the effect of .char using the .tr tables.
3692 * XXX In groff, .char and .tr interact differently.
3693 */
3694
3695 if (ksz == 1) {
3696 if (r->xtab == NULL)
3697 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3698 assert((unsigned int)*kp < 128);
3699 free(r->xtab[(int)*kp].p);
3700 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3701 "%s%s", vp, font ? "\fP" : "");
3702 } else {
3703 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3704 if (font)
3705 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3706 }
3707 return ROFF_IGN;
3708 }
3709
3710 static int
3711 roff_ec(ROFF_ARGS)
3712 {
3713 const char *p;
3714
3715 p = buf->buf + pos;
3716 if (*p == '\0')
3717 r->escape = '\\';
3718 else {
3719 r->escape = *p;
3720 if (*++p != '\0')
3721 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3722 (int)(p - buf->buf), "ec ... %s", p);
3723 }
3724 return ROFF_IGN;
3725 }
3726
3727 static int
3728 roff_eo(ROFF_ARGS)
3729 {
3730 r->escape = '\0';
3731 if (buf->buf[pos] != '\0')
3732 mandoc_msg(MANDOCERR_ARG_SKIP,
3733 ln, pos, "eo %s", buf->buf + pos);
3734 return ROFF_IGN;
3735 }
3736
3737 static int
3738 roff_mc(ROFF_ARGS)
3739 {
3740 struct roff_node *n;
3741 char *cp;
3742
3743 /* Parse the first argument. */
3744
3745 cp = buf->buf + pos;
3746 if (*cp != '\0')
3747 cp++;
3748 if (buf->buf[pos] == '\\') {
3749 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3750 case ESCAPE_SPECIAL:
3751 case ESCAPE_UNICODE:
3752 case ESCAPE_NUMBERED:
3753 break;
3754 default:
3755 *cp = '\0';
3756 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3757 "mc %s", buf->buf + pos);
3758 buf->buf[pos] = '\0';
3759 break;
3760 }
3761 }
3762
3763 /* Ignore additional arguments. */
3764
3765 while (*cp == ' ')
3766 *cp++ = '\0';
3767 if (*cp != '\0') {
3768 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3769 "mc ... %s", cp);
3770 *cp = '\0';
3771 }
3772
3773 /* Create the .mc node. */
3774
3775 roff_elem_alloc(r->man, ln, ppos, tok);
3776 n = r->man->last;
3777 if (buf->buf[pos] != '\0')
3778 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3779 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3780 r->man->last = n;
3781 r->man->next = ROFF_NEXT_SIBLING;
3782 return ROFF_IGN;
3783 }
3784
3785 static int
3786 roff_nop(ROFF_ARGS)
3787 {
3788 while (buf->buf[pos] == ' ')
3789 pos++;
3790 *offs = pos;
3791 return ROFF_RERUN;
3792 }
3793
3794 static int
3795 roff_tr(ROFF_ARGS)
3796 {
3797 const char *p, *first, *second;
3798 size_t fsz, ssz;
3799 enum mandoc_esc esc;
3800
3801 p = buf->buf + pos;
3802
3803 if (*p == '\0') {
3804 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3805 return ROFF_IGN;
3806 }
3807
3808 while (*p != '\0') {
3809 fsz = ssz = 1;
3810
3811 first = p++;
3812 if (*first == '\\') {
3813 esc = mandoc_escape(&p, NULL, NULL);
3814 if (esc == ESCAPE_ERROR) {
3815 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3816 (int)(p - buf->buf), "%s", first);
3817 return ROFF_IGN;
3818 }
3819 fsz = (size_t)(p - first);
3820 }
3821
3822 second = p++;
3823 if (*second == '\\') {
3824 esc = mandoc_escape(&p, NULL, NULL);
3825 if (esc == ESCAPE_ERROR) {
3826 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3827 (int)(p - buf->buf), "%s", second);
3828 return ROFF_IGN;
3829 }
3830 ssz = (size_t)(p - second);
3831 } else if (*second == '\0') {
3832 mandoc_msg(MANDOCERR_TR_ODD, ln,
3833 (int)(first - buf->buf), "tr %s", first);
3834 second = " ";
3835 p--;
3836 }
3837
3838 if (fsz > 1) {
3839 roff_setstrn(&r->xmbtab, first, fsz,
3840 second, ssz, 0);
3841 continue;
3842 }
3843
3844 if (r->xtab == NULL)
3845 r->xtab = mandoc_calloc(128,
3846 sizeof(struct roffstr));
3847
3848 free(r->xtab[(int)*first].p);
3849 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3850 r->xtab[(int)*first].sz = ssz;
3851 }
3852
3853 return ROFF_IGN;
3854 }
3855
3856 /*
3857 * Implementation of the .return request.
3858 * There is no need to call roff_userret() from here.
3859 * The read module will call that after rewinding the reader stack
3860 * to the place from where the current macro was called.
3861 */
3862 static int
3863 roff_return(ROFF_ARGS)
3864 {
3865 if (r->mstackpos >= 0)
3866 return ROFF_IGN | ROFF_USERRET;
3867
3868 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3869 return ROFF_IGN;
3870 }
3871
3872 static int
3873 roff_rn(ROFF_ARGS)
3874 {
3875 const char *value;
3876 char *oldn, *newn, *end;
3877 size_t oldsz, newsz;
3878 int deftype;
3879
3880 oldn = newn = buf->buf + pos;
3881 if (*oldn == '\0')
3882 return ROFF_IGN;
3883
3884 oldsz = roff_getname(r, &newn, ln, pos);
3885 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3886 return ROFF_IGN;
3887
3888 end = newn;
3889 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3890 if (newsz == 0)
3891 return ROFF_IGN;
3892
3893 deftype = ROFFDEF_ANY;
3894 value = roff_getstrn(r, oldn, oldsz, &deftype);
3895 switch (deftype) {
3896 case ROFFDEF_USER:
3897 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3898 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3899 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3900 break;
3901 case ROFFDEF_PRE:
3902 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3903 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3904 break;
3905 case ROFFDEF_REN:
3906 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3907 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3908 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3909 break;
3910 case ROFFDEF_STD:
3911 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3912 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3913 break;
3914 default:
3915 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3916 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3917 break;
3918 }
3919 return ROFF_IGN;
3920 }
3921
3922 static int
3923 roff_shift(ROFF_ARGS)
3924 {
3925 struct mctx *ctx;
3926 int argpos, levels, i;
3927
3928 argpos = pos;
3929 levels = 1;
3930 if (buf->buf[pos] != '\0' &&
3931 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3932 mandoc_msg(MANDOCERR_CE_NONUM,
3933 ln, pos, "shift %s", buf->buf + pos);
3934 levels = 1;
3935 }
3936 if (r->mstackpos < 0) {
3937 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3938 return ROFF_IGN;
3939 }
3940 ctx = r->mstack + r->mstackpos;
3941 if (levels > ctx->argc) {
3942 mandoc_msg(MANDOCERR_SHIFT,
3943 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3944 levels = ctx->argc;
3945 }
3946 if (levels < 0) {
3947 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3948 levels = 0;
3949 }
3950 if (levels == 0)
3951 return ROFF_IGN;
3952 for (i = 0; i < levels; i++)
3953 free(ctx->argv[i]);
3954 ctx->argc -= levels;
3955 for (i = 0; i < ctx->argc; i++)
3956 ctx->argv[i] = ctx->argv[i + levels];
3957 return ROFF_IGN;
3958 }
3959
3960 static int
3961 roff_so(ROFF_ARGS)
3962 {
3963 char *name, *cp;
3964
3965 name = buf->buf + pos;
3966 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3967
3968 /*
3969 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3970 * opening anything that's not in our cwd or anything beneath
3971 * it. Thus, explicitly disallow traversing up the file-system
3972 * or using absolute paths.
3973 */
3974
3975 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3976 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3977 buf->sz = mandoc_asprintf(&cp,
3978 ".sp\nSee the file %s.\n.sp", name) + 1;
3979 free(buf->buf);
3980 buf->buf = cp;
3981 *offs = 0;
3982 return ROFF_REPARSE;
3983 }
3984
3985 *offs = pos;
3986 return ROFF_SO;
3987 }
3988
3989 /* --- user defined strings and macros ------------------------------------ */
3990
3991 static int
3992 roff_userdef(ROFF_ARGS)
3993 {
3994 struct mctx *ctx;
3995 char *arg, *ap, *dst, *src;
3996 size_t sz;
3997
3998 /* If the macro is empty, ignore it altogether. */
3999
4000 if (*r->current_string == '\0')
4001 return ROFF_IGN;
4002
4003 /* Initialize a new macro stack context. */
4004
4005 if (++r->mstackpos == r->mstacksz) {
4006 r->mstack = mandoc_recallocarray(r->mstack,
4007 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
4008 r->mstacksz += 8;
4009 }
4010 ctx = r->mstack + r->mstackpos;
4011 ctx->argc = 0;
4012
4013 /*
4014 * Collect pointers to macro argument strings,
4015 * NUL-terminating them and escaping quotes.
4016 */
4017
4018 src = buf->buf + pos;
4019 while (*src != '\0') {
4020 if (ctx->argc == ctx->argsz) {
4021 ctx->argsz += 8;
4022 ctx->argv = mandoc_reallocarray(ctx->argv,
4023 ctx->argsz, sizeof(*ctx->argv));
4024 }
4025 arg = roff_getarg(r, &src, ln, &pos);
4026 sz = 1; /* For the terminating NUL. */
4027 for (ap = arg; *ap != '\0'; ap++)
4028 sz += *ap == '"' ? 4 : 1;
4029 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
4030 for (ap = arg; *ap != '\0'; ap++) {
4031 if (*ap == '"') {
4032 memcpy(dst, "\\(dq", 4);
4033 dst += 4;
4034 } else
4035 *dst++ = *ap;
4036 }
4037 *dst = '\0';
4038 free(arg);
4039 }
4040
4041 /* Replace the macro invocation by the macro definition. */
4042
4043 free(buf->buf);
4044 buf->buf = mandoc_strdup(r->current_string);
4045 buf->sz = strlen(buf->buf) + 1;
4046 *offs = 0;
4047
4048 return buf->buf[buf->sz - 2] == '\n' ?
4049 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4050 }
4051
4052 /*
4053 * Calling a high-level macro that was renamed with .rn.
4054 * r->current_string has already been set up by roff_parse().
4055 */
4056 static int
4057 roff_renamed(ROFF_ARGS)
4058 {
4059 char *nbuf;
4060
4061 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4062 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4063 free(buf->buf);
4064 buf->buf = nbuf;
4065 *offs = 0;
4066 return ROFF_CONT;
4067 }
4068
4069 /*
4070 * Measure the length in bytes of the roff identifier at *cpp
4071 * and advance the pointer to the next word.
4072 */
4073 static size_t
4074 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4075 {
4076 char *name, *cp;
4077 size_t namesz;
4078
4079 name = *cpp;
4080 if (*name == '\0')
4081 return 0;
4082
4083 /* Advance cp to the byte after the end of the name. */
4084
4085 for (cp = name; 1; cp++) {
4086 namesz = cp - name;
4087 if (*cp == '\0')
4088 break;
4089 if (*cp == ' ' || *cp == '\t') {
4090 cp++;
4091 break;
4092 }
4093 if (*cp != '\\')
4094 continue;
4095 if (cp[1] == '{' || cp[1] == '}')
4096 break;
4097 if (*++cp == '\\')
4098 continue;
4099 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4100 "%.*s", (int)(cp - name + 1), name);
4101 mandoc_escape((const char **)&cp, NULL, NULL);
4102 break;
4103 }
4104
4105 /* Read past spaces. */
4106
4107 while (*cp == ' ')
4108 cp++;
4109
4110 *cpp = cp;
4111 return namesz;
4112 }
4113
4114 /*
4115 * Store *string into the user-defined string called *name.
4116 * To clear an existing entry, call with (*r, *name, NULL, 0).
4117 * append == 0: replace mode
4118 * append == 1: single-line append mode
4119 * append == 2: multiline append mode, append '\n' after each call
4120 */
4121 static void
4122 roff_setstr(struct roff *r, const char *name, const char *string,
4123 int append)
4124 {
4125 size_t namesz;
4126
4127 namesz = strlen(name);
4128 roff_setstrn(&r->strtab, name, namesz, string,
4129 string ? strlen(string) : 0, append);
4130 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4131 }
4132
4133 static void
4134 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4135 const char *string, size_t stringsz, int append)
4136 {
4137 struct roffkv *n;
4138 char *c;
4139 int i;
4140 size_t oldch, newch;
4141
4142 /* Search for an existing string with the same name. */
4143 n = *r;
4144
4145 while (n && (namesz != n->key.sz ||
4146 strncmp(n->key.p, name, namesz)))
4147 n = n->next;
4148
4149 if (NULL == n) {
4150 /* Create a new string table entry. */
4151 n = mandoc_malloc(sizeof(struct roffkv));
4152 n->key.p = mandoc_strndup(name, namesz);
4153 n->key.sz = namesz;
4154 n->val.p = NULL;
4155 n->val.sz = 0;
4156 n->next = *r;
4157 *r = n;
4158 } else if (0 == append) {
4159 free(n->val.p);
4160 n->val.p = NULL;
4161 n->val.sz = 0;
4162 }
4163
4164 if (NULL == string)
4165 return;
4166
4167 /*
4168 * One additional byte for the '\n' in multiline mode,
4169 * and one for the terminating '\0'.
4170 */
4171 newch = stringsz + (1 < append ? 2u : 1u);
4172
4173 if (NULL == n->val.p) {
4174 n->val.p = mandoc_malloc(newch);
4175 *n->val.p = '\0';
4176 oldch = 0;
4177 } else {
4178 oldch = n->val.sz;
4179 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4180 }
4181
4182 /* Skip existing content in the destination buffer. */
4183 c = n->val.p + (int)oldch;
4184
4185 /* Append new content to the destination buffer. */
4186 i = 0;
4187 while (i < (int)stringsz) {
4188 /*
4189 * Rudimentary roff copy mode:
4190 * Handle escaped backslashes.
4191 */
4192 if ('\\' == string[i] && '\\' == string[i + 1])
4193 i++;
4194 *c++ = string[i++];
4195 }
4196
4197 /* Append terminating bytes. */
4198 if (1 < append)
4199 *c++ = '\n';
4200
4201 *c = '\0';
4202 n->val.sz = (int)(c - n->val.p);
4203 }
4204
4205 static const char *
4206 roff_getstrn(struct roff *r, const char *name, size_t len,
4207 int *deftype)
4208 {
4209 const struct roffkv *n;
4210 int found, i;
4211 enum roff_tok tok;
4212
4213 found = 0;
4214 for (n = r->strtab; n != NULL; n = n->next) {
4215 if (strncmp(name, n->key.p, len) != 0 ||
4216 n->key.p[len] != '\0' || n->val.p == NULL)
4217 continue;
4218 if (*deftype & ROFFDEF_USER) {
4219 *deftype = ROFFDEF_USER;
4220 return n->val.p;
4221 } else {
4222 found = 1;
4223 break;
4224 }
4225 }
4226 for (n = r->rentab; n != NULL; n = n->next) {
4227 if (strncmp(name, n->key.p, len) != 0 ||
4228 n->key.p[len] != '\0' || n->val.p == NULL)
4229 continue;
4230 if (*deftype & ROFFDEF_REN) {
4231 *deftype = ROFFDEF_REN;
4232 return n->val.p;
4233 } else {
4234 found = 1;
4235 break;
4236 }
4237 }
4238 for (i = 0; i < PREDEFS_MAX; i++) {
4239 if (strncmp(name, predefs[i].name, len) != 0 ||
4240 predefs[i].name[len] != '\0')
4241 continue;
4242 if (*deftype & ROFFDEF_PRE) {
4243 *deftype = ROFFDEF_PRE;
4244 return predefs[i].str;
4245 } else {
4246 found = 1;
4247 break;
4248 }
4249 }
4250 if (r->man->meta.macroset != MACROSET_MAN) {
4251 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4252 if (strncmp(name, roff_name[tok], len) != 0 ||
4253 roff_name[tok][len] != '\0')
4254 continue;
4255 if (*deftype & ROFFDEF_STD) {
4256 *deftype = ROFFDEF_STD;
4257 return NULL;
4258 } else {
4259 found = 1;
4260 break;
4261 }
4262 }
4263 }
4264 if (r->man->meta.macroset != MACROSET_MDOC) {
4265 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4266 if (strncmp(name, roff_name[tok], len) != 0 ||
4267 roff_name[tok][len] != '\0')
4268 continue;
4269 if (*deftype & ROFFDEF_STD) {
4270 *deftype = ROFFDEF_STD;
4271 return NULL;
4272 } else {
4273 found = 1;
4274 break;
4275 }
4276 }
4277 }
4278
4279 if (found == 0 && *deftype != ROFFDEF_ANY) {
4280 if (*deftype & ROFFDEF_REN) {
4281 /*
4282 * This might still be a request,
4283 * so do not treat it as undefined yet.
4284 */
4285 *deftype = ROFFDEF_UNDEF;
4286 return NULL;
4287 }
4288
4289 /* Using an undefined string defines it to be empty. */
4290
4291 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4292 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4293 }
4294
4295 *deftype = 0;
4296 return NULL;
4297 }
4298
4299 static void
4300 roff_freestr(struct roffkv *r)
4301 {
4302 struct roffkv *n, *nn;
4303
4304 for (n = r; n; n = nn) {
4305 free(n->key.p);
4306 free(n->val.p);
4307 nn = n->next;
4308 free(n);
4309 }
4310 }
4311
4312 /* --- accessors and utility functions ------------------------------------ */
4313
4314 /*
4315 * Duplicate an input string, making the appropriate character
4316 * conversations (as stipulated by `tr') along the way.
4317 * Returns a heap-allocated string with all the replacements made.
4318 */
4319 char *
4320 roff_strdup(const struct roff *r, const char *p)
4321 {
4322 const struct roffkv *cp;
4323 char *res;
4324 const char *pp;
4325 size_t ssz, sz;
4326 enum mandoc_esc esc;
4327
4328 if (NULL == r->xmbtab && NULL == r->xtab)
4329 return mandoc_strdup(p);
4330 else if ('\0' == *p)
4331 return mandoc_strdup("");
4332
4333 /*
4334 * Step through each character looking for term matches
4335 * (remember that a `tr' can be invoked with an escape, which is
4336 * a glyph but the escape is multi-character).
4337 * We only do this if the character hash has been initialised
4338 * and the string is >0 length.
4339 */
4340
4341 res = NULL;
4342 ssz = 0;
4343
4344 while ('\0' != *p) {
4345 assert((unsigned int)*p < 128);
4346 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4347 sz = r->xtab[(int)*p].sz;
4348 res = mandoc_realloc(res, ssz + sz + 1);
4349 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4350 ssz += sz;
4351 p++;
4352 continue;
4353 } else if ('\\' != *p) {
4354 res = mandoc_realloc(res, ssz + 2);
4355 res[ssz++] = *p++;
4356 continue;
4357 }
4358
4359 /* Search for term matches. */
4360 for (cp = r->xmbtab; cp; cp = cp->next)
4361 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4362 break;
4363
4364 if (NULL != cp) {
4365 /*
4366 * A match has been found.
4367 * Append the match to the array and move
4368 * forward by its keysize.
4369 */
4370 res = mandoc_realloc(res,
4371 ssz + cp->val.sz + 1);
4372 memcpy(res + ssz, cp->val.p, cp->val.sz);
4373 ssz += cp->val.sz;
4374 p += (int)cp->key.sz;
4375 continue;
4376 }
4377
4378 /*
4379 * Handle escapes carefully: we need to copy
4380 * over just the escape itself, or else we might
4381 * do replacements within the escape itself.
4382 * Make sure to pass along the bogus string.
4383 */
4384 pp = p++;
4385 esc = mandoc_escape(&p, NULL, NULL);
4386 if (ESCAPE_ERROR == esc) {
4387 sz = strlen(pp);
4388 res = mandoc_realloc(res, ssz + sz + 1);
4389 memcpy(res + ssz, pp, sz);
4390 break;
4391 }
4392 /*
4393 * We bail out on bad escapes.
4394 * No need to warn: we already did so when
4395 * roff_expand() was called.
4396 */
4397 sz = (int)(p - pp);
4398 res = mandoc_realloc(res, ssz + sz + 1);
4399 memcpy(res + ssz, pp, sz);
4400 ssz += sz;
4401 }
4402
4403 res[(int)ssz] = '\0';
4404 return res;
4405 }
4406
4407 int
4408 roff_getformat(const struct roff *r)
4409 {
4410
4411 return r->format;
4412 }
4413
4414 /*
4415 * Find out whether a line is a macro line or not.
4416 * If it is, adjust the current position and return one; if it isn't,
4417 * return zero and don't change the current position.
4418 * If the control character has been set with `.cc', then let that grain
4419 * precedence.
4420 * This is slighly contrary to groff, where using the non-breaking
4421 * control character when `cc' has been invoked will cause the
4422 * non-breaking macro contents to be printed verbatim.
4423 */
4424 int
4425 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4426 {
4427 int pos;
4428
4429 pos = *ppos;
4430
4431 if (r->control != '\0' && cp[pos] == r->control)
4432 pos++;
4433 else if (r->control != '\0')
4434 return 0;
4435 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4436 pos += 2;
4437 else if ('.' == cp[pos] || '\'' == cp[pos])
4438 pos++;
4439 else
4440 return 0;
4441
4442 while (' ' == cp[pos] || '\t' == cp[pos])
4443 pos++;
4444
4445 *ppos = pos;
4446 return 1;
4447 }