]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
If a node is tagged explicitly, skip implicit tagging for that node.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.383 2022/04/24 17:40:22 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_noarg(ROFF_ARGS);
231 static int roff_nop(ROFF_ARGS);
232 static int roff_nr(ROFF_ARGS);
233 static int roff_onearg(ROFF_ARGS);
234 static enum roff_tok roff_parse(struct roff *, char *, int *,
235 int, int);
236 static int roff_parsetext(struct roff *, struct buf *,
237 int, int *);
238 static int roff_renamed(ROFF_ARGS);
239 static int roff_return(ROFF_ARGS);
240 static int roff_rm(ROFF_ARGS);
241 static int roff_rn(ROFF_ARGS);
242 static int roff_rr(ROFF_ARGS);
243 static void roff_setregn(struct roff *, const char *,
244 size_t, int, char, int);
245 static void roff_setstr(struct roff *,
246 const char *, const char *, int);
247 static void roff_setstrn(struct roffkv **, const char *,
248 size_t, const char *, size_t, int);
249 static int roff_shift(ROFF_ARGS);
250 static int roff_so(ROFF_ARGS);
251 static int roff_tr(ROFF_ARGS);
252 static int roff_Dd(ROFF_ARGS);
253 static int roff_TE(ROFF_ARGS);
254 static int roff_TS(ROFF_ARGS);
255 static int roff_EQ(ROFF_ARGS);
256 static int roff_EN(ROFF_ARGS);
257 static int roff_T_(ROFF_ARGS);
258 static int roff_unsupp(ROFF_ARGS);
259 static int roff_userdef(ROFF_ARGS);
260
261 /* --- constant data ------------------------------------------------------ */
262
263 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
264 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
265
266 const char *__roff_name[MAN_MAX + 1] = {
267 "br", "ce", "fi", "ft",
268 "ll", "mc", "nf",
269 "po", "rj", "sp",
270 "ta", "ti", NULL,
271 "ab", "ad", "af", "aln",
272 "als", "am", "am1", "ami",
273 "ami1", "as", "as1", "asciify",
274 "backtrace", "bd", "bleedat", "blm",
275 "box", "boxa", "bp", "BP",
276 "break", "breakchar", "brnl", "brp",
277 "brpnl", "c2", "cc",
278 "cf", "cflags", "ch", "char",
279 "chop", "class", "close", "CL",
280 "color", "composite", "continue", "cp",
281 "cropat", "cs", "cu", "da",
282 "dch", "Dd", "de", "de1",
283 "defcolor", "dei", "dei1", "device",
284 "devicem", "di", "do", "ds",
285 "ds1", "dwh", "dt", "ec",
286 "ecr", "ecs", "el", "em",
287 "EN", "eo", "EP", "EQ",
288 "errprint", "ev", "evc", "ex",
289 "fallback", "fam", "fc", "fchar",
290 "fcolor", "fdeferlig", "feature", "fkern",
291 "fl", "flig", "fp", "fps",
292 "fschar", "fspacewidth", "fspecial", "ftr",
293 "fzoom", "gcolor", "hc", "hcode",
294 "hidechar", "hla", "hlm", "hpf",
295 "hpfa", "hpfcode", "hw", "hy",
296 "hylang", "hylen", "hym", "hypp",
297 "hys", "ie", "if", "ig",
298 "index", "it", "itc", "IX",
299 "kern", "kernafter", "kernbefore", "kernpair",
300 "lc", "lc_ctype", "lds", "length",
301 "letadj", "lf", "lg", "lhang",
302 "linetabs", "lnr", "lnrf", "lpfx",
303 "ls", "lsm", "lt",
304 "mediasize", "minss", "mk", "mso",
305 "na", "ne", "nh", "nhychar",
306 "nm", "nn", "nop", "nr",
307 "nrf", "nroff", "ns", "nx",
308 "open", "opena", "os", "output",
309 "padj", "papersize", "pc", "pev",
310 "pi", "PI", "pl", "pm",
311 "pn", "pnr", "ps",
312 "psbb", "pshape", "pso", "ptr",
313 "pvs", "rchar", "rd", "recursionlimit",
314 "return", "rfschar", "rhang",
315 "rm", "rn", "rnn", "rr",
316 "rs", "rt", "schar", "sentchar",
317 "shc", "shift", "sizes", "so",
318 "spacewidth", "special", "spreadwarn", "ss",
319 "sty", "substring", "sv", "sy",
320 "T&", "tc", "TE",
321 "TH", "tkf", "tl",
322 "tm", "tm1", "tmc", "tr",
323 "track", "transchar", "trf", "trimat",
324 "trin", "trnt", "troff", "TS",
325 "uf", "ul", "unformat", "unwatch",
326 "unwatchn", "vpt", "vs", "warn",
327 "warnscale", "watch", "watchlength", "watchn",
328 "wh", "while", "write", "writec",
329 "writem", "xflag", ".", NULL,
330 NULL, "text",
331 "Dd", "Dt", "Os", "Sh",
332 "Ss", "Pp", "D1", "Dl",
333 "Bd", "Ed", "Bl", "El",
334 "It", "Ad", "An", "Ap",
335 "Ar", "Cd", "Cm", "Dv",
336 "Er", "Ev", "Ex", "Fa",
337 "Fd", "Fl", "Fn", "Ft",
338 "Ic", "In", "Li", "Nd",
339 "Nm", "Op", "Ot", "Pa",
340 "Rv", "St", "Va", "Vt",
341 "Xr", "%A", "%B", "%D",
342 "%I", "%J", "%N", "%O",
343 "%P", "%R", "%T", "%V",
344 "Ac", "Ao", "Aq", "At",
345 "Bc", "Bf", "Bo", "Bq",
346 "Bsx", "Bx", "Db", "Dc",
347 "Do", "Dq", "Ec", "Ef",
348 "Em", "Eo", "Fx", "Ms",
349 "No", "Ns", "Nx", "Ox",
350 "Pc", "Pf", "Po", "Pq",
351 "Qc", "Ql", "Qo", "Qq",
352 "Re", "Rs", "Sc", "So",
353 "Sq", "Sm", "Sx", "Sy",
354 "Tn", "Ux", "Xc", "Xo",
355 "Fo", "Fc", "Oo", "Oc",
356 "Bk", "Ek", "Bt", "Hf",
357 "Fr", "Ud", "Lb", "Lp",
358 "Lk", "Mt", "Brq", "Bro",
359 "Brc", "%C", "Es", "En",
360 "Dx", "%Q", "%U", "Ta",
361 "Tg", NULL,
362 "TH", "SH", "SS", "TP",
363 "TQ",
364 "LP", "PP", "P", "IP",
365 "HP", "SM", "SB", "BI",
366 "IB", "BR", "RB", "R",
367 "B", "I", "IR", "RI",
368 "RE", "RS", "DT", "UC",
369 "PD", "AT", "in",
370 "SY", "YS", "OP",
371 "EX", "EE", "UR",
372 "UE", "MT", "ME", NULL
373 };
374 const char *const *roff_name = __roff_name;
375
376 static struct roffmac roffs[TOKEN_NONE] = {
377 { roff_noarg, NULL, NULL, 0 }, /* br */
378 { roff_onearg, NULL, NULL, 0 }, /* ce */
379 { roff_noarg, NULL, NULL, 0 }, /* fi */
380 { roff_onearg, NULL, NULL, 0 }, /* ft */
381 { roff_onearg, NULL, NULL, 0 }, /* ll */
382 { roff_onearg, NULL, NULL, 0 }, /* mc */
383 { roff_noarg, NULL, NULL, 0 }, /* nf */
384 { roff_onearg, NULL, NULL, 0 }, /* po */
385 { roff_onearg, NULL, NULL, 0 }, /* rj */
386 { roff_onearg, NULL, NULL, 0 }, /* sp */
387 { roff_manyarg, NULL, NULL, 0 }, /* ta */
388 { roff_onearg, NULL, NULL, 0 }, /* ti */
389 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
390 { roff_unsupp, NULL, NULL, 0 }, /* ab */
391 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
392 { roff_line_ignore, NULL, NULL, 0 }, /* af */
393 { roff_unsupp, NULL, NULL, 0 }, /* aln */
394 { roff_als, NULL, NULL, 0 }, /* als */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
399 { roff_ds, NULL, NULL, 0 }, /* as */
400 { roff_ds, NULL, NULL, 0 }, /* as1 */
401 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
402 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
403 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
404 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
405 { roff_unsupp, NULL, NULL, 0 }, /* blm */
406 { roff_unsupp, NULL, NULL, 0 }, /* box */
407 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
408 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
409 { roff_unsupp, NULL, NULL, 0 }, /* BP */
410 { roff_break, NULL, NULL, 0 }, /* break */
411 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
412 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
413 { roff_noarg, NULL, NULL, 0 }, /* brp */
414 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
415 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
416 { roff_cc, NULL, NULL, 0 }, /* cc */
417 { roff_insec, NULL, NULL, 0 }, /* cf */
418 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
419 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
420 { roff_char, NULL, NULL, 0 }, /* char */
421 { roff_unsupp, NULL, NULL, 0 }, /* chop */
422 { roff_line_ignore, NULL, NULL, 0 }, /* class */
423 { roff_insec, NULL, NULL, 0 }, /* close */
424 { roff_unsupp, NULL, NULL, 0 }, /* CL */
425 { roff_line_ignore, NULL, NULL, 0 }, /* color */
426 { roff_unsupp, NULL, NULL, 0 }, /* composite */
427 { roff_unsupp, NULL, NULL, 0 }, /* continue */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
429 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
431 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
432 { roff_unsupp, NULL, NULL, 0 }, /* da */
433 { roff_unsupp, NULL, NULL, 0 }, /* dch */
434 { roff_Dd, NULL, NULL, 0 }, /* Dd */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
437 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
440 { roff_unsupp, NULL, NULL, 0 }, /* device */
441 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
442 { roff_unsupp, NULL, NULL, 0 }, /* di */
443 { roff_unsupp, NULL, NULL, 0 }, /* do */
444 { roff_ds, NULL, NULL, 0 }, /* ds */
445 { roff_ds, NULL, NULL, 0 }, /* ds1 */
446 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
447 { roff_unsupp, NULL, NULL, 0 }, /* dt */
448 { roff_ec, NULL, NULL, 0 }, /* ec */
449 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
450 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
451 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
452 { roff_unsupp, NULL, NULL, 0 }, /* em */
453 { roff_EN, NULL, NULL, 0 }, /* EN */
454 { roff_eo, NULL, NULL, 0 }, /* eo */
455 { roff_unsupp, NULL, NULL, 0 }, /* EP */
456 { roff_EQ, NULL, NULL, 0 }, /* EQ */
457 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
458 { roff_unsupp, NULL, NULL, 0 }, /* ev */
459 { roff_unsupp, NULL, NULL, 0 }, /* evc */
460 { roff_unsupp, NULL, NULL, 0 }, /* ex */
461 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
463 { roff_unsupp, NULL, NULL, 0 }, /* fc */
464 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
467 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
470 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
473 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
476 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
477 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
478 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
494 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
495 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
496 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
497 { roff_unsupp, NULL, NULL, 0 }, /* index */
498 { roff_it, NULL, NULL, 0 }, /* it */
499 { roff_unsupp, NULL, NULL, 0 }, /* itc */
500 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
502 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
504 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
505 { roff_unsupp, NULL, NULL, 0 }, /* lc */
506 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
507 { roff_unsupp, NULL, NULL, 0 }, /* lds */
508 { roff_unsupp, NULL, NULL, 0 }, /* length */
509 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
510 { roff_insec, NULL, NULL, 0 }, /* lf */
511 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
512 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
513 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
514 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
515 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
516 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
518 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
519 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
520 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
521 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
522 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
523 { roff_insec, NULL, NULL, 0 }, /* mso */
524 { roff_line_ignore, NULL, NULL, 0 }, /* na */
525 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
526 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
527 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
528 { roff_unsupp, NULL, NULL, 0 }, /* nm */
529 { roff_unsupp, NULL, NULL, 0 }, /* nn */
530 { roff_nop, NULL, NULL, 0 }, /* nop */
531 { roff_nr, NULL, NULL, 0 }, /* nr */
532 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
533 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
534 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
535 { roff_insec, NULL, NULL, 0 }, /* nx */
536 { roff_insec, NULL, NULL, 0 }, /* open */
537 { roff_insec, NULL, NULL, 0 }, /* opena */
538 { roff_line_ignore, NULL, NULL, 0 }, /* os */
539 { roff_unsupp, NULL, NULL, 0 }, /* output */
540 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
541 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
542 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
544 { roff_insec, NULL, NULL, 0 }, /* pi */
545 { roff_unsupp, NULL, NULL, 0 }, /* PI */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
550 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
551 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
552 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
553 { roff_insec, NULL, NULL, 0 }, /* pso */
554 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
555 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
556 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
557 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
558 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
559 { roff_return, NULL, NULL, 0 }, /* return */
560 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
561 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
562 { roff_rm, NULL, NULL, 0 }, /* rm */
563 { roff_rn, NULL, NULL, 0 }, /* rn */
564 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
565 { roff_rr, NULL, NULL, 0 }, /* rr */
566 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
567 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
568 { roff_unsupp, NULL, NULL, 0 }, /* schar */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
570 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
571 { roff_shift, NULL, NULL, 0 }, /* shift */
572 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
573 { roff_so, NULL, NULL, 0 }, /* so */
574 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
575 { roff_line_ignore, NULL, NULL, 0 }, /* special */
576 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
577 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
578 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
579 { roff_unsupp, NULL, NULL, 0 }, /* substring */
580 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
581 { roff_insec, NULL, NULL, 0 }, /* sy */
582 { roff_T_, NULL, NULL, 0 }, /* T& */
583 { roff_unsupp, NULL, NULL, 0 }, /* tc */
584 { roff_TE, NULL, NULL, 0 }, /* TE */
585 { roff_Dd, NULL, NULL, 0 }, /* TH */
586 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
587 { roff_unsupp, NULL, NULL, 0 }, /* tl */
588 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
589 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
591 { roff_tr, NULL, NULL, 0 }, /* tr */
592 { roff_line_ignore, NULL, NULL, 0 }, /* track */
593 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
594 { roff_insec, NULL, NULL, 0 }, /* trf */
595 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
596 { roff_unsupp, NULL, NULL, 0 }, /* trin */
597 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
598 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
599 { roff_TS, NULL, NULL, 0 }, /* TS */
600 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
601 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
602 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
603 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
604 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
605 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
606 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
607 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
608 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
609 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
610 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
611 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
612 { roff_unsupp, NULL, NULL, 0 }, /* wh */
613 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
614 { roff_insec, NULL, NULL, 0 }, /* write */
615 { roff_insec, NULL, NULL, 0 }, /* writec */
616 { roff_insec, NULL, NULL, 0 }, /* writem */
617 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
618 { roff_cblock, NULL, NULL, 0 }, /* . */
619 { roff_renamed, NULL, NULL, 0 },
620 { roff_userdef, NULL, NULL, 0 }
621 };
622
623 /* Array of injected predefined strings. */
624 #define PREDEFS_MAX 38
625 static const struct predef predefs[PREDEFS_MAX] = {
626 #include "predefs.in"
627 };
628
629 static int roffce_lines; /* number of input lines to center */
630 static struct roff_node *roffce_node; /* active request */
631 static int roffit_lines; /* number of lines to delay */
632 static char *roffit_macro; /* nil-terminated macro line */
633
634
635 /* --- request table ------------------------------------------------------ */
636
637 struct ohash *
638 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
639 {
640 struct ohash *htab;
641 struct roffreq *req;
642 enum roff_tok tok;
643 size_t sz;
644 unsigned int slot;
645
646 htab = mandoc_malloc(sizeof(*htab));
647 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
648
649 for (tok = mintok; tok < maxtok; tok++) {
650 if (roff_name[tok] == NULL)
651 continue;
652 sz = strlen(roff_name[tok]);
653 req = mandoc_malloc(sizeof(*req) + sz + 1);
654 req->tok = tok;
655 memcpy(req->name, roff_name[tok], sz + 1);
656 slot = ohash_qlookup(htab, req->name);
657 ohash_insert(htab, slot, req);
658 }
659 return htab;
660 }
661
662 void
663 roffhash_free(struct ohash *htab)
664 {
665 struct roffreq *req;
666 unsigned int slot;
667
668 if (htab == NULL)
669 return;
670 for (req = ohash_first(htab, &slot); req != NULL;
671 req = ohash_next(htab, &slot))
672 free(req);
673 ohash_delete(htab);
674 free(htab);
675 }
676
677 enum roff_tok
678 roffhash_find(struct ohash *htab, const char *name, size_t sz)
679 {
680 struct roffreq *req;
681 const char *end;
682
683 if (sz) {
684 end = name + sz;
685 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
686 } else
687 req = ohash_find(htab, ohash_qlookup(htab, name));
688 return req == NULL ? TOKEN_NONE : req->tok;
689 }
690
691 /* --- stack of request blocks -------------------------------------------- */
692
693 /*
694 * Pop the current node off of the stack of roff instructions currently
695 * pending. Return 1 if it is a loop or 0 otherwise.
696 */
697 static int
698 roffnode_pop(struct roff *r)
699 {
700 struct roffnode *p;
701 int inloop;
702
703 p = r->last;
704 inloop = p->tok == ROFF_while;
705 r->last = p->parent;
706 free(p->name);
707 free(p->end);
708 free(p);
709 return inloop;
710 }
711
712 /*
713 * Push a roff node onto the instruction stack. This must later be
714 * removed with roffnode_pop().
715 */
716 static void
717 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
718 int line, int col)
719 {
720 struct roffnode *p;
721
722 p = mandoc_calloc(1, sizeof(struct roffnode));
723 p->tok = tok;
724 if (name)
725 p->name = mandoc_strdup(name);
726 p->parent = r->last;
727 p->line = line;
728 p->col = col;
729 p->rule = p->parent ? p->parent->rule : 0;
730
731 r->last = p;
732 }
733
734 /* --- roff parser state data management ---------------------------------- */
735
736 static void
737 roff_free1(struct roff *r)
738 {
739 int i;
740
741 tbl_free(r->first_tbl);
742 r->first_tbl = r->last_tbl = r->tbl = NULL;
743
744 eqn_free(r->last_eqn);
745 r->last_eqn = r->eqn = NULL;
746
747 while (r->mstackpos >= 0)
748 roff_userret(r);
749
750 while (r->last)
751 roffnode_pop(r);
752
753 free (r->rstack);
754 r->rstack = NULL;
755 r->rstacksz = 0;
756 r->rstackpos = -1;
757
758 roff_freereg(r->regtab);
759 r->regtab = NULL;
760
761 roff_freestr(r->strtab);
762 roff_freestr(r->rentab);
763 roff_freestr(r->xmbtab);
764 r->strtab = r->rentab = r->xmbtab = NULL;
765
766 if (r->xtab)
767 for (i = 0; i < 128; i++)
768 free(r->xtab[i].p);
769 free(r->xtab);
770 r->xtab = NULL;
771 }
772
773 void
774 roff_reset(struct roff *r)
775 {
776 roff_free1(r);
777 r->options |= MPARSE_COMMENT;
778 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
779 r->control = '\0';
780 r->escape = '\\';
781 roffce_lines = 0;
782 roffce_node = NULL;
783 roffit_lines = 0;
784 roffit_macro = NULL;
785 }
786
787 void
788 roff_free(struct roff *r)
789 {
790 int i;
791
792 roff_free1(r);
793 for (i = 0; i < r->mstacksz; i++)
794 free(r->mstack[i].argv);
795 free(r->mstack);
796 roffhash_free(r->reqtab);
797 free(r);
798 }
799
800 struct roff *
801 roff_alloc(int options)
802 {
803 struct roff *r;
804
805 r = mandoc_calloc(1, sizeof(struct roff));
806 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
807 r->options = options | MPARSE_COMMENT;
808 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
809 r->mstackpos = -1;
810 r->rstackpos = -1;
811 r->escape = '\\';
812 return r;
813 }
814
815 /* --- syntax tree state data management ---------------------------------- */
816
817 static void
818 roff_man_free1(struct roff_man *man)
819 {
820 if (man->meta.first != NULL)
821 roff_node_delete(man, man->meta.first);
822 free(man->meta.msec);
823 free(man->meta.vol);
824 free(man->meta.os);
825 free(man->meta.arch);
826 free(man->meta.title);
827 free(man->meta.name);
828 free(man->meta.date);
829 free(man->meta.sodest);
830 }
831
832 void
833 roff_state_reset(struct roff_man *man)
834 {
835 man->last = man->meta.first;
836 man->last_es = NULL;
837 man->flags = 0;
838 man->lastsec = man->lastnamed = SEC_NONE;
839 man->next = ROFF_NEXT_CHILD;
840 roff_setreg(man->roff, "nS", 0, '=');
841 }
842
843 static void
844 roff_man_alloc1(struct roff_man *man)
845 {
846 memset(&man->meta, 0, sizeof(man->meta));
847 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
848 man->meta.first->type = ROFFT_ROOT;
849 man->meta.macroset = MACROSET_NONE;
850 roff_state_reset(man);
851 }
852
853 void
854 roff_man_reset(struct roff_man *man)
855 {
856 roff_man_free1(man);
857 roff_man_alloc1(man);
858 }
859
860 void
861 roff_man_free(struct roff_man *man)
862 {
863 roff_man_free1(man);
864 free(man->os_r);
865 free(man);
866 }
867
868 struct roff_man *
869 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
870 {
871 struct roff_man *man;
872
873 man = mandoc_calloc(1, sizeof(*man));
874 man->roff = roff;
875 man->os_s = os_s;
876 man->quick = quick;
877 roff_man_alloc1(man);
878 roff->man = man;
879 return man;
880 }
881
882 /* --- syntax tree handling ----------------------------------------------- */
883
884 struct roff_node *
885 roff_node_alloc(struct roff_man *man, int line, int pos,
886 enum roff_type type, int tok)
887 {
888 struct roff_node *n;
889
890 n = mandoc_calloc(1, sizeof(*n));
891 n->line = line;
892 n->pos = pos;
893 n->tok = tok;
894 n->type = type;
895 n->sec = man->lastsec;
896
897 if (man->flags & MDOC_SYNOPSIS)
898 n->flags |= NODE_SYNPRETTY;
899 else
900 n->flags &= ~NODE_SYNPRETTY;
901 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
902 n->flags |= NODE_NOFILL;
903 else
904 n->flags &= ~NODE_NOFILL;
905 if (man->flags & MDOC_NEWLINE)
906 n->flags |= NODE_LINE;
907 man->flags &= ~MDOC_NEWLINE;
908
909 return n;
910 }
911
912 void
913 roff_node_append(struct roff_man *man, struct roff_node *n)
914 {
915
916 switch (man->next) {
917 case ROFF_NEXT_SIBLING:
918 if (man->last->next != NULL) {
919 n->next = man->last->next;
920 man->last->next->prev = n;
921 } else
922 man->last->parent->last = n;
923 man->last->next = n;
924 n->prev = man->last;
925 n->parent = man->last->parent;
926 break;
927 case ROFF_NEXT_CHILD:
928 if (man->last->child != NULL) {
929 n->next = man->last->child;
930 man->last->child->prev = n;
931 } else
932 man->last->last = n;
933 man->last->child = n;
934 n->parent = man->last;
935 break;
936 default:
937 abort();
938 }
939 man->last = n;
940
941 switch (n->type) {
942 case ROFFT_HEAD:
943 n->parent->head = n;
944 break;
945 case ROFFT_BODY:
946 if (n->end != ENDBODY_NOT)
947 return;
948 n->parent->body = n;
949 break;
950 case ROFFT_TAIL:
951 n->parent->tail = n;
952 break;
953 default:
954 return;
955 }
956
957 /*
958 * Copy over the normalised-data pointer of our parent. Not
959 * everybody has one, but copying a null pointer is fine.
960 */
961
962 n->norm = n->parent->norm;
963 assert(n->parent->type == ROFFT_BLOCK);
964 }
965
966 void
967 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
968 {
969 struct roff_node *n;
970
971 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
972 n->string = roff_strdup(man->roff, word);
973 roff_node_append(man, n);
974 n->flags |= NODE_VALID | NODE_ENDED;
975 man->next = ROFF_NEXT_SIBLING;
976 }
977
978 void
979 roff_word_append(struct roff_man *man, const char *word)
980 {
981 struct roff_node *n;
982 char *addstr, *newstr;
983
984 n = man->last;
985 addstr = roff_strdup(man->roff, word);
986 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
987 free(addstr);
988 free(n->string);
989 n->string = newstr;
990 man->next = ROFF_NEXT_SIBLING;
991 }
992
993 void
994 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
995 {
996 struct roff_node *n;
997
998 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
999 roff_node_append(man, n);
1000 man->next = ROFF_NEXT_CHILD;
1001 }
1002
1003 struct roff_node *
1004 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1005 {
1006 struct roff_node *n;
1007
1008 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1009 roff_node_append(man, n);
1010 man->next = ROFF_NEXT_CHILD;
1011 return n;
1012 }
1013
1014 struct roff_node *
1015 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1016 {
1017 struct roff_node *n;
1018
1019 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1020 roff_node_append(man, n);
1021 man->next = ROFF_NEXT_CHILD;
1022 return n;
1023 }
1024
1025 struct roff_node *
1026 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1027 {
1028 struct roff_node *n;
1029
1030 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1031 roff_node_append(man, n);
1032 man->next = ROFF_NEXT_CHILD;
1033 return n;
1034 }
1035
1036 static void
1037 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1038 {
1039 struct roff_node *n;
1040 struct tbl_span *span;
1041
1042 if (man->meta.macroset == MACROSET_MAN)
1043 man_breakscope(man, ROFF_TS);
1044 while ((span = tbl_span(tbl)) != NULL) {
1045 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1046 n->span = span;
1047 roff_node_append(man, n);
1048 n->flags |= NODE_VALID | NODE_ENDED;
1049 man->next = ROFF_NEXT_SIBLING;
1050 }
1051 }
1052
1053 void
1054 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1055 {
1056
1057 /* Adjust siblings. */
1058
1059 if (n->prev)
1060 n->prev->next = n->next;
1061 if (n->next)
1062 n->next->prev = n->prev;
1063
1064 /* Adjust parent. */
1065
1066 if (n->parent != NULL) {
1067 if (n->parent->child == n)
1068 n->parent->child = n->next;
1069 if (n->parent->last == n)
1070 n->parent->last = n->prev;
1071 }
1072
1073 /* Adjust parse point. */
1074
1075 if (man == NULL)
1076 return;
1077 if (man->last == n) {
1078 if (n->prev == NULL) {
1079 man->last = n->parent;
1080 man->next = ROFF_NEXT_CHILD;
1081 } else {
1082 man->last = n->prev;
1083 man->next = ROFF_NEXT_SIBLING;
1084 }
1085 }
1086 if (man->meta.first == n)
1087 man->meta.first = NULL;
1088 }
1089
1090 void
1091 roff_node_relink(struct roff_man *man, struct roff_node *n)
1092 {
1093 roff_node_unlink(man, n);
1094 n->prev = n->next = NULL;
1095 roff_node_append(man, n);
1096 }
1097
1098 void
1099 roff_node_free(struct roff_node *n)
1100 {
1101
1102 if (n->args != NULL)
1103 mdoc_argv_free(n->args);
1104 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1105 free(n->norm);
1106 eqn_box_free(n->eqn);
1107 free(n->string);
1108 free(n->tag);
1109 free(n);
1110 }
1111
1112 void
1113 roff_node_delete(struct roff_man *man, struct roff_node *n)
1114 {
1115
1116 while (n->child != NULL)
1117 roff_node_delete(man, n->child);
1118 roff_node_unlink(man, n);
1119 roff_node_free(n);
1120 }
1121
1122 int
1123 roff_node_transparent(struct roff_node *n)
1124 {
1125 if (n == NULL)
1126 return 0;
1127 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1128 return 1;
1129 return roff_tok_transparent(n->tok);
1130 }
1131
1132 int
1133 roff_tok_transparent(enum roff_tok tok)
1134 {
1135 switch (tok) {
1136 case ROFF_ft:
1137 case ROFF_ll:
1138 case ROFF_mc:
1139 case ROFF_po:
1140 case ROFF_ta:
1141 case MDOC_Db:
1142 case MDOC_Es:
1143 case MDOC_Sm:
1144 case MDOC_Tg:
1145 case MAN_DT:
1146 case MAN_UC:
1147 case MAN_PD:
1148 case MAN_AT:
1149 return 1;
1150 default:
1151 return 0;
1152 }
1153 }
1154
1155 struct roff_node *
1156 roff_node_child(struct roff_node *n)
1157 {
1158 for (n = n->child; roff_node_transparent(n); n = n->next)
1159 continue;
1160 return n;
1161 }
1162
1163 struct roff_node *
1164 roff_node_prev(struct roff_node *n)
1165 {
1166 do {
1167 n = n->prev;
1168 } while (roff_node_transparent(n));
1169 return n;
1170 }
1171
1172 struct roff_node *
1173 roff_node_next(struct roff_node *n)
1174 {
1175 do {
1176 n = n->next;
1177 } while (roff_node_transparent(n));
1178 return n;
1179 }
1180
1181 void
1182 deroff(char **dest, const struct roff_node *n)
1183 {
1184 char *cp;
1185 size_t sz;
1186
1187 if (n->string == NULL) {
1188 for (n = n->child; n != NULL; n = n->next)
1189 deroff(dest, n);
1190 return;
1191 }
1192
1193 /* Skip leading whitespace. */
1194
1195 for (cp = n->string; *cp != '\0'; cp++) {
1196 if (cp[0] == '\\' && cp[1] != '\0' &&
1197 strchr(" %&0^|~", cp[1]) != NULL)
1198 cp++;
1199 else if ( ! isspace((unsigned char)*cp))
1200 break;
1201 }
1202
1203 /* Skip trailing backslash. */
1204
1205 sz = strlen(cp);
1206 if (sz > 0 && cp[sz - 1] == '\\')
1207 sz--;
1208
1209 /* Skip trailing whitespace. */
1210
1211 for (; sz; sz--)
1212 if ( ! isspace((unsigned char)cp[sz-1]))
1213 break;
1214
1215 /* Skip empty strings. */
1216
1217 if (sz == 0)
1218 return;
1219
1220 if (*dest == NULL) {
1221 *dest = mandoc_strndup(cp, sz);
1222 return;
1223 }
1224
1225 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1226 free(*dest);
1227 *dest = cp;
1228 }
1229
1230 /* --- main functions of the roff parser ---------------------------------- */
1231
1232 /*
1233 * In the current line, expand escape sequences that produce parsable
1234 * input text. Also check the syntax of the remaining escape sequences,
1235 * which typically produce output glyphs or change formatter state.
1236 */
1237 static int
1238 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1239 {
1240 struct mctx *ctx; /* current macro call context */
1241 char ubuf[24]; /* buffer to print the number */
1242 struct roff_node *n; /* used for header comments */
1243 const char *start; /* start of the string to process */
1244 char *stesc; /* start of an escape sequence ('\\') */
1245 const char *esct; /* type of esccape sequence */
1246 char *ep; /* end of comment string */
1247 const char *stnam; /* start of the name, after "[(*" */
1248 const char *cp; /* end of the name, e.g. before ']' */
1249 const char *res; /* the string to be substituted */
1250 char *nbuf; /* new buffer to copy buf->buf to */
1251 size_t maxl; /* expected length of the escape name */
1252 size_t naml; /* actual length of the escape name */
1253 size_t asz; /* length of the replacement */
1254 size_t rsz; /* length of the rest of the string */
1255 int inaml; /* length returned from mandoc_escape() */
1256 int expand_count; /* to avoid infinite loops */
1257 int npos; /* position in numeric expression */
1258 int arg_complete; /* argument not interrupted by eol */
1259 int quote_args; /* true for \\$@, false for \\$* */
1260 int done; /* no more input available */
1261 int deftype; /* type of definition to paste */
1262 int rcsid; /* kind of RCS id seen */
1263 enum mandocerr err; /* for escape sequence problems */
1264 char sign; /* increment number register */
1265 char term; /* character terminating the escape */
1266
1267 /* Search forward for comments. */
1268
1269 done = 0;
1270 start = buf->buf + pos;
1271 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1272 if (stesc[0] != newesc || stesc[1] == '\0')
1273 continue;
1274 stesc++;
1275 if (*stesc != '"' && *stesc != '#')
1276 continue;
1277
1278 /* Comment found, look for RCS id. */
1279
1280 rcsid = 0;
1281 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1282 rcsid = 1 << MANDOC_OS_OPENBSD;
1283 cp += 8;
1284 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1285 rcsid = 1 << MANDOC_OS_NETBSD;
1286 cp += 7;
1287 }
1288 if (cp != NULL &&
1289 isalnum((unsigned char)*cp) == 0 &&
1290 strchr(cp, '$') != NULL) {
1291 if (r->man->meta.rcsids & rcsid)
1292 mandoc_msg(MANDOCERR_RCS_REP, ln,
1293 (int)(stesc - buf->buf) + 1,
1294 "%s", stesc + 1);
1295 r->man->meta.rcsids |= rcsid;
1296 }
1297
1298 /* Handle trailing whitespace. */
1299
1300 ep = strchr(stesc--, '\0') - 1;
1301 if (*ep == '\n') {
1302 done = 1;
1303 ep--;
1304 }
1305 if (*ep == ' ' || *ep == '\t')
1306 mandoc_msg(MANDOCERR_SPACE_EOL,
1307 ln, (int)(ep - buf->buf), NULL);
1308
1309 /*
1310 * Save comments preceding the title macro
1311 * in the syntax tree.
1312 */
1313
1314 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1315 while (*ep == ' ' || *ep == '\t')
1316 ep--;
1317 ep[1] = '\0';
1318 n = roff_node_alloc(r->man,
1319 ln, stesc + 1 - buf->buf,
1320 ROFFT_COMMENT, TOKEN_NONE);
1321 n->string = mandoc_strdup(stesc + 2);
1322 roff_node_append(r->man, n);
1323 n->flags |= NODE_VALID | NODE_ENDED;
1324 r->man->next = ROFF_NEXT_SIBLING;
1325 }
1326
1327 /* Line continuation with comment. */
1328
1329 if (stesc[1] == '#') {
1330 *stesc = '\0';
1331 return ROFF_IGN | ROFF_APPEND;
1332 }
1333
1334 /* Discard normal comments. */
1335
1336 while (stesc > start && stesc[-1] == ' ' &&
1337 (stesc == start + 1 || stesc[-2] != '\\'))
1338 stesc--;
1339 *stesc = '\0';
1340 break;
1341 }
1342 if (stesc == start)
1343 return ROFF_CONT;
1344 stesc--;
1345
1346 /* Notice the end of the input. */
1347
1348 if (*stesc == '\n') {
1349 *stesc-- = '\0';
1350 done = 1;
1351 }
1352
1353 expand_count = 0;
1354 while (stesc >= start) {
1355 if (*stesc != newesc) {
1356
1357 /*
1358 * If we have a non-standard escape character,
1359 * escape literal backslashes because all
1360 * processing in subsequent functions uses
1361 * the standard escaping rules.
1362 */
1363
1364 if (newesc != ASCII_ESC && *stesc == '\\') {
1365 *stesc = '\0';
1366 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1367 buf->buf, stesc + 1) + 1;
1368 start = nbuf + pos;
1369 stesc = nbuf + (stesc - buf->buf);
1370 free(buf->buf);
1371 buf->buf = nbuf;
1372 }
1373
1374 /* Search backwards for the next escape. */
1375
1376 stesc--;
1377 continue;
1378 }
1379
1380 /* If it is escaped, skip it. */
1381
1382 for (cp = stesc - 1; cp >= start; cp--)
1383 if (*cp != r->escape)
1384 break;
1385
1386 if ((stesc - cp) % 2 == 0) {
1387 while (stesc > cp)
1388 *stesc-- = '\\';
1389 continue;
1390 } else if (stesc[1] != '\0') {
1391 *stesc = '\\';
1392 } else {
1393 *stesc-- = '\0';
1394 if (done)
1395 continue;
1396 else
1397 return ROFF_IGN | ROFF_APPEND;
1398 }
1399
1400 /* Decide whether to expand or to check only. */
1401
1402 term = '\0';
1403 cp = stesc + 1;
1404 while (*cp == 'E')
1405 cp++;
1406 esct = cp;
1407 switch (*esct) {
1408 case '*':
1409 case '$':
1410 res = NULL;
1411 break;
1412 case 'B':
1413 case 'w':
1414 term = cp[1];
1415 /* FALLTHROUGH */
1416 case 'n':
1417 sign = cp[1];
1418 if (sign == '+' || sign == '-')
1419 cp++;
1420 res = ubuf;
1421 break;
1422 default:
1423 err = MANDOCERR_OK;
1424 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1425 case ESCAPE_SPECIAL:
1426 if (mchars_spec2cp(stnam, inaml) >= 0)
1427 break;
1428 /* FALLTHROUGH */
1429 case ESCAPE_ERROR:
1430 err = MANDOCERR_ESC_BAD;
1431 break;
1432 case ESCAPE_UNDEF:
1433 err = MANDOCERR_ESC_UNDEF;
1434 break;
1435 case ESCAPE_UNSUPP:
1436 err = MANDOCERR_ESC_UNSUPP;
1437 break;
1438 default:
1439 break;
1440 }
1441 if (err != MANDOCERR_OK)
1442 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1443 "%.*s", (int)(cp - stesc), stesc);
1444 stesc--;
1445 continue;
1446 }
1447
1448 if (EXPAND_LIMIT < ++expand_count) {
1449 mandoc_msg(MANDOCERR_ROFFLOOP,
1450 ln, (int)(stesc - buf->buf), NULL);
1451 return ROFF_IGN;
1452 }
1453
1454 /*
1455 * The third character decides the length
1456 * of the name of the string or register.
1457 * Save a pointer to the name.
1458 */
1459
1460 if (term == '\0') {
1461 switch (*++cp) {
1462 case '\0':
1463 maxl = 0;
1464 break;
1465 case '(':
1466 cp++;
1467 maxl = 2;
1468 break;
1469 case '[':
1470 cp++;
1471 term = ']';
1472 maxl = 0;
1473 break;
1474 default:
1475 maxl = 1;
1476 break;
1477 }
1478 } else {
1479 cp += 2;
1480 maxl = 0;
1481 }
1482 stnam = cp;
1483
1484 /* Advance to the end of the name. */
1485
1486 naml = 0;
1487 arg_complete = 1;
1488 while (maxl == 0 || naml < maxl) {
1489 if (*cp == '\0') {
1490 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1491 (int)(stesc - buf->buf), "%s", stesc);
1492 arg_complete = 0;
1493 break;
1494 }
1495 if (maxl == 0 && *cp == term) {
1496 cp++;
1497 break;
1498 }
1499 if (*cp++ != '\\' || *esct != 'w') {
1500 naml++;
1501 continue;
1502 }
1503 switch (mandoc_escape(&cp, NULL, NULL)) {
1504 case ESCAPE_SPECIAL:
1505 case ESCAPE_UNICODE:
1506 case ESCAPE_NUMBERED:
1507 case ESCAPE_UNDEF:
1508 case ESCAPE_OVERSTRIKE:
1509 naml++;
1510 break;
1511 default:
1512 break;
1513 }
1514 }
1515
1516 /*
1517 * Retrieve the replacement string; if it is
1518 * undefined, resume searching for escapes.
1519 */
1520
1521 switch (*esct) {
1522 case '*':
1523 if (arg_complete) {
1524 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1525 res = roff_getstrn(r, stnam, naml, &deftype);
1526
1527 /*
1528 * If not overriden, let \*(.T
1529 * through to the formatters.
1530 */
1531
1532 if (res == NULL && naml == 2 &&
1533 stnam[0] == '.' && stnam[1] == 'T') {
1534 roff_setstrn(&r->strtab,
1535 ".T", 2, NULL, 0, 0);
1536 stesc--;
1537 continue;
1538 }
1539 }
1540 break;
1541 case '$':
1542 if (r->mstackpos < 0) {
1543 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1544 (int)(stesc - buf->buf), "%.3s", stesc);
1545 break;
1546 }
1547 ctx = r->mstack + r->mstackpos;
1548 npos = esct[1] - '1';
1549 if (npos >= 0 && npos <= 8) {
1550 res = npos < ctx->argc ?
1551 ctx->argv[npos] : "";
1552 break;
1553 }
1554 if (esct[1] == '*')
1555 quote_args = 0;
1556 else if (esct[1] == '@')
1557 quote_args = 1;
1558 else {
1559 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1560 (int)(stesc - buf->buf), "%.3s", stesc);
1561 break;
1562 }
1563 asz = 0;
1564 for (npos = 0; npos < ctx->argc; npos++) {
1565 if (npos)
1566 asz++; /* blank */
1567 if (quote_args)
1568 asz += 2; /* quotes */
1569 asz += strlen(ctx->argv[npos]);
1570 }
1571 if (asz != 3) {
1572 rsz = buf->sz - (stesc - buf->buf) - 3;
1573 if (asz < 3)
1574 memmove(stesc + asz, stesc + 3, rsz);
1575 buf->sz += asz - 3;
1576 nbuf = mandoc_realloc(buf->buf, buf->sz);
1577 start = nbuf + pos;
1578 stesc = nbuf + (stesc - buf->buf);
1579 buf->buf = nbuf;
1580 if (asz > 3)
1581 memmove(stesc + asz, stesc + 3, rsz);
1582 }
1583 for (npos = 0; npos < ctx->argc; npos++) {
1584 if (npos)
1585 *stesc++ = ' ';
1586 if (quote_args)
1587 *stesc++ = '"';
1588 cp = ctx->argv[npos];
1589 while (*cp != '\0')
1590 *stesc++ = *cp++;
1591 if (quote_args)
1592 *stesc++ = '"';
1593 }
1594 continue;
1595 case 'B':
1596 npos = 0;
1597 ubuf[0] = arg_complete &&
1598 roff_evalnum(r, ln, stnam, &npos,
1599 NULL, ROFFNUM_SCALE) &&
1600 stnam + npos + 1 == cp ? '1' : '0';
1601 ubuf[1] = '\0';
1602 break;
1603 case 'n':
1604 if (arg_complete)
1605 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1606 roff_getregn(r, stnam, naml, sign));
1607 else
1608 ubuf[0] = '\0';
1609 break;
1610 case 'w':
1611 /* use even incomplete args */
1612 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1613 24 * (int)naml);
1614 break;
1615 }
1616
1617 if (res == NULL) {
1618 if (*esct == '*')
1619 mandoc_msg(MANDOCERR_STR_UNDEF,
1620 ln, (int)(stesc - buf->buf),
1621 "%.*s", (int)naml, stnam);
1622 res = "";
1623 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1624 mandoc_msg(MANDOCERR_ROFFLOOP,
1625 ln, (int)(stesc - buf->buf), NULL);
1626 return ROFF_IGN;
1627 }
1628
1629 /* Replace the escape sequence by the string. */
1630
1631 *stesc = '\0';
1632 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1633 buf->buf, res, cp) + 1;
1634
1635 /* Prepare for the next replacement. */
1636
1637 start = nbuf + pos;
1638 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1639 free(buf->buf);
1640 buf->buf = nbuf;
1641 }
1642 return ROFF_CONT;
1643 }
1644
1645 /*
1646 * Parse a quoted or unquoted roff-style request or macro argument.
1647 * Return a pointer to the parsed argument, which is either the original
1648 * pointer or advanced by one byte in case the argument is quoted.
1649 * NUL-terminate the argument in place.
1650 * Collapse pairs of quotes inside quoted arguments.
1651 * Advance the argument pointer to the next argument,
1652 * or to the NUL byte terminating the argument line.
1653 */
1654 char *
1655 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1656 {
1657 struct buf buf;
1658 char *cp, *start;
1659 int newesc, pairs, quoted, white;
1660
1661 /* Quoting can only start with a new word. */
1662 start = *cpp;
1663 quoted = 0;
1664 if ('"' == *start) {
1665 quoted = 1;
1666 start++;
1667 }
1668
1669 newesc = pairs = white = 0;
1670 for (cp = start; '\0' != *cp; cp++) {
1671
1672 /*
1673 * Move the following text left
1674 * after quoted quotes and after "\\" and "\t".
1675 */
1676 if (pairs)
1677 cp[-pairs] = cp[0];
1678
1679 if ('\\' == cp[0]) {
1680 /*
1681 * In copy mode, translate double to single
1682 * backslashes and backslash-t to literal tabs.
1683 */
1684 switch (cp[1]) {
1685 case 'a':
1686 case 't':
1687 cp[-pairs] = '\t';
1688 pairs++;
1689 cp++;
1690 break;
1691 case '\\':
1692 newesc = 1;
1693 cp[-pairs] = ASCII_ESC;
1694 pairs++;
1695 cp++;
1696 break;
1697 case ' ':
1698 /* Skip escaped blanks. */
1699 if (0 == quoted)
1700 cp++;
1701 break;
1702 default:
1703 break;
1704 }
1705 } else if (0 == quoted) {
1706 if (' ' == cp[0]) {
1707 /* Unescaped blanks end unquoted args. */
1708 white = 1;
1709 break;
1710 }
1711 } else if ('"' == cp[0]) {
1712 if ('"' == cp[1]) {
1713 /* Quoted quotes collapse. */
1714 pairs++;
1715 cp++;
1716 } else {
1717 /* Unquoted quotes end quoted args. */
1718 quoted = 2;
1719 break;
1720 }
1721 }
1722 }
1723
1724 /* Quoted argument without a closing quote. */
1725 if (1 == quoted)
1726 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1727
1728 /* NUL-terminate this argument and move to the next one. */
1729 if (pairs)
1730 cp[-pairs] = '\0';
1731 if ('\0' != *cp) {
1732 *cp++ = '\0';
1733 while (' ' == *cp)
1734 cp++;
1735 }
1736 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1737 *cpp = cp;
1738
1739 if ('\0' == *cp && (white || ' ' == cp[-1]))
1740 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1741
1742 start = mandoc_strdup(start);
1743 if (newesc == 0)
1744 return start;
1745
1746 buf.buf = start;
1747 buf.sz = strlen(start) + 1;
1748 buf.next = NULL;
1749 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1750 free(buf.buf);
1751 buf.buf = mandoc_strdup("");
1752 }
1753 return buf.buf;
1754 }
1755
1756
1757 /*
1758 * Process text streams.
1759 */
1760 static int
1761 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1762 {
1763 size_t sz;
1764 const char *start;
1765 char *p;
1766 int isz;
1767 enum mandoc_esc esc;
1768
1769 /* Spring the input line trap. */
1770
1771 if (roffit_lines == 1) {
1772 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1773 free(buf->buf);
1774 buf->buf = p;
1775 buf->sz = isz + 1;
1776 *offs = 0;
1777 free(roffit_macro);
1778 roffit_lines = 0;
1779 return ROFF_REPARSE;
1780 } else if (roffit_lines > 1)
1781 --roffit_lines;
1782
1783 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1784 if (roffce_lines < 1) {
1785 r->man->last = roffce_node;
1786 r->man->next = ROFF_NEXT_SIBLING;
1787 roffce_lines = 0;
1788 roffce_node = NULL;
1789 } else
1790 roffce_lines--;
1791 }
1792
1793 /* Convert all breakable hyphens into ASCII_HYPH. */
1794
1795 start = p = buf->buf + pos;
1796
1797 while (*p != '\0') {
1798 sz = strcspn(p, "-\\");
1799 p += sz;
1800
1801 if (*p == '\0')
1802 break;
1803
1804 if (*p == '\\') {
1805 /* Skip over escapes. */
1806 p++;
1807 esc = mandoc_escape((const char **)&p, NULL, NULL);
1808 if (esc == ESCAPE_ERROR)
1809 break;
1810 while (*p == '-')
1811 p++;
1812 continue;
1813 } else if (p == start) {
1814 p++;
1815 continue;
1816 }
1817
1818 if (isalpha((unsigned char)p[-1]) &&
1819 isalpha((unsigned char)p[1]))
1820 *p = ASCII_HYPH;
1821 p++;
1822 }
1823 return ROFF_CONT;
1824 }
1825
1826 int
1827 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1828 {
1829 enum roff_tok t;
1830 int e;
1831 int pos; /* parse point */
1832 int spos; /* saved parse point for messages */
1833 int ppos; /* original offset in buf->buf */
1834 int ctl; /* macro line (boolean) */
1835
1836 ppos = pos = *offs;
1837
1838 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1839 (r->man->flags & ROFF_NOFILL) == 0 &&
1840 strchr(" .\\", buf->buf[pos]) == NULL &&
1841 buf->buf[pos] != r->control &&
1842 strcspn(buf->buf, " ") < 80)
1843 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1844 "%.20s...", buf->buf + pos);
1845
1846 /* Handle in-line equation delimiters. */
1847
1848 if (r->tbl == NULL &&
1849 r->last_eqn != NULL && r->last_eqn->delim &&
1850 (r->eqn == NULL || r->eqn_inline)) {
1851 e = roff_eqndelim(r, buf, pos);
1852 if (e == ROFF_REPARSE)
1853 return e;
1854 assert(e == ROFF_CONT);
1855 }
1856
1857 /* Expand some escape sequences. */
1858
1859 e = roff_expand(r, buf, ln, pos, r->escape);
1860 if ((e & ROFF_MASK) == ROFF_IGN)
1861 return e;
1862 assert(e == ROFF_CONT);
1863
1864 ctl = roff_getcontrol(r, buf->buf, &pos);
1865
1866 /*
1867 * First, if a scope is open and we're not a macro, pass the
1868 * text through the macro's filter.
1869 * Equations process all content themselves.
1870 * Tables process almost all content themselves, but we want
1871 * to warn about macros before passing it there.
1872 */
1873
1874 if (r->last != NULL && ! ctl) {
1875 t = r->last->tok;
1876 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1877 if ((e & ROFF_MASK) == ROFF_IGN)
1878 return e;
1879 e &= ~ROFF_MASK;
1880 } else
1881 e = ROFF_IGN;
1882 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1883 eqn_read(r->eqn, buf->buf + ppos);
1884 return e;
1885 }
1886 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1887 tbl_read(r->tbl, ln, buf->buf, ppos);
1888 roff_addtbl(r->man, ln, r->tbl);
1889 return e;
1890 }
1891 if ( ! ctl) {
1892 r->options &= ~MPARSE_COMMENT;
1893 return roff_parsetext(r, buf, pos, offs) | e;
1894 }
1895
1896 /* Skip empty request lines. */
1897
1898 if (buf->buf[pos] == '"') {
1899 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1900 return ROFF_IGN;
1901 } else if (buf->buf[pos] == '\0')
1902 return ROFF_IGN;
1903
1904 /*
1905 * If a scope is open, go to the child handler for that macro,
1906 * as it may want to preprocess before doing anything with it.
1907 * Don't do so if an equation is open.
1908 */
1909
1910 if (r->last) {
1911 t = r->last->tok;
1912 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1913 }
1914
1915 /* No scope is open. This is a new request or macro. */
1916
1917 r->options &= ~MPARSE_COMMENT;
1918 spos = pos;
1919 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1920
1921 /* Tables ignore most macros. */
1922
1923 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1924 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1925 mandoc_msg(MANDOCERR_TBLMACRO,
1926 ln, pos, "%s", buf->buf + spos);
1927 if (t != TOKEN_NONE)
1928 return ROFF_IGN;
1929 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1930 pos++;
1931 while (buf->buf[pos] == ' ')
1932 pos++;
1933 tbl_read(r->tbl, ln, buf->buf, pos);
1934 roff_addtbl(r->man, ln, r->tbl);
1935 return ROFF_IGN;
1936 }
1937
1938 /* For now, let high level macros abort .ce mode. */
1939
1940 if (ctl && roffce_node != NULL &&
1941 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1942 t == ROFF_TH || t == ROFF_TS)) {
1943 r->man->last = roffce_node;
1944 r->man->next = ROFF_NEXT_SIBLING;
1945 roffce_lines = 0;
1946 roffce_node = NULL;
1947 }
1948
1949 /*
1950 * This is neither a roff request nor a user-defined macro.
1951 * Let the standard macro set parsers handle it.
1952 */
1953
1954 if (t == TOKEN_NONE)
1955 return ROFF_CONT;
1956
1957 /* Execute a roff request or a user defined macro. */
1958
1959 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1960 }
1961
1962 /*
1963 * Internal interface function to tell the roff parser that execution
1964 * of the current macro ended. This is required because macro
1965 * definitions usually do not end with a .return request.
1966 */
1967 void
1968 roff_userret(struct roff *r)
1969 {
1970 struct mctx *ctx;
1971 int i;
1972
1973 assert(r->mstackpos >= 0);
1974 ctx = r->mstack + r->mstackpos;
1975 for (i = 0; i < ctx->argc; i++)
1976 free(ctx->argv[i]);
1977 ctx->argc = 0;
1978 r->mstackpos--;
1979 }
1980
1981 void
1982 roff_endparse(struct roff *r)
1983 {
1984 if (r->last != NULL)
1985 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1986 r->last->col, "%s", roff_name[r->last->tok]);
1987
1988 if (r->eqn != NULL) {
1989 mandoc_msg(MANDOCERR_BLK_NOEND,
1990 r->eqn->node->line, r->eqn->node->pos, "EQ");
1991 eqn_parse(r->eqn);
1992 r->eqn = NULL;
1993 }
1994
1995 if (r->tbl != NULL) {
1996 tbl_end(r->tbl, 1);
1997 r->tbl = NULL;
1998 }
1999 }
2000
2001 /*
2002 * Parse a roff node's type from the input buffer. This must be in the
2003 * form of ".foo xxx" in the usual way.
2004 */
2005 static enum roff_tok
2006 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2007 {
2008 char *cp;
2009 const char *mac;
2010 size_t maclen;
2011 int deftype;
2012 enum roff_tok t;
2013
2014 cp = buf + *pos;
2015
2016 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2017 return TOKEN_NONE;
2018
2019 mac = cp;
2020 maclen = roff_getname(r, &cp, ln, ppos);
2021
2022 deftype = ROFFDEF_USER | ROFFDEF_REN;
2023 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2024 switch (deftype) {
2025 case ROFFDEF_USER:
2026 t = ROFF_USERDEF;
2027 break;
2028 case ROFFDEF_REN:
2029 t = ROFF_RENAMED;
2030 break;
2031 default:
2032 t = roffhash_find(r->reqtab, mac, maclen);
2033 break;
2034 }
2035 if (t != TOKEN_NONE)
2036 *pos = cp - buf;
2037 else if (deftype == ROFFDEF_UNDEF) {
2038 /* Using an undefined macro defines it to be empty. */
2039 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2040 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2041 }
2042 return t;
2043 }
2044
2045 /* --- handling of request blocks ----------------------------------------- */
2046
2047 /*
2048 * Close a macro definition block or an "ignore" block.
2049 */
2050 static int
2051 roff_cblock(ROFF_ARGS)
2052 {
2053 int rr;
2054
2055 if (r->last == NULL) {
2056 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2057 return ROFF_IGN;
2058 }
2059
2060 switch (r->last->tok) {
2061 case ROFF_am:
2062 case ROFF_ami:
2063 case ROFF_de:
2064 case ROFF_dei:
2065 case ROFF_ig:
2066 break;
2067 case ROFF_am1:
2068 case ROFF_de1:
2069 /* Remapped in roff_block(). */
2070 abort();
2071 default:
2072 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2073 return ROFF_IGN;
2074 }
2075
2076 roffnode_pop(r);
2077 roffnode_cleanscope(r);
2078
2079 /*
2080 * If a conditional block with braces is still open,
2081 * check for "\}" block end markers.
2082 */
2083
2084 if (r->last != NULL && r->last->endspan < 0) {
2085 rr = 1; /* If arguments follow "\}", warn about them. */
2086 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2087 }
2088
2089 if (buf->buf[pos] != '\0')
2090 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2091 ".. %s", buf->buf + pos);
2092
2093 return ROFF_IGN;
2094 }
2095
2096 /*
2097 * Pop all nodes ending at the end of the current input line.
2098 * Return the number of loops ended.
2099 */
2100 static int
2101 roffnode_cleanscope(struct roff *r)
2102 {
2103 int inloop;
2104
2105 inloop = 0;
2106 while (r->last != NULL && r->last->endspan > 0) {
2107 if (--r->last->endspan != 0)
2108 break;
2109 inloop += roffnode_pop(r);
2110 }
2111 return inloop;
2112 }
2113
2114 /*
2115 * Handle the closing "\}" of a conditional block.
2116 * Apart from generating warnings, this only pops nodes.
2117 * Return the number of loops ended.
2118 */
2119 static int
2120 roff_ccond(struct roff *r, int ln, int ppos)
2121 {
2122 if (NULL == r->last) {
2123 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2124 return 0;
2125 }
2126
2127 switch (r->last->tok) {
2128 case ROFF_el:
2129 case ROFF_ie:
2130 case ROFF_if:
2131 case ROFF_while:
2132 break;
2133 default:
2134 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2135 return 0;
2136 }
2137
2138 if (r->last->endspan > -1) {
2139 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2140 return 0;
2141 }
2142
2143 return roffnode_pop(r) + roffnode_cleanscope(r);
2144 }
2145
2146 static int
2147 roff_block(ROFF_ARGS)
2148 {
2149 const char *name, *value;
2150 char *call, *cp, *iname, *rname;
2151 size_t csz, namesz, rsz;
2152 int deftype;
2153
2154 /* Ignore groff compatibility mode for now. */
2155
2156 if (tok == ROFF_de1)
2157 tok = ROFF_de;
2158 else if (tok == ROFF_dei1)
2159 tok = ROFF_dei;
2160 else if (tok == ROFF_am1)
2161 tok = ROFF_am;
2162 else if (tok == ROFF_ami1)
2163 tok = ROFF_ami;
2164
2165 /* Parse the macro name argument. */
2166
2167 cp = buf->buf + pos;
2168 if (tok == ROFF_ig) {
2169 iname = NULL;
2170 namesz = 0;
2171 } else {
2172 iname = cp;
2173 namesz = roff_getname(r, &cp, ln, ppos);
2174 iname[namesz] = '\0';
2175 }
2176
2177 /* Resolve the macro name argument if it is indirect. */
2178
2179 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2180 deftype = ROFFDEF_USER;
2181 name = roff_getstrn(r, iname, namesz, &deftype);
2182 if (name == NULL) {
2183 mandoc_msg(MANDOCERR_STR_UNDEF,
2184 ln, (int)(iname - buf->buf),
2185 "%.*s", (int)namesz, iname);
2186 namesz = 0;
2187 } else
2188 namesz = strlen(name);
2189 } else
2190 name = iname;
2191
2192 if (namesz == 0 && tok != ROFF_ig) {
2193 mandoc_msg(MANDOCERR_REQ_EMPTY,
2194 ln, ppos, "%s", roff_name[tok]);
2195 return ROFF_IGN;
2196 }
2197
2198 roffnode_push(r, tok, name, ln, ppos);
2199
2200 /*
2201 * At the beginning of a `de' macro, clear the existing string
2202 * with the same name, if there is one. New content will be
2203 * appended from roff_block_text() in multiline mode.
2204 */
2205
2206 if (tok == ROFF_de || tok == ROFF_dei) {
2207 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2208 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2209 } else if (tok == ROFF_am || tok == ROFF_ami) {
2210 deftype = ROFFDEF_ANY;
2211 value = roff_getstrn(r, iname, namesz, &deftype);
2212 switch (deftype) { /* Before appending, ... */
2213 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2214 roff_setstrn(&r->strtab, name, namesz,
2215 value, strlen(value), 0);
2216 break;
2217 case ROFFDEF_REN: /* call original standard macro. */
2218 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2219 (int)strlen(value), value);
2220 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2221 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2222 free(call);
2223 break;
2224 case ROFFDEF_STD: /* rename and call standard macro. */
2225 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2226 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2227 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2228 (int)rsz, rname);
2229 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2230 free(call);
2231 free(rname);
2232 break;
2233 default:
2234 break;
2235 }
2236 }
2237
2238 if (*cp == '\0')
2239 return ROFF_IGN;
2240
2241 /* Get the custom end marker. */
2242
2243 iname = cp;
2244 namesz = roff_getname(r, &cp, ln, ppos);
2245
2246 /* Resolve the end marker if it is indirect. */
2247
2248 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2249 deftype = ROFFDEF_USER;
2250 name = roff_getstrn(r, iname, namesz, &deftype);
2251 if (name == NULL) {
2252 mandoc_msg(MANDOCERR_STR_UNDEF,
2253 ln, (int)(iname - buf->buf),
2254 "%.*s", (int)namesz, iname);
2255 namesz = 0;
2256 } else
2257 namesz = strlen(name);
2258 } else
2259 name = iname;
2260
2261 if (namesz)
2262 r->last->end = mandoc_strndup(name, namesz);
2263
2264 if (*cp != '\0')
2265 mandoc_msg(MANDOCERR_ARG_EXCESS,
2266 ln, pos, ".%s ... %s", roff_name[tok], cp);
2267
2268 return ROFF_IGN;
2269 }
2270
2271 static int
2272 roff_block_sub(ROFF_ARGS)
2273 {
2274 enum roff_tok t;
2275 int i, j;
2276
2277 /*
2278 * First check whether a custom macro exists at this level. If
2279 * it does, then check against it. This is some of groff's
2280 * stranger behaviours. If we encountered a custom end-scope
2281 * tag and that tag also happens to be a "real" macro, then we
2282 * need to try interpreting it again as a real macro. If it's
2283 * not, then return ignore. Else continue.
2284 */
2285
2286 if (r->last->end) {
2287 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2288 if (buf->buf[i] != r->last->end[j])
2289 break;
2290
2291 if (r->last->end[j] == '\0' &&
2292 (buf->buf[i] == '\0' ||
2293 buf->buf[i] == ' ' ||
2294 buf->buf[i] == '\t')) {
2295 roffnode_pop(r);
2296 roffnode_cleanscope(r);
2297
2298 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2299 i++;
2300
2301 pos = i;
2302 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2303 TOKEN_NONE)
2304 return ROFF_RERUN;
2305 return ROFF_IGN;
2306 }
2307 }
2308
2309 /*
2310 * If we have no custom end-query or lookup failed, then try
2311 * pulling it out of the hashtable.
2312 */
2313
2314 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2315
2316 if (t != ROFF_cblock) {
2317 if (tok != ROFF_ig)
2318 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2319 return ROFF_IGN;
2320 }
2321
2322 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2323 }
2324
2325 static int
2326 roff_block_text(ROFF_ARGS)
2327 {
2328
2329 if (tok != ROFF_ig)
2330 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2331
2332 return ROFF_IGN;
2333 }
2334
2335 /*
2336 * Check for a closing "\}" and handle it.
2337 * In this function, the final "int *offs" argument is used for
2338 * different purposes than elsewhere:
2339 * Input: *offs == 0: caller wants to discard arguments following \}
2340 * *offs == 1: caller wants to preserve text following \}
2341 * Output: *offs = 0: tell caller to discard input line
2342 * *offs = 1: tell caller to use input line
2343 */
2344 static int
2345 roff_cond_checkend(ROFF_ARGS)
2346 {
2347 char *ep;
2348 int endloop, irc, rr;
2349
2350 irc = ROFF_IGN;
2351 rr = r->last->rule;
2352 endloop = tok != ROFF_while ? ROFF_IGN :
2353 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2354 if (roffnode_cleanscope(r))
2355 irc |= endloop;
2356
2357 /*
2358 * If "\}" occurs on a macro line without a preceding macro or
2359 * a text line contains nothing else, drop the line completely.
2360 */
2361
2362 ep = buf->buf + pos;
2363 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2364 rr = 0;
2365
2366 /*
2367 * The closing delimiter "\}" rewinds the conditional scope
2368 * but is otherwise ignored when interpreting the line.
2369 */
2370
2371 while ((ep = strchr(ep, '\\')) != NULL) {
2372 switch (ep[1]) {
2373 case '}':
2374 if (ep[2] == '\0')
2375 ep[0] = '\0';
2376 else if (rr)
2377 ep[1] = '&';
2378 else
2379 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2380 if (roff_ccond(r, ln, ep - buf->buf))
2381 irc |= endloop;
2382 break;
2383 case '\0':
2384 ++ep;
2385 break;
2386 default:
2387 ep += 2;
2388 break;
2389 }
2390 }
2391 *offs = rr;
2392 return irc;
2393 }
2394
2395 /*
2396 * Parse and process a request or macro line in conditional scope.
2397 */
2398 static int
2399 roff_cond_sub(ROFF_ARGS)
2400 {
2401 struct roffnode *bl;
2402 int irc, rr;
2403 enum roff_tok t;
2404
2405 rr = 0; /* If arguments follow "\}", skip them. */
2406 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2407 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2408
2409 /* For now, let high level macros abort .ce mode. */
2410
2411 if (roffce_node != NULL &&
2412 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2413 t == ROFF_TH || t == ROFF_TS)) {
2414 r->man->last = roffce_node;
2415 r->man->next = ROFF_NEXT_SIBLING;
2416 roffce_lines = 0;
2417 roffce_node = NULL;
2418 }
2419
2420 /*
2421 * Fully handle known macros when they are structurally
2422 * required or when the conditional evaluated to true.
2423 */
2424
2425 if (t == ROFF_break) {
2426 if (irc & ROFF_LOOPMASK)
2427 irc = ROFF_IGN | ROFF_LOOPEXIT;
2428 else if (rr) {
2429 for (bl = r->last; bl != NULL; bl = bl->parent) {
2430 bl->rule = 0;
2431 if (bl->tok == ROFF_while)
2432 break;
2433 }
2434 }
2435 } else if (t != TOKEN_NONE &&
2436 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
2437 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2438 if (irc & ROFF_WHILE)
2439 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2440 } else
2441 irc |= rr ? ROFF_CONT : ROFF_IGN;
2442 return irc;
2443 }
2444
2445 /*
2446 * Parse and process a text line in conditional scope.
2447 */
2448 static int
2449 roff_cond_text(ROFF_ARGS)
2450 {
2451 int irc, rr;
2452
2453 rr = 1; /* If arguments follow "\}", preserve them. */
2454 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2455 if (rr)
2456 irc |= ROFF_CONT;
2457 return irc;
2458 }
2459
2460 /* --- handling of numeric and conditional expressions -------------------- */
2461
2462 /*
2463 * Parse a single signed integer number. Stop at the first non-digit.
2464 * If there is at least one digit, return success and advance the
2465 * parse point, else return failure and let the parse point unchanged.
2466 * Ignore overflows, treat them just like the C language.
2467 */
2468 static int
2469 roff_getnum(const char *v, int *pos, int *res, int flags)
2470 {
2471 int myres, scaled, n, p;
2472
2473 if (NULL == res)
2474 res = &myres;
2475
2476 p = *pos;
2477 n = v[p] == '-';
2478 if (n || v[p] == '+')
2479 p++;
2480
2481 if (flags & ROFFNUM_WHITE)
2482 while (isspace((unsigned char)v[p]))
2483 p++;
2484
2485 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2486 *res = 10 * *res + v[p] - '0';
2487 if (p == *pos + n)
2488 return 0;
2489
2490 if (n)
2491 *res = -*res;
2492
2493 /* Each number may be followed by one optional scaling unit. */
2494
2495 switch (v[p]) {
2496 case 'f':
2497 scaled = *res * 65536;
2498 break;
2499 case 'i':
2500 scaled = *res * 240;
2501 break;
2502 case 'c':
2503 scaled = *res * 240 / 2.54;
2504 break;
2505 case 'v':
2506 case 'P':
2507 scaled = *res * 40;
2508 break;
2509 case 'm':
2510 case 'n':
2511 scaled = *res * 24;
2512 break;
2513 case 'p':
2514 scaled = *res * 10 / 3;
2515 break;
2516 case 'u':
2517 scaled = *res;
2518 break;
2519 case 'M':
2520 scaled = *res * 6 / 25;
2521 break;
2522 default:
2523 scaled = *res;
2524 p--;
2525 break;
2526 }
2527 if (flags & ROFFNUM_SCALE)
2528 *res = scaled;
2529
2530 *pos = p + 1;
2531 return 1;
2532 }
2533
2534 /*
2535 * Evaluate a string comparison condition.
2536 * The first character is the delimiter.
2537 * Succeed if the string up to its second occurrence
2538 * matches the string up to its third occurence.
2539 * Advance the cursor after the third occurrence
2540 * or lacking that, to the end of the line.
2541 */
2542 static int
2543 roff_evalstrcond(const char *v, int *pos)
2544 {
2545 const char *s1, *s2, *s3;
2546 int match;
2547
2548 match = 0;
2549 s1 = v + *pos; /* initial delimiter */
2550 s2 = s1 + 1; /* for scanning the first string */
2551 s3 = strchr(s2, *s1); /* for scanning the second string */
2552
2553 if (NULL == s3) /* found no middle delimiter */
2554 goto out;
2555
2556 while ('\0' != *++s3) {
2557 if (*s2 != *s3) { /* mismatch */
2558 s3 = strchr(s3, *s1);
2559 break;
2560 }
2561 if (*s3 == *s1) { /* found the final delimiter */
2562 match = 1;
2563 break;
2564 }
2565 s2++;
2566 }
2567
2568 out:
2569 if (NULL == s3)
2570 s3 = strchr(s2, '\0');
2571 else if (*s3 != '\0')
2572 s3++;
2573 *pos = s3 - v;
2574 return match;
2575 }
2576
2577 /*
2578 * Evaluate an optionally negated single character, numerical,
2579 * or string condition.
2580 */
2581 static int
2582 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2583 {
2584 const char *start, *end;
2585 char *cp, *name;
2586 size_t sz;
2587 int deftype, len, number, savepos, istrue, wanttrue;
2588
2589 if ('!' == v[*pos]) {
2590 wanttrue = 0;
2591 (*pos)++;
2592 } else
2593 wanttrue = 1;
2594
2595 switch (v[*pos]) {
2596 case '\0':
2597 return 0;
2598 case 'n':
2599 case 'o':
2600 (*pos)++;
2601 return wanttrue;
2602 case 'e':
2603 case 't':
2604 case 'v':
2605 (*pos)++;
2606 return !wanttrue;
2607 case 'c':
2608 do {
2609 (*pos)++;
2610 } while (v[*pos] == ' ');
2611
2612 /*
2613 * Quirk for groff compatibility:
2614 * The horizontal tab is neither available nor unavailable.
2615 */
2616
2617 if (v[*pos] == '\t') {
2618 (*pos)++;
2619 return 0;
2620 }
2621
2622 /* Printable ASCII characters are available. */
2623
2624 if (v[*pos] != '\\') {
2625 (*pos)++;
2626 return wanttrue;
2627 }
2628
2629 end = v + ++*pos;
2630 switch (mandoc_escape(&end, &start, &len)) {
2631 case ESCAPE_SPECIAL:
2632 istrue = mchars_spec2cp(start, len) != -1;
2633 break;
2634 case ESCAPE_UNICODE:
2635 istrue = 1;
2636 break;
2637 case ESCAPE_NUMBERED:
2638 istrue = mchars_num2char(start, len) != -1;
2639 break;
2640 default:
2641 istrue = !wanttrue;
2642 break;
2643 }
2644 *pos = end - v;
2645 return istrue == wanttrue;
2646 case 'd':
2647 case 'r':
2648 cp = v + *pos + 1;
2649 while (*cp == ' ')
2650 cp++;
2651 name = cp;
2652 sz = roff_getname(r, &cp, ln, cp - v);
2653 if (sz == 0)
2654 istrue = 0;
2655 else if (v[*pos] == 'r')
2656 istrue = roff_hasregn(r, name, sz);
2657 else {
2658 deftype = ROFFDEF_ANY;
2659 roff_getstrn(r, name, sz, &deftype);
2660 istrue = !!deftype;
2661 }
2662 *pos = (name + sz) - v;
2663 return istrue == wanttrue;
2664 default:
2665 break;
2666 }
2667
2668 savepos = *pos;
2669 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2670 return (number > 0) == wanttrue;
2671 else if (*pos == savepos)
2672 return roff_evalstrcond(v, pos) == wanttrue;
2673 else
2674 return 0;
2675 }
2676
2677 static int
2678 roff_line_ignore(ROFF_ARGS)
2679 {
2680
2681 return ROFF_IGN;
2682 }
2683
2684 static int
2685 roff_insec(ROFF_ARGS)
2686 {
2687
2688 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2689 return ROFF_IGN;
2690 }
2691
2692 static int
2693 roff_unsupp(ROFF_ARGS)
2694 {
2695
2696 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2697 return ROFF_IGN;
2698 }
2699
2700 static int
2701 roff_cond(ROFF_ARGS)
2702 {
2703 int irc;
2704
2705 roffnode_push(r, tok, NULL, ln, ppos);
2706
2707 /*
2708 * An `.el' has no conditional body: it will consume the value
2709 * of the current rstack entry set in prior `ie' calls or
2710 * defaults to DENY.
2711 *
2712 * If we're not an `el', however, then evaluate the conditional.
2713 */
2714
2715 r->last->rule = tok == ROFF_el ?
2716 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2717 roff_evalcond(r, ln, buf->buf, &pos);
2718
2719 /*
2720 * An if-else will put the NEGATION of the current evaluated
2721 * conditional into the stack of rules.
2722 */
2723
2724 if (tok == ROFF_ie) {
2725 if (r->rstackpos + 1 == r->rstacksz) {
2726 r->rstacksz += 16;
2727 r->rstack = mandoc_reallocarray(r->rstack,
2728 r->rstacksz, sizeof(int));
2729 }
2730 r->rstack[++r->rstackpos] = !r->last->rule;
2731 }
2732
2733 /* If the parent has false as its rule, then so do we. */
2734
2735 if (r->last->parent && !r->last->parent->rule)
2736 r->last->rule = 0;
2737
2738 /*
2739 * Determine scope.
2740 * If there is nothing on the line after the conditional,
2741 * not even whitespace, use next-line scope.
2742 * Except that .while does not support next-line scope.
2743 */
2744
2745 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2746 r->last->endspan = 2;
2747 goto out;
2748 }
2749
2750 while (buf->buf[pos] == ' ')
2751 pos++;
2752
2753 /* An opening brace requests multiline scope. */
2754
2755 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2756 r->last->endspan = -1;
2757 pos += 2;
2758 while (buf->buf[pos] == ' ')
2759 pos++;
2760 goto out;
2761 }
2762
2763 /*
2764 * Anything else following the conditional causes
2765 * single-line scope. Warn if the scope contains
2766 * nothing but trailing whitespace.
2767 */
2768
2769 if (buf->buf[pos] == '\0')
2770 mandoc_msg(MANDOCERR_COND_EMPTY,
2771 ln, ppos, "%s", roff_name[tok]);
2772
2773 r->last->endspan = 1;
2774
2775 out:
2776 *offs = pos;
2777 irc = ROFF_RERUN;
2778 if (tok == ROFF_while)
2779 irc |= ROFF_WHILE;
2780 return irc;
2781 }
2782
2783 static int
2784 roff_ds(ROFF_ARGS)
2785 {
2786 char *string;
2787 const char *name;
2788 size_t namesz;
2789
2790 /* Ignore groff compatibility mode for now. */
2791
2792 if (tok == ROFF_ds1)
2793 tok = ROFF_ds;
2794 else if (tok == ROFF_as1)
2795 tok = ROFF_as;
2796
2797 /*
2798 * The first word is the name of the string.
2799 * If it is empty or terminated by an escape sequence,
2800 * abort the `ds' request without defining anything.
2801 */
2802
2803 name = string = buf->buf + pos;
2804 if (*name == '\0')
2805 return ROFF_IGN;
2806
2807 namesz = roff_getname(r, &string, ln, pos);
2808 switch (name[namesz]) {
2809 case '\\':
2810 return ROFF_IGN;
2811 case '\t':
2812 string = buf->buf + pos + namesz;
2813 break;
2814 default:
2815 break;
2816 }
2817
2818 /* Read past the initial double-quote, if any. */
2819 if (*string == '"')
2820 string++;
2821
2822 /* The rest is the value. */
2823 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2824 ROFF_as == tok);
2825 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2826 return ROFF_IGN;
2827 }
2828
2829 /*
2830 * Parse a single operator, one or two characters long.
2831 * If the operator is recognized, return success and advance the
2832 * parse point, else return failure and let the parse point unchanged.
2833 */
2834 static int
2835 roff_getop(const char *v, int *pos, char *res)
2836 {
2837
2838 *res = v[*pos];
2839
2840 switch (*res) {
2841 case '+':
2842 case '-':
2843 case '*':
2844 case '/':
2845 case '%':
2846 case '&':
2847 case ':':
2848 break;
2849 case '<':
2850 switch (v[*pos + 1]) {
2851 case '=':
2852 *res = 'l';
2853 (*pos)++;
2854 break;
2855 case '>':
2856 *res = '!';
2857 (*pos)++;
2858 break;
2859 case '?':
2860 *res = 'i';
2861 (*pos)++;
2862 break;
2863 default:
2864 break;
2865 }
2866 break;
2867 case '>':
2868 switch (v[*pos + 1]) {
2869 case '=':
2870 *res = 'g';
2871 (*pos)++;
2872 break;
2873 case '?':
2874 *res = 'a';
2875 (*pos)++;
2876 break;
2877 default:
2878 break;
2879 }
2880 break;
2881 case '=':
2882 if ('=' == v[*pos + 1])
2883 (*pos)++;
2884 break;
2885 default:
2886 return 0;
2887 }
2888 (*pos)++;
2889
2890 return *res;
2891 }
2892
2893 /*
2894 * Evaluate either a parenthesized numeric expression
2895 * or a single signed integer number.
2896 */
2897 static int
2898 roff_evalpar(struct roff *r, int ln,
2899 const char *v, int *pos, int *res, int flags)
2900 {
2901
2902 if ('(' != v[*pos])
2903 return roff_getnum(v, pos, res, flags);
2904
2905 (*pos)++;
2906 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2907 return 0;
2908
2909 /*
2910 * Omission of the closing parenthesis
2911 * is an error in validation mode,
2912 * but ignored in evaluation mode.
2913 */
2914
2915 if (')' == v[*pos])
2916 (*pos)++;
2917 else if (NULL == res)
2918 return 0;
2919
2920 return 1;
2921 }
2922
2923 /*
2924 * Evaluate a complete numeric expression.
2925 * Proceed left to right, there is no concept of precedence.
2926 */
2927 static int
2928 roff_evalnum(struct roff *r, int ln, const char *v,
2929 int *pos, int *res, int flags)
2930 {
2931 int mypos, operand2;
2932 char operator;
2933
2934 if (NULL == pos) {
2935 mypos = 0;
2936 pos = &mypos;
2937 }
2938
2939 if (flags & ROFFNUM_WHITE)
2940 while (isspace((unsigned char)v[*pos]))
2941 (*pos)++;
2942
2943 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2944 return 0;
2945
2946 while (1) {
2947 if (flags & ROFFNUM_WHITE)
2948 while (isspace((unsigned char)v[*pos]))
2949 (*pos)++;
2950
2951 if ( ! roff_getop(v, pos, &operator))
2952 break;
2953
2954 if (flags & ROFFNUM_WHITE)
2955 while (isspace((unsigned char)v[*pos]))
2956 (*pos)++;
2957
2958 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2959 return 0;
2960
2961 if (flags & ROFFNUM_WHITE)
2962 while (isspace((unsigned char)v[*pos]))
2963 (*pos)++;
2964
2965 if (NULL == res)
2966 continue;
2967
2968 switch (operator) {
2969 case '+':
2970 *res += operand2;
2971 break;
2972 case '-':
2973 *res -= operand2;
2974 break;
2975 case '*':
2976 *res *= operand2;
2977 break;
2978 case '/':
2979 if (operand2 == 0) {
2980 mandoc_msg(MANDOCERR_DIVZERO,
2981 ln, *pos, "%s", v);
2982 *res = 0;
2983 break;
2984 }
2985 *res /= operand2;
2986 break;
2987 case '%':
2988 if (operand2 == 0) {
2989 mandoc_msg(MANDOCERR_DIVZERO,
2990 ln, *pos, "%s", v);
2991 *res = 0;
2992 break;
2993 }
2994 *res %= operand2;
2995 break;
2996 case '<':
2997 *res = *res < operand2;
2998 break;
2999 case '>':
3000 *res = *res > operand2;
3001 break;
3002 case 'l':
3003 *res = *res <= operand2;
3004 break;
3005 case 'g':
3006 *res = *res >= operand2;
3007 break;
3008 case '=':
3009 *res = *res == operand2;
3010 break;
3011 case '!':
3012 *res = *res != operand2;
3013 break;
3014 case '&':
3015 *res = *res && operand2;
3016 break;
3017 case ':':
3018 *res = *res || operand2;
3019 break;
3020 case 'i':
3021 if (operand2 < *res)
3022 *res = operand2;
3023 break;
3024 case 'a':
3025 if (operand2 > *res)
3026 *res = operand2;
3027 break;
3028 default:
3029 abort();
3030 }
3031 }
3032 return 1;
3033 }
3034
3035 /* --- register management ------------------------------------------------ */
3036
3037 void
3038 roff_setreg(struct roff *r, const char *name, int val, char sign)
3039 {
3040 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3041 }
3042
3043 static void
3044 roff_setregn(struct roff *r, const char *name, size_t len,
3045 int val, char sign, int step)
3046 {
3047 struct roffreg *reg;
3048
3049 /* Search for an existing register with the same name. */
3050 reg = r->regtab;
3051
3052 while (reg != NULL && (reg->key.sz != len ||
3053 strncmp(reg->key.p, name, len) != 0))
3054 reg = reg->next;
3055
3056 if (NULL == reg) {
3057 /* Create a new register. */
3058 reg = mandoc_malloc(sizeof(struct roffreg));
3059 reg->key.p = mandoc_strndup(name, len);
3060 reg->key.sz = len;
3061 reg->val = 0;
3062 reg->step = 0;
3063 reg->next = r->regtab;
3064 r->regtab = reg;
3065 }
3066
3067 if ('+' == sign)
3068 reg->val += val;
3069 else if ('-' == sign)
3070 reg->val -= val;
3071 else
3072 reg->val = val;
3073 if (step != INT_MIN)
3074 reg->step = step;
3075 }
3076
3077 /*
3078 * Handle some predefined read-only number registers.
3079 * For now, return -1 if the requested register is not predefined;
3080 * in case a predefined read-only register having the value -1
3081 * were to turn up, another special value would have to be chosen.
3082 */
3083 static int
3084 roff_getregro(const struct roff *r, const char *name)
3085 {
3086
3087 switch (*name) {
3088 case '$': /* Number of arguments of the last macro evaluated. */
3089 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3090 case 'A': /* ASCII approximation mode is always off. */
3091 return 0;
3092 case 'g': /* Groff compatibility mode is always on. */
3093 return 1;
3094 case 'H': /* Fixed horizontal resolution. */
3095 return 24;
3096 case 'j': /* Always adjust left margin only. */
3097 return 0;
3098 case 'T': /* Some output device is always defined. */
3099 return 1;
3100 case 'V': /* Fixed vertical resolution. */
3101 return 40;
3102 default:
3103 return -1;
3104 }
3105 }
3106
3107 int
3108 roff_getreg(struct roff *r, const char *name)
3109 {
3110 return roff_getregn(r, name, strlen(name), '\0');
3111 }
3112
3113 static int
3114 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3115 {
3116 struct roffreg *reg;
3117 int val;
3118
3119 if ('.' == name[0] && 2 == len) {
3120 val = roff_getregro(r, name + 1);
3121 if (-1 != val)
3122 return val;
3123 }
3124
3125 for (reg = r->regtab; reg; reg = reg->next) {
3126 if (len == reg->key.sz &&
3127 0 == strncmp(name, reg->key.p, len)) {
3128 switch (sign) {
3129 case '+':
3130 reg->val += reg->step;
3131 break;
3132 case '-':
3133 reg->val -= reg->step;
3134 break;
3135 default:
3136 break;
3137 }
3138 return reg->val;
3139 }
3140 }
3141
3142 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3143 return 0;
3144 }
3145
3146 static int
3147 roff_hasregn(const struct roff *r, const char *name, size_t len)
3148 {
3149 struct roffreg *reg;
3150 int val;
3151
3152 if ('.' == name[0] && 2 == len) {
3153 val = roff_getregro(r, name + 1);
3154 if (-1 != val)
3155 return 1;
3156 }
3157
3158 for (reg = r->regtab; reg; reg = reg->next)
3159 if (len == reg->key.sz &&
3160 0 == strncmp(name, reg->key.p, len))
3161 return 1;
3162
3163 return 0;
3164 }
3165
3166 static void
3167 roff_freereg(struct roffreg *reg)
3168 {
3169 struct roffreg *old_reg;
3170
3171 while (NULL != reg) {
3172 free(reg->key.p);
3173 old_reg = reg;
3174 reg = reg->next;
3175 free(old_reg);
3176 }
3177 }
3178
3179 static int
3180 roff_nr(ROFF_ARGS)
3181 {
3182 char *key, *val, *step;
3183 size_t keysz;
3184 int iv, is, len;
3185 char sign;
3186
3187 key = val = buf->buf + pos;
3188 if (*key == '\0')
3189 return ROFF_IGN;
3190
3191 keysz = roff_getname(r, &val, ln, pos);
3192 if (key[keysz] == '\\' || key[keysz] == '\t')
3193 return ROFF_IGN;
3194
3195 sign = *val;
3196 if (sign == '+' || sign == '-')
3197 val++;
3198
3199 len = 0;
3200 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3201 return ROFF_IGN;
3202
3203 step = val + len;
3204 while (isspace((unsigned char)*step))
3205 step++;
3206 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3207 is = INT_MIN;
3208
3209 roff_setregn(r, key, keysz, iv, sign, is);
3210 return ROFF_IGN;
3211 }
3212
3213 static int
3214 roff_rr(ROFF_ARGS)
3215 {
3216 struct roffreg *reg, **prev;
3217 char *name, *cp;
3218 size_t namesz;
3219
3220 name = cp = buf->buf + pos;
3221 if (*name == '\0')
3222 return ROFF_IGN;
3223 namesz = roff_getname(r, &cp, ln, pos);
3224 name[namesz] = '\0';
3225
3226 prev = &r->regtab;
3227 while (1) {
3228 reg = *prev;
3229 if (reg == NULL || !strcmp(name, reg->key.p))
3230 break;
3231 prev = &reg->next;
3232 }
3233 if (reg != NULL) {
3234 *prev = reg->next;
3235 free(reg->key.p);
3236 free(reg);
3237 }
3238 return ROFF_IGN;
3239 }
3240
3241 /* --- handler functions for roff requests -------------------------------- */
3242
3243 static int
3244 roff_rm(ROFF_ARGS)
3245 {
3246 const char *name;
3247 char *cp;
3248 size_t namesz;
3249
3250 cp = buf->buf + pos;
3251 while (*cp != '\0') {
3252 name = cp;
3253 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3254 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3255 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3256 if (name[namesz] == '\\' || name[namesz] == '\t')
3257 break;
3258 }
3259 return ROFF_IGN;
3260 }
3261
3262 static int
3263 roff_it(ROFF_ARGS)
3264 {
3265 int iv;
3266
3267 /* Parse the number of lines. */
3268
3269 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3270 mandoc_msg(MANDOCERR_IT_NONUM,
3271 ln, ppos, "%s", buf->buf + 1);
3272 return ROFF_IGN;
3273 }
3274
3275 while (isspace((unsigned char)buf->buf[pos]))
3276 pos++;
3277
3278 /*
3279 * Arm the input line trap.
3280 * Special-casing "an-trap" is an ugly workaround to cope
3281 * with DocBook stupidly fiddling with man(7) internals.
3282 */
3283
3284 roffit_lines = iv;
3285 roffit_macro = mandoc_strdup(iv != 1 ||
3286 strcmp(buf->buf + pos, "an-trap") ?
3287 buf->buf + pos : "br");
3288 return ROFF_IGN;
3289 }
3290
3291 static int
3292 roff_Dd(ROFF_ARGS)
3293 {
3294 int mask;
3295 enum roff_tok t, te;
3296
3297 switch (tok) {
3298 case ROFF_Dd:
3299 tok = MDOC_Dd;
3300 te = MDOC_MAX;
3301 if (r->format == 0)
3302 r->format = MPARSE_MDOC;
3303 mask = MPARSE_MDOC | MPARSE_QUICK;
3304 break;
3305 case ROFF_TH:
3306 tok = MAN_TH;
3307 te = MAN_MAX;
3308 if (r->format == 0)
3309 r->format = MPARSE_MAN;
3310 mask = MPARSE_QUICK;
3311 break;
3312 default:
3313 abort();
3314 }
3315 if ((r->options & mask) == 0)
3316 for (t = tok; t < te; t++)
3317 roff_setstr(r, roff_name[t], NULL, 0);
3318 return ROFF_CONT;
3319 }
3320
3321 static int
3322 roff_TE(ROFF_ARGS)
3323 {
3324 r->man->flags &= ~ROFF_NONOFILL;
3325 if (r->tbl == NULL) {
3326 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3327 return ROFF_IGN;
3328 }
3329 if (tbl_end(r->tbl, 0) == 0) {
3330 r->tbl = NULL;
3331 free(buf->buf);
3332 buf->buf = mandoc_strdup(".sp");
3333 buf->sz = 4;
3334 *offs = 0;
3335 return ROFF_REPARSE;
3336 }
3337 r->tbl = NULL;
3338 return ROFF_IGN;
3339 }
3340
3341 static int
3342 roff_T_(ROFF_ARGS)
3343 {
3344
3345 if (NULL == r->tbl)
3346 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3347 else
3348 tbl_restart(ln, ppos, r->tbl);
3349
3350 return ROFF_IGN;
3351 }
3352
3353 /*
3354 * Handle in-line equation delimiters.
3355 */
3356 static int
3357 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3358 {
3359 char *cp1, *cp2;
3360 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3361
3362 /*
3363 * Outside equations, look for an opening delimiter.
3364 * If we are inside an equation, we already know it is
3365 * in-line, or this function wouldn't have been called;
3366 * so look for a closing delimiter.
3367 */
3368
3369 cp1 = buf->buf + pos;
3370 cp2 = strchr(cp1, r->eqn == NULL ?
3371 r->last_eqn->odelim : r->last_eqn->cdelim);
3372 if (cp2 == NULL)
3373 return ROFF_CONT;
3374
3375 *cp2++ = '\0';
3376 bef_pr = bef_nl = aft_nl = aft_pr = "";
3377
3378 /* Handle preceding text, protecting whitespace. */
3379
3380 if (*buf->buf != '\0') {
3381 if (r->eqn == NULL)
3382 bef_pr = "\\&";
3383 bef_nl = "\n";
3384 }
3385
3386 /*
3387 * Prepare replacing the delimiter with an equation macro
3388 * and drop leading white space from the equation.
3389 */
3390
3391 if (r->eqn == NULL) {
3392 while (*cp2 == ' ')
3393 cp2++;
3394 mac = ".EQ";
3395 } else
3396 mac = ".EN";
3397
3398 /* Handle following text, protecting whitespace. */
3399
3400 if (*cp2 != '\0') {
3401 aft_nl = "\n";
3402 if (r->eqn != NULL)
3403 aft_pr = "\\&";
3404 }
3405
3406 /* Do the actual replacement. */
3407
3408 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3409 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3410 free(buf->buf);
3411 buf->buf = cp1;
3412
3413 /* Toggle the in-line state of the eqn subsystem. */
3414
3415 r->eqn_inline = r->eqn == NULL;
3416 return ROFF_REPARSE;
3417 }
3418
3419 static int
3420 roff_EQ(ROFF_ARGS)
3421 {
3422 struct roff_node *n;
3423
3424 if (r->man->meta.macroset == MACROSET_MAN)
3425 man_breakscope(r->man, ROFF_EQ);
3426 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3427 if (ln > r->man->last->line)
3428 n->flags |= NODE_LINE;
3429 n->eqn = eqn_box_new();
3430 roff_node_append(r->man, n);
3431 r->man->next = ROFF_NEXT_SIBLING;
3432
3433 assert(r->eqn == NULL);
3434 if (r->last_eqn == NULL)
3435 r->last_eqn = eqn_alloc();
3436 else
3437 eqn_reset(r->last_eqn);
3438 r->eqn = r->last_eqn;
3439 r->eqn->node = n;
3440
3441 if (buf->buf[pos] != '\0')
3442 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3443 ".EQ %s", buf->buf + pos);
3444
3445 return ROFF_IGN;
3446 }
3447
3448 static int
3449 roff_EN(ROFF_ARGS)
3450 {
3451 if (r->eqn != NULL) {
3452 eqn_parse(r->eqn);
3453 r->eqn = NULL;
3454 } else
3455 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3456 if (buf->buf[pos] != '\0')
3457 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3458 "EN %s", buf->buf + pos);
3459 return ROFF_IGN;
3460 }
3461
3462 static int
3463 roff_TS(ROFF_ARGS)
3464 {
3465 if (r->tbl != NULL) {
3466 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3467 tbl_end(r->tbl, 0);
3468 }
3469 r->man->flags |= ROFF_NONOFILL;
3470 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3471 if (r->last_tbl == NULL)
3472 r->first_tbl = r->tbl;
3473 r->last_tbl = r->tbl;
3474 return ROFF_IGN;
3475 }
3476
3477 static int
3478 roff_noarg(ROFF_ARGS)
3479 {
3480 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3481 man_breakscope(r->man, tok);
3482 if (tok == ROFF_brp)
3483 tok = ROFF_br;
3484 roff_elem_alloc(r->man, ln, ppos, tok);
3485 if (buf->buf[pos] != '\0')
3486 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3487 "%s %s", roff_name[tok], buf->buf + pos);
3488 if (tok == ROFF_nf)
3489 r->man->flags |= ROFF_NOFILL;
3490 else if (tok == ROFF_fi)
3491 r->man->flags &= ~ROFF_NOFILL;
3492 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3493 r->man->next = ROFF_NEXT_SIBLING;
3494 return ROFF_IGN;
3495 }
3496
3497 static int
3498 roff_onearg(ROFF_ARGS)
3499 {
3500 struct roff_node *n;
3501 char *cp;
3502 int npos;
3503
3504 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3505 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3506 tok == ROFF_ti))
3507 man_breakscope(r->man, tok);
3508
3509 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3510 r->man->last = roffce_node;
3511 r->man->next = ROFF_NEXT_SIBLING;
3512 }
3513
3514 roff_elem_alloc(r->man, ln, ppos, tok);
3515 n = r->man->last;
3516
3517 cp = buf->buf + pos;
3518 if (*cp != '\0') {
3519 while (*cp != '\0' && *cp != ' ')
3520 cp++;
3521 while (*cp == ' ')
3522 *cp++ = '\0';
3523 if (*cp != '\0')
3524 mandoc_msg(MANDOCERR_ARG_EXCESS,
3525 ln, (int)(cp - buf->buf),
3526 "%s ... %s", roff_name[tok], cp);
3527 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3528 }
3529
3530 if (tok == ROFF_ce || tok == ROFF_rj) {
3531 if (r->man->last->type == ROFFT_ELEM) {
3532 roff_word_alloc(r->man, ln, pos, "1");
3533 r->man->last->flags |= NODE_NOSRC;
3534 }
3535 npos = 0;
3536 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3537 &roffce_lines, 0) == 0) {
3538 mandoc_msg(MANDOCERR_CE_NONUM,
3539 ln, pos, "ce %s", buf->buf + pos);
3540 roffce_lines = 1;
3541 }
3542 if (roffce_lines < 1) {
3543 r->man->last = r->man->last->parent;
3544 roffce_node = NULL;
3545 roffce_lines = 0;
3546 } else
3547 roffce_node = r->man->last->parent;
3548 } else {
3549 n->flags |= NODE_VALID | NODE_ENDED;
3550 r->man->last = n;
3551 }
3552 n->flags |= NODE_LINE;
3553 r->man->next = ROFF_NEXT_SIBLING;
3554 return ROFF_IGN;
3555 }
3556
3557 static int
3558 roff_manyarg(ROFF_ARGS)
3559 {
3560 struct roff_node *n;
3561 char *sp, *ep;
3562
3563 roff_elem_alloc(r->man, ln, ppos, tok);
3564 n = r->man->last;
3565
3566 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3567 while (*ep != '\0' && *ep != ' ')
3568 ep++;
3569 while (*ep == ' ')
3570 *ep++ = '\0';
3571 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3572 }
3573
3574 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3575 r->man->last = n;
3576 r->man->next = ROFF_NEXT_SIBLING;
3577 return ROFF_IGN;
3578 }
3579
3580 static int
3581 roff_als(ROFF_ARGS)
3582 {
3583 char *oldn, *newn, *end, *value;
3584 size_t oldsz, newsz, valsz;
3585
3586 newn = oldn = buf->buf + pos;
3587 if (*newn == '\0')
3588 return ROFF_IGN;
3589
3590 newsz = roff_getname(r, &oldn, ln, pos);
3591 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3592 return ROFF_IGN;
3593
3594 end = oldn;
3595 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3596 if (oldsz == 0)
3597 return ROFF_IGN;
3598
3599 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3600 (int)oldsz, oldn);
3601 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3602 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3603 free(value);
3604 return ROFF_IGN;
3605 }
3606
3607 /*
3608 * The .break request only makes sense inside conditionals,
3609 * and that case is already handled in roff_cond_sub().
3610 */
3611 static int
3612 roff_break(ROFF_ARGS)
3613 {
3614 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3615 return ROFF_IGN;
3616 }
3617
3618 static int
3619 roff_cc(ROFF_ARGS)
3620 {
3621 const char *p;
3622
3623 p = buf->buf + pos;
3624
3625 if (*p == '\0' || (r->control = *p++) == '.')
3626 r->control = '\0';
3627
3628 if (*p != '\0')
3629 mandoc_msg(MANDOCERR_ARG_EXCESS,
3630 ln, p - buf->buf, "cc ... %s", p);
3631
3632 return ROFF_IGN;
3633 }
3634
3635 static int
3636 roff_char(ROFF_ARGS)
3637 {
3638 const char *p, *kp, *vp;
3639 size_t ksz, vsz;
3640 int font;
3641
3642 /* Parse the character to be replaced. */
3643
3644 kp = buf->buf + pos;
3645 p = kp + 1;
3646 if (*kp == '\0' || (*kp == '\\' &&
3647 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3648 (*p != ' ' && *p != '\0')) {
3649 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3650 return ROFF_IGN;
3651 }
3652 ksz = p - kp;
3653 while (*p == ' ')
3654 p++;
3655
3656 /*
3657 * If the replacement string contains a font escape sequence,
3658 * we have to restore the font at the end.
3659 */
3660
3661 vp = p;
3662 vsz = strlen(p);
3663 font = 0;
3664 while (*p != '\0') {
3665 if (*p++ != '\\')
3666 continue;
3667 switch (mandoc_escape(&p, NULL, NULL)) {
3668 case ESCAPE_FONT:
3669 case ESCAPE_FONTROMAN:
3670 case ESCAPE_FONTITALIC:
3671 case ESCAPE_FONTBOLD:
3672 case ESCAPE_FONTBI:
3673 case ESCAPE_FONTCR:
3674 case ESCAPE_FONTCB:
3675 case ESCAPE_FONTCI:
3676 case ESCAPE_FONTPREV:
3677 font++;
3678 break;
3679 default:
3680 break;
3681 }
3682 }
3683 if (font > 1)
3684 mandoc_msg(MANDOCERR_CHAR_FONT,
3685 ln, (int)(vp - buf->buf), "%s", vp);
3686
3687 /*
3688 * Approximate the effect of .char using the .tr tables.
3689 * XXX In groff, .char and .tr interact differently.
3690 */
3691
3692 if (ksz == 1) {
3693 if (r->xtab == NULL)
3694 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3695 assert((unsigned int)*kp < 128);
3696 free(r->xtab[(int)*kp].p);
3697 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3698 "%s%s", vp, font ? "\fP" : "");
3699 } else {
3700 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3701 if (font)
3702 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3703 }
3704 return ROFF_IGN;
3705 }
3706
3707 static int
3708 roff_ec(ROFF_ARGS)
3709 {
3710 const char *p;
3711
3712 p = buf->buf + pos;
3713 if (*p == '\0')
3714 r->escape = '\\';
3715 else {
3716 r->escape = *p;
3717 if (*++p != '\0')
3718 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3719 (int)(p - buf->buf), "ec ... %s", p);
3720 }
3721 return ROFF_IGN;
3722 }
3723
3724 static int
3725 roff_eo(ROFF_ARGS)
3726 {
3727 r->escape = '\0';
3728 if (buf->buf[pos] != '\0')
3729 mandoc_msg(MANDOCERR_ARG_SKIP,
3730 ln, pos, "eo %s", buf->buf + pos);
3731 return ROFF_IGN;
3732 }
3733
3734 static int
3735 roff_nop(ROFF_ARGS)
3736 {
3737 while (buf->buf[pos] == ' ')
3738 pos++;
3739 *offs = pos;
3740 return ROFF_RERUN;
3741 }
3742
3743 static int
3744 roff_tr(ROFF_ARGS)
3745 {
3746 const char *p, *first, *second;
3747 size_t fsz, ssz;
3748 enum mandoc_esc esc;
3749
3750 p = buf->buf + pos;
3751
3752 if (*p == '\0') {
3753 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3754 return ROFF_IGN;
3755 }
3756
3757 while (*p != '\0') {
3758 fsz = ssz = 1;
3759
3760 first = p++;
3761 if (*first == '\\') {
3762 esc = mandoc_escape(&p, NULL, NULL);
3763 if (esc == ESCAPE_ERROR) {
3764 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3765 (int)(p - buf->buf), "%s", first);
3766 return ROFF_IGN;
3767 }
3768 fsz = (size_t)(p - first);
3769 }
3770
3771 second = p++;
3772 if (*second == '\\') {
3773 esc = mandoc_escape(&p, NULL, NULL);
3774 if (esc == ESCAPE_ERROR) {
3775 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3776 (int)(p - buf->buf), "%s", second);
3777 return ROFF_IGN;
3778 }
3779 ssz = (size_t)(p - second);
3780 } else if (*second == '\0') {
3781 mandoc_msg(MANDOCERR_TR_ODD, ln,
3782 (int)(first - buf->buf), "tr %s", first);
3783 second = " ";
3784 p--;
3785 }
3786
3787 if (fsz > 1) {
3788 roff_setstrn(&r->xmbtab, first, fsz,
3789 second, ssz, 0);
3790 continue;
3791 }
3792
3793 if (r->xtab == NULL)
3794 r->xtab = mandoc_calloc(128,
3795 sizeof(struct roffstr));
3796
3797 free(r->xtab[(int)*first].p);
3798 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3799 r->xtab[(int)*first].sz = ssz;
3800 }
3801
3802 return ROFF_IGN;
3803 }
3804
3805 /*
3806 * Implementation of the .return request.
3807 * There is no need to call roff_userret() from here.
3808 * The read module will call that after rewinding the reader stack
3809 * to the place from where the current macro was called.
3810 */
3811 static int
3812 roff_return(ROFF_ARGS)
3813 {
3814 if (r->mstackpos >= 0)
3815 return ROFF_IGN | ROFF_USERRET;
3816
3817 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3818 return ROFF_IGN;
3819 }
3820
3821 static int
3822 roff_rn(ROFF_ARGS)
3823 {
3824 const char *value;
3825 char *oldn, *newn, *end;
3826 size_t oldsz, newsz;
3827 int deftype;
3828
3829 oldn = newn = buf->buf + pos;
3830 if (*oldn == '\0')
3831 return ROFF_IGN;
3832
3833 oldsz = roff_getname(r, &newn, ln, pos);
3834 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3835 return ROFF_IGN;
3836
3837 end = newn;
3838 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3839 if (newsz == 0)
3840 return ROFF_IGN;
3841
3842 deftype = ROFFDEF_ANY;
3843 value = roff_getstrn(r, oldn, oldsz, &deftype);
3844 switch (deftype) {
3845 case ROFFDEF_USER:
3846 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3847 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3848 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3849 break;
3850 case ROFFDEF_PRE:
3851 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3852 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3853 break;
3854 case ROFFDEF_REN:
3855 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3856 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3857 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3858 break;
3859 case ROFFDEF_STD:
3860 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3861 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862 break;
3863 default:
3864 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3865 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3866 break;
3867 }
3868 return ROFF_IGN;
3869 }
3870
3871 static int
3872 roff_shift(ROFF_ARGS)
3873 {
3874 struct mctx *ctx;
3875 int argpos, levels, i;
3876
3877 argpos = pos;
3878 levels = 1;
3879 if (buf->buf[pos] != '\0' &&
3880 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3881 mandoc_msg(MANDOCERR_CE_NONUM,
3882 ln, pos, "shift %s", buf->buf + pos);
3883 levels = 1;
3884 }
3885 if (r->mstackpos < 0) {
3886 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3887 return ROFF_IGN;
3888 }
3889 ctx = r->mstack + r->mstackpos;
3890 if (levels > ctx->argc) {
3891 mandoc_msg(MANDOCERR_SHIFT,
3892 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3893 levels = ctx->argc;
3894 }
3895 if (levels < 0) {
3896 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3897 levels = 0;
3898 }
3899 if (levels == 0)
3900 return ROFF_IGN;
3901 for (i = 0; i < levels; i++)
3902 free(ctx->argv[i]);
3903 ctx->argc -= levels;
3904 for (i = 0; i < ctx->argc; i++)
3905 ctx->argv[i] = ctx->argv[i + levels];
3906 return ROFF_IGN;
3907 }
3908
3909 static int
3910 roff_so(ROFF_ARGS)
3911 {
3912 char *name, *cp;
3913
3914 name = buf->buf + pos;
3915 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3916
3917 /*
3918 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3919 * opening anything that's not in our cwd or anything beneath
3920 * it. Thus, explicitly disallow traversing up the file-system
3921 * or using absolute paths.
3922 */
3923
3924 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3925 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3926 buf->sz = mandoc_asprintf(&cp,
3927 ".sp\nSee the file %s.\n.sp", name) + 1;
3928 free(buf->buf);
3929 buf->buf = cp;
3930 *offs = 0;
3931 return ROFF_REPARSE;
3932 }
3933
3934 *offs = pos;
3935 return ROFF_SO;
3936 }
3937
3938 /* --- user defined strings and macros ------------------------------------ */
3939
3940 static int
3941 roff_userdef(ROFF_ARGS)
3942 {
3943 struct mctx *ctx;
3944 char *arg, *ap, *dst, *src;
3945 size_t sz;
3946
3947 /* If the macro is empty, ignore it altogether. */
3948
3949 if (*r->current_string == '\0')
3950 return ROFF_IGN;
3951
3952 /* Initialize a new macro stack context. */
3953
3954 if (++r->mstackpos == r->mstacksz) {
3955 r->mstack = mandoc_recallocarray(r->mstack,
3956 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3957 r->mstacksz += 8;
3958 }
3959 ctx = r->mstack + r->mstackpos;
3960 ctx->argc = 0;
3961
3962 /*
3963 * Collect pointers to macro argument strings,
3964 * NUL-terminating them and escaping quotes.
3965 */
3966
3967 src = buf->buf + pos;
3968 while (*src != '\0') {
3969 if (ctx->argc == ctx->argsz) {
3970 ctx->argsz += 8;
3971 ctx->argv = mandoc_reallocarray(ctx->argv,
3972 ctx->argsz, sizeof(*ctx->argv));
3973 }
3974 arg = roff_getarg(r, &src, ln, &pos);
3975 sz = 1; /* For the terminating NUL. */
3976 for (ap = arg; *ap != '\0'; ap++)
3977 sz += *ap == '"' ? 4 : 1;
3978 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3979 for (ap = arg; *ap != '\0'; ap++) {
3980 if (*ap == '"') {
3981 memcpy(dst, "\\(dq", 4);
3982 dst += 4;
3983 } else
3984 *dst++ = *ap;
3985 }
3986 *dst = '\0';
3987 free(arg);
3988 }
3989
3990 /* Replace the macro invocation by the macro definition. */
3991
3992 free(buf->buf);
3993 buf->buf = mandoc_strdup(r->current_string);
3994 buf->sz = strlen(buf->buf) + 1;
3995 *offs = 0;
3996
3997 return buf->buf[buf->sz - 2] == '\n' ?
3998 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3999 }
4000
4001 /*
4002 * Calling a high-level macro that was renamed with .rn.
4003 * r->current_string has already been set up by roff_parse().
4004 */
4005 static int
4006 roff_renamed(ROFF_ARGS)
4007 {
4008 char *nbuf;
4009
4010 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4011 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4012 free(buf->buf);
4013 buf->buf = nbuf;
4014 *offs = 0;
4015 return ROFF_CONT;
4016 }
4017
4018 /*
4019 * Measure the length in bytes of the roff identifier at *cpp
4020 * and advance the pointer to the next word.
4021 */
4022 static size_t
4023 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4024 {
4025 char *name, *cp;
4026 size_t namesz;
4027
4028 name = *cpp;
4029 if (*name == '\0')
4030 return 0;
4031
4032 /* Advance cp to the byte after the end of the name. */
4033
4034 for (cp = name; 1; cp++) {
4035 namesz = cp - name;
4036 if (*cp == '\0')
4037 break;
4038 if (*cp == ' ' || *cp == '\t') {
4039 cp++;
4040 break;
4041 }
4042 if (*cp != '\\')
4043 continue;
4044 if (cp[1] == '{' || cp[1] == '}')
4045 break;
4046 if (*++cp == '\\')
4047 continue;
4048 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4049 "%.*s", (int)(cp - name + 1), name);
4050 mandoc_escape((const char **)&cp, NULL, NULL);
4051 break;
4052 }
4053
4054 /* Read past spaces. */
4055
4056 while (*cp == ' ')
4057 cp++;
4058
4059 *cpp = cp;
4060 return namesz;
4061 }
4062
4063 /*
4064 * Store *string into the user-defined string called *name.
4065 * To clear an existing entry, call with (*r, *name, NULL, 0).
4066 * append == 0: replace mode
4067 * append == 1: single-line append mode
4068 * append == 2: multiline append mode, append '\n' after each call
4069 */
4070 static void
4071 roff_setstr(struct roff *r, const char *name, const char *string,
4072 int append)
4073 {
4074 size_t namesz;
4075
4076 namesz = strlen(name);
4077 roff_setstrn(&r->strtab, name, namesz, string,
4078 string ? strlen(string) : 0, append);
4079 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4080 }
4081
4082 static void
4083 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4084 const char *string, size_t stringsz, int append)
4085 {
4086 struct roffkv *n;
4087 char *c;
4088 int i;
4089 size_t oldch, newch;
4090
4091 /* Search for an existing string with the same name. */
4092 n = *r;
4093
4094 while (n && (namesz != n->key.sz ||
4095 strncmp(n->key.p, name, namesz)))
4096 n = n->next;
4097
4098 if (NULL == n) {
4099 /* Create a new string table entry. */
4100 n = mandoc_malloc(sizeof(struct roffkv));
4101 n->key.p = mandoc_strndup(name, namesz);
4102 n->key.sz = namesz;
4103 n->val.p = NULL;
4104 n->val.sz = 0;
4105 n->next = *r;
4106 *r = n;
4107 } else if (0 == append) {
4108 free(n->val.p);
4109 n->val.p = NULL;
4110 n->val.sz = 0;
4111 }
4112
4113 if (NULL == string)
4114 return;
4115
4116 /*
4117 * One additional byte for the '\n' in multiline mode,
4118 * and one for the terminating '\0'.
4119 */
4120 newch = stringsz + (1 < append ? 2u : 1u);
4121
4122 if (NULL == n->val.p) {
4123 n->val.p = mandoc_malloc(newch);
4124 *n->val.p = '\0';
4125 oldch = 0;
4126 } else {
4127 oldch = n->val.sz;
4128 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4129 }
4130
4131 /* Skip existing content in the destination buffer. */
4132 c = n->val.p + (int)oldch;
4133
4134 /* Append new content to the destination buffer. */
4135 i = 0;
4136 while (i < (int)stringsz) {
4137 /*
4138 * Rudimentary roff copy mode:
4139 * Handle escaped backslashes.
4140 */
4141 if ('\\' == string[i] && '\\' == string[i + 1])
4142 i++;
4143 *c++ = string[i++];
4144 }
4145
4146 /* Append terminating bytes. */
4147 if (1 < append)
4148 *c++ = '\n';
4149
4150 *c = '\0';
4151 n->val.sz = (int)(c - n->val.p);
4152 }
4153
4154 static const char *
4155 roff_getstrn(struct roff *r, const char *name, size_t len,
4156 int *deftype)
4157 {
4158 const struct roffkv *n;
4159 int found, i;
4160 enum roff_tok tok;
4161
4162 found = 0;
4163 for (n = r->strtab; n != NULL; n = n->next) {
4164 if (strncmp(name, n->key.p, len) != 0 ||
4165 n->key.p[len] != '\0' || n->val.p == NULL)
4166 continue;
4167 if (*deftype & ROFFDEF_USER) {
4168 *deftype = ROFFDEF_USER;
4169 return n->val.p;
4170 } else {
4171 found = 1;
4172 break;
4173 }
4174 }
4175 for (n = r->rentab; n != NULL; n = n->next) {
4176 if (strncmp(name, n->key.p, len) != 0 ||
4177 n->key.p[len] != '\0' || n->val.p == NULL)
4178 continue;
4179 if (*deftype & ROFFDEF_REN) {
4180 *deftype = ROFFDEF_REN;
4181 return n->val.p;
4182 } else {
4183 found = 1;
4184 break;
4185 }
4186 }
4187 for (i = 0; i < PREDEFS_MAX; i++) {
4188 if (strncmp(name, predefs[i].name, len) != 0 ||
4189 predefs[i].name[len] != '\0')
4190 continue;
4191 if (*deftype & ROFFDEF_PRE) {
4192 *deftype = ROFFDEF_PRE;
4193 return predefs[i].str;
4194 } else {
4195 found = 1;
4196 break;
4197 }
4198 }
4199 if (r->man->meta.macroset != MACROSET_MAN) {
4200 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4201 if (strncmp(name, roff_name[tok], len) != 0 ||
4202 roff_name[tok][len] != '\0')
4203 continue;
4204 if (*deftype & ROFFDEF_STD) {
4205 *deftype = ROFFDEF_STD;
4206 return NULL;
4207 } else {
4208 found = 1;
4209 break;
4210 }
4211 }
4212 }
4213 if (r->man->meta.macroset != MACROSET_MDOC) {
4214 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4215 if (strncmp(name, roff_name[tok], len) != 0 ||
4216 roff_name[tok][len] != '\0')
4217 continue;
4218 if (*deftype & ROFFDEF_STD) {
4219 *deftype = ROFFDEF_STD;
4220 return NULL;
4221 } else {
4222 found = 1;
4223 break;
4224 }
4225 }
4226 }
4227
4228 if (found == 0 && *deftype != ROFFDEF_ANY) {
4229 if (*deftype & ROFFDEF_REN) {
4230 /*
4231 * This might still be a request,
4232 * so do not treat it as undefined yet.
4233 */
4234 *deftype = ROFFDEF_UNDEF;
4235 return NULL;
4236 }
4237
4238 /* Using an undefined string defines it to be empty. */
4239
4240 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4241 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4242 }
4243
4244 *deftype = 0;
4245 return NULL;
4246 }
4247
4248 static void
4249 roff_freestr(struct roffkv *r)
4250 {
4251 struct roffkv *n, *nn;
4252
4253 for (n = r; n; n = nn) {
4254 free(n->key.p);
4255 free(n->val.p);
4256 nn = n->next;
4257 free(n);
4258 }
4259 }
4260
4261 /* --- accessors and utility functions ------------------------------------ */
4262
4263 /*
4264 * Duplicate an input string, making the appropriate character
4265 * conversations (as stipulated by `tr') along the way.
4266 * Returns a heap-allocated string with all the replacements made.
4267 */
4268 char *
4269 roff_strdup(const struct roff *r, const char *p)
4270 {
4271 const struct roffkv *cp;
4272 char *res;
4273 const char *pp;
4274 size_t ssz, sz;
4275 enum mandoc_esc esc;
4276
4277 if (NULL == r->xmbtab && NULL == r->xtab)
4278 return mandoc_strdup(p);
4279 else if ('\0' == *p)
4280 return mandoc_strdup("");
4281
4282 /*
4283 * Step through each character looking for term matches
4284 * (remember that a `tr' can be invoked with an escape, which is
4285 * a glyph but the escape is multi-character).
4286 * We only do this if the character hash has been initialised
4287 * and the string is >0 length.
4288 */
4289
4290 res = NULL;
4291 ssz = 0;
4292
4293 while ('\0' != *p) {
4294 assert((unsigned int)*p < 128);
4295 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4296 sz = r->xtab[(int)*p].sz;
4297 res = mandoc_realloc(res, ssz + sz + 1);
4298 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4299 ssz += sz;
4300 p++;
4301 continue;
4302 } else if ('\\' != *p) {
4303 res = mandoc_realloc(res, ssz + 2);
4304 res[ssz++] = *p++;
4305 continue;
4306 }
4307
4308 /* Search for term matches. */
4309 for (cp = r->xmbtab; cp; cp = cp->next)
4310 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4311 break;
4312
4313 if (NULL != cp) {
4314 /*
4315 * A match has been found.
4316 * Append the match to the array and move
4317 * forward by its keysize.
4318 */
4319 res = mandoc_realloc(res,
4320 ssz + cp->val.sz + 1);
4321 memcpy(res + ssz, cp->val.p, cp->val.sz);
4322 ssz += cp->val.sz;
4323 p += (int)cp->key.sz;
4324 continue;
4325 }
4326
4327 /*
4328 * Handle escapes carefully: we need to copy
4329 * over just the escape itself, or else we might
4330 * do replacements within the escape itself.
4331 * Make sure to pass along the bogus string.
4332 */
4333 pp = p++;
4334 esc = mandoc_escape(&p, NULL, NULL);
4335 if (ESCAPE_ERROR == esc) {
4336 sz = strlen(pp);
4337 res = mandoc_realloc(res, ssz + sz + 1);
4338 memcpy(res + ssz, pp, sz);
4339 break;
4340 }
4341 /*
4342 * We bail out on bad escapes.
4343 * No need to warn: we already did so when
4344 * roff_expand() was called.
4345 */
4346 sz = (int)(p - pp);
4347 res = mandoc_realloc(res, ssz + sz + 1);
4348 memcpy(res + ssz, pp, sz);
4349 ssz += sz;
4350 }
4351
4352 res[(int)ssz] = '\0';
4353 return res;
4354 }
4355
4356 int
4357 roff_getformat(const struct roff *r)
4358 {
4359
4360 return r->format;
4361 }
4362
4363 /*
4364 * Find out whether a line is a macro line or not.
4365 * If it is, adjust the current position and return one; if it isn't,
4366 * return zero and don't change the current position.
4367 * If the control character has been set with `.cc', then let that grain
4368 * precedence.
4369 * This is slighly contrary to groff, where using the non-breaking
4370 * control character when `cc' has been invoked will cause the
4371 * non-breaking macro contents to be printed verbatim.
4372 */
4373 int
4374 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4375 {
4376 int pos;
4377
4378 pos = *ppos;
4379
4380 if (r->control != '\0' && cp[pos] == r->control)
4381 pos++;
4382 else if (r->control != '\0')
4383 return 0;
4384 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4385 pos += 2;
4386 else if ('.' == cp[pos] || '\'' == cp[pos])
4387 pos++;
4388 else
4389 return 0;
4390
4391 while (' ' == cp[pos] || '\t' == cp[pos])
4392 pos++;
4393
4394 *ppos = pos;
4395 return 1;
4396 }