]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Do not fail an assertion when a high level macro occurs in the body
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.368 2019/12/26 19:51:51 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40
41 /*
42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43 * that an escape sequence resulted from copy-in processing and
44 * needs to be checked or interpolated. As it is used nowhere
45 * else, it is defined here rather than in a header file.
46 */
47 #define ASCII_ESC 27
48
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define EXPAND_LIMIT 1000
51
52 /* Types of definitions of macros and strings. */
53 #define ROFFDEF_USER (1 << 1) /* User-defined. */
54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
58 ROFFDEF_REN | ROFFDEF_STD)
59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
60
61 /* --- data types --------------------------------------------------------- */
62
63 /*
64 * An incredibly-simple string buffer.
65 */
66 struct roffstr {
67 char *p; /* nil-terminated buffer */
68 size_t sz; /* saved strlen(p) */
69 };
70
71 /*
72 * A key-value roffstr pair as part of a singly-linked list.
73 */
74 struct roffkv {
75 struct roffstr key;
76 struct roffstr val;
77 struct roffkv *next; /* next in list */
78 };
79
80 /*
81 * A single number register as part of a singly-linked list.
82 */
83 struct roffreg {
84 struct roffstr key;
85 int val;
86 int step;
87 struct roffreg *next;
88 };
89
90 /*
91 * Association of request and macro names with token IDs.
92 */
93 struct roffreq {
94 enum roff_tok tok;
95 char name[];
96 };
97
98 /*
99 * A macro processing context.
100 * More than one is needed when macro calls are nested.
101 */
102 struct mctx {
103 char **argv;
104 int argc;
105 int argsz;
106 };
107
108 struct roff {
109 struct roff_man *man; /* mdoc or man parser */
110 struct roffnode *last; /* leaf of stack */
111 struct mctx *mstack; /* stack of macro contexts */
112 int *rstack; /* stack of inverted `ie' values */
113 struct ohash *reqtab; /* request lookup table */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *rentab; /* renamed strings & macros */
117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
118 struct roffstr *xtab; /* single-byte trans table (`tr') */
119 const char *current_string; /* value of last called user macro */
120 struct tbl_node *first_tbl; /* first table parsed */
121 struct tbl_node *last_tbl; /* last table parsed */
122 struct tbl_node *tbl; /* current table being parsed */
123 struct eqn_node *last_eqn; /* equation parser */
124 struct eqn_node *eqn; /* active equation parser */
125 int eqn_inline; /* current equation is inline */
126 int options; /* parse options */
127 int mstacksz; /* current size of mstack */
128 int mstackpos; /* position in mstack */
129 int rstacksz; /* current size limit of rstack */
130 int rstackpos; /* position in rstack */
131 int format; /* current file in mdoc or man format */
132 char control; /* control character */
133 char escape; /* escape character */
134 };
135
136 /*
137 * A macro definition, condition, or ignored block.
138 */
139 struct roffnode {
140 enum roff_tok tok; /* type of node */
141 struct roffnode *parent; /* up one in stack */
142 int line; /* parse line */
143 int col; /* parse col */
144 char *name; /* node name, e.g. macro name */
145 char *end; /* custom end macro of the block */
146 int endspan; /* scope to: 1=eol 2=next line -1=\} */
147 int rule; /* content is: 1=evaluated 0=skipped */
148 };
149
150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
151 enum roff_tok tok, /* tok of macro */ \
152 struct buf *buf, /* input buffer */ \
153 int ln, /* parse line */ \
154 int ppos, /* original pos in buffer */ \
155 int pos, /* current pos in buffer */ \
156 int *offs /* reset offset of buffer data */
157
158 typedef int (*roffproc)(ROFF_ARGS);
159
160 struct roffmac {
161 roffproc proc; /* process new macro */
162 roffproc text; /* process as child text of macro */
163 roffproc sub; /* process as child of macro */
164 int flags;
165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
166 };
167
168 struct predef {
169 const char *name; /* predefined input name */
170 const char *str; /* replacement symbol */
171 };
172
173 #define PREDEF(__name, __str) \
174 { (__name), (__str) },
175
176 /* --- function prototypes ------------------------------------------------ */
177
178 static int roffnode_cleanscope(struct roff *);
179 static int roffnode_pop(struct roff *);
180 static void roffnode_push(struct roff *, enum roff_tok,
181 const char *, int, int);
182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
183 static int roff_als(ROFF_ARGS);
184 static int roff_block(ROFF_ARGS);
185 static int roff_block_text(ROFF_ARGS);
186 static int roff_block_sub(ROFF_ARGS);
187 static int roff_break(ROFF_ARGS);
188 static int roff_cblock(ROFF_ARGS);
189 static int roff_cc(ROFF_ARGS);
190 static int roff_ccond(struct roff *, int, int);
191 static int roff_char(ROFF_ARGS);
192 static int roff_cond(ROFF_ARGS);
193 static int roff_cond_text(ROFF_ARGS);
194 static int roff_cond_sub(ROFF_ARGS);
195 static int roff_ds(ROFF_ARGS);
196 static int roff_ec(ROFF_ARGS);
197 static int roff_eo(ROFF_ARGS);
198 static int roff_eqndelim(struct roff *, struct buf *, int);
199 static int roff_evalcond(struct roff *r, int, char *, int *);
200 static int roff_evalnum(struct roff *, int,
201 const char *, int *, int *, int);
202 static int roff_evalpar(struct roff *, int,
203 const char *, int *, int *, int);
204 static int roff_evalstrcond(const char *, int *);
205 static int roff_expand(struct roff *, struct buf *,
206 int, int, char);
207 static void roff_free1(struct roff *);
208 static void roff_freereg(struct roffreg *);
209 static void roff_freestr(struct roffkv *);
210 static size_t roff_getname(struct roff *, char **, int, int);
211 static int roff_getnum(const char *, int *, int *, int);
212 static int roff_getop(const char *, int *, char *);
213 static int roff_getregn(struct roff *,
214 const char *, size_t, char);
215 static int roff_getregro(const struct roff *,
216 const char *name);
217 static const char *roff_getstrn(struct roff *,
218 const char *, size_t, int *);
219 static int roff_hasregn(const struct roff *,
220 const char *, size_t);
221 static int roff_insec(ROFF_ARGS);
222 static int roff_it(ROFF_ARGS);
223 static int roff_line_ignore(ROFF_ARGS);
224 static void roff_man_alloc1(struct roff_man *);
225 static void roff_man_free1(struct roff_man *);
226 static int roff_manyarg(ROFF_ARGS);
227 static int roff_noarg(ROFF_ARGS);
228 static int roff_nop(ROFF_ARGS);
229 static int roff_nr(ROFF_ARGS);
230 static int roff_onearg(ROFF_ARGS);
231 static enum roff_tok roff_parse(struct roff *, char *, int *,
232 int, int);
233 static int roff_parsetext(struct roff *, struct buf *,
234 int, int *);
235 static int roff_renamed(ROFF_ARGS);
236 static int roff_return(ROFF_ARGS);
237 static int roff_rm(ROFF_ARGS);
238 static int roff_rn(ROFF_ARGS);
239 static int roff_rr(ROFF_ARGS);
240 static void roff_setregn(struct roff *, const char *,
241 size_t, int, char, int);
242 static void roff_setstr(struct roff *,
243 const char *, const char *, int);
244 static void roff_setstrn(struct roffkv **, const char *,
245 size_t, const char *, size_t, int);
246 static int roff_shift(ROFF_ARGS);
247 static int roff_so(ROFF_ARGS);
248 static int roff_tr(ROFF_ARGS);
249 static int roff_Dd(ROFF_ARGS);
250 static int roff_TE(ROFF_ARGS);
251 static int roff_TS(ROFF_ARGS);
252 static int roff_EQ(ROFF_ARGS);
253 static int roff_EN(ROFF_ARGS);
254 static int roff_T_(ROFF_ARGS);
255 static int roff_unsupp(ROFF_ARGS);
256 static int roff_userdef(ROFF_ARGS);
257
258 /* --- constant data ------------------------------------------------------ */
259
260 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
261 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
262
263 const char *__roff_name[MAN_MAX + 1] = {
264 "br", "ce", "fi", "ft",
265 "ll", "mc", "nf",
266 "po", "rj", "sp",
267 "ta", "ti", NULL,
268 "ab", "ad", "af", "aln",
269 "als", "am", "am1", "ami",
270 "ami1", "as", "as1", "asciify",
271 "backtrace", "bd", "bleedat", "blm",
272 "box", "boxa", "bp", "BP",
273 "break", "breakchar", "brnl", "brp",
274 "brpnl", "c2", "cc",
275 "cf", "cflags", "ch", "char",
276 "chop", "class", "close", "CL",
277 "color", "composite", "continue", "cp",
278 "cropat", "cs", "cu", "da",
279 "dch", "Dd", "de", "de1",
280 "defcolor", "dei", "dei1", "device",
281 "devicem", "di", "do", "ds",
282 "ds1", "dwh", "dt", "ec",
283 "ecr", "ecs", "el", "em",
284 "EN", "eo", "EP", "EQ",
285 "errprint", "ev", "evc", "ex",
286 "fallback", "fam", "fc", "fchar",
287 "fcolor", "fdeferlig", "feature", "fkern",
288 "fl", "flig", "fp", "fps",
289 "fschar", "fspacewidth", "fspecial", "ftr",
290 "fzoom", "gcolor", "hc", "hcode",
291 "hidechar", "hla", "hlm", "hpf",
292 "hpfa", "hpfcode", "hw", "hy",
293 "hylang", "hylen", "hym", "hypp",
294 "hys", "ie", "if", "ig",
295 "index", "it", "itc", "IX",
296 "kern", "kernafter", "kernbefore", "kernpair",
297 "lc", "lc_ctype", "lds", "length",
298 "letadj", "lf", "lg", "lhang",
299 "linetabs", "lnr", "lnrf", "lpfx",
300 "ls", "lsm", "lt",
301 "mediasize", "minss", "mk", "mso",
302 "na", "ne", "nh", "nhychar",
303 "nm", "nn", "nop", "nr",
304 "nrf", "nroff", "ns", "nx",
305 "open", "opena", "os", "output",
306 "padj", "papersize", "pc", "pev",
307 "pi", "PI", "pl", "pm",
308 "pn", "pnr", "ps",
309 "psbb", "pshape", "pso", "ptr",
310 "pvs", "rchar", "rd", "recursionlimit",
311 "return", "rfschar", "rhang",
312 "rm", "rn", "rnn", "rr",
313 "rs", "rt", "schar", "sentchar",
314 "shc", "shift", "sizes", "so",
315 "spacewidth", "special", "spreadwarn", "ss",
316 "sty", "substring", "sv", "sy",
317 "T&", "tc", "TE",
318 "TH", "tkf", "tl",
319 "tm", "tm1", "tmc", "tr",
320 "track", "transchar", "trf", "trimat",
321 "trin", "trnt", "troff", "TS",
322 "uf", "ul", "unformat", "unwatch",
323 "unwatchn", "vpt", "vs", "warn",
324 "warnscale", "watch", "watchlength", "watchn",
325 "wh", "while", "write", "writec",
326 "writem", "xflag", ".", NULL,
327 NULL, "text",
328 "Dd", "Dt", "Os", "Sh",
329 "Ss", "Pp", "D1", "Dl",
330 "Bd", "Ed", "Bl", "El",
331 "It", "Ad", "An", "Ap",
332 "Ar", "Cd", "Cm", "Dv",
333 "Er", "Ev", "Ex", "Fa",
334 "Fd", "Fl", "Fn", "Ft",
335 "Ic", "In", "Li", "Nd",
336 "Nm", "Op", "Ot", "Pa",
337 "Rv", "St", "Va", "Vt",
338 "Xr", "%A", "%B", "%D",
339 "%I", "%J", "%N", "%O",
340 "%P", "%R", "%T", "%V",
341 "Ac", "Ao", "Aq", "At",
342 "Bc", "Bf", "Bo", "Bq",
343 "Bsx", "Bx", "Db", "Dc",
344 "Do", "Dq", "Ec", "Ef",
345 "Em", "Eo", "Fx", "Ms",
346 "No", "Ns", "Nx", "Ox",
347 "Pc", "Pf", "Po", "Pq",
348 "Qc", "Ql", "Qo", "Qq",
349 "Re", "Rs", "Sc", "So",
350 "Sq", "Sm", "Sx", "Sy",
351 "Tn", "Ux", "Xc", "Xo",
352 "Fo", "Fc", "Oo", "Oc",
353 "Bk", "Ek", "Bt", "Hf",
354 "Fr", "Ud", "Lb", "Lp",
355 "Lk", "Mt", "Brq", "Bro",
356 "Brc", "%C", "Es", "En",
357 "Dx", "%Q", "%U", "Ta",
358 NULL,
359 "TH", "SH", "SS", "TP",
360 "TQ",
361 "LP", "PP", "P", "IP",
362 "HP", "SM", "SB", "BI",
363 "IB", "BR", "RB", "R",
364 "B", "I", "IR", "RI",
365 "RE", "RS", "DT", "UC",
366 "PD", "AT", "in",
367 "SY", "YS", "OP",
368 "EX", "EE", "UR",
369 "UE", "MT", "ME", NULL
370 };
371 const char *const *roff_name = __roff_name;
372
373 static struct roffmac roffs[TOKEN_NONE] = {
374 { roff_noarg, NULL, NULL, 0 }, /* br */
375 { roff_onearg, NULL, NULL, 0 }, /* ce */
376 { roff_noarg, NULL, NULL, 0 }, /* fi */
377 { roff_onearg, NULL, NULL, 0 }, /* ft */
378 { roff_onearg, NULL, NULL, 0 }, /* ll */
379 { roff_onearg, NULL, NULL, 0 }, /* mc */
380 { roff_noarg, NULL, NULL, 0 }, /* nf */
381 { roff_onearg, NULL, NULL, 0 }, /* po */
382 { roff_onearg, NULL, NULL, 0 }, /* rj */
383 { roff_onearg, NULL, NULL, 0 }, /* sp */
384 { roff_manyarg, NULL, NULL, 0 }, /* ta */
385 { roff_onearg, NULL, NULL, 0 }, /* ti */
386 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
387 { roff_unsupp, NULL, NULL, 0 }, /* ab */
388 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
389 { roff_line_ignore, NULL, NULL, 0 }, /* af */
390 { roff_unsupp, NULL, NULL, 0 }, /* aln */
391 { roff_als, NULL, NULL, 0 }, /* als */
392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
396 { roff_ds, NULL, NULL, 0 }, /* as */
397 { roff_ds, NULL, NULL, 0 }, /* as1 */
398 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
399 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
400 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
401 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
402 { roff_unsupp, NULL, NULL, 0 }, /* blm */
403 { roff_unsupp, NULL, NULL, 0 }, /* box */
404 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
405 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
406 { roff_unsupp, NULL, NULL, 0 }, /* BP */
407 { roff_break, NULL, NULL, 0 }, /* break */
408 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
409 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
410 { roff_noarg, NULL, NULL, 0 }, /* brp */
411 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
412 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
413 { roff_cc, NULL, NULL, 0 }, /* cc */
414 { roff_insec, NULL, NULL, 0 }, /* cf */
415 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
416 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
417 { roff_char, NULL, NULL, 0 }, /* char */
418 { roff_unsupp, NULL, NULL, 0 }, /* chop */
419 { roff_line_ignore, NULL, NULL, 0 }, /* class */
420 { roff_insec, NULL, NULL, 0 }, /* close */
421 { roff_unsupp, NULL, NULL, 0 }, /* CL */
422 { roff_line_ignore, NULL, NULL, 0 }, /* color */
423 { roff_unsupp, NULL, NULL, 0 }, /* composite */
424 { roff_unsupp, NULL, NULL, 0 }, /* continue */
425 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
426 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
427 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
429 { roff_unsupp, NULL, NULL, 0 }, /* da */
430 { roff_unsupp, NULL, NULL, 0 }, /* dch */
431 { roff_Dd, NULL, NULL, 0 }, /* Dd */
432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
434 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
437 { roff_unsupp, NULL, NULL, 0 }, /* device */
438 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
439 { roff_unsupp, NULL, NULL, 0 }, /* di */
440 { roff_unsupp, NULL, NULL, 0 }, /* do */
441 { roff_ds, NULL, NULL, 0 }, /* ds */
442 { roff_ds, NULL, NULL, 0 }, /* ds1 */
443 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
444 { roff_unsupp, NULL, NULL, 0 }, /* dt */
445 { roff_ec, NULL, NULL, 0 }, /* ec */
446 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
447 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
448 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
449 { roff_unsupp, NULL, NULL, 0 }, /* em */
450 { roff_EN, NULL, NULL, 0 }, /* EN */
451 { roff_eo, NULL, NULL, 0 }, /* eo */
452 { roff_unsupp, NULL, NULL, 0 }, /* EP */
453 { roff_EQ, NULL, NULL, 0 }, /* EQ */
454 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
455 { roff_unsupp, NULL, NULL, 0 }, /* ev */
456 { roff_unsupp, NULL, NULL, 0 }, /* evc */
457 { roff_unsupp, NULL, NULL, 0 }, /* ex */
458 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
459 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
460 { roff_unsupp, NULL, NULL, 0 }, /* fc */
461 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
464 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
467 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
470 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
473 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
475 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
477 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
491 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
493 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
494 { roff_unsupp, NULL, NULL, 0 }, /* index */
495 { roff_it, NULL, NULL, 0 }, /* it */
496 { roff_unsupp, NULL, NULL, 0 }, /* itc */
497 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
498 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
499 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
500 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
502 { roff_unsupp, NULL, NULL, 0 }, /* lc */
503 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
504 { roff_unsupp, NULL, NULL, 0 }, /* lds */
505 { roff_unsupp, NULL, NULL, 0 }, /* length */
506 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
507 { roff_insec, NULL, NULL, 0 }, /* lf */
508 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
509 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
510 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
511 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
512 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
513 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
514 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
515 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
516 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
517 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
518 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
519 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
520 { roff_insec, NULL, NULL, 0 }, /* mso */
521 { roff_line_ignore, NULL, NULL, 0 }, /* na */
522 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
523 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
524 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
525 { roff_unsupp, NULL, NULL, 0 }, /* nm */
526 { roff_unsupp, NULL, NULL, 0 }, /* nn */
527 { roff_nop, NULL, NULL, 0 }, /* nop */
528 { roff_nr, NULL, NULL, 0 }, /* nr */
529 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
530 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
531 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
532 { roff_insec, NULL, NULL, 0 }, /* nx */
533 { roff_insec, NULL, NULL, 0 }, /* open */
534 { roff_insec, NULL, NULL, 0 }, /* opena */
535 { roff_line_ignore, NULL, NULL, 0 }, /* os */
536 { roff_unsupp, NULL, NULL, 0 }, /* output */
537 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
538 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
539 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
540 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
541 { roff_insec, NULL, NULL, 0 }, /* pi */
542 { roff_unsupp, NULL, NULL, 0 }, /* PI */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
544 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
545 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
547 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
548 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
549 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
550 { roff_insec, NULL, NULL, 0 }, /* pso */
551 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
552 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
553 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
554 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
555 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
556 { roff_return, NULL, NULL, 0 }, /* return */
557 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
558 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
559 { roff_rm, NULL, NULL, 0 }, /* rm */
560 { roff_rn, NULL, NULL, 0 }, /* rn */
561 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
562 { roff_rr, NULL, NULL, 0 }, /* rr */
563 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
564 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
565 { roff_unsupp, NULL, NULL, 0 }, /* schar */
566 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
567 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
568 { roff_shift, NULL, NULL, 0 }, /* shift */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
570 { roff_so, NULL, NULL, 0 }, /* so */
571 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
572 { roff_line_ignore, NULL, NULL, 0 }, /* special */
573 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
574 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
575 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
576 { roff_unsupp, NULL, NULL, 0 }, /* substring */
577 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
578 { roff_insec, NULL, NULL, 0 }, /* sy */
579 { roff_T_, NULL, NULL, 0 }, /* T& */
580 { roff_unsupp, NULL, NULL, 0 }, /* tc */
581 { roff_TE, NULL, NULL, 0 }, /* TE */
582 { roff_Dd, NULL, NULL, 0 }, /* TH */
583 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
584 { roff_unsupp, NULL, NULL, 0 }, /* tl */
585 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
586 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
587 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
588 { roff_tr, NULL, NULL, 0 }, /* tr */
589 { roff_line_ignore, NULL, NULL, 0 }, /* track */
590 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
591 { roff_insec, NULL, NULL, 0 }, /* trf */
592 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
593 { roff_unsupp, NULL, NULL, 0 }, /* trin */
594 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
595 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
596 { roff_TS, NULL, NULL, 0 }, /* TS */
597 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
598 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
599 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
600 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
602 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
603 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
604 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
605 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
606 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
607 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
608 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
609 { roff_unsupp, NULL, NULL, 0 }, /* wh */
610 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
611 { roff_insec, NULL, NULL, 0 }, /* write */
612 { roff_insec, NULL, NULL, 0 }, /* writec */
613 { roff_insec, NULL, NULL, 0 }, /* writem */
614 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
615 { roff_cblock, NULL, NULL, 0 }, /* . */
616 { roff_renamed, NULL, NULL, 0 },
617 { roff_userdef, NULL, NULL, 0 }
618 };
619
620 /* Array of injected predefined strings. */
621 #define PREDEFS_MAX 38
622 static const struct predef predefs[PREDEFS_MAX] = {
623 #include "predefs.in"
624 };
625
626 static int roffce_lines; /* number of input lines to center */
627 static struct roff_node *roffce_node; /* active request */
628 static int roffit_lines; /* number of lines to delay */
629 static char *roffit_macro; /* nil-terminated macro line */
630
631
632 /* --- request table ------------------------------------------------------ */
633
634 struct ohash *
635 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
636 {
637 struct ohash *htab;
638 struct roffreq *req;
639 enum roff_tok tok;
640 size_t sz;
641 unsigned int slot;
642
643 htab = mandoc_malloc(sizeof(*htab));
644 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
645
646 for (tok = mintok; tok < maxtok; tok++) {
647 if (roff_name[tok] == NULL)
648 continue;
649 sz = strlen(roff_name[tok]);
650 req = mandoc_malloc(sizeof(*req) + sz + 1);
651 req->tok = tok;
652 memcpy(req->name, roff_name[tok], sz + 1);
653 slot = ohash_qlookup(htab, req->name);
654 ohash_insert(htab, slot, req);
655 }
656 return htab;
657 }
658
659 void
660 roffhash_free(struct ohash *htab)
661 {
662 struct roffreq *req;
663 unsigned int slot;
664
665 if (htab == NULL)
666 return;
667 for (req = ohash_first(htab, &slot); req != NULL;
668 req = ohash_next(htab, &slot))
669 free(req);
670 ohash_delete(htab);
671 free(htab);
672 }
673
674 enum roff_tok
675 roffhash_find(struct ohash *htab, const char *name, size_t sz)
676 {
677 struct roffreq *req;
678 const char *end;
679
680 if (sz) {
681 end = name + sz;
682 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
683 } else
684 req = ohash_find(htab, ohash_qlookup(htab, name));
685 return req == NULL ? TOKEN_NONE : req->tok;
686 }
687
688 /* --- stack of request blocks -------------------------------------------- */
689
690 /*
691 * Pop the current node off of the stack of roff instructions currently
692 * pending. Return 1 if it is a loop or 0 otherwise.
693 */
694 static int
695 roffnode_pop(struct roff *r)
696 {
697 struct roffnode *p;
698 int inloop;
699
700 p = r->last;
701 inloop = p->tok == ROFF_while;
702 r->last = p->parent;
703 free(p->name);
704 free(p->end);
705 free(p);
706 return inloop;
707 }
708
709 /*
710 * Push a roff node onto the instruction stack. This must later be
711 * removed with roffnode_pop().
712 */
713 static void
714 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
715 int line, int col)
716 {
717 struct roffnode *p;
718
719 p = mandoc_calloc(1, sizeof(struct roffnode));
720 p->tok = tok;
721 if (name)
722 p->name = mandoc_strdup(name);
723 p->parent = r->last;
724 p->line = line;
725 p->col = col;
726 p->rule = p->parent ? p->parent->rule : 0;
727
728 r->last = p;
729 }
730
731 /* --- roff parser state data management ---------------------------------- */
732
733 static void
734 roff_free1(struct roff *r)
735 {
736 int i;
737
738 tbl_free(r->first_tbl);
739 r->first_tbl = r->last_tbl = r->tbl = NULL;
740
741 eqn_free(r->last_eqn);
742 r->last_eqn = r->eqn = NULL;
743
744 while (r->mstackpos >= 0)
745 roff_userret(r);
746
747 while (r->last)
748 roffnode_pop(r);
749
750 free (r->rstack);
751 r->rstack = NULL;
752 r->rstacksz = 0;
753 r->rstackpos = -1;
754
755 roff_freereg(r->regtab);
756 r->regtab = NULL;
757
758 roff_freestr(r->strtab);
759 roff_freestr(r->rentab);
760 roff_freestr(r->xmbtab);
761 r->strtab = r->rentab = r->xmbtab = NULL;
762
763 if (r->xtab)
764 for (i = 0; i < 128; i++)
765 free(r->xtab[i].p);
766 free(r->xtab);
767 r->xtab = NULL;
768 }
769
770 void
771 roff_reset(struct roff *r)
772 {
773 roff_free1(r);
774 r->options |= MPARSE_COMMENT;
775 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
776 r->control = '\0';
777 r->escape = '\\';
778 roffce_lines = 0;
779 roffce_node = NULL;
780 roffit_lines = 0;
781 roffit_macro = NULL;
782 }
783
784 void
785 roff_free(struct roff *r)
786 {
787 int i;
788
789 roff_free1(r);
790 for (i = 0; i < r->mstacksz; i++)
791 free(r->mstack[i].argv);
792 free(r->mstack);
793 roffhash_free(r->reqtab);
794 free(r);
795 }
796
797 struct roff *
798 roff_alloc(int options)
799 {
800 struct roff *r;
801
802 r = mandoc_calloc(1, sizeof(struct roff));
803 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
804 r->options = options | MPARSE_COMMENT;
805 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
806 r->mstackpos = -1;
807 r->rstackpos = -1;
808 r->escape = '\\';
809 return r;
810 }
811
812 /* --- syntax tree state data management ---------------------------------- */
813
814 static void
815 roff_man_free1(struct roff_man *man)
816 {
817 if (man->meta.first != NULL)
818 roff_node_delete(man, man->meta.first);
819 free(man->meta.msec);
820 free(man->meta.vol);
821 free(man->meta.os);
822 free(man->meta.arch);
823 free(man->meta.title);
824 free(man->meta.name);
825 free(man->meta.date);
826 free(man->meta.sodest);
827 }
828
829 void
830 roff_state_reset(struct roff_man *man)
831 {
832 man->last = man->meta.first;
833 man->last_es = NULL;
834 man->flags = 0;
835 man->lastsec = man->lastnamed = SEC_NONE;
836 man->next = ROFF_NEXT_CHILD;
837 roff_setreg(man->roff, "nS", 0, '=');
838 }
839
840 static void
841 roff_man_alloc1(struct roff_man *man)
842 {
843 memset(&man->meta, 0, sizeof(man->meta));
844 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
845 man->meta.first->type = ROFFT_ROOT;
846 man->meta.macroset = MACROSET_NONE;
847 roff_state_reset(man);
848 }
849
850 void
851 roff_man_reset(struct roff_man *man)
852 {
853 roff_man_free1(man);
854 roff_man_alloc1(man);
855 }
856
857 void
858 roff_man_free(struct roff_man *man)
859 {
860 roff_man_free1(man);
861 free(man);
862 }
863
864 struct roff_man *
865 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
866 {
867 struct roff_man *man;
868
869 man = mandoc_calloc(1, sizeof(*man));
870 man->roff = roff;
871 man->os_s = os_s;
872 man->quick = quick;
873 roff_man_alloc1(man);
874 roff->man = man;
875 return man;
876 }
877
878 /* --- syntax tree handling ----------------------------------------------- */
879
880 struct roff_node *
881 roff_node_alloc(struct roff_man *man, int line, int pos,
882 enum roff_type type, int tok)
883 {
884 struct roff_node *n;
885
886 n = mandoc_calloc(1, sizeof(*n));
887 n->line = line;
888 n->pos = pos;
889 n->tok = tok;
890 n->type = type;
891 n->sec = man->lastsec;
892
893 if (man->flags & MDOC_SYNOPSIS)
894 n->flags |= NODE_SYNPRETTY;
895 else
896 n->flags &= ~NODE_SYNPRETTY;
897 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
898 n->flags |= NODE_NOFILL;
899 else
900 n->flags &= ~NODE_NOFILL;
901 if (man->flags & MDOC_NEWLINE)
902 n->flags |= NODE_LINE;
903 man->flags &= ~MDOC_NEWLINE;
904
905 return n;
906 }
907
908 void
909 roff_node_append(struct roff_man *man, struct roff_node *n)
910 {
911
912 switch (man->next) {
913 case ROFF_NEXT_SIBLING:
914 if (man->last->next != NULL) {
915 n->next = man->last->next;
916 man->last->next->prev = n;
917 } else
918 man->last->parent->last = n;
919 man->last->next = n;
920 n->prev = man->last;
921 n->parent = man->last->parent;
922 break;
923 case ROFF_NEXT_CHILD:
924 if (man->last->child != NULL) {
925 n->next = man->last->child;
926 man->last->child->prev = n;
927 } else
928 man->last->last = n;
929 man->last->child = n;
930 n->parent = man->last;
931 break;
932 default:
933 abort();
934 }
935 man->last = n;
936
937 switch (n->type) {
938 case ROFFT_HEAD:
939 n->parent->head = n;
940 break;
941 case ROFFT_BODY:
942 if (n->end != ENDBODY_NOT)
943 return;
944 n->parent->body = n;
945 break;
946 case ROFFT_TAIL:
947 n->parent->tail = n;
948 break;
949 default:
950 return;
951 }
952
953 /*
954 * Copy over the normalised-data pointer of our parent. Not
955 * everybody has one, but copying a null pointer is fine.
956 */
957
958 n->norm = n->parent->norm;
959 assert(n->parent->type == ROFFT_BLOCK);
960 }
961
962 void
963 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
964 {
965 struct roff_node *n;
966
967 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
968 n->string = roff_strdup(man->roff, word);
969 roff_node_append(man, n);
970 n->flags |= NODE_VALID | NODE_ENDED;
971 man->next = ROFF_NEXT_SIBLING;
972 }
973
974 void
975 roff_word_append(struct roff_man *man, const char *word)
976 {
977 struct roff_node *n;
978 char *addstr, *newstr;
979
980 n = man->last;
981 addstr = roff_strdup(man->roff, word);
982 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
983 free(addstr);
984 free(n->string);
985 n->string = newstr;
986 man->next = ROFF_NEXT_SIBLING;
987 }
988
989 void
990 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
991 {
992 struct roff_node *n;
993
994 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
995 roff_node_append(man, n);
996 man->next = ROFF_NEXT_CHILD;
997 }
998
999 struct roff_node *
1000 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1001 {
1002 struct roff_node *n;
1003
1004 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1005 roff_node_append(man, n);
1006 man->next = ROFF_NEXT_CHILD;
1007 return n;
1008 }
1009
1010 struct roff_node *
1011 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1012 {
1013 struct roff_node *n;
1014
1015 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1016 roff_node_append(man, n);
1017 man->next = ROFF_NEXT_CHILD;
1018 return n;
1019 }
1020
1021 struct roff_node *
1022 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1023 {
1024 struct roff_node *n;
1025
1026 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1027 roff_node_append(man, n);
1028 man->next = ROFF_NEXT_CHILD;
1029 return n;
1030 }
1031
1032 static void
1033 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1034 {
1035 struct roff_node *n;
1036 struct tbl_span *span;
1037
1038 if (man->meta.macroset == MACROSET_MAN)
1039 man_breakscope(man, ROFF_TS);
1040 while ((span = tbl_span(tbl)) != NULL) {
1041 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1042 n->span = span;
1043 roff_node_append(man, n);
1044 n->flags |= NODE_VALID | NODE_ENDED;
1045 man->next = ROFF_NEXT_SIBLING;
1046 }
1047 }
1048
1049 void
1050 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1051 {
1052
1053 /* Adjust siblings. */
1054
1055 if (n->prev)
1056 n->prev->next = n->next;
1057 if (n->next)
1058 n->next->prev = n->prev;
1059
1060 /* Adjust parent. */
1061
1062 if (n->parent != NULL) {
1063 if (n->parent->child == n)
1064 n->parent->child = n->next;
1065 if (n->parent->last == n)
1066 n->parent->last = n->prev;
1067 }
1068
1069 /* Adjust parse point. */
1070
1071 if (man == NULL)
1072 return;
1073 if (man->last == n) {
1074 if (n->prev == NULL) {
1075 man->last = n->parent;
1076 man->next = ROFF_NEXT_CHILD;
1077 } else {
1078 man->last = n->prev;
1079 man->next = ROFF_NEXT_SIBLING;
1080 }
1081 }
1082 if (man->meta.first == n)
1083 man->meta.first = NULL;
1084 }
1085
1086 void
1087 roff_node_relink(struct roff_man *man, struct roff_node *n)
1088 {
1089 roff_node_unlink(man, n);
1090 n->prev = n->next = NULL;
1091 roff_node_append(man, n);
1092 }
1093
1094 void
1095 roff_node_free(struct roff_node *n)
1096 {
1097
1098 if (n->args != NULL)
1099 mdoc_argv_free(n->args);
1100 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1101 free(n->norm);
1102 eqn_box_free(n->eqn);
1103 free(n->string);
1104 free(n);
1105 }
1106
1107 void
1108 roff_node_delete(struct roff_man *man, struct roff_node *n)
1109 {
1110
1111 while (n->child != NULL)
1112 roff_node_delete(man, n->child);
1113 roff_node_unlink(man, n);
1114 roff_node_free(n);
1115 }
1116
1117 void
1118 deroff(char **dest, const struct roff_node *n)
1119 {
1120 char *cp;
1121 size_t sz;
1122
1123 if (n->type != ROFFT_TEXT) {
1124 for (n = n->child; n != NULL; n = n->next)
1125 deroff(dest, n);
1126 return;
1127 }
1128
1129 /* Skip leading whitespace. */
1130
1131 for (cp = n->string; *cp != '\0'; cp++) {
1132 if (cp[0] == '\\' && cp[1] != '\0' &&
1133 strchr(" %&0^|~", cp[1]) != NULL)
1134 cp++;
1135 else if ( ! isspace((unsigned char)*cp))
1136 break;
1137 }
1138
1139 /* Skip trailing backslash. */
1140
1141 sz = strlen(cp);
1142 if (sz > 0 && cp[sz - 1] == '\\')
1143 sz--;
1144
1145 /* Skip trailing whitespace. */
1146
1147 for (; sz; sz--)
1148 if ( ! isspace((unsigned char)cp[sz-1]))
1149 break;
1150
1151 /* Skip empty strings. */
1152
1153 if (sz == 0)
1154 return;
1155
1156 if (*dest == NULL) {
1157 *dest = mandoc_strndup(cp, sz);
1158 return;
1159 }
1160
1161 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1162 free(*dest);
1163 *dest = cp;
1164 }
1165
1166 /* --- main functions of the roff parser ---------------------------------- */
1167
1168 /*
1169 * In the current line, expand escape sequences that produce parsable
1170 * input text. Also check the syntax of the remaining escape sequences,
1171 * which typically produce output glyphs or change formatter state.
1172 */
1173 static int
1174 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1175 {
1176 struct mctx *ctx; /* current macro call context */
1177 char ubuf[24]; /* buffer to print the number */
1178 struct roff_node *n; /* used for header comments */
1179 const char *start; /* start of the string to process */
1180 char *stesc; /* start of an escape sequence ('\\') */
1181 const char *esct; /* type of esccape sequence */
1182 char *ep; /* end of comment string */
1183 const char *stnam; /* start of the name, after "[(*" */
1184 const char *cp; /* end of the name, e.g. before ']' */
1185 const char *res; /* the string to be substituted */
1186 char *nbuf; /* new buffer to copy buf->buf to */
1187 size_t maxl; /* expected length of the escape name */
1188 size_t naml; /* actual length of the escape name */
1189 size_t asz; /* length of the replacement */
1190 size_t rsz; /* length of the rest of the string */
1191 int inaml; /* length returned from mandoc_escape() */
1192 int expand_count; /* to avoid infinite loops */
1193 int npos; /* position in numeric expression */
1194 int arg_complete; /* argument not interrupted by eol */
1195 int quote_args; /* true for \\$@, false for \\$* */
1196 int done; /* no more input available */
1197 int deftype; /* type of definition to paste */
1198 int rcsid; /* kind of RCS id seen */
1199 enum mandocerr err; /* for escape sequence problems */
1200 char sign; /* increment number register */
1201 char term; /* character terminating the escape */
1202
1203 /* Search forward for comments. */
1204
1205 done = 0;
1206 start = buf->buf + pos;
1207 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1208 if (stesc[0] != newesc || stesc[1] == '\0')
1209 continue;
1210 stesc++;
1211 if (*stesc != '"' && *stesc != '#')
1212 continue;
1213
1214 /* Comment found, look for RCS id. */
1215
1216 rcsid = 0;
1217 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1218 rcsid = 1 << MANDOC_OS_OPENBSD;
1219 cp += 8;
1220 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1221 rcsid = 1 << MANDOC_OS_NETBSD;
1222 cp += 7;
1223 }
1224 if (cp != NULL &&
1225 isalnum((unsigned char)*cp) == 0 &&
1226 strchr(cp, '$') != NULL) {
1227 if (r->man->meta.rcsids & rcsid)
1228 mandoc_msg(MANDOCERR_RCS_REP, ln,
1229 (int)(stesc - buf->buf) + 1,
1230 "%s", stesc + 1);
1231 r->man->meta.rcsids |= rcsid;
1232 }
1233
1234 /* Handle trailing whitespace. */
1235
1236 ep = strchr(stesc--, '\0') - 1;
1237 if (*ep == '\n') {
1238 done = 1;
1239 ep--;
1240 }
1241 if (*ep == ' ' || *ep == '\t')
1242 mandoc_msg(MANDOCERR_SPACE_EOL,
1243 ln, (int)(ep - buf->buf), NULL);
1244
1245 /*
1246 * Save comments preceding the title macro
1247 * in the syntax tree.
1248 */
1249
1250 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1251 while (*ep == ' ' || *ep == '\t')
1252 ep--;
1253 ep[1] = '\0';
1254 n = roff_node_alloc(r->man,
1255 ln, stesc + 1 - buf->buf,
1256 ROFFT_COMMENT, TOKEN_NONE);
1257 n->string = mandoc_strdup(stesc + 2);
1258 roff_node_append(r->man, n);
1259 n->flags |= NODE_VALID | NODE_ENDED;
1260 r->man->next = ROFF_NEXT_SIBLING;
1261 }
1262
1263 /* Line continuation with comment. */
1264
1265 if (stesc[1] == '#') {
1266 *stesc = '\0';
1267 return ROFF_IGN | ROFF_APPEND;
1268 }
1269
1270 /* Discard normal comments. */
1271
1272 while (stesc > start && stesc[-1] == ' ' &&
1273 (stesc == start + 1 || stesc[-2] != '\\'))
1274 stesc--;
1275 *stesc = '\0';
1276 break;
1277 }
1278 if (stesc == start)
1279 return ROFF_CONT;
1280 stesc--;
1281
1282 /* Notice the end of the input. */
1283
1284 if (*stesc == '\n') {
1285 *stesc-- = '\0';
1286 done = 1;
1287 }
1288
1289 expand_count = 0;
1290 while (stesc >= start) {
1291 if (*stesc != newesc) {
1292
1293 /*
1294 * If we have a non-standard escape character,
1295 * escape literal backslashes because all
1296 * processing in subsequent functions uses
1297 * the standard escaping rules.
1298 */
1299
1300 if (newesc != ASCII_ESC && *stesc == '\\') {
1301 *stesc = '\0';
1302 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1303 buf->buf, stesc + 1) + 1;
1304 start = nbuf + pos;
1305 stesc = nbuf + (stesc - buf->buf);
1306 free(buf->buf);
1307 buf->buf = nbuf;
1308 }
1309
1310 /* Search backwards for the next escape. */
1311
1312 stesc--;
1313 continue;
1314 }
1315
1316 /* If it is escaped, skip it. */
1317
1318 for (cp = stesc - 1; cp >= start; cp--)
1319 if (*cp != r->escape)
1320 break;
1321
1322 if ((stesc - cp) % 2 == 0) {
1323 while (stesc > cp)
1324 *stesc-- = '\\';
1325 continue;
1326 } else if (stesc[1] != '\0') {
1327 *stesc = '\\';
1328 } else {
1329 *stesc-- = '\0';
1330 if (done)
1331 continue;
1332 else
1333 return ROFF_IGN | ROFF_APPEND;
1334 }
1335
1336 /* Decide whether to expand or to check only. */
1337
1338 term = '\0';
1339 cp = stesc + 1;
1340 if (*cp == 'E')
1341 cp++;
1342 esct = cp;
1343 switch (*esct) {
1344 case '*':
1345 case '$':
1346 res = NULL;
1347 break;
1348 case 'B':
1349 case 'w':
1350 term = cp[1];
1351 /* FALLTHROUGH */
1352 case 'n':
1353 sign = cp[1];
1354 if (sign == '+' || sign == '-')
1355 cp++;
1356 res = ubuf;
1357 break;
1358 default:
1359 err = MANDOCERR_OK;
1360 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1361 case ESCAPE_SPECIAL:
1362 if (mchars_spec2cp(stnam, inaml) >= 0)
1363 break;
1364 /* FALLTHROUGH */
1365 case ESCAPE_ERROR:
1366 err = MANDOCERR_ESC_BAD;
1367 break;
1368 case ESCAPE_UNDEF:
1369 err = MANDOCERR_ESC_UNDEF;
1370 break;
1371 case ESCAPE_UNSUPP:
1372 err = MANDOCERR_ESC_UNSUPP;
1373 break;
1374 default:
1375 break;
1376 }
1377 if (err != MANDOCERR_OK)
1378 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1379 "%.*s", (int)(cp - stesc), stesc);
1380 stesc--;
1381 continue;
1382 }
1383
1384 if (EXPAND_LIMIT < ++expand_count) {
1385 mandoc_msg(MANDOCERR_ROFFLOOP,
1386 ln, (int)(stesc - buf->buf), NULL);
1387 return ROFF_IGN;
1388 }
1389
1390 /*
1391 * The third character decides the length
1392 * of the name of the string or register.
1393 * Save a pointer to the name.
1394 */
1395
1396 if (term == '\0') {
1397 switch (*++cp) {
1398 case '\0':
1399 maxl = 0;
1400 break;
1401 case '(':
1402 cp++;
1403 maxl = 2;
1404 break;
1405 case '[':
1406 cp++;
1407 term = ']';
1408 maxl = 0;
1409 break;
1410 default:
1411 maxl = 1;
1412 break;
1413 }
1414 } else {
1415 cp += 2;
1416 maxl = 0;
1417 }
1418 stnam = cp;
1419
1420 /* Advance to the end of the name. */
1421
1422 naml = 0;
1423 arg_complete = 1;
1424 while (maxl == 0 || naml < maxl) {
1425 if (*cp == '\0') {
1426 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1427 (int)(stesc - buf->buf), "%s", stesc);
1428 arg_complete = 0;
1429 break;
1430 }
1431 if (maxl == 0 && *cp == term) {
1432 cp++;
1433 break;
1434 }
1435 if (*cp++ != '\\' || *esct != 'w') {
1436 naml++;
1437 continue;
1438 }
1439 switch (mandoc_escape(&cp, NULL, NULL)) {
1440 case ESCAPE_SPECIAL:
1441 case ESCAPE_UNICODE:
1442 case ESCAPE_NUMBERED:
1443 case ESCAPE_UNDEF:
1444 case ESCAPE_OVERSTRIKE:
1445 naml++;
1446 break;
1447 default:
1448 break;
1449 }
1450 }
1451
1452 /*
1453 * Retrieve the replacement string; if it is
1454 * undefined, resume searching for escapes.
1455 */
1456
1457 switch (*esct) {
1458 case '*':
1459 if (arg_complete) {
1460 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1461 res = roff_getstrn(r, stnam, naml, &deftype);
1462
1463 /*
1464 * If not overriden, let \*(.T
1465 * through to the formatters.
1466 */
1467
1468 if (res == NULL && naml == 2 &&
1469 stnam[0] == '.' && stnam[1] == 'T') {
1470 roff_setstrn(&r->strtab,
1471 ".T", 2, NULL, 0, 0);
1472 stesc--;
1473 continue;
1474 }
1475 }
1476 break;
1477 case '$':
1478 if (r->mstackpos < 0) {
1479 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1480 (int)(stesc - buf->buf), "%.3s", stesc);
1481 break;
1482 }
1483 ctx = r->mstack + r->mstackpos;
1484 npos = esct[1] - '1';
1485 if (npos >= 0 && npos <= 8) {
1486 res = npos < ctx->argc ?
1487 ctx->argv[npos] : "";
1488 break;
1489 }
1490 if (esct[1] == '*')
1491 quote_args = 0;
1492 else if (esct[1] == '@')
1493 quote_args = 1;
1494 else {
1495 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1496 (int)(stesc - buf->buf), "%.3s", stesc);
1497 break;
1498 }
1499 asz = 0;
1500 for (npos = 0; npos < ctx->argc; npos++) {
1501 if (npos)
1502 asz++; /* blank */
1503 if (quote_args)
1504 asz += 2; /* quotes */
1505 asz += strlen(ctx->argv[npos]);
1506 }
1507 if (asz != 3) {
1508 rsz = buf->sz - (stesc - buf->buf) - 3;
1509 if (asz < 3)
1510 memmove(stesc + asz, stesc + 3, rsz);
1511 buf->sz += asz - 3;
1512 nbuf = mandoc_realloc(buf->buf, buf->sz);
1513 start = nbuf + pos;
1514 stesc = nbuf + (stesc - buf->buf);
1515 buf->buf = nbuf;
1516 if (asz > 3)
1517 memmove(stesc + asz, stesc + 3, rsz);
1518 }
1519 for (npos = 0; npos < ctx->argc; npos++) {
1520 if (npos)
1521 *stesc++ = ' ';
1522 if (quote_args)
1523 *stesc++ = '"';
1524 cp = ctx->argv[npos];
1525 while (*cp != '\0')
1526 *stesc++ = *cp++;
1527 if (quote_args)
1528 *stesc++ = '"';
1529 }
1530 continue;
1531 case 'B':
1532 npos = 0;
1533 ubuf[0] = arg_complete &&
1534 roff_evalnum(r, ln, stnam, &npos,
1535 NULL, ROFFNUM_SCALE) &&
1536 stnam + npos + 1 == cp ? '1' : '0';
1537 ubuf[1] = '\0';
1538 break;
1539 case 'n':
1540 if (arg_complete)
1541 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1542 roff_getregn(r, stnam, naml, sign));
1543 else
1544 ubuf[0] = '\0';
1545 break;
1546 case 'w':
1547 /* use even incomplete args */
1548 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1549 24 * (int)naml);
1550 break;
1551 }
1552
1553 if (res == NULL) {
1554 if (*esct == '*')
1555 mandoc_msg(MANDOCERR_STR_UNDEF,
1556 ln, (int)(stesc - buf->buf),
1557 "%.*s", (int)naml, stnam);
1558 res = "";
1559 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1560 mandoc_msg(MANDOCERR_ROFFLOOP,
1561 ln, (int)(stesc - buf->buf), NULL);
1562 return ROFF_IGN;
1563 }
1564
1565 /* Replace the escape sequence by the string. */
1566
1567 *stesc = '\0';
1568 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1569 buf->buf, res, cp) + 1;
1570
1571 /* Prepare for the next replacement. */
1572
1573 start = nbuf + pos;
1574 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1575 free(buf->buf);
1576 buf->buf = nbuf;
1577 }
1578 return ROFF_CONT;
1579 }
1580
1581 /*
1582 * Parse a quoted or unquoted roff-style request or macro argument.
1583 * Return a pointer to the parsed argument, which is either the original
1584 * pointer or advanced by one byte in case the argument is quoted.
1585 * NUL-terminate the argument in place.
1586 * Collapse pairs of quotes inside quoted arguments.
1587 * Advance the argument pointer to the next argument,
1588 * or to the NUL byte terminating the argument line.
1589 */
1590 char *
1591 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1592 {
1593 struct buf buf;
1594 char *cp, *start;
1595 int newesc, pairs, quoted, white;
1596
1597 /* Quoting can only start with a new word. */
1598 start = *cpp;
1599 quoted = 0;
1600 if ('"' == *start) {
1601 quoted = 1;
1602 start++;
1603 }
1604
1605 newesc = pairs = white = 0;
1606 for (cp = start; '\0' != *cp; cp++) {
1607
1608 /*
1609 * Move the following text left
1610 * after quoted quotes and after "\\" and "\t".
1611 */
1612 if (pairs)
1613 cp[-pairs] = cp[0];
1614
1615 if ('\\' == cp[0]) {
1616 /*
1617 * In copy mode, translate double to single
1618 * backslashes and backslash-t to literal tabs.
1619 */
1620 switch (cp[1]) {
1621 case 'a':
1622 case 't':
1623 cp[-pairs] = '\t';
1624 pairs++;
1625 cp++;
1626 break;
1627 case '\\':
1628 newesc = 1;
1629 cp[-pairs] = ASCII_ESC;
1630 pairs++;
1631 cp++;
1632 break;
1633 case ' ':
1634 /* Skip escaped blanks. */
1635 if (0 == quoted)
1636 cp++;
1637 break;
1638 default:
1639 break;
1640 }
1641 } else if (0 == quoted) {
1642 if (' ' == cp[0]) {
1643 /* Unescaped blanks end unquoted args. */
1644 white = 1;
1645 break;
1646 }
1647 } else if ('"' == cp[0]) {
1648 if ('"' == cp[1]) {
1649 /* Quoted quotes collapse. */
1650 pairs++;
1651 cp++;
1652 } else {
1653 /* Unquoted quotes end quoted args. */
1654 quoted = 2;
1655 break;
1656 }
1657 }
1658 }
1659
1660 /* Quoted argument without a closing quote. */
1661 if (1 == quoted)
1662 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1663
1664 /* NUL-terminate this argument and move to the next one. */
1665 if (pairs)
1666 cp[-pairs] = '\0';
1667 if ('\0' != *cp) {
1668 *cp++ = '\0';
1669 while (' ' == *cp)
1670 cp++;
1671 }
1672 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1673 *cpp = cp;
1674
1675 if ('\0' == *cp && (white || ' ' == cp[-1]))
1676 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1677
1678 start = mandoc_strdup(start);
1679 if (newesc == 0)
1680 return start;
1681
1682 buf.buf = start;
1683 buf.sz = strlen(start) + 1;
1684 buf.next = NULL;
1685 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1686 free(buf.buf);
1687 buf.buf = mandoc_strdup("");
1688 }
1689 return buf.buf;
1690 }
1691
1692
1693 /*
1694 * Process text streams.
1695 */
1696 static int
1697 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1698 {
1699 size_t sz;
1700 const char *start;
1701 char *p;
1702 int isz;
1703 enum mandoc_esc esc;
1704
1705 /* Spring the input line trap. */
1706
1707 if (roffit_lines == 1) {
1708 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1709 free(buf->buf);
1710 buf->buf = p;
1711 buf->sz = isz + 1;
1712 *offs = 0;
1713 free(roffit_macro);
1714 roffit_lines = 0;
1715 return ROFF_REPARSE;
1716 } else if (roffit_lines > 1)
1717 --roffit_lines;
1718
1719 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1720 if (roffce_lines < 1) {
1721 r->man->last = roffce_node;
1722 r->man->next = ROFF_NEXT_SIBLING;
1723 roffce_lines = 0;
1724 roffce_node = NULL;
1725 } else
1726 roffce_lines--;
1727 }
1728
1729 /* Convert all breakable hyphens into ASCII_HYPH. */
1730
1731 start = p = buf->buf + pos;
1732
1733 while (*p != '\0') {
1734 sz = strcspn(p, "-\\");
1735 p += sz;
1736
1737 if (*p == '\0')
1738 break;
1739
1740 if (*p == '\\') {
1741 /* Skip over escapes. */
1742 p++;
1743 esc = mandoc_escape((const char **)&p, NULL, NULL);
1744 if (esc == ESCAPE_ERROR)
1745 break;
1746 while (*p == '-')
1747 p++;
1748 continue;
1749 } else if (p == start) {
1750 p++;
1751 continue;
1752 }
1753
1754 if (isalpha((unsigned char)p[-1]) &&
1755 isalpha((unsigned char)p[1]))
1756 *p = ASCII_HYPH;
1757 p++;
1758 }
1759 return ROFF_CONT;
1760 }
1761
1762 int
1763 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1764 {
1765 enum roff_tok t;
1766 int e;
1767 int pos; /* parse point */
1768 int spos; /* saved parse point for messages */
1769 int ppos; /* original offset in buf->buf */
1770 int ctl; /* macro line (boolean) */
1771
1772 ppos = pos = *offs;
1773
1774 /* Handle in-line equation delimiters. */
1775
1776 if (r->tbl == NULL &&
1777 r->last_eqn != NULL && r->last_eqn->delim &&
1778 (r->eqn == NULL || r->eqn_inline)) {
1779 e = roff_eqndelim(r, buf, pos);
1780 if (e == ROFF_REPARSE)
1781 return e;
1782 assert(e == ROFF_CONT);
1783 }
1784
1785 /* Expand some escape sequences. */
1786
1787 e = roff_expand(r, buf, ln, pos, r->escape);
1788 if ((e & ROFF_MASK) == ROFF_IGN)
1789 return e;
1790 assert(e == ROFF_CONT);
1791
1792 ctl = roff_getcontrol(r, buf->buf, &pos);
1793
1794 /*
1795 * First, if a scope is open and we're not a macro, pass the
1796 * text through the macro's filter.
1797 * Equations process all content themselves.
1798 * Tables process almost all content themselves, but we want
1799 * to warn about macros before passing it there.
1800 */
1801
1802 if (r->last != NULL && ! ctl) {
1803 t = r->last->tok;
1804 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1805 if ((e & ROFF_MASK) == ROFF_IGN)
1806 return e;
1807 e &= ~ROFF_MASK;
1808 } else
1809 e = ROFF_IGN;
1810 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1811 eqn_read(r->eqn, buf->buf + ppos);
1812 return e;
1813 }
1814 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1815 tbl_read(r->tbl, ln, buf->buf, ppos);
1816 roff_addtbl(r->man, ln, r->tbl);
1817 return e;
1818 }
1819 if ( ! ctl) {
1820 r->options &= ~MPARSE_COMMENT;
1821 return roff_parsetext(r, buf, pos, offs) | e;
1822 }
1823
1824 /* Skip empty request lines. */
1825
1826 if (buf->buf[pos] == '"') {
1827 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1828 return ROFF_IGN;
1829 } else if (buf->buf[pos] == '\0')
1830 return ROFF_IGN;
1831
1832 /*
1833 * If a scope is open, go to the child handler for that macro,
1834 * as it may want to preprocess before doing anything with it.
1835 * Don't do so if an equation is open.
1836 */
1837
1838 if (r->last) {
1839 t = r->last->tok;
1840 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1841 }
1842
1843 /* No scope is open. This is a new request or macro. */
1844
1845 r->options &= ~MPARSE_COMMENT;
1846 spos = pos;
1847 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1848
1849 /* Tables ignore most macros. */
1850
1851 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1852 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1853 mandoc_msg(MANDOCERR_TBLMACRO,
1854 ln, pos, "%s", buf->buf + spos);
1855 if (t != TOKEN_NONE)
1856 return ROFF_IGN;
1857 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1858 pos++;
1859 while (buf->buf[pos] == ' ')
1860 pos++;
1861 tbl_read(r->tbl, ln, buf->buf, pos);
1862 roff_addtbl(r->man, ln, r->tbl);
1863 return ROFF_IGN;
1864 }
1865
1866 /* For now, let high level macros abort .ce mode. */
1867
1868 if (ctl && roffce_node != NULL &&
1869 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1870 t == ROFF_TH || t == ROFF_TS)) {
1871 r->man->last = roffce_node;
1872 r->man->next = ROFF_NEXT_SIBLING;
1873 roffce_lines = 0;
1874 roffce_node = NULL;
1875 }
1876
1877 /*
1878 * This is neither a roff request nor a user-defined macro.
1879 * Let the standard macro set parsers handle it.
1880 */
1881
1882 if (t == TOKEN_NONE)
1883 return ROFF_CONT;
1884
1885 /* Execute a roff request or a user defined macro. */
1886
1887 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1888 }
1889
1890 /*
1891 * Internal interface function to tell the roff parser that execution
1892 * of the current macro ended. This is required because macro
1893 * definitions usually do not end with a .return request.
1894 */
1895 void
1896 roff_userret(struct roff *r)
1897 {
1898 struct mctx *ctx;
1899 int i;
1900
1901 assert(r->mstackpos >= 0);
1902 ctx = r->mstack + r->mstackpos;
1903 for (i = 0; i < ctx->argc; i++)
1904 free(ctx->argv[i]);
1905 ctx->argc = 0;
1906 r->mstackpos--;
1907 }
1908
1909 void
1910 roff_endparse(struct roff *r)
1911 {
1912 if (r->last != NULL)
1913 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1914 r->last->col, "%s", roff_name[r->last->tok]);
1915
1916 if (r->eqn != NULL) {
1917 mandoc_msg(MANDOCERR_BLK_NOEND,
1918 r->eqn->node->line, r->eqn->node->pos, "EQ");
1919 eqn_parse(r->eqn);
1920 r->eqn = NULL;
1921 }
1922
1923 if (r->tbl != NULL) {
1924 tbl_end(r->tbl, 1);
1925 r->tbl = NULL;
1926 }
1927 }
1928
1929 /*
1930 * Parse a roff node's type from the input buffer. This must be in the
1931 * form of ".foo xxx" in the usual way.
1932 */
1933 static enum roff_tok
1934 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1935 {
1936 char *cp;
1937 const char *mac;
1938 size_t maclen;
1939 int deftype;
1940 enum roff_tok t;
1941
1942 cp = buf + *pos;
1943
1944 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1945 return TOKEN_NONE;
1946
1947 mac = cp;
1948 maclen = roff_getname(r, &cp, ln, ppos);
1949
1950 deftype = ROFFDEF_USER | ROFFDEF_REN;
1951 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1952 switch (deftype) {
1953 case ROFFDEF_USER:
1954 t = ROFF_USERDEF;
1955 break;
1956 case ROFFDEF_REN:
1957 t = ROFF_RENAMED;
1958 break;
1959 default:
1960 t = roffhash_find(r->reqtab, mac, maclen);
1961 break;
1962 }
1963 if (t != TOKEN_NONE)
1964 *pos = cp - buf;
1965 else if (deftype == ROFFDEF_UNDEF) {
1966 /* Using an undefined macro defines it to be empty. */
1967 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1968 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1969 }
1970 return t;
1971 }
1972
1973 /* --- handling of request blocks ----------------------------------------- */
1974
1975 static int
1976 roff_cblock(ROFF_ARGS)
1977 {
1978
1979 /*
1980 * A block-close `..' should only be invoked as a child of an
1981 * ignore macro, otherwise raise a warning and just ignore it.
1982 */
1983
1984 if (r->last == NULL) {
1985 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1986 return ROFF_IGN;
1987 }
1988
1989 switch (r->last->tok) {
1990 case ROFF_am:
1991 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1992 case ROFF_ami:
1993 case ROFF_de:
1994 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1995 case ROFF_dei:
1996 case ROFF_ig:
1997 break;
1998 default:
1999 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2000 return ROFF_IGN;
2001 }
2002
2003 if (buf->buf[pos] != '\0')
2004 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2005 ".. %s", buf->buf + pos);
2006
2007 roffnode_pop(r);
2008 roffnode_cleanscope(r);
2009 return ROFF_IGN;
2010
2011 }
2012
2013 /*
2014 * Pop all nodes ending at the end of the current input line.
2015 * Return the number of loops ended.
2016 */
2017 static int
2018 roffnode_cleanscope(struct roff *r)
2019 {
2020 int inloop;
2021
2022 inloop = 0;
2023 while (r->last != NULL) {
2024 if (--r->last->endspan != 0)
2025 break;
2026 inloop += roffnode_pop(r);
2027 }
2028 return inloop;
2029 }
2030
2031 /*
2032 * Handle the closing \} of a conditional block.
2033 * Apart from generating warnings, this only pops nodes.
2034 * Return the number of loops ended.
2035 */
2036 static int
2037 roff_ccond(struct roff *r, int ln, int ppos)
2038 {
2039 if (NULL == r->last) {
2040 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2041 return 0;
2042 }
2043
2044 switch (r->last->tok) {
2045 case ROFF_el:
2046 case ROFF_ie:
2047 case ROFF_if:
2048 case ROFF_while:
2049 break;
2050 default:
2051 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2052 return 0;
2053 }
2054
2055 if (r->last->endspan > -1) {
2056 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2057 return 0;
2058 }
2059
2060 return roffnode_pop(r) + roffnode_cleanscope(r);
2061 }
2062
2063 static int
2064 roff_block(ROFF_ARGS)
2065 {
2066 const char *name, *value;
2067 char *call, *cp, *iname, *rname;
2068 size_t csz, namesz, rsz;
2069 int deftype;
2070
2071 /* Ignore groff compatibility mode for now. */
2072
2073 if (tok == ROFF_de1)
2074 tok = ROFF_de;
2075 else if (tok == ROFF_dei1)
2076 tok = ROFF_dei;
2077 else if (tok == ROFF_am1)
2078 tok = ROFF_am;
2079 else if (tok == ROFF_ami1)
2080 tok = ROFF_ami;
2081
2082 /* Parse the macro name argument. */
2083
2084 cp = buf->buf + pos;
2085 if (tok == ROFF_ig) {
2086 iname = NULL;
2087 namesz = 0;
2088 } else {
2089 iname = cp;
2090 namesz = roff_getname(r, &cp, ln, ppos);
2091 iname[namesz] = '\0';
2092 }
2093
2094 /* Resolve the macro name argument if it is indirect. */
2095
2096 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2097 deftype = ROFFDEF_USER;
2098 name = roff_getstrn(r, iname, namesz, &deftype);
2099 if (name == NULL) {
2100 mandoc_msg(MANDOCERR_STR_UNDEF,
2101 ln, (int)(iname - buf->buf),
2102 "%.*s", (int)namesz, iname);
2103 namesz = 0;
2104 } else
2105 namesz = strlen(name);
2106 } else
2107 name = iname;
2108
2109 if (namesz == 0 && tok != ROFF_ig) {
2110 mandoc_msg(MANDOCERR_REQ_EMPTY,
2111 ln, ppos, "%s", roff_name[tok]);
2112 return ROFF_IGN;
2113 }
2114
2115 roffnode_push(r, tok, name, ln, ppos);
2116
2117 /*
2118 * At the beginning of a `de' macro, clear the existing string
2119 * with the same name, if there is one. New content will be
2120 * appended from roff_block_text() in multiline mode.
2121 */
2122
2123 if (tok == ROFF_de || tok == ROFF_dei) {
2124 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2125 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2126 } else if (tok == ROFF_am || tok == ROFF_ami) {
2127 deftype = ROFFDEF_ANY;
2128 value = roff_getstrn(r, iname, namesz, &deftype);
2129 switch (deftype) { /* Before appending, ... */
2130 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2131 roff_setstrn(&r->strtab, name, namesz,
2132 value, strlen(value), 0);
2133 break;
2134 case ROFFDEF_REN: /* call original standard macro. */
2135 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2136 (int)strlen(value), value);
2137 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2138 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2139 free(call);
2140 break;
2141 case ROFFDEF_STD: /* rename and call standard macro. */
2142 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2143 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2144 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2145 (int)rsz, rname);
2146 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2147 free(call);
2148 free(rname);
2149 break;
2150 default:
2151 break;
2152 }
2153 }
2154
2155 if (*cp == '\0')
2156 return ROFF_IGN;
2157
2158 /* Get the custom end marker. */
2159
2160 iname = cp;
2161 namesz = roff_getname(r, &cp, ln, ppos);
2162
2163 /* Resolve the end marker if it is indirect. */
2164
2165 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2166 deftype = ROFFDEF_USER;
2167 name = roff_getstrn(r, iname, namesz, &deftype);
2168 if (name == NULL) {
2169 mandoc_msg(MANDOCERR_STR_UNDEF,
2170 ln, (int)(iname - buf->buf),
2171 "%.*s", (int)namesz, iname);
2172 namesz = 0;
2173 } else
2174 namesz = strlen(name);
2175 } else
2176 name = iname;
2177
2178 if (namesz)
2179 r->last->end = mandoc_strndup(name, namesz);
2180
2181 if (*cp != '\0')
2182 mandoc_msg(MANDOCERR_ARG_EXCESS,
2183 ln, pos, ".%s ... %s", roff_name[tok], cp);
2184
2185 return ROFF_IGN;
2186 }
2187
2188 static int
2189 roff_block_sub(ROFF_ARGS)
2190 {
2191 enum roff_tok t;
2192 int i, j;
2193
2194 /*
2195 * First check whether a custom macro exists at this level. If
2196 * it does, then check against it. This is some of groff's
2197 * stranger behaviours. If we encountered a custom end-scope
2198 * tag and that tag also happens to be a "real" macro, then we
2199 * need to try interpreting it again as a real macro. If it's
2200 * not, then return ignore. Else continue.
2201 */
2202
2203 if (r->last->end) {
2204 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2205 if (buf->buf[i] != r->last->end[j])
2206 break;
2207
2208 if (r->last->end[j] == '\0' &&
2209 (buf->buf[i] == '\0' ||
2210 buf->buf[i] == ' ' ||
2211 buf->buf[i] == '\t')) {
2212 roffnode_pop(r);
2213 roffnode_cleanscope(r);
2214
2215 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2216 i++;
2217
2218 pos = i;
2219 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2220 TOKEN_NONE)
2221 return ROFF_RERUN;
2222 return ROFF_IGN;
2223 }
2224 }
2225
2226 /*
2227 * If we have no custom end-query or lookup failed, then try
2228 * pulling it out of the hashtable.
2229 */
2230
2231 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2232
2233 if (t != ROFF_cblock) {
2234 if (tok != ROFF_ig)
2235 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2236 return ROFF_IGN;
2237 }
2238
2239 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2240 }
2241
2242 static int
2243 roff_block_text(ROFF_ARGS)
2244 {
2245
2246 if (tok != ROFF_ig)
2247 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2248
2249 return ROFF_IGN;
2250 }
2251
2252 static int
2253 roff_cond_sub(ROFF_ARGS)
2254 {
2255 struct roffnode *bl;
2256 char *ep;
2257 int endloop, irc, rr;
2258 enum roff_tok t;
2259
2260 irc = ROFF_IGN;
2261 rr = r->last->rule;
2262 endloop = tok != ROFF_while ? ROFF_IGN :
2263 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2264 if (roffnode_cleanscope(r))
2265 irc |= endloop;
2266
2267 /*
2268 * If `\}' occurs on a macro line without a preceding macro,
2269 * drop the line completely.
2270 */
2271
2272 ep = buf->buf + pos;
2273 if (ep[0] == '\\' && ep[1] == '}')
2274 rr = 0;
2275
2276 /*
2277 * The closing delimiter `\}' rewinds the conditional scope
2278 * but is otherwise ignored when interpreting the line.
2279 */
2280
2281 while ((ep = strchr(ep, '\\')) != NULL) {
2282 switch (ep[1]) {
2283 case '}':
2284 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2285 if (roff_ccond(r, ln, ep - buf->buf))
2286 irc |= endloop;
2287 break;
2288 case '\0':
2289 ++ep;
2290 break;
2291 default:
2292 ep += 2;
2293 break;
2294 }
2295 }
2296
2297 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2298
2299 /* For now, let high level macros abort .ce mode. */
2300
2301 if (roffce_node != NULL &&
2302 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2303 t == ROFF_TH || t == ROFF_TS)) {
2304 r->man->last = roffce_node;
2305 r->man->next = ROFF_NEXT_SIBLING;
2306 roffce_lines = 0;
2307 roffce_node = NULL;
2308 }
2309
2310 /*
2311 * Fully handle known macros when they are structurally
2312 * required or when the conditional evaluated to true.
2313 */
2314
2315 if (t == ROFF_break) {
2316 if (irc & ROFF_LOOPMASK)
2317 irc = ROFF_IGN | ROFF_LOOPEXIT;
2318 else if (rr) {
2319 for (bl = r->last; bl != NULL; bl = bl->parent) {
2320 bl->rule = 0;
2321 if (bl->tok == ROFF_while)
2322 break;
2323 }
2324 }
2325 } else if (t != TOKEN_NONE &&
2326 (rr || roffs[t].flags & ROFFMAC_STRUCT))
2327 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2328 else
2329 irc |= rr ? ROFF_CONT : ROFF_IGN;
2330 return irc;
2331 }
2332
2333 static int
2334 roff_cond_text(ROFF_ARGS)
2335 {
2336 char *ep;
2337 int endloop, irc, rr;
2338
2339 irc = ROFF_IGN;
2340 rr = r->last->rule;
2341 endloop = tok != ROFF_while ? ROFF_IGN :
2342 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2343 if (roffnode_cleanscope(r))
2344 irc |= endloop;
2345
2346 /*
2347 * If `\}' occurs on a text line with neither preceding
2348 * nor following characters, drop the line completely.
2349 */
2350
2351 ep = buf->buf + pos;
2352 if (strcmp(ep, "\\}") == 0)
2353 rr = 0;
2354
2355 /*
2356 * The closing delimiter `\}' rewinds the conditional scope
2357 * but is otherwise ignored when interpreting the line.
2358 */
2359
2360 while ((ep = strchr(ep, '\\')) != NULL) {
2361 switch (ep[1]) {
2362 case '}':
2363 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2364 if (roff_ccond(r, ln, ep - buf->buf))
2365 irc |= endloop;
2366 break;
2367 case '\0':
2368 ++ep;
2369 break;
2370 default:
2371 ep += 2;
2372 break;
2373 }
2374 }
2375 if (rr)
2376 irc |= ROFF_CONT;
2377 return irc;
2378 }
2379
2380 /* --- handling of numeric and conditional expressions -------------------- */
2381
2382 /*
2383 * Parse a single signed integer number. Stop at the first non-digit.
2384 * If there is at least one digit, return success and advance the
2385 * parse point, else return failure and let the parse point unchanged.
2386 * Ignore overflows, treat them just like the C language.
2387 */
2388 static int
2389 roff_getnum(const char *v, int *pos, int *res, int flags)
2390 {
2391 int myres, scaled, n, p;
2392
2393 if (NULL == res)
2394 res = &myres;
2395
2396 p = *pos;
2397 n = v[p] == '-';
2398 if (n || v[p] == '+')
2399 p++;
2400
2401 if (flags & ROFFNUM_WHITE)
2402 while (isspace((unsigned char)v[p]))
2403 p++;
2404
2405 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2406 *res = 10 * *res + v[p] - '0';
2407 if (p == *pos + n)
2408 return 0;
2409
2410 if (n)
2411 *res = -*res;
2412
2413 /* Each number may be followed by one optional scaling unit. */
2414
2415 switch (v[p]) {
2416 case 'f':
2417 scaled = *res * 65536;
2418 break;
2419 case 'i':
2420 scaled = *res * 240;
2421 break;
2422 case 'c':
2423 scaled = *res * 240 / 2.54;
2424 break;
2425 case 'v':
2426 case 'P':
2427 scaled = *res * 40;
2428 break;
2429 case 'm':
2430 case 'n':
2431 scaled = *res * 24;
2432 break;
2433 case 'p':
2434 scaled = *res * 10 / 3;
2435 break;
2436 case 'u':
2437 scaled = *res;
2438 break;
2439 case 'M':
2440 scaled = *res * 6 / 25;
2441 break;
2442 default:
2443 scaled = *res;
2444 p--;
2445 break;
2446 }
2447 if (flags & ROFFNUM_SCALE)
2448 *res = scaled;
2449
2450 *pos = p + 1;
2451 return 1;
2452 }
2453
2454 /*
2455 * Evaluate a string comparison condition.
2456 * The first character is the delimiter.
2457 * Succeed if the string up to its second occurrence
2458 * matches the string up to its third occurence.
2459 * Advance the cursor after the third occurrence
2460 * or lacking that, to the end of the line.
2461 */
2462 static int
2463 roff_evalstrcond(const char *v, int *pos)
2464 {
2465 const char *s1, *s2, *s3;
2466 int match;
2467
2468 match = 0;
2469 s1 = v + *pos; /* initial delimiter */
2470 s2 = s1 + 1; /* for scanning the first string */
2471 s3 = strchr(s2, *s1); /* for scanning the second string */
2472
2473 if (NULL == s3) /* found no middle delimiter */
2474 goto out;
2475
2476 while ('\0' != *++s3) {
2477 if (*s2 != *s3) { /* mismatch */
2478 s3 = strchr(s3, *s1);
2479 break;
2480 }
2481 if (*s3 == *s1) { /* found the final delimiter */
2482 match = 1;
2483 break;
2484 }
2485 s2++;
2486 }
2487
2488 out:
2489 if (NULL == s3)
2490 s3 = strchr(s2, '\0');
2491 else if (*s3 != '\0')
2492 s3++;
2493 *pos = s3 - v;
2494 return match;
2495 }
2496
2497 /*
2498 * Evaluate an optionally negated single character, numerical,
2499 * or string condition.
2500 */
2501 static int
2502 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2503 {
2504 const char *start, *end;
2505 char *cp, *name;
2506 size_t sz;
2507 int deftype, len, number, savepos, istrue, wanttrue;
2508
2509 if ('!' == v[*pos]) {
2510 wanttrue = 0;
2511 (*pos)++;
2512 } else
2513 wanttrue = 1;
2514
2515 switch (v[*pos]) {
2516 case '\0':
2517 return 0;
2518 case 'n':
2519 case 'o':
2520 (*pos)++;
2521 return wanttrue;
2522 case 'e':
2523 case 't':
2524 case 'v':
2525 (*pos)++;
2526 return !wanttrue;
2527 case 'c':
2528 do {
2529 (*pos)++;
2530 } while (v[*pos] == ' ');
2531
2532 /*
2533 * Quirk for groff compatibility:
2534 * The horizontal tab is neither available nor unavailable.
2535 */
2536
2537 if (v[*pos] == '\t') {
2538 (*pos)++;
2539 return 0;
2540 }
2541
2542 /* Printable ASCII characters are available. */
2543
2544 if (v[*pos] != '\\') {
2545 (*pos)++;
2546 return wanttrue;
2547 }
2548
2549 end = v + ++*pos;
2550 switch (mandoc_escape(&end, &start, &len)) {
2551 case ESCAPE_SPECIAL:
2552 istrue = mchars_spec2cp(start, len) != -1;
2553 break;
2554 case ESCAPE_UNICODE:
2555 istrue = 1;
2556 break;
2557 case ESCAPE_NUMBERED:
2558 istrue = mchars_num2char(start, len) != -1;
2559 break;
2560 default:
2561 istrue = !wanttrue;
2562 break;
2563 }
2564 *pos = end - v;
2565 return istrue == wanttrue;
2566 case 'd':
2567 case 'r':
2568 cp = v + *pos + 1;
2569 while (*cp == ' ')
2570 cp++;
2571 name = cp;
2572 sz = roff_getname(r, &cp, ln, cp - v);
2573 if (sz == 0)
2574 istrue = 0;
2575 else if (v[*pos] == 'r')
2576 istrue = roff_hasregn(r, name, sz);
2577 else {
2578 deftype = ROFFDEF_ANY;
2579 roff_getstrn(r, name, sz, &deftype);
2580 istrue = !!deftype;
2581 }
2582 *pos = (name + sz) - v;
2583 return istrue == wanttrue;
2584 default:
2585 break;
2586 }
2587
2588 savepos = *pos;
2589 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2590 return (number > 0) == wanttrue;
2591 else if (*pos == savepos)
2592 return roff_evalstrcond(v, pos) == wanttrue;
2593 else
2594 return 0;
2595 }
2596
2597 static int
2598 roff_line_ignore(ROFF_ARGS)
2599 {
2600
2601 return ROFF_IGN;
2602 }
2603
2604 static int
2605 roff_insec(ROFF_ARGS)
2606 {
2607
2608 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2609 return ROFF_IGN;
2610 }
2611
2612 static int
2613 roff_unsupp(ROFF_ARGS)
2614 {
2615
2616 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2617 return ROFF_IGN;
2618 }
2619
2620 static int
2621 roff_cond(ROFF_ARGS)
2622 {
2623 int irc;
2624
2625 roffnode_push(r, tok, NULL, ln, ppos);
2626
2627 /*
2628 * An `.el' has no conditional body: it will consume the value
2629 * of the current rstack entry set in prior `ie' calls or
2630 * defaults to DENY.
2631 *
2632 * If we're not an `el', however, then evaluate the conditional.
2633 */
2634
2635 r->last->rule = tok == ROFF_el ?
2636 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2637 roff_evalcond(r, ln, buf->buf, &pos);
2638
2639 /*
2640 * An if-else will put the NEGATION of the current evaluated
2641 * conditional into the stack of rules.
2642 */
2643
2644 if (tok == ROFF_ie) {
2645 if (r->rstackpos + 1 == r->rstacksz) {
2646 r->rstacksz += 16;
2647 r->rstack = mandoc_reallocarray(r->rstack,
2648 r->rstacksz, sizeof(int));
2649 }
2650 r->rstack[++r->rstackpos] = !r->last->rule;
2651 }
2652
2653 /* If the parent has false as its rule, then so do we. */
2654
2655 if (r->last->parent && !r->last->parent->rule)
2656 r->last->rule = 0;
2657
2658 /*
2659 * Determine scope.
2660 * If there is nothing on the line after the conditional,
2661 * not even whitespace, use next-line scope.
2662 * Except that .while does not support next-line scope.
2663 */
2664
2665 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2666 r->last->endspan = 2;
2667 goto out;
2668 }
2669
2670 while (buf->buf[pos] == ' ')
2671 pos++;
2672
2673 /* An opening brace requests multiline scope. */
2674
2675 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2676 r->last->endspan = -1;
2677 pos += 2;
2678 while (buf->buf[pos] == ' ')
2679 pos++;
2680 goto out;
2681 }
2682
2683 /*
2684 * Anything else following the conditional causes
2685 * single-line scope. Warn if the scope contains
2686 * nothing but trailing whitespace.
2687 */
2688
2689 if (buf->buf[pos] == '\0')
2690 mandoc_msg(MANDOCERR_COND_EMPTY,
2691 ln, ppos, "%s", roff_name[tok]);
2692
2693 r->last->endspan = 1;
2694
2695 out:
2696 *offs = pos;
2697 irc = ROFF_RERUN;
2698 if (tok == ROFF_while)
2699 irc |= ROFF_WHILE;
2700 return irc;
2701 }
2702
2703 static int
2704 roff_ds(ROFF_ARGS)
2705 {
2706 char *string;
2707 const char *name;
2708 size_t namesz;
2709
2710 /* Ignore groff compatibility mode for now. */
2711
2712 if (tok == ROFF_ds1)
2713 tok = ROFF_ds;
2714 else if (tok == ROFF_as1)
2715 tok = ROFF_as;
2716
2717 /*
2718 * The first word is the name of the string.
2719 * If it is empty or terminated by an escape sequence,
2720 * abort the `ds' request without defining anything.
2721 */
2722
2723 name = string = buf->buf + pos;
2724 if (*name == '\0')
2725 return ROFF_IGN;
2726
2727 namesz = roff_getname(r, &string, ln, pos);
2728 switch (name[namesz]) {
2729 case '\\':
2730 return ROFF_IGN;
2731 case '\t':
2732 string = buf->buf + pos + namesz;
2733 break;
2734 default:
2735 break;
2736 }
2737
2738 /* Read past the initial double-quote, if any. */
2739 if (*string == '"')
2740 string++;
2741
2742 /* The rest is the value. */
2743 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2744 ROFF_as == tok);
2745 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2746 return ROFF_IGN;
2747 }
2748
2749 /*
2750 * Parse a single operator, one or two characters long.
2751 * If the operator is recognized, return success and advance the
2752 * parse point, else return failure and let the parse point unchanged.
2753 */
2754 static int
2755 roff_getop(const char *v, int *pos, char *res)
2756 {
2757
2758 *res = v[*pos];
2759
2760 switch (*res) {
2761 case '+':
2762 case '-':
2763 case '*':
2764 case '/':
2765 case '%':
2766 case '&':
2767 case ':':
2768 break;
2769 case '<':
2770 switch (v[*pos + 1]) {
2771 case '=':
2772 *res = 'l';
2773 (*pos)++;
2774 break;
2775 case '>':
2776 *res = '!';
2777 (*pos)++;
2778 break;
2779 case '?':
2780 *res = 'i';
2781 (*pos)++;
2782 break;
2783 default:
2784 break;
2785 }
2786 break;
2787 case '>':
2788 switch (v[*pos + 1]) {
2789 case '=':
2790 *res = 'g';
2791 (*pos)++;
2792 break;
2793 case '?':
2794 *res = 'a';
2795 (*pos)++;
2796 break;
2797 default:
2798 break;
2799 }
2800 break;
2801 case '=':
2802 if ('=' == v[*pos + 1])
2803 (*pos)++;
2804 break;
2805 default:
2806 return 0;
2807 }
2808 (*pos)++;
2809
2810 return *res;
2811 }
2812
2813 /*
2814 * Evaluate either a parenthesized numeric expression
2815 * or a single signed integer number.
2816 */
2817 static int
2818 roff_evalpar(struct roff *r, int ln,
2819 const char *v, int *pos, int *res, int flags)
2820 {
2821
2822 if ('(' != v[*pos])
2823 return roff_getnum(v, pos, res, flags);
2824
2825 (*pos)++;
2826 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2827 return 0;
2828
2829 /*
2830 * Omission of the closing parenthesis
2831 * is an error in validation mode,
2832 * but ignored in evaluation mode.
2833 */
2834
2835 if (')' == v[*pos])
2836 (*pos)++;
2837 else if (NULL == res)
2838 return 0;
2839
2840 return 1;
2841 }
2842
2843 /*
2844 * Evaluate a complete numeric expression.
2845 * Proceed left to right, there is no concept of precedence.
2846 */
2847 static int
2848 roff_evalnum(struct roff *r, int ln, const char *v,
2849 int *pos, int *res, int flags)
2850 {
2851 int mypos, operand2;
2852 char operator;
2853
2854 if (NULL == pos) {
2855 mypos = 0;
2856 pos = &mypos;
2857 }
2858
2859 if (flags & ROFFNUM_WHITE)
2860 while (isspace((unsigned char)v[*pos]))
2861 (*pos)++;
2862
2863 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2864 return 0;
2865
2866 while (1) {
2867 if (flags & ROFFNUM_WHITE)
2868 while (isspace((unsigned char)v[*pos]))
2869 (*pos)++;
2870
2871 if ( ! roff_getop(v, pos, &operator))
2872 break;
2873
2874 if (flags & ROFFNUM_WHITE)
2875 while (isspace((unsigned char)v[*pos]))
2876 (*pos)++;
2877
2878 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2879 return 0;
2880
2881 if (flags & ROFFNUM_WHITE)
2882 while (isspace((unsigned char)v[*pos]))
2883 (*pos)++;
2884
2885 if (NULL == res)
2886 continue;
2887
2888 switch (operator) {
2889 case '+':
2890 *res += operand2;
2891 break;
2892 case '-':
2893 *res -= operand2;
2894 break;
2895 case '*':
2896 *res *= operand2;
2897 break;
2898 case '/':
2899 if (operand2 == 0) {
2900 mandoc_msg(MANDOCERR_DIVZERO,
2901 ln, *pos, "%s", v);
2902 *res = 0;
2903 break;
2904 }
2905 *res /= operand2;
2906 break;
2907 case '%':
2908 if (operand2 == 0) {
2909 mandoc_msg(MANDOCERR_DIVZERO,
2910 ln, *pos, "%s", v);
2911 *res = 0;
2912 break;
2913 }
2914 *res %= operand2;
2915 break;
2916 case '<':
2917 *res = *res < operand2;
2918 break;
2919 case '>':
2920 *res = *res > operand2;
2921 break;
2922 case 'l':
2923 *res = *res <= operand2;
2924 break;
2925 case 'g':
2926 *res = *res >= operand2;
2927 break;
2928 case '=':
2929 *res = *res == operand2;
2930 break;
2931 case '!':
2932 *res = *res != operand2;
2933 break;
2934 case '&':
2935 *res = *res && operand2;
2936 break;
2937 case ':':
2938 *res = *res || operand2;
2939 break;
2940 case 'i':
2941 if (operand2 < *res)
2942 *res = operand2;
2943 break;
2944 case 'a':
2945 if (operand2 > *res)
2946 *res = operand2;
2947 break;
2948 default:
2949 abort();
2950 }
2951 }
2952 return 1;
2953 }
2954
2955 /* --- register management ------------------------------------------------ */
2956
2957 void
2958 roff_setreg(struct roff *r, const char *name, int val, char sign)
2959 {
2960 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2961 }
2962
2963 static void
2964 roff_setregn(struct roff *r, const char *name, size_t len,
2965 int val, char sign, int step)
2966 {
2967 struct roffreg *reg;
2968
2969 /* Search for an existing register with the same name. */
2970 reg = r->regtab;
2971
2972 while (reg != NULL && (reg->key.sz != len ||
2973 strncmp(reg->key.p, name, len) != 0))
2974 reg = reg->next;
2975
2976 if (NULL == reg) {
2977 /* Create a new register. */
2978 reg = mandoc_malloc(sizeof(struct roffreg));
2979 reg->key.p = mandoc_strndup(name, len);
2980 reg->key.sz = len;
2981 reg->val = 0;
2982 reg->step = 0;
2983 reg->next = r->regtab;
2984 r->regtab = reg;
2985 }
2986
2987 if ('+' == sign)
2988 reg->val += val;
2989 else if ('-' == sign)
2990 reg->val -= val;
2991 else
2992 reg->val = val;
2993 if (step != INT_MIN)
2994 reg->step = step;
2995 }
2996
2997 /*
2998 * Handle some predefined read-only number registers.
2999 * For now, return -1 if the requested register is not predefined;
3000 * in case a predefined read-only register having the value -1
3001 * were to turn up, another special value would have to be chosen.
3002 */
3003 static int
3004 roff_getregro(const struct roff *r, const char *name)
3005 {
3006
3007 switch (*name) {
3008 case '$': /* Number of arguments of the last macro evaluated. */
3009 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3010 case 'A': /* ASCII approximation mode is always off. */
3011 return 0;
3012 case 'g': /* Groff compatibility mode is always on. */
3013 return 1;
3014 case 'H': /* Fixed horizontal resolution. */
3015 return 24;
3016 case 'j': /* Always adjust left margin only. */
3017 return 0;
3018 case 'T': /* Some output device is always defined. */
3019 return 1;
3020 case 'V': /* Fixed vertical resolution. */
3021 return 40;
3022 default:
3023 return -1;
3024 }
3025 }
3026
3027 int
3028 roff_getreg(struct roff *r, const char *name)
3029 {
3030 return roff_getregn(r, name, strlen(name), '\0');
3031 }
3032
3033 static int
3034 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3035 {
3036 struct roffreg *reg;
3037 int val;
3038
3039 if ('.' == name[0] && 2 == len) {
3040 val = roff_getregro(r, name + 1);
3041 if (-1 != val)
3042 return val;
3043 }
3044
3045 for (reg = r->regtab; reg; reg = reg->next) {
3046 if (len == reg->key.sz &&
3047 0 == strncmp(name, reg->key.p, len)) {
3048 switch (sign) {
3049 case '+':
3050 reg->val += reg->step;
3051 break;
3052 case '-':
3053 reg->val -= reg->step;
3054 break;
3055 default:
3056 break;
3057 }
3058 return reg->val;
3059 }
3060 }
3061
3062 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3063 return 0;
3064 }
3065
3066 static int
3067 roff_hasregn(const struct roff *r, const char *name, size_t len)
3068 {
3069 struct roffreg *reg;
3070 int val;
3071
3072 if ('.' == name[0] && 2 == len) {
3073 val = roff_getregro(r, name + 1);
3074 if (-1 != val)
3075 return 1;
3076 }
3077
3078 for (reg = r->regtab; reg; reg = reg->next)
3079 if (len == reg->key.sz &&
3080 0 == strncmp(name, reg->key.p, len))
3081 return 1;
3082
3083 return 0;
3084 }
3085
3086 static void
3087 roff_freereg(struct roffreg *reg)
3088 {
3089 struct roffreg *old_reg;
3090
3091 while (NULL != reg) {
3092 free(reg->key.p);
3093 old_reg = reg;
3094 reg = reg->next;
3095 free(old_reg);
3096 }
3097 }
3098
3099 static int
3100 roff_nr(ROFF_ARGS)
3101 {
3102 char *key, *val, *step;
3103 size_t keysz;
3104 int iv, is, len;
3105 char sign;
3106
3107 key = val = buf->buf + pos;
3108 if (*key == '\0')
3109 return ROFF_IGN;
3110
3111 keysz = roff_getname(r, &val, ln, pos);
3112 if (key[keysz] == '\\' || key[keysz] == '\t')
3113 return ROFF_IGN;
3114
3115 sign = *val;
3116 if (sign == '+' || sign == '-')
3117 val++;
3118
3119 len = 0;
3120 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3121 return ROFF_IGN;
3122
3123 step = val + len;
3124 while (isspace((unsigned char)*step))
3125 step++;
3126 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3127 is = INT_MIN;
3128
3129 roff_setregn(r, key, keysz, iv, sign, is);
3130 return ROFF_IGN;
3131 }
3132
3133 static int
3134 roff_rr(ROFF_ARGS)
3135 {
3136 struct roffreg *reg, **prev;
3137 char *name, *cp;
3138 size_t namesz;
3139
3140 name = cp = buf->buf + pos;
3141 if (*name == '\0')
3142 return ROFF_IGN;
3143 namesz = roff_getname(r, &cp, ln, pos);
3144 name[namesz] = '\0';
3145
3146 prev = &r->regtab;
3147 while (1) {
3148 reg = *prev;
3149 if (reg == NULL || !strcmp(name, reg->key.p))
3150 break;
3151 prev = &reg->next;
3152 }
3153 if (reg != NULL) {
3154 *prev = reg->next;
3155 free(reg->key.p);
3156 free(reg);
3157 }
3158 return ROFF_IGN;
3159 }
3160
3161 /* --- handler functions for roff requests -------------------------------- */
3162
3163 static int
3164 roff_rm(ROFF_ARGS)
3165 {
3166 const char *name;
3167 char *cp;
3168 size_t namesz;
3169
3170 cp = buf->buf + pos;
3171 while (*cp != '\0') {
3172 name = cp;
3173 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3174 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3175 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3176 if (name[namesz] == '\\' || name[namesz] == '\t')
3177 break;
3178 }
3179 return ROFF_IGN;
3180 }
3181
3182 static int
3183 roff_it(ROFF_ARGS)
3184 {
3185 int iv;
3186
3187 /* Parse the number of lines. */
3188
3189 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3190 mandoc_msg(MANDOCERR_IT_NONUM,
3191 ln, ppos, "%s", buf->buf + 1);
3192 return ROFF_IGN;
3193 }
3194
3195 while (isspace((unsigned char)buf->buf[pos]))
3196 pos++;
3197
3198 /*
3199 * Arm the input line trap.
3200 * Special-casing "an-trap" is an ugly workaround to cope
3201 * with DocBook stupidly fiddling with man(7) internals.
3202 */
3203
3204 roffit_lines = iv;
3205 roffit_macro = mandoc_strdup(iv != 1 ||
3206 strcmp(buf->buf + pos, "an-trap") ?
3207 buf->buf + pos : "br");
3208 return ROFF_IGN;
3209 }
3210
3211 static int
3212 roff_Dd(ROFF_ARGS)
3213 {
3214 int mask;
3215 enum roff_tok t, te;
3216
3217 switch (tok) {
3218 case ROFF_Dd:
3219 tok = MDOC_Dd;
3220 te = MDOC_MAX;
3221 if (r->format == 0)
3222 r->format = MPARSE_MDOC;
3223 mask = MPARSE_MDOC | MPARSE_QUICK;
3224 break;
3225 case ROFF_TH:
3226 tok = MAN_TH;
3227 te = MAN_MAX;
3228 if (r->format == 0)
3229 r->format = MPARSE_MAN;
3230 mask = MPARSE_QUICK;
3231 break;
3232 default:
3233 abort();
3234 }
3235 if ((r->options & mask) == 0)
3236 for (t = tok; t < te; t++)
3237 roff_setstr(r, roff_name[t], NULL, 0);
3238 return ROFF_CONT;
3239 }
3240
3241 static int
3242 roff_TE(ROFF_ARGS)
3243 {
3244 r->man->flags &= ~ROFF_NONOFILL;
3245 if (r->tbl == NULL) {
3246 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3247 return ROFF_IGN;
3248 }
3249 if (tbl_end(r->tbl, 0) == 0) {
3250 r->tbl = NULL;
3251 free(buf->buf);
3252 buf->buf = mandoc_strdup(".sp");
3253 buf->sz = 4;
3254 *offs = 0;
3255 return ROFF_REPARSE;
3256 }
3257 r->tbl = NULL;
3258 return ROFF_IGN;
3259 }
3260
3261 static int
3262 roff_T_(ROFF_ARGS)
3263 {
3264
3265 if (NULL == r->tbl)
3266 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3267 else
3268 tbl_restart(ln, ppos, r->tbl);
3269
3270 return ROFF_IGN;
3271 }
3272
3273 /*
3274 * Handle in-line equation delimiters.
3275 */
3276 static int
3277 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3278 {
3279 char *cp1, *cp2;
3280 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3281
3282 /*
3283 * Outside equations, look for an opening delimiter.
3284 * If we are inside an equation, we already know it is
3285 * in-line, or this function wouldn't have been called;
3286 * so look for a closing delimiter.
3287 */
3288
3289 cp1 = buf->buf + pos;
3290 cp2 = strchr(cp1, r->eqn == NULL ?
3291 r->last_eqn->odelim : r->last_eqn->cdelim);
3292 if (cp2 == NULL)
3293 return ROFF_CONT;
3294
3295 *cp2++ = '\0';
3296 bef_pr = bef_nl = aft_nl = aft_pr = "";
3297
3298 /* Handle preceding text, protecting whitespace. */
3299
3300 if (*buf->buf != '\0') {
3301 if (r->eqn == NULL)
3302 bef_pr = "\\&";
3303 bef_nl = "\n";
3304 }
3305
3306 /*
3307 * Prepare replacing the delimiter with an equation macro
3308 * and drop leading white space from the equation.
3309 */
3310
3311 if (r->eqn == NULL) {
3312 while (*cp2 == ' ')
3313 cp2++;
3314 mac = ".EQ";
3315 } else
3316 mac = ".EN";
3317
3318 /* Handle following text, protecting whitespace. */
3319
3320 if (*cp2 != '\0') {
3321 aft_nl = "\n";
3322 if (r->eqn != NULL)
3323 aft_pr = "\\&";
3324 }
3325
3326 /* Do the actual replacement. */
3327
3328 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3329 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3330 free(buf->buf);
3331 buf->buf = cp1;
3332
3333 /* Toggle the in-line state of the eqn subsystem. */
3334
3335 r->eqn_inline = r->eqn == NULL;
3336 return ROFF_REPARSE;
3337 }
3338
3339 static int
3340 roff_EQ(ROFF_ARGS)
3341 {
3342 struct roff_node *n;
3343
3344 if (r->man->meta.macroset == MACROSET_MAN)
3345 man_breakscope(r->man, ROFF_EQ);
3346 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3347 if (ln > r->man->last->line)
3348 n->flags |= NODE_LINE;
3349 n->eqn = eqn_box_new();
3350 roff_node_append(r->man, n);
3351 r->man->next = ROFF_NEXT_SIBLING;
3352
3353 assert(r->eqn == NULL);
3354 if (r->last_eqn == NULL)
3355 r->last_eqn = eqn_alloc();
3356 else
3357 eqn_reset(r->last_eqn);
3358 r->eqn = r->last_eqn;
3359 r->eqn->node = n;
3360
3361 if (buf->buf[pos] != '\0')
3362 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3363 ".EQ %s", buf->buf + pos);
3364
3365 return ROFF_IGN;
3366 }
3367
3368 static int
3369 roff_EN(ROFF_ARGS)
3370 {
3371 if (r->eqn != NULL) {
3372 eqn_parse(r->eqn);
3373 r->eqn = NULL;
3374 } else
3375 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3376 if (buf->buf[pos] != '\0')
3377 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3378 "EN %s", buf->buf + pos);
3379 return ROFF_IGN;
3380 }
3381
3382 static int
3383 roff_TS(ROFF_ARGS)
3384 {
3385 if (r->tbl != NULL) {
3386 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3387 tbl_end(r->tbl, 0);
3388 }
3389 r->man->flags |= ROFF_NONOFILL;
3390 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3391 if (r->last_tbl == NULL)
3392 r->first_tbl = r->tbl;
3393 r->last_tbl = r->tbl;
3394 return ROFF_IGN;
3395 }
3396
3397 static int
3398 roff_noarg(ROFF_ARGS)
3399 {
3400 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3401 man_breakscope(r->man, tok);
3402 if (tok == ROFF_brp)
3403 tok = ROFF_br;
3404 roff_elem_alloc(r->man, ln, ppos, tok);
3405 if (buf->buf[pos] != '\0')
3406 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3407 "%s %s", roff_name[tok], buf->buf + pos);
3408 if (tok == ROFF_nf)
3409 r->man->flags |= ROFF_NOFILL;
3410 else if (tok == ROFF_fi)
3411 r->man->flags &= ~ROFF_NOFILL;
3412 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3413 r->man->next = ROFF_NEXT_SIBLING;
3414 return ROFF_IGN;
3415 }
3416
3417 static int
3418 roff_onearg(ROFF_ARGS)
3419 {
3420 struct roff_node *n;
3421 char *cp;
3422 int npos;
3423
3424 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3425 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3426 tok == ROFF_ti))
3427 man_breakscope(r->man, tok);
3428
3429 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3430 r->man->last = roffce_node;
3431 r->man->next = ROFF_NEXT_SIBLING;
3432 }
3433
3434 roff_elem_alloc(r->man, ln, ppos, tok);
3435 n = r->man->last;
3436
3437 cp = buf->buf + pos;
3438 if (*cp != '\0') {
3439 while (*cp != '\0' && *cp != ' ')
3440 cp++;
3441 while (*cp == ' ')
3442 *cp++ = '\0';
3443 if (*cp != '\0')
3444 mandoc_msg(MANDOCERR_ARG_EXCESS,
3445 ln, (int)(cp - buf->buf),
3446 "%s ... %s", roff_name[tok], cp);
3447 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3448 }
3449
3450 if (tok == ROFF_ce || tok == ROFF_rj) {
3451 if (r->man->last->type == ROFFT_ELEM) {
3452 roff_word_alloc(r->man, ln, pos, "1");
3453 r->man->last->flags |= NODE_NOSRC;
3454 }
3455 npos = 0;
3456 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3457 &roffce_lines, 0) == 0) {
3458 mandoc_msg(MANDOCERR_CE_NONUM,
3459 ln, pos, "ce %s", buf->buf + pos);
3460 roffce_lines = 1;
3461 }
3462 if (roffce_lines < 1) {
3463 r->man->last = r->man->last->parent;
3464 roffce_node = NULL;
3465 roffce_lines = 0;
3466 } else
3467 roffce_node = r->man->last->parent;
3468 } else {
3469 n->flags |= NODE_VALID | NODE_ENDED;
3470 r->man->last = n;
3471 }
3472 n->flags |= NODE_LINE;
3473 r->man->next = ROFF_NEXT_SIBLING;
3474 return ROFF_IGN;
3475 }
3476
3477 static int
3478 roff_manyarg(ROFF_ARGS)
3479 {
3480 struct roff_node *n;
3481 char *sp, *ep;
3482
3483 roff_elem_alloc(r->man, ln, ppos, tok);
3484 n = r->man->last;
3485
3486 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3487 while (*ep != '\0' && *ep != ' ')
3488 ep++;
3489 while (*ep == ' ')
3490 *ep++ = '\0';
3491 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3492 }
3493
3494 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3495 r->man->last = n;
3496 r->man->next = ROFF_NEXT_SIBLING;
3497 return ROFF_IGN;
3498 }
3499
3500 static int
3501 roff_als(ROFF_ARGS)
3502 {
3503 char *oldn, *newn, *end, *value;
3504 size_t oldsz, newsz, valsz;
3505
3506 newn = oldn = buf->buf + pos;
3507 if (*newn == '\0')
3508 return ROFF_IGN;
3509
3510 newsz = roff_getname(r, &oldn, ln, pos);
3511 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3512 return ROFF_IGN;
3513
3514 end = oldn;
3515 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3516 if (oldsz == 0)
3517 return ROFF_IGN;
3518
3519 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3520 (int)oldsz, oldn);
3521 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3522 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3523 free(value);
3524 return ROFF_IGN;
3525 }
3526
3527 /*
3528 * The .break request only makes sense inside conditionals,
3529 * and that case is already handled in roff_cond_sub().
3530 */
3531 static int
3532 roff_break(ROFF_ARGS)
3533 {
3534 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3535 return ROFF_IGN;
3536 }
3537
3538 static int
3539 roff_cc(ROFF_ARGS)
3540 {
3541 const char *p;
3542
3543 p = buf->buf + pos;
3544
3545 if (*p == '\0' || (r->control = *p++) == '.')
3546 r->control = '\0';
3547
3548 if (*p != '\0')
3549 mandoc_msg(MANDOCERR_ARG_EXCESS,
3550 ln, p - buf->buf, "cc ... %s", p);
3551
3552 return ROFF_IGN;
3553 }
3554
3555 static int
3556 roff_char(ROFF_ARGS)
3557 {
3558 const char *p, *kp, *vp;
3559 size_t ksz, vsz;
3560 int font;
3561
3562 /* Parse the character to be replaced. */
3563
3564 kp = buf->buf + pos;
3565 p = kp + 1;
3566 if (*kp == '\0' || (*kp == '\\' &&
3567 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3568 (*p != ' ' && *p != '\0')) {
3569 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3570 return ROFF_IGN;
3571 }
3572 ksz = p - kp;
3573 while (*p == ' ')
3574 p++;
3575
3576 /*
3577 * If the replacement string contains a font escape sequence,
3578 * we have to restore the font at the end.
3579 */
3580
3581 vp = p;
3582 vsz = strlen(p);
3583 font = 0;
3584 while (*p != '\0') {
3585 if (*p++ != '\\')
3586 continue;
3587 switch (mandoc_escape(&p, NULL, NULL)) {
3588 case ESCAPE_FONT:
3589 case ESCAPE_FONTROMAN:
3590 case ESCAPE_FONTITALIC:
3591 case ESCAPE_FONTBOLD:
3592 case ESCAPE_FONTBI:
3593 case ESCAPE_FONTCW:
3594 case ESCAPE_FONTPREV:
3595 font++;
3596 break;
3597 default:
3598 break;
3599 }
3600 }
3601 if (font > 1)
3602 mandoc_msg(MANDOCERR_CHAR_FONT,
3603 ln, (int)(vp - buf->buf), "%s", vp);
3604
3605 /*
3606 * Approximate the effect of .char using the .tr tables.
3607 * XXX In groff, .char and .tr interact differently.
3608 */
3609
3610 if (ksz == 1) {
3611 if (r->xtab == NULL)
3612 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3613 assert((unsigned int)*kp < 128);
3614 free(r->xtab[(int)*kp].p);
3615 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3616 "%s%s", vp, font ? "\fP" : "");
3617 } else {
3618 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3619 if (font)
3620 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3621 }
3622 return ROFF_IGN;
3623 }
3624
3625 static int
3626 roff_ec(ROFF_ARGS)
3627 {
3628 const char *p;
3629
3630 p = buf->buf + pos;
3631 if (*p == '\0')
3632 r->escape = '\\';
3633 else {
3634 r->escape = *p;
3635 if (*++p != '\0')
3636 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3637 (int)(p - buf->buf), "ec ... %s", p);
3638 }
3639 return ROFF_IGN;
3640 }
3641
3642 static int
3643 roff_eo(ROFF_ARGS)
3644 {
3645 r->escape = '\0';
3646 if (buf->buf[pos] != '\0')
3647 mandoc_msg(MANDOCERR_ARG_SKIP,
3648 ln, pos, "eo %s", buf->buf + pos);
3649 return ROFF_IGN;
3650 }
3651
3652 static int
3653 roff_nop(ROFF_ARGS)
3654 {
3655 while (buf->buf[pos] == ' ')
3656 pos++;
3657 *offs = pos;
3658 return ROFF_RERUN;
3659 }
3660
3661 static int
3662 roff_tr(ROFF_ARGS)
3663 {
3664 const char *p, *first, *second;
3665 size_t fsz, ssz;
3666 enum mandoc_esc esc;
3667
3668 p = buf->buf + pos;
3669
3670 if (*p == '\0') {
3671 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3672 return ROFF_IGN;
3673 }
3674
3675 while (*p != '\0') {
3676 fsz = ssz = 1;
3677
3678 first = p++;
3679 if (*first == '\\') {
3680 esc = mandoc_escape(&p, NULL, NULL);
3681 if (esc == ESCAPE_ERROR) {
3682 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3683 (int)(p - buf->buf), "%s", first);
3684 return ROFF_IGN;
3685 }
3686 fsz = (size_t)(p - first);
3687 }
3688
3689 second = p++;
3690 if (*second == '\\') {
3691 esc = mandoc_escape(&p, NULL, NULL);
3692 if (esc == ESCAPE_ERROR) {
3693 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3694 (int)(p - buf->buf), "%s", second);
3695 return ROFF_IGN;
3696 }
3697 ssz = (size_t)(p - second);
3698 } else if (*second == '\0') {
3699 mandoc_msg(MANDOCERR_TR_ODD, ln,
3700 (int)(first - buf->buf), "tr %s", first);
3701 second = " ";
3702 p--;
3703 }
3704
3705 if (fsz > 1) {
3706 roff_setstrn(&r->xmbtab, first, fsz,
3707 second, ssz, 0);
3708 continue;
3709 }
3710
3711 if (r->xtab == NULL)
3712 r->xtab = mandoc_calloc(128,
3713 sizeof(struct roffstr));
3714
3715 free(r->xtab[(int)*first].p);
3716 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3717 r->xtab[(int)*first].sz = ssz;
3718 }
3719
3720 return ROFF_IGN;
3721 }
3722
3723 /*
3724 * Implementation of the .return request.
3725 * There is no need to call roff_userret() from here.
3726 * The read module will call that after rewinding the reader stack
3727 * to the place from where the current macro was called.
3728 */
3729 static int
3730 roff_return(ROFF_ARGS)
3731 {
3732 if (r->mstackpos >= 0)
3733 return ROFF_IGN | ROFF_USERRET;
3734
3735 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3736 return ROFF_IGN;
3737 }
3738
3739 static int
3740 roff_rn(ROFF_ARGS)
3741 {
3742 const char *value;
3743 char *oldn, *newn, *end;
3744 size_t oldsz, newsz;
3745 int deftype;
3746
3747 oldn = newn = buf->buf + pos;
3748 if (*oldn == '\0')
3749 return ROFF_IGN;
3750
3751 oldsz = roff_getname(r, &newn, ln, pos);
3752 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3753 return ROFF_IGN;
3754
3755 end = newn;
3756 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3757 if (newsz == 0)
3758 return ROFF_IGN;
3759
3760 deftype = ROFFDEF_ANY;
3761 value = roff_getstrn(r, oldn, oldsz, &deftype);
3762 switch (deftype) {
3763 case ROFFDEF_USER:
3764 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3765 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3766 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3767 break;
3768 case ROFFDEF_PRE:
3769 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3770 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3771 break;
3772 case ROFFDEF_REN:
3773 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3774 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3775 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3776 break;
3777 case ROFFDEF_STD:
3778 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3779 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3780 break;
3781 default:
3782 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3783 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3784 break;
3785 }
3786 return ROFF_IGN;
3787 }
3788
3789 static int
3790 roff_shift(ROFF_ARGS)
3791 {
3792 struct mctx *ctx;
3793 int levels, i;
3794
3795 levels = 1;
3796 if (buf->buf[pos] != '\0' &&
3797 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3798 mandoc_msg(MANDOCERR_CE_NONUM,
3799 ln, pos, "shift %s", buf->buf + pos);
3800 levels = 1;
3801 }
3802 if (r->mstackpos < 0) {
3803 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3804 return ROFF_IGN;
3805 }
3806 ctx = r->mstack + r->mstackpos;
3807 if (levels > ctx->argc) {
3808 mandoc_msg(MANDOCERR_SHIFT,
3809 ln, pos, "%d, but max is %d", levels, ctx->argc);
3810 levels = ctx->argc;
3811 }
3812 if (levels == 0)
3813 return ROFF_IGN;
3814 for (i = 0; i < levels; i++)
3815 free(ctx->argv[i]);
3816 ctx->argc -= levels;
3817 for (i = 0; i < ctx->argc; i++)
3818 ctx->argv[i] = ctx->argv[i + levels];
3819 return ROFF_IGN;
3820 }
3821
3822 static int
3823 roff_so(ROFF_ARGS)
3824 {
3825 char *name, *cp;
3826
3827 name = buf->buf + pos;
3828 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3829
3830 /*
3831 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3832 * opening anything that's not in our cwd or anything beneath
3833 * it. Thus, explicitly disallow traversing up the file-system
3834 * or using absolute paths.
3835 */
3836
3837 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3838 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3839 buf->sz = mandoc_asprintf(&cp,
3840 ".sp\nSee the file %s.\n.sp", name) + 1;
3841 free(buf->buf);
3842 buf->buf = cp;
3843 *offs = 0;
3844 return ROFF_REPARSE;
3845 }
3846
3847 *offs = pos;
3848 return ROFF_SO;
3849 }
3850
3851 /* --- user defined strings and macros ------------------------------------ */
3852
3853 static int
3854 roff_userdef(ROFF_ARGS)
3855 {
3856 struct mctx *ctx;
3857 char *arg, *ap, *dst, *src;
3858 size_t sz;
3859
3860 /* If the macro is empty, ignore it altogether. */
3861
3862 if (*r->current_string == '\0')
3863 return ROFF_IGN;
3864
3865 /* Initialize a new macro stack context. */
3866
3867 if (++r->mstackpos == r->mstacksz) {
3868 r->mstack = mandoc_recallocarray(r->mstack,
3869 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3870 r->mstacksz += 8;
3871 }
3872 ctx = r->mstack + r->mstackpos;
3873 ctx->argsz = 0;
3874 ctx->argc = 0;
3875 ctx->argv = NULL;
3876
3877 /*
3878 * Collect pointers to macro argument strings,
3879 * NUL-terminating them and escaping quotes.
3880 */
3881
3882 src = buf->buf + pos;
3883 while (*src != '\0') {
3884 if (ctx->argc == ctx->argsz) {
3885 ctx->argsz += 8;
3886 ctx->argv = mandoc_reallocarray(ctx->argv,
3887 ctx->argsz, sizeof(*ctx->argv));
3888 }
3889 arg = roff_getarg(r, &src, ln, &pos);
3890 sz = 1; /* For the terminating NUL. */
3891 for (ap = arg; *ap != '\0'; ap++)
3892 sz += *ap == '"' ? 4 : 1;
3893 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3894 for (ap = arg; *ap != '\0'; ap++) {
3895 if (*ap == '"') {
3896 memcpy(dst, "\\(dq", 4);
3897 dst += 4;
3898 } else
3899 *dst++ = *ap;
3900 }
3901 *dst = '\0';
3902 free(arg);
3903 }
3904
3905 /* Replace the macro invocation by the macro definition. */
3906
3907 free(buf->buf);
3908 buf->buf = mandoc_strdup(r->current_string);
3909 buf->sz = strlen(buf->buf) + 1;
3910 *offs = 0;
3911
3912 return buf->buf[buf->sz - 2] == '\n' ?
3913 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3914 }
3915
3916 /*
3917 * Calling a high-level macro that was renamed with .rn.
3918 * r->current_string has already been set up by roff_parse().
3919 */
3920 static int
3921 roff_renamed(ROFF_ARGS)
3922 {
3923 char *nbuf;
3924
3925 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3926 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3927 free(buf->buf);
3928 buf->buf = nbuf;
3929 *offs = 0;
3930 return ROFF_CONT;
3931 }
3932
3933 /*
3934 * Measure the length in bytes of the roff identifier at *cpp
3935 * and advance the pointer to the next word.
3936 */
3937 static size_t
3938 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3939 {
3940 char *name, *cp;
3941 size_t namesz;
3942
3943 name = *cpp;
3944 if (*name == '\0')
3945 return 0;
3946
3947 /* Advance cp to the byte after the end of the name. */
3948
3949 for (cp = name; 1; cp++) {
3950 namesz = cp - name;
3951 if (*cp == '\0')
3952 break;
3953 if (*cp == ' ' || *cp == '\t') {
3954 cp++;
3955 break;
3956 }
3957 if (*cp != '\\')
3958 continue;
3959 if (cp[1] == '{' || cp[1] == '}')
3960 break;
3961 if (*++cp == '\\')
3962 continue;
3963 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
3964 "%.*s", (int)(cp - name + 1), name);
3965 mandoc_escape((const char **)&cp, NULL, NULL);
3966 break;
3967 }
3968
3969 /* Read past spaces. */
3970
3971 while (*cp == ' ')
3972 cp++;
3973
3974 *cpp = cp;
3975 return namesz;
3976 }
3977
3978 /*
3979 * Store *string into the user-defined string called *name.
3980 * To clear an existing entry, call with (*r, *name, NULL, 0).
3981 * append == 0: replace mode
3982 * append == 1: single-line append mode
3983 * append == 2: multiline append mode, append '\n' after each call
3984 */
3985 static void
3986 roff_setstr(struct roff *r, const char *name, const char *string,
3987 int append)
3988 {
3989 size_t namesz;
3990
3991 namesz = strlen(name);
3992 roff_setstrn(&r->strtab, name, namesz, string,
3993 string ? strlen(string) : 0, append);
3994 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3995 }
3996
3997 static void
3998 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3999 const char *string, size_t stringsz, int append)
4000 {
4001 struct roffkv *n;
4002 char *c;
4003 int i;
4004 size_t oldch, newch;
4005
4006 /* Search for an existing string with the same name. */
4007 n = *r;
4008
4009 while (n && (namesz != n->key.sz ||
4010 strncmp(n->key.p, name, namesz)))
4011 n = n->next;
4012
4013 if (NULL == n) {
4014 /* Create a new string table entry. */
4015 n = mandoc_malloc(sizeof(struct roffkv));
4016 n->key.p = mandoc_strndup(name, namesz);
4017 n->key.sz = namesz;
4018 n->val.p = NULL;
4019 n->val.sz = 0;
4020 n->next = *r;
4021 *r = n;
4022 } else if (0 == append) {
4023 free(n->val.p);
4024 n->val.p = NULL;
4025 n->val.sz = 0;
4026 }
4027
4028 if (NULL == string)
4029 return;
4030
4031 /*
4032 * One additional byte for the '\n' in multiline mode,
4033 * and one for the terminating '\0'.
4034 */
4035 newch = stringsz + (1 < append ? 2u : 1u);
4036
4037 if (NULL == n->val.p) {
4038 n->val.p = mandoc_malloc(newch);
4039 *n->val.p = '\0';
4040 oldch = 0;
4041 } else {
4042 oldch = n->val.sz;
4043 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4044 }
4045
4046 /* Skip existing content in the destination buffer. */
4047 c = n->val.p + (int)oldch;
4048
4049 /* Append new content to the destination buffer. */
4050 i = 0;
4051 while (i < (int)stringsz) {
4052 /*
4053 * Rudimentary roff copy mode:
4054 * Handle escaped backslashes.
4055 */
4056 if ('\\' == string[i] && '\\' == string[i + 1])
4057 i++;
4058 *c++ = string[i++];
4059 }
4060
4061 /* Append terminating bytes. */
4062 if (1 < append)
4063 *c++ = '\n';
4064
4065 *c = '\0';
4066 n->val.sz = (int)(c - n->val.p);
4067 }
4068
4069 static const char *
4070 roff_getstrn(struct roff *r, const char *name, size_t len,
4071 int *deftype)
4072 {
4073 const struct roffkv *n;
4074 int found, i;
4075 enum roff_tok tok;
4076
4077 found = 0;
4078 for (n = r->strtab; n != NULL; n = n->next) {
4079 if (strncmp(name, n->key.p, len) != 0 ||
4080 n->key.p[len] != '\0' || n->val.p == NULL)
4081 continue;
4082 if (*deftype & ROFFDEF_USER) {
4083 *deftype = ROFFDEF_USER;
4084 return n->val.p;
4085 } else {
4086 found = 1;
4087 break;
4088 }
4089 }
4090 for (n = r->rentab; n != NULL; n = n->next) {
4091 if (strncmp(name, n->key.p, len) != 0 ||
4092 n->key.p[len] != '\0' || n->val.p == NULL)
4093 continue;
4094 if (*deftype & ROFFDEF_REN) {
4095 *deftype = ROFFDEF_REN;
4096 return n->val.p;
4097 } else {
4098 found = 1;
4099 break;
4100 }
4101 }
4102 for (i = 0; i < PREDEFS_MAX; i++) {
4103 if (strncmp(name, predefs[i].name, len) != 0 ||
4104 predefs[i].name[len] != '\0')
4105 continue;
4106 if (*deftype & ROFFDEF_PRE) {
4107 *deftype = ROFFDEF_PRE;
4108 return predefs[i].str;
4109 } else {
4110 found = 1;
4111 break;
4112 }
4113 }
4114 if (r->man->meta.macroset != MACROSET_MAN) {
4115 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4116 if (strncmp(name, roff_name[tok], len) != 0 ||
4117 roff_name[tok][len] != '\0')
4118 continue;
4119 if (*deftype & ROFFDEF_STD) {
4120 *deftype = ROFFDEF_STD;
4121 return NULL;
4122 } else {
4123 found = 1;
4124 break;
4125 }
4126 }
4127 }
4128 if (r->man->meta.macroset != MACROSET_MDOC) {
4129 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4130 if (strncmp(name, roff_name[tok], len) != 0 ||
4131 roff_name[tok][len] != '\0')
4132 continue;
4133 if (*deftype & ROFFDEF_STD) {
4134 *deftype = ROFFDEF_STD;
4135 return NULL;
4136 } else {
4137 found = 1;
4138 break;
4139 }
4140 }
4141 }
4142
4143 if (found == 0 && *deftype != ROFFDEF_ANY) {
4144 if (*deftype & ROFFDEF_REN) {
4145 /*
4146 * This might still be a request,
4147 * so do not treat it as undefined yet.
4148 */
4149 *deftype = ROFFDEF_UNDEF;
4150 return NULL;
4151 }
4152
4153 /* Using an undefined string defines it to be empty. */
4154
4155 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4156 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4157 }
4158
4159 *deftype = 0;
4160 return NULL;
4161 }
4162
4163 static void
4164 roff_freestr(struct roffkv *r)
4165 {
4166 struct roffkv *n, *nn;
4167
4168 for (n = r; n; n = nn) {
4169 free(n->key.p);
4170 free(n->val.p);
4171 nn = n->next;
4172 free(n);
4173 }
4174 }
4175
4176 /* --- accessors and utility functions ------------------------------------ */
4177
4178 /*
4179 * Duplicate an input string, making the appropriate character
4180 * conversations (as stipulated by `tr') along the way.
4181 * Returns a heap-allocated string with all the replacements made.
4182 */
4183 char *
4184 roff_strdup(const struct roff *r, const char *p)
4185 {
4186 const struct roffkv *cp;
4187 char *res;
4188 const char *pp;
4189 size_t ssz, sz;
4190 enum mandoc_esc esc;
4191
4192 if (NULL == r->xmbtab && NULL == r->xtab)
4193 return mandoc_strdup(p);
4194 else if ('\0' == *p)
4195 return mandoc_strdup("");
4196
4197 /*
4198 * Step through each character looking for term matches
4199 * (remember that a `tr' can be invoked with an escape, which is
4200 * a glyph but the escape is multi-character).
4201 * We only do this if the character hash has been initialised
4202 * and the string is >0 length.
4203 */
4204
4205 res = NULL;
4206 ssz = 0;
4207
4208 while ('\0' != *p) {
4209 assert((unsigned int)*p < 128);
4210 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4211 sz = r->xtab[(int)*p].sz;
4212 res = mandoc_realloc(res, ssz + sz + 1);
4213 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4214 ssz += sz;
4215 p++;
4216 continue;
4217 } else if ('\\' != *p) {
4218 res = mandoc_realloc(res, ssz + 2);
4219 res[ssz++] = *p++;
4220 continue;
4221 }
4222
4223 /* Search for term matches. */
4224 for (cp = r->xmbtab; cp; cp = cp->next)
4225 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4226 break;
4227
4228 if (NULL != cp) {
4229 /*
4230 * A match has been found.
4231 * Append the match to the array and move
4232 * forward by its keysize.
4233 */
4234 res = mandoc_realloc(res,
4235 ssz + cp->val.sz + 1);
4236 memcpy(res + ssz, cp->val.p, cp->val.sz);
4237 ssz += cp->val.sz;
4238 p += (int)cp->key.sz;
4239 continue;
4240 }
4241
4242 /*
4243 * Handle escapes carefully: we need to copy
4244 * over just the escape itself, or else we might
4245 * do replacements within the escape itself.
4246 * Make sure to pass along the bogus string.
4247 */
4248 pp = p++;
4249 esc = mandoc_escape(&p, NULL, NULL);
4250 if (ESCAPE_ERROR == esc) {
4251 sz = strlen(pp);
4252 res = mandoc_realloc(res, ssz + sz + 1);
4253 memcpy(res + ssz, pp, sz);
4254 break;
4255 }
4256 /*
4257 * We bail out on bad escapes.
4258 * No need to warn: we already did so when
4259 * roff_expand() was called.
4260 */
4261 sz = (int)(p - pp);
4262 res = mandoc_realloc(res, ssz + sz + 1);
4263 memcpy(res + ssz, pp, sz);
4264 ssz += sz;
4265 }
4266
4267 res[(int)ssz] = '\0';
4268 return res;
4269 }
4270
4271 int
4272 roff_getformat(const struct roff *r)
4273 {
4274
4275 return r->format;
4276 }
4277
4278 /*
4279 * Find out whether a line is a macro line or not.
4280 * If it is, adjust the current position and return one; if it isn't,
4281 * return zero and don't change the current position.
4282 * If the control character has been set with `.cc', then let that grain
4283 * precedence.
4284 * This is slighly contrary to groff, where using the non-breaking
4285 * control character when `cc' has been invoked will cause the
4286 * non-breaking macro contents to be printed verbatim.
4287 */
4288 int
4289 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4290 {
4291 int pos;
4292
4293 pos = *ppos;
4294
4295 if (r->control != '\0' && cp[pos] == r->control)
4296 pos++;
4297 else if (r->control != '\0')
4298 return 0;
4299 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4300 pos += 2;
4301 else if ('.' == cp[pos] || '\'' == cp[pos])
4302 pos++;
4303 else
4304 return 0;
4305
4306 while (' ' == cp[pos] || '\t' == cp[pos])
4307 pos++;
4308
4309 *ppos = pos;
4310 return 1;
4311 }