]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
The non-standard .EX/.EE macro pair was invented for Version 9 AT&T UNIX
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.366 2019/07/01 22:56:24 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40
41 /*
42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43 * that an escape sequence resulted from copy-in processing and
44 * needs to be checked or interpolated. As it is used nowhere
45 * else, it is defined here rather than in a header file.
46 */
47 #define ASCII_ESC 27
48
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define EXPAND_LIMIT 1000
51
52 /* Types of definitions of macros and strings. */
53 #define ROFFDEF_USER (1 << 1) /* User-defined. */
54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
58 ROFFDEF_REN | ROFFDEF_STD)
59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
60
61 /* --- data types --------------------------------------------------------- */
62
63 /*
64 * An incredibly-simple string buffer.
65 */
66 struct roffstr {
67 char *p; /* nil-terminated buffer */
68 size_t sz; /* saved strlen(p) */
69 };
70
71 /*
72 * A key-value roffstr pair as part of a singly-linked list.
73 */
74 struct roffkv {
75 struct roffstr key;
76 struct roffstr val;
77 struct roffkv *next; /* next in list */
78 };
79
80 /*
81 * A single number register as part of a singly-linked list.
82 */
83 struct roffreg {
84 struct roffstr key;
85 int val;
86 int step;
87 struct roffreg *next;
88 };
89
90 /*
91 * Association of request and macro names with token IDs.
92 */
93 struct roffreq {
94 enum roff_tok tok;
95 char name[];
96 };
97
98 /*
99 * A macro processing context.
100 * More than one is needed when macro calls are nested.
101 */
102 struct mctx {
103 char **argv;
104 int argc;
105 int argsz;
106 };
107
108 struct roff {
109 struct roff_man *man; /* mdoc or man parser */
110 struct roffnode *last; /* leaf of stack */
111 struct mctx *mstack; /* stack of macro contexts */
112 int *rstack; /* stack of inverted `ie' values */
113 struct ohash *reqtab; /* request lookup table */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *rentab; /* renamed strings & macros */
117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
118 struct roffstr *xtab; /* single-byte trans table (`tr') */
119 const char *current_string; /* value of last called user macro */
120 struct tbl_node *first_tbl; /* first table parsed */
121 struct tbl_node *last_tbl; /* last table parsed */
122 struct tbl_node *tbl; /* current table being parsed */
123 struct eqn_node *last_eqn; /* equation parser */
124 struct eqn_node *eqn; /* active equation parser */
125 int eqn_inline; /* current equation is inline */
126 int options; /* parse options */
127 int mstacksz; /* current size of mstack */
128 int mstackpos; /* position in mstack */
129 int rstacksz; /* current size limit of rstack */
130 int rstackpos; /* position in rstack */
131 int format; /* current file in mdoc or man format */
132 char control; /* control character */
133 char escape; /* escape character */
134 };
135
136 /*
137 * A macro definition, condition, or ignored block.
138 */
139 struct roffnode {
140 enum roff_tok tok; /* type of node */
141 struct roffnode *parent; /* up one in stack */
142 int line; /* parse line */
143 int col; /* parse col */
144 char *name; /* node name, e.g. macro name */
145 char *end; /* custom end macro of the block */
146 int endspan; /* scope to: 1=eol 2=next line -1=\} */
147 int rule; /* content is: 1=evaluated 0=skipped */
148 };
149
150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
151 enum roff_tok tok, /* tok of macro */ \
152 struct buf *buf, /* input buffer */ \
153 int ln, /* parse line */ \
154 int ppos, /* original pos in buffer */ \
155 int pos, /* current pos in buffer */ \
156 int *offs /* reset offset of buffer data */
157
158 typedef int (*roffproc)(ROFF_ARGS);
159
160 struct roffmac {
161 roffproc proc; /* process new macro */
162 roffproc text; /* process as child text of macro */
163 roffproc sub; /* process as child of macro */
164 int flags;
165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
166 };
167
168 struct predef {
169 const char *name; /* predefined input name */
170 const char *str; /* replacement symbol */
171 };
172
173 #define PREDEF(__name, __str) \
174 { (__name), (__str) },
175
176 /* --- function prototypes ------------------------------------------------ */
177
178 static int roffnode_cleanscope(struct roff *);
179 static int roffnode_pop(struct roff *);
180 static void roffnode_push(struct roff *, enum roff_tok,
181 const char *, int, int);
182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
183 static int roff_als(ROFF_ARGS);
184 static int roff_block(ROFF_ARGS);
185 static int roff_block_text(ROFF_ARGS);
186 static int roff_block_sub(ROFF_ARGS);
187 static int roff_break(ROFF_ARGS);
188 static int roff_cblock(ROFF_ARGS);
189 static int roff_cc(ROFF_ARGS);
190 static int roff_ccond(struct roff *, int, int);
191 static int roff_char(ROFF_ARGS);
192 static int roff_cond(ROFF_ARGS);
193 static int roff_cond_text(ROFF_ARGS);
194 static int roff_cond_sub(ROFF_ARGS);
195 static int roff_ds(ROFF_ARGS);
196 static int roff_ec(ROFF_ARGS);
197 static int roff_eo(ROFF_ARGS);
198 static int roff_eqndelim(struct roff *, struct buf *, int);
199 static int roff_evalcond(struct roff *r, int, char *, int *);
200 static int roff_evalnum(struct roff *, int,
201 const char *, int *, int *, int);
202 static int roff_evalpar(struct roff *, int,
203 const char *, int *, int *, int);
204 static int roff_evalstrcond(const char *, int *);
205 static int roff_expand(struct roff *, struct buf *,
206 int, int, char);
207 static void roff_free1(struct roff *);
208 static void roff_freereg(struct roffreg *);
209 static void roff_freestr(struct roffkv *);
210 static size_t roff_getname(struct roff *, char **, int, int);
211 static int roff_getnum(const char *, int *, int *, int);
212 static int roff_getop(const char *, int *, char *);
213 static int roff_getregn(struct roff *,
214 const char *, size_t, char);
215 static int roff_getregro(const struct roff *,
216 const char *name);
217 static const char *roff_getstrn(struct roff *,
218 const char *, size_t, int *);
219 static int roff_hasregn(const struct roff *,
220 const char *, size_t);
221 static int roff_insec(ROFF_ARGS);
222 static int roff_it(ROFF_ARGS);
223 static int roff_line_ignore(ROFF_ARGS);
224 static void roff_man_alloc1(struct roff_man *);
225 static void roff_man_free1(struct roff_man *);
226 static int roff_manyarg(ROFF_ARGS);
227 static int roff_noarg(ROFF_ARGS);
228 static int roff_nop(ROFF_ARGS);
229 static int roff_nr(ROFF_ARGS);
230 static int roff_onearg(ROFF_ARGS);
231 static enum roff_tok roff_parse(struct roff *, char *, int *,
232 int, int);
233 static int roff_parsetext(struct roff *, struct buf *,
234 int, int *);
235 static int roff_renamed(ROFF_ARGS);
236 static int roff_return(ROFF_ARGS);
237 static int roff_rm(ROFF_ARGS);
238 static int roff_rn(ROFF_ARGS);
239 static int roff_rr(ROFF_ARGS);
240 static void roff_setregn(struct roff *, const char *,
241 size_t, int, char, int);
242 static void roff_setstr(struct roff *,
243 const char *, const char *, int);
244 static void roff_setstrn(struct roffkv **, const char *,
245 size_t, const char *, size_t, int);
246 static int roff_shift(ROFF_ARGS);
247 static int roff_so(ROFF_ARGS);
248 static int roff_tr(ROFF_ARGS);
249 static int roff_Dd(ROFF_ARGS);
250 static int roff_TE(ROFF_ARGS);
251 static int roff_TS(ROFF_ARGS);
252 static int roff_EQ(ROFF_ARGS);
253 static int roff_EN(ROFF_ARGS);
254 static int roff_T_(ROFF_ARGS);
255 static int roff_unsupp(ROFF_ARGS);
256 static int roff_userdef(ROFF_ARGS);
257
258 /* --- constant data ------------------------------------------------------ */
259
260 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
261 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
262
263 const char *__roff_name[MAN_MAX + 1] = {
264 "br", "ce", "fi", "ft",
265 "ll", "mc", "nf",
266 "po", "rj", "sp",
267 "ta", "ti", NULL,
268 "ab", "ad", "af", "aln",
269 "als", "am", "am1", "ami",
270 "ami1", "as", "as1", "asciify",
271 "backtrace", "bd", "bleedat", "blm",
272 "box", "boxa", "bp", "BP",
273 "break", "breakchar", "brnl", "brp",
274 "brpnl", "c2", "cc",
275 "cf", "cflags", "ch", "char",
276 "chop", "class", "close", "CL",
277 "color", "composite", "continue", "cp",
278 "cropat", "cs", "cu", "da",
279 "dch", "Dd", "de", "de1",
280 "defcolor", "dei", "dei1", "device",
281 "devicem", "di", "do", "ds",
282 "ds1", "dwh", "dt", "ec",
283 "ecr", "ecs", "el", "em",
284 "EN", "eo", "EP", "EQ",
285 "errprint", "ev", "evc", "ex",
286 "fallback", "fam", "fc", "fchar",
287 "fcolor", "fdeferlig", "feature", "fkern",
288 "fl", "flig", "fp", "fps",
289 "fschar", "fspacewidth", "fspecial", "ftr",
290 "fzoom", "gcolor", "hc", "hcode",
291 "hidechar", "hla", "hlm", "hpf",
292 "hpfa", "hpfcode", "hw", "hy",
293 "hylang", "hylen", "hym", "hypp",
294 "hys", "ie", "if", "ig",
295 "index", "it", "itc", "IX",
296 "kern", "kernafter", "kernbefore", "kernpair",
297 "lc", "lc_ctype", "lds", "length",
298 "letadj", "lf", "lg", "lhang",
299 "linetabs", "lnr", "lnrf", "lpfx",
300 "ls", "lsm", "lt",
301 "mediasize", "minss", "mk", "mso",
302 "na", "ne", "nh", "nhychar",
303 "nm", "nn", "nop", "nr",
304 "nrf", "nroff", "ns", "nx",
305 "open", "opena", "os", "output",
306 "padj", "papersize", "pc", "pev",
307 "pi", "PI", "pl", "pm",
308 "pn", "pnr", "ps",
309 "psbb", "pshape", "pso", "ptr",
310 "pvs", "rchar", "rd", "recursionlimit",
311 "return", "rfschar", "rhang",
312 "rm", "rn", "rnn", "rr",
313 "rs", "rt", "schar", "sentchar",
314 "shc", "shift", "sizes", "so",
315 "spacewidth", "special", "spreadwarn", "ss",
316 "sty", "substring", "sv", "sy",
317 "T&", "tc", "TE",
318 "TH", "tkf", "tl",
319 "tm", "tm1", "tmc", "tr",
320 "track", "transchar", "trf", "trimat",
321 "trin", "trnt", "troff", "TS",
322 "uf", "ul", "unformat", "unwatch",
323 "unwatchn", "vpt", "vs", "warn",
324 "warnscale", "watch", "watchlength", "watchn",
325 "wh", "while", "write", "writec",
326 "writem", "xflag", ".", NULL,
327 NULL, "text",
328 "Dd", "Dt", "Os", "Sh",
329 "Ss", "Pp", "D1", "Dl",
330 "Bd", "Ed", "Bl", "El",
331 "It", "Ad", "An", "Ap",
332 "Ar", "Cd", "Cm", "Dv",
333 "Er", "Ev", "Ex", "Fa",
334 "Fd", "Fl", "Fn", "Ft",
335 "Ic", "In", "Li", "Nd",
336 "Nm", "Op", "Ot", "Pa",
337 "Rv", "St", "Va", "Vt",
338 "Xr", "%A", "%B", "%D",
339 "%I", "%J", "%N", "%O",
340 "%P", "%R", "%T", "%V",
341 "Ac", "Ao", "Aq", "At",
342 "Bc", "Bf", "Bo", "Bq",
343 "Bsx", "Bx", "Db", "Dc",
344 "Do", "Dq", "Ec", "Ef",
345 "Em", "Eo", "Fx", "Ms",
346 "No", "Ns", "Nx", "Ox",
347 "Pc", "Pf", "Po", "Pq",
348 "Qc", "Ql", "Qo", "Qq",
349 "Re", "Rs", "Sc", "So",
350 "Sq", "Sm", "Sx", "Sy",
351 "Tn", "Ux", "Xc", "Xo",
352 "Fo", "Fc", "Oo", "Oc",
353 "Bk", "Ek", "Bt", "Hf",
354 "Fr", "Ud", "Lb", "Lp",
355 "Lk", "Mt", "Brq", "Bro",
356 "Brc", "%C", "Es", "En",
357 "Dx", "%Q", "%U", "Ta",
358 NULL,
359 "TH", "SH", "SS", "TP",
360 "TQ",
361 "LP", "PP", "P", "IP",
362 "HP", "SM", "SB", "BI",
363 "IB", "BR", "RB", "R",
364 "B", "I", "IR", "RI",
365 "RE", "RS", "DT", "UC",
366 "PD", "AT", "in",
367 "SY", "YS", "OP",
368 "EX", "EE", "UR",
369 "UE", "MT", "ME", NULL
370 };
371 const char *const *roff_name = __roff_name;
372
373 static struct roffmac roffs[TOKEN_NONE] = {
374 { roff_noarg, NULL, NULL, 0 }, /* br */
375 { roff_onearg, NULL, NULL, 0 }, /* ce */
376 { roff_noarg, NULL, NULL, 0 }, /* fi */
377 { roff_onearg, NULL, NULL, 0 }, /* ft */
378 { roff_onearg, NULL, NULL, 0 }, /* ll */
379 { roff_onearg, NULL, NULL, 0 }, /* mc */
380 { roff_noarg, NULL, NULL, 0 }, /* nf */
381 { roff_onearg, NULL, NULL, 0 }, /* po */
382 { roff_onearg, NULL, NULL, 0 }, /* rj */
383 { roff_onearg, NULL, NULL, 0 }, /* sp */
384 { roff_manyarg, NULL, NULL, 0 }, /* ta */
385 { roff_onearg, NULL, NULL, 0 }, /* ti */
386 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
387 { roff_unsupp, NULL, NULL, 0 }, /* ab */
388 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
389 { roff_line_ignore, NULL, NULL, 0 }, /* af */
390 { roff_unsupp, NULL, NULL, 0 }, /* aln */
391 { roff_als, NULL, NULL, 0 }, /* als */
392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
396 { roff_ds, NULL, NULL, 0 }, /* as */
397 { roff_ds, NULL, NULL, 0 }, /* as1 */
398 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
399 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
400 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
401 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
402 { roff_unsupp, NULL, NULL, 0 }, /* blm */
403 { roff_unsupp, NULL, NULL, 0 }, /* box */
404 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
405 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
406 { roff_unsupp, NULL, NULL, 0 }, /* BP */
407 { roff_break, NULL, NULL, 0 }, /* break */
408 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
409 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
410 { roff_noarg, NULL, NULL, 0 }, /* brp */
411 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
412 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
413 { roff_cc, NULL, NULL, 0 }, /* cc */
414 { roff_insec, NULL, NULL, 0 }, /* cf */
415 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
416 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
417 { roff_char, NULL, NULL, 0 }, /* char */
418 { roff_unsupp, NULL, NULL, 0 }, /* chop */
419 { roff_line_ignore, NULL, NULL, 0 }, /* class */
420 { roff_insec, NULL, NULL, 0 }, /* close */
421 { roff_unsupp, NULL, NULL, 0 }, /* CL */
422 { roff_line_ignore, NULL, NULL, 0 }, /* color */
423 { roff_unsupp, NULL, NULL, 0 }, /* composite */
424 { roff_unsupp, NULL, NULL, 0 }, /* continue */
425 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
426 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
427 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
429 { roff_unsupp, NULL, NULL, 0 }, /* da */
430 { roff_unsupp, NULL, NULL, 0 }, /* dch */
431 { roff_Dd, NULL, NULL, 0 }, /* Dd */
432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
434 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
437 { roff_unsupp, NULL, NULL, 0 }, /* device */
438 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
439 { roff_unsupp, NULL, NULL, 0 }, /* di */
440 { roff_unsupp, NULL, NULL, 0 }, /* do */
441 { roff_ds, NULL, NULL, 0 }, /* ds */
442 { roff_ds, NULL, NULL, 0 }, /* ds1 */
443 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
444 { roff_unsupp, NULL, NULL, 0 }, /* dt */
445 { roff_ec, NULL, NULL, 0 }, /* ec */
446 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
447 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
448 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
449 { roff_unsupp, NULL, NULL, 0 }, /* em */
450 { roff_EN, NULL, NULL, 0 }, /* EN */
451 { roff_eo, NULL, NULL, 0 }, /* eo */
452 { roff_unsupp, NULL, NULL, 0 }, /* EP */
453 { roff_EQ, NULL, NULL, 0 }, /* EQ */
454 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
455 { roff_unsupp, NULL, NULL, 0 }, /* ev */
456 { roff_unsupp, NULL, NULL, 0 }, /* evc */
457 { roff_unsupp, NULL, NULL, 0 }, /* ex */
458 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
459 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
460 { roff_unsupp, NULL, NULL, 0 }, /* fc */
461 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
464 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
467 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
470 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
473 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
475 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
477 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
491 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
493 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
494 { roff_unsupp, NULL, NULL, 0 }, /* index */
495 { roff_it, NULL, NULL, 0 }, /* it */
496 { roff_unsupp, NULL, NULL, 0 }, /* itc */
497 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
498 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
499 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
500 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
502 { roff_unsupp, NULL, NULL, 0 }, /* lc */
503 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
504 { roff_unsupp, NULL, NULL, 0 }, /* lds */
505 { roff_unsupp, NULL, NULL, 0 }, /* length */
506 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
507 { roff_insec, NULL, NULL, 0 }, /* lf */
508 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
509 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
510 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
511 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
512 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
513 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
514 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
515 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
516 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
517 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
518 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
519 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
520 { roff_insec, NULL, NULL, 0 }, /* mso */
521 { roff_line_ignore, NULL, NULL, 0 }, /* na */
522 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
523 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
524 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
525 { roff_unsupp, NULL, NULL, 0 }, /* nm */
526 { roff_unsupp, NULL, NULL, 0 }, /* nn */
527 { roff_nop, NULL, NULL, 0 }, /* nop */
528 { roff_nr, NULL, NULL, 0 }, /* nr */
529 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
530 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
531 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
532 { roff_insec, NULL, NULL, 0 }, /* nx */
533 { roff_insec, NULL, NULL, 0 }, /* open */
534 { roff_insec, NULL, NULL, 0 }, /* opena */
535 { roff_line_ignore, NULL, NULL, 0 }, /* os */
536 { roff_unsupp, NULL, NULL, 0 }, /* output */
537 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
538 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
539 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
540 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
541 { roff_insec, NULL, NULL, 0 }, /* pi */
542 { roff_unsupp, NULL, NULL, 0 }, /* PI */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
544 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
545 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
547 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
548 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
549 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
550 { roff_insec, NULL, NULL, 0 }, /* pso */
551 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
552 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
553 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
554 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
555 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
556 { roff_return, NULL, NULL, 0 }, /* return */
557 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
558 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
559 { roff_rm, NULL, NULL, 0 }, /* rm */
560 { roff_rn, NULL, NULL, 0 }, /* rn */
561 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
562 { roff_rr, NULL, NULL, 0 }, /* rr */
563 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
564 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
565 { roff_unsupp, NULL, NULL, 0 }, /* schar */
566 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
567 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
568 { roff_shift, NULL, NULL, 0 }, /* shift */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
570 { roff_so, NULL, NULL, 0 }, /* so */
571 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
572 { roff_line_ignore, NULL, NULL, 0 }, /* special */
573 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
574 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
575 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
576 { roff_unsupp, NULL, NULL, 0 }, /* substring */
577 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
578 { roff_insec, NULL, NULL, 0 }, /* sy */
579 { roff_T_, NULL, NULL, 0 }, /* T& */
580 { roff_unsupp, NULL, NULL, 0 }, /* tc */
581 { roff_TE, NULL, NULL, 0 }, /* TE */
582 { roff_Dd, NULL, NULL, 0 }, /* TH */
583 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
584 { roff_unsupp, NULL, NULL, 0 }, /* tl */
585 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
586 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
587 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
588 { roff_tr, NULL, NULL, 0 }, /* tr */
589 { roff_line_ignore, NULL, NULL, 0 }, /* track */
590 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
591 { roff_insec, NULL, NULL, 0 }, /* trf */
592 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
593 { roff_unsupp, NULL, NULL, 0 }, /* trin */
594 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
595 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
596 { roff_TS, NULL, NULL, 0 }, /* TS */
597 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
598 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
599 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
600 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
602 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
603 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
604 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
605 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
606 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
607 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
608 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
609 { roff_unsupp, NULL, NULL, 0 }, /* wh */
610 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
611 { roff_insec, NULL, NULL, 0 }, /* write */
612 { roff_insec, NULL, NULL, 0 }, /* writec */
613 { roff_insec, NULL, NULL, 0 }, /* writem */
614 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
615 { roff_cblock, NULL, NULL, 0 }, /* . */
616 { roff_renamed, NULL, NULL, 0 },
617 { roff_userdef, NULL, NULL, 0 }
618 };
619
620 /* Array of injected predefined strings. */
621 #define PREDEFS_MAX 38
622 static const struct predef predefs[PREDEFS_MAX] = {
623 #include "predefs.in"
624 };
625
626 static int roffce_lines; /* number of input lines to center */
627 static struct roff_node *roffce_node; /* active request */
628 static int roffit_lines; /* number of lines to delay */
629 static char *roffit_macro; /* nil-terminated macro line */
630
631
632 /* --- request table ------------------------------------------------------ */
633
634 struct ohash *
635 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
636 {
637 struct ohash *htab;
638 struct roffreq *req;
639 enum roff_tok tok;
640 size_t sz;
641 unsigned int slot;
642
643 htab = mandoc_malloc(sizeof(*htab));
644 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
645
646 for (tok = mintok; tok < maxtok; tok++) {
647 if (roff_name[tok] == NULL)
648 continue;
649 sz = strlen(roff_name[tok]);
650 req = mandoc_malloc(sizeof(*req) + sz + 1);
651 req->tok = tok;
652 memcpy(req->name, roff_name[tok], sz + 1);
653 slot = ohash_qlookup(htab, req->name);
654 ohash_insert(htab, slot, req);
655 }
656 return htab;
657 }
658
659 void
660 roffhash_free(struct ohash *htab)
661 {
662 struct roffreq *req;
663 unsigned int slot;
664
665 if (htab == NULL)
666 return;
667 for (req = ohash_first(htab, &slot); req != NULL;
668 req = ohash_next(htab, &slot))
669 free(req);
670 ohash_delete(htab);
671 free(htab);
672 }
673
674 enum roff_tok
675 roffhash_find(struct ohash *htab, const char *name, size_t sz)
676 {
677 struct roffreq *req;
678 const char *end;
679
680 if (sz) {
681 end = name + sz;
682 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
683 } else
684 req = ohash_find(htab, ohash_qlookup(htab, name));
685 return req == NULL ? TOKEN_NONE : req->tok;
686 }
687
688 /* --- stack of request blocks -------------------------------------------- */
689
690 /*
691 * Pop the current node off of the stack of roff instructions currently
692 * pending. Return 1 if it is a loop or 0 otherwise.
693 */
694 static int
695 roffnode_pop(struct roff *r)
696 {
697 struct roffnode *p;
698 int inloop;
699
700 p = r->last;
701 inloop = p->tok == ROFF_while;
702 r->last = p->parent;
703 free(p->name);
704 free(p->end);
705 free(p);
706 return inloop;
707 }
708
709 /*
710 * Push a roff node onto the instruction stack. This must later be
711 * removed with roffnode_pop().
712 */
713 static void
714 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
715 int line, int col)
716 {
717 struct roffnode *p;
718
719 p = mandoc_calloc(1, sizeof(struct roffnode));
720 p->tok = tok;
721 if (name)
722 p->name = mandoc_strdup(name);
723 p->parent = r->last;
724 p->line = line;
725 p->col = col;
726 p->rule = p->parent ? p->parent->rule : 0;
727
728 r->last = p;
729 }
730
731 /* --- roff parser state data management ---------------------------------- */
732
733 static void
734 roff_free1(struct roff *r)
735 {
736 int i;
737
738 tbl_free(r->first_tbl);
739 r->first_tbl = r->last_tbl = r->tbl = NULL;
740
741 eqn_free(r->last_eqn);
742 r->last_eqn = r->eqn = NULL;
743
744 while (r->mstackpos >= 0)
745 roff_userret(r);
746
747 while (r->last)
748 roffnode_pop(r);
749
750 free (r->rstack);
751 r->rstack = NULL;
752 r->rstacksz = 0;
753 r->rstackpos = -1;
754
755 roff_freereg(r->regtab);
756 r->regtab = NULL;
757
758 roff_freestr(r->strtab);
759 roff_freestr(r->rentab);
760 roff_freestr(r->xmbtab);
761 r->strtab = r->rentab = r->xmbtab = NULL;
762
763 if (r->xtab)
764 for (i = 0; i < 128; i++)
765 free(r->xtab[i].p);
766 free(r->xtab);
767 r->xtab = NULL;
768 }
769
770 void
771 roff_reset(struct roff *r)
772 {
773 roff_free1(r);
774 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
775 r->control = '\0';
776 r->escape = '\\';
777 roffce_lines = 0;
778 roffce_node = NULL;
779 roffit_lines = 0;
780 roffit_macro = NULL;
781 }
782
783 void
784 roff_free(struct roff *r)
785 {
786 int i;
787
788 roff_free1(r);
789 for (i = 0; i < r->mstacksz; i++)
790 free(r->mstack[i].argv);
791 free(r->mstack);
792 roffhash_free(r->reqtab);
793 free(r);
794 }
795
796 struct roff *
797 roff_alloc(int options)
798 {
799 struct roff *r;
800
801 r = mandoc_calloc(1, sizeof(struct roff));
802 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
803 r->options = options;
804 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
805 r->mstackpos = -1;
806 r->rstackpos = -1;
807 r->escape = '\\';
808 return r;
809 }
810
811 /* --- syntax tree state data management ---------------------------------- */
812
813 static void
814 roff_man_free1(struct roff_man *man)
815 {
816 if (man->meta.first != NULL)
817 roff_node_delete(man, man->meta.first);
818 free(man->meta.msec);
819 free(man->meta.vol);
820 free(man->meta.os);
821 free(man->meta.arch);
822 free(man->meta.title);
823 free(man->meta.name);
824 free(man->meta.date);
825 free(man->meta.sodest);
826 }
827
828 void
829 roff_state_reset(struct roff_man *man)
830 {
831 man->last = man->meta.first;
832 man->last_es = NULL;
833 man->flags = 0;
834 man->lastsec = man->lastnamed = SEC_NONE;
835 man->next = ROFF_NEXT_CHILD;
836 roff_setreg(man->roff, "nS", 0, '=');
837 }
838
839 static void
840 roff_man_alloc1(struct roff_man *man)
841 {
842 memset(&man->meta, 0, sizeof(man->meta));
843 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
844 man->meta.first->type = ROFFT_ROOT;
845 man->meta.macroset = MACROSET_NONE;
846 roff_state_reset(man);
847 }
848
849 void
850 roff_man_reset(struct roff_man *man)
851 {
852 roff_man_free1(man);
853 roff_man_alloc1(man);
854 }
855
856 void
857 roff_man_free(struct roff_man *man)
858 {
859 roff_man_free1(man);
860 free(man);
861 }
862
863 struct roff_man *
864 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
865 {
866 struct roff_man *man;
867
868 man = mandoc_calloc(1, sizeof(*man));
869 man->roff = roff;
870 man->os_s = os_s;
871 man->quick = quick;
872 roff_man_alloc1(man);
873 roff->man = man;
874 return man;
875 }
876
877 /* --- syntax tree handling ----------------------------------------------- */
878
879 struct roff_node *
880 roff_node_alloc(struct roff_man *man, int line, int pos,
881 enum roff_type type, int tok)
882 {
883 struct roff_node *n;
884
885 n = mandoc_calloc(1, sizeof(*n));
886 n->line = line;
887 n->pos = pos;
888 n->tok = tok;
889 n->type = type;
890 n->sec = man->lastsec;
891
892 if (man->flags & MDOC_SYNOPSIS)
893 n->flags |= NODE_SYNPRETTY;
894 else
895 n->flags &= ~NODE_SYNPRETTY;
896 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
897 n->flags |= NODE_NOFILL;
898 else
899 n->flags &= ~NODE_NOFILL;
900 if (man->flags & MDOC_NEWLINE)
901 n->flags |= NODE_LINE;
902 man->flags &= ~MDOC_NEWLINE;
903
904 return n;
905 }
906
907 void
908 roff_node_append(struct roff_man *man, struct roff_node *n)
909 {
910
911 switch (man->next) {
912 case ROFF_NEXT_SIBLING:
913 if (man->last->next != NULL) {
914 n->next = man->last->next;
915 man->last->next->prev = n;
916 } else
917 man->last->parent->last = n;
918 man->last->next = n;
919 n->prev = man->last;
920 n->parent = man->last->parent;
921 break;
922 case ROFF_NEXT_CHILD:
923 if (man->last->child != NULL) {
924 n->next = man->last->child;
925 man->last->child->prev = n;
926 } else
927 man->last->last = n;
928 man->last->child = n;
929 n->parent = man->last;
930 break;
931 default:
932 abort();
933 }
934 man->last = n;
935
936 switch (n->type) {
937 case ROFFT_HEAD:
938 n->parent->head = n;
939 break;
940 case ROFFT_BODY:
941 if (n->end != ENDBODY_NOT)
942 return;
943 n->parent->body = n;
944 break;
945 case ROFFT_TAIL:
946 n->parent->tail = n;
947 break;
948 default:
949 return;
950 }
951
952 /*
953 * Copy over the normalised-data pointer of our parent. Not
954 * everybody has one, but copying a null pointer is fine.
955 */
956
957 n->norm = n->parent->norm;
958 assert(n->parent->type == ROFFT_BLOCK);
959 }
960
961 void
962 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
963 {
964 struct roff_node *n;
965
966 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
967 n->string = roff_strdup(man->roff, word);
968 roff_node_append(man, n);
969 n->flags |= NODE_VALID | NODE_ENDED;
970 man->next = ROFF_NEXT_SIBLING;
971 }
972
973 void
974 roff_word_append(struct roff_man *man, const char *word)
975 {
976 struct roff_node *n;
977 char *addstr, *newstr;
978
979 n = man->last;
980 addstr = roff_strdup(man->roff, word);
981 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
982 free(addstr);
983 free(n->string);
984 n->string = newstr;
985 man->next = ROFF_NEXT_SIBLING;
986 }
987
988 void
989 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
990 {
991 struct roff_node *n;
992
993 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
994 roff_node_append(man, n);
995 man->next = ROFF_NEXT_CHILD;
996 }
997
998 struct roff_node *
999 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1000 {
1001 struct roff_node *n;
1002
1003 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1004 roff_node_append(man, n);
1005 man->next = ROFF_NEXT_CHILD;
1006 return n;
1007 }
1008
1009 struct roff_node *
1010 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1011 {
1012 struct roff_node *n;
1013
1014 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1015 roff_node_append(man, n);
1016 man->next = ROFF_NEXT_CHILD;
1017 return n;
1018 }
1019
1020 struct roff_node *
1021 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1022 {
1023 struct roff_node *n;
1024
1025 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1026 roff_node_append(man, n);
1027 man->next = ROFF_NEXT_CHILD;
1028 return n;
1029 }
1030
1031 static void
1032 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1033 {
1034 struct roff_node *n;
1035 struct tbl_span *span;
1036
1037 if (man->meta.macroset == MACROSET_MAN)
1038 man_breakscope(man, ROFF_TS);
1039 while ((span = tbl_span(tbl)) != NULL) {
1040 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1041 n->span = span;
1042 roff_node_append(man, n);
1043 n->flags |= NODE_VALID | NODE_ENDED;
1044 man->next = ROFF_NEXT_SIBLING;
1045 }
1046 }
1047
1048 void
1049 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1050 {
1051
1052 /* Adjust siblings. */
1053
1054 if (n->prev)
1055 n->prev->next = n->next;
1056 if (n->next)
1057 n->next->prev = n->prev;
1058
1059 /* Adjust parent. */
1060
1061 if (n->parent != NULL) {
1062 if (n->parent->child == n)
1063 n->parent->child = n->next;
1064 if (n->parent->last == n)
1065 n->parent->last = n->prev;
1066 }
1067
1068 /* Adjust parse point. */
1069
1070 if (man == NULL)
1071 return;
1072 if (man->last == n) {
1073 if (n->prev == NULL) {
1074 man->last = n->parent;
1075 man->next = ROFF_NEXT_CHILD;
1076 } else {
1077 man->last = n->prev;
1078 man->next = ROFF_NEXT_SIBLING;
1079 }
1080 }
1081 if (man->meta.first == n)
1082 man->meta.first = NULL;
1083 }
1084
1085 void
1086 roff_node_relink(struct roff_man *man, struct roff_node *n)
1087 {
1088 roff_node_unlink(man, n);
1089 n->prev = n->next = NULL;
1090 roff_node_append(man, n);
1091 }
1092
1093 void
1094 roff_node_free(struct roff_node *n)
1095 {
1096
1097 if (n->args != NULL)
1098 mdoc_argv_free(n->args);
1099 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1100 free(n->norm);
1101 eqn_box_free(n->eqn);
1102 free(n->string);
1103 free(n);
1104 }
1105
1106 void
1107 roff_node_delete(struct roff_man *man, struct roff_node *n)
1108 {
1109
1110 while (n->child != NULL)
1111 roff_node_delete(man, n->child);
1112 roff_node_unlink(man, n);
1113 roff_node_free(n);
1114 }
1115
1116 void
1117 deroff(char **dest, const struct roff_node *n)
1118 {
1119 char *cp;
1120 size_t sz;
1121
1122 if (n->type != ROFFT_TEXT) {
1123 for (n = n->child; n != NULL; n = n->next)
1124 deroff(dest, n);
1125 return;
1126 }
1127
1128 /* Skip leading whitespace. */
1129
1130 for (cp = n->string; *cp != '\0'; cp++) {
1131 if (cp[0] == '\\' && cp[1] != '\0' &&
1132 strchr(" %&0^|~", cp[1]) != NULL)
1133 cp++;
1134 else if ( ! isspace((unsigned char)*cp))
1135 break;
1136 }
1137
1138 /* Skip trailing backslash. */
1139
1140 sz = strlen(cp);
1141 if (sz > 0 && cp[sz - 1] == '\\')
1142 sz--;
1143
1144 /* Skip trailing whitespace. */
1145
1146 for (; sz; sz--)
1147 if ( ! isspace((unsigned char)cp[sz-1]))
1148 break;
1149
1150 /* Skip empty strings. */
1151
1152 if (sz == 0)
1153 return;
1154
1155 if (*dest == NULL) {
1156 *dest = mandoc_strndup(cp, sz);
1157 return;
1158 }
1159
1160 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1161 free(*dest);
1162 *dest = cp;
1163 }
1164
1165 /* --- main functions of the roff parser ---------------------------------- */
1166
1167 /*
1168 * In the current line, expand escape sequences that produce parsable
1169 * input text. Also check the syntax of the remaining escape sequences,
1170 * which typically produce output glyphs or change formatter state.
1171 */
1172 static int
1173 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1174 {
1175 struct mctx *ctx; /* current macro call context */
1176 char ubuf[24]; /* buffer to print the number */
1177 struct roff_node *n; /* used for header comments */
1178 const char *start; /* start of the string to process */
1179 char *stesc; /* start of an escape sequence ('\\') */
1180 const char *esct; /* type of esccape sequence */
1181 char *ep; /* end of comment string */
1182 const char *stnam; /* start of the name, after "[(*" */
1183 const char *cp; /* end of the name, e.g. before ']' */
1184 const char *res; /* the string to be substituted */
1185 char *nbuf; /* new buffer to copy buf->buf to */
1186 size_t maxl; /* expected length of the escape name */
1187 size_t naml; /* actual length of the escape name */
1188 size_t asz; /* length of the replacement */
1189 size_t rsz; /* length of the rest of the string */
1190 int inaml; /* length returned from mandoc_escape() */
1191 int expand_count; /* to avoid infinite loops */
1192 int npos; /* position in numeric expression */
1193 int arg_complete; /* argument not interrupted by eol */
1194 int quote_args; /* true for \\$@, false for \\$* */
1195 int done; /* no more input available */
1196 int deftype; /* type of definition to paste */
1197 int rcsid; /* kind of RCS id seen */
1198 enum mandocerr err; /* for escape sequence problems */
1199 char sign; /* increment number register */
1200 char term; /* character terminating the escape */
1201
1202 /* Search forward for comments. */
1203
1204 done = 0;
1205 start = buf->buf + pos;
1206 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1207 if (stesc[0] != newesc || stesc[1] == '\0')
1208 continue;
1209 stesc++;
1210 if (*stesc != '"' && *stesc != '#')
1211 continue;
1212
1213 /* Comment found, look for RCS id. */
1214
1215 rcsid = 0;
1216 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1217 rcsid = 1 << MANDOC_OS_OPENBSD;
1218 cp += 8;
1219 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1220 rcsid = 1 << MANDOC_OS_NETBSD;
1221 cp += 7;
1222 }
1223 if (cp != NULL &&
1224 isalnum((unsigned char)*cp) == 0 &&
1225 strchr(cp, '$') != NULL) {
1226 if (r->man->meta.rcsids & rcsid)
1227 mandoc_msg(MANDOCERR_RCS_REP, ln,
1228 (int)(stesc - buf->buf) + 1,
1229 "%s", stesc + 1);
1230 r->man->meta.rcsids |= rcsid;
1231 }
1232
1233 /* Handle trailing whitespace. */
1234
1235 ep = strchr(stesc--, '\0') - 1;
1236 if (*ep == '\n') {
1237 done = 1;
1238 ep--;
1239 }
1240 if (*ep == ' ' || *ep == '\t')
1241 mandoc_msg(MANDOCERR_SPACE_EOL,
1242 ln, (int)(ep - buf->buf), NULL);
1243
1244 /*
1245 * Save comments preceding the title macro
1246 * in the syntax tree.
1247 */
1248
1249 if (newesc != ASCII_ESC && r->format == 0) {
1250 while (*ep == ' ' || *ep == '\t')
1251 ep--;
1252 ep[1] = '\0';
1253 n = roff_node_alloc(r->man,
1254 ln, stesc + 1 - buf->buf,
1255 ROFFT_COMMENT, TOKEN_NONE);
1256 n->string = mandoc_strdup(stesc + 2);
1257 roff_node_append(r->man, n);
1258 n->flags |= NODE_VALID | NODE_ENDED;
1259 r->man->next = ROFF_NEXT_SIBLING;
1260 }
1261
1262 /* Line continuation with comment. */
1263
1264 if (stesc[1] == '#') {
1265 *stesc = '\0';
1266 return ROFF_IGN | ROFF_APPEND;
1267 }
1268
1269 /* Discard normal comments. */
1270
1271 while (stesc > start && stesc[-1] == ' ' &&
1272 (stesc == start + 1 || stesc[-2] != '\\'))
1273 stesc--;
1274 *stesc = '\0';
1275 break;
1276 }
1277 if (stesc == start)
1278 return ROFF_CONT;
1279 stesc--;
1280
1281 /* Notice the end of the input. */
1282
1283 if (*stesc == '\n') {
1284 *stesc-- = '\0';
1285 done = 1;
1286 }
1287
1288 expand_count = 0;
1289 while (stesc >= start) {
1290 if (*stesc != newesc) {
1291
1292 /*
1293 * If we have a non-standard escape character,
1294 * escape literal backslashes because all
1295 * processing in subsequent functions uses
1296 * the standard escaping rules.
1297 */
1298
1299 if (newesc != ASCII_ESC && *stesc == '\\') {
1300 *stesc = '\0';
1301 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1302 buf->buf, stesc + 1) + 1;
1303 start = nbuf + pos;
1304 stesc = nbuf + (stesc - buf->buf);
1305 free(buf->buf);
1306 buf->buf = nbuf;
1307 }
1308
1309 /* Search backwards for the next escape. */
1310
1311 stesc--;
1312 continue;
1313 }
1314
1315 /* If it is escaped, skip it. */
1316
1317 for (cp = stesc - 1; cp >= start; cp--)
1318 if (*cp != r->escape)
1319 break;
1320
1321 if ((stesc - cp) % 2 == 0) {
1322 while (stesc > cp)
1323 *stesc-- = '\\';
1324 continue;
1325 } else if (stesc[1] != '\0') {
1326 *stesc = '\\';
1327 } else {
1328 *stesc-- = '\0';
1329 if (done)
1330 continue;
1331 else
1332 return ROFF_IGN | ROFF_APPEND;
1333 }
1334
1335 /* Decide whether to expand or to check only. */
1336
1337 term = '\0';
1338 cp = stesc + 1;
1339 if (*cp == 'E')
1340 cp++;
1341 esct = cp;
1342 switch (*esct) {
1343 case '*':
1344 case '$':
1345 res = NULL;
1346 break;
1347 case 'B':
1348 case 'w':
1349 term = cp[1];
1350 /* FALLTHROUGH */
1351 case 'n':
1352 sign = cp[1];
1353 if (sign == '+' || sign == '-')
1354 cp++;
1355 res = ubuf;
1356 break;
1357 default:
1358 err = MANDOCERR_OK;
1359 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1360 case ESCAPE_SPECIAL:
1361 if (mchars_spec2cp(stnam, inaml) >= 0)
1362 break;
1363 /* FALLTHROUGH */
1364 case ESCAPE_ERROR:
1365 err = MANDOCERR_ESC_BAD;
1366 break;
1367 case ESCAPE_UNDEF:
1368 err = MANDOCERR_ESC_UNDEF;
1369 break;
1370 case ESCAPE_UNSUPP:
1371 err = MANDOCERR_ESC_UNSUPP;
1372 break;
1373 default:
1374 break;
1375 }
1376 if (err != MANDOCERR_OK)
1377 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1378 "%.*s", (int)(cp - stesc), stesc);
1379 stesc--;
1380 continue;
1381 }
1382
1383 if (EXPAND_LIMIT < ++expand_count) {
1384 mandoc_msg(MANDOCERR_ROFFLOOP,
1385 ln, (int)(stesc - buf->buf), NULL);
1386 return ROFF_IGN;
1387 }
1388
1389 /*
1390 * The third character decides the length
1391 * of the name of the string or register.
1392 * Save a pointer to the name.
1393 */
1394
1395 if (term == '\0') {
1396 switch (*++cp) {
1397 case '\0':
1398 maxl = 0;
1399 break;
1400 case '(':
1401 cp++;
1402 maxl = 2;
1403 break;
1404 case '[':
1405 cp++;
1406 term = ']';
1407 maxl = 0;
1408 break;
1409 default:
1410 maxl = 1;
1411 break;
1412 }
1413 } else {
1414 cp += 2;
1415 maxl = 0;
1416 }
1417 stnam = cp;
1418
1419 /* Advance to the end of the name. */
1420
1421 naml = 0;
1422 arg_complete = 1;
1423 while (maxl == 0 || naml < maxl) {
1424 if (*cp == '\0') {
1425 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1426 (int)(stesc - buf->buf), "%s", stesc);
1427 arg_complete = 0;
1428 break;
1429 }
1430 if (maxl == 0 && *cp == term) {
1431 cp++;
1432 break;
1433 }
1434 if (*cp++ != '\\' || *esct != 'w') {
1435 naml++;
1436 continue;
1437 }
1438 switch (mandoc_escape(&cp, NULL, NULL)) {
1439 case ESCAPE_SPECIAL:
1440 case ESCAPE_UNICODE:
1441 case ESCAPE_NUMBERED:
1442 case ESCAPE_UNDEF:
1443 case ESCAPE_OVERSTRIKE:
1444 naml++;
1445 break;
1446 default:
1447 break;
1448 }
1449 }
1450
1451 /*
1452 * Retrieve the replacement string; if it is
1453 * undefined, resume searching for escapes.
1454 */
1455
1456 switch (*esct) {
1457 case '*':
1458 if (arg_complete) {
1459 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1460 res = roff_getstrn(r, stnam, naml, &deftype);
1461
1462 /*
1463 * If not overriden, let \*(.T
1464 * through to the formatters.
1465 */
1466
1467 if (res == NULL && naml == 2 &&
1468 stnam[0] == '.' && stnam[1] == 'T') {
1469 roff_setstrn(&r->strtab,
1470 ".T", 2, NULL, 0, 0);
1471 stesc--;
1472 continue;
1473 }
1474 }
1475 break;
1476 case '$':
1477 if (r->mstackpos < 0) {
1478 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1479 (int)(stesc - buf->buf), "%.3s", stesc);
1480 break;
1481 }
1482 ctx = r->mstack + r->mstackpos;
1483 npos = esct[1] - '1';
1484 if (npos >= 0 && npos <= 8) {
1485 res = npos < ctx->argc ?
1486 ctx->argv[npos] : "";
1487 break;
1488 }
1489 if (esct[1] == '*')
1490 quote_args = 0;
1491 else if (esct[1] == '@')
1492 quote_args = 1;
1493 else {
1494 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1495 (int)(stesc - buf->buf), "%.3s", stesc);
1496 break;
1497 }
1498 asz = 0;
1499 for (npos = 0; npos < ctx->argc; npos++) {
1500 if (npos)
1501 asz++; /* blank */
1502 if (quote_args)
1503 asz += 2; /* quotes */
1504 asz += strlen(ctx->argv[npos]);
1505 }
1506 if (asz != 3) {
1507 rsz = buf->sz - (stesc - buf->buf) - 3;
1508 if (asz < 3)
1509 memmove(stesc + asz, stesc + 3, rsz);
1510 buf->sz += asz - 3;
1511 nbuf = mandoc_realloc(buf->buf, buf->sz);
1512 start = nbuf + pos;
1513 stesc = nbuf + (stesc - buf->buf);
1514 buf->buf = nbuf;
1515 if (asz > 3)
1516 memmove(stesc + asz, stesc + 3, rsz);
1517 }
1518 for (npos = 0; npos < ctx->argc; npos++) {
1519 if (npos)
1520 *stesc++ = ' ';
1521 if (quote_args)
1522 *stesc++ = '"';
1523 cp = ctx->argv[npos];
1524 while (*cp != '\0')
1525 *stesc++ = *cp++;
1526 if (quote_args)
1527 *stesc++ = '"';
1528 }
1529 continue;
1530 case 'B':
1531 npos = 0;
1532 ubuf[0] = arg_complete &&
1533 roff_evalnum(r, ln, stnam, &npos,
1534 NULL, ROFFNUM_SCALE) &&
1535 stnam + npos + 1 == cp ? '1' : '0';
1536 ubuf[1] = '\0';
1537 break;
1538 case 'n':
1539 if (arg_complete)
1540 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1541 roff_getregn(r, stnam, naml, sign));
1542 else
1543 ubuf[0] = '\0';
1544 break;
1545 case 'w':
1546 /* use even incomplete args */
1547 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1548 24 * (int)naml);
1549 break;
1550 }
1551
1552 if (res == NULL) {
1553 if (*esct == '*')
1554 mandoc_msg(MANDOCERR_STR_UNDEF,
1555 ln, (int)(stesc - buf->buf),
1556 "%.*s", (int)naml, stnam);
1557 res = "";
1558 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1559 mandoc_msg(MANDOCERR_ROFFLOOP,
1560 ln, (int)(stesc - buf->buf), NULL);
1561 return ROFF_IGN;
1562 }
1563
1564 /* Replace the escape sequence by the string. */
1565
1566 *stesc = '\0';
1567 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1568 buf->buf, res, cp) + 1;
1569
1570 /* Prepare for the next replacement. */
1571
1572 start = nbuf + pos;
1573 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1574 free(buf->buf);
1575 buf->buf = nbuf;
1576 }
1577 return ROFF_CONT;
1578 }
1579
1580 /*
1581 * Parse a quoted or unquoted roff-style request or macro argument.
1582 * Return a pointer to the parsed argument, which is either the original
1583 * pointer or advanced by one byte in case the argument is quoted.
1584 * NUL-terminate the argument in place.
1585 * Collapse pairs of quotes inside quoted arguments.
1586 * Advance the argument pointer to the next argument,
1587 * or to the NUL byte terminating the argument line.
1588 */
1589 char *
1590 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1591 {
1592 struct buf buf;
1593 char *cp, *start;
1594 int newesc, pairs, quoted, white;
1595
1596 /* Quoting can only start with a new word. */
1597 start = *cpp;
1598 quoted = 0;
1599 if ('"' == *start) {
1600 quoted = 1;
1601 start++;
1602 }
1603
1604 newesc = pairs = white = 0;
1605 for (cp = start; '\0' != *cp; cp++) {
1606
1607 /*
1608 * Move the following text left
1609 * after quoted quotes and after "\\" and "\t".
1610 */
1611 if (pairs)
1612 cp[-pairs] = cp[0];
1613
1614 if ('\\' == cp[0]) {
1615 /*
1616 * In copy mode, translate double to single
1617 * backslashes and backslash-t to literal tabs.
1618 */
1619 switch (cp[1]) {
1620 case 'a':
1621 case 't':
1622 cp[-pairs] = '\t';
1623 pairs++;
1624 cp++;
1625 break;
1626 case '\\':
1627 newesc = 1;
1628 cp[-pairs] = ASCII_ESC;
1629 pairs++;
1630 cp++;
1631 break;
1632 case ' ':
1633 /* Skip escaped blanks. */
1634 if (0 == quoted)
1635 cp++;
1636 break;
1637 default:
1638 break;
1639 }
1640 } else if (0 == quoted) {
1641 if (' ' == cp[0]) {
1642 /* Unescaped blanks end unquoted args. */
1643 white = 1;
1644 break;
1645 }
1646 } else if ('"' == cp[0]) {
1647 if ('"' == cp[1]) {
1648 /* Quoted quotes collapse. */
1649 pairs++;
1650 cp++;
1651 } else {
1652 /* Unquoted quotes end quoted args. */
1653 quoted = 2;
1654 break;
1655 }
1656 }
1657 }
1658
1659 /* Quoted argument without a closing quote. */
1660 if (1 == quoted)
1661 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1662
1663 /* NUL-terminate this argument and move to the next one. */
1664 if (pairs)
1665 cp[-pairs] = '\0';
1666 if ('\0' != *cp) {
1667 *cp++ = '\0';
1668 while (' ' == *cp)
1669 cp++;
1670 }
1671 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1672 *cpp = cp;
1673
1674 if ('\0' == *cp && (white || ' ' == cp[-1]))
1675 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1676
1677 start = mandoc_strdup(start);
1678 if (newesc == 0)
1679 return start;
1680
1681 buf.buf = start;
1682 buf.sz = strlen(start) + 1;
1683 buf.next = NULL;
1684 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1685 free(buf.buf);
1686 buf.buf = mandoc_strdup("");
1687 }
1688 return buf.buf;
1689 }
1690
1691
1692 /*
1693 * Process text streams.
1694 */
1695 static int
1696 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1697 {
1698 size_t sz;
1699 const char *start;
1700 char *p;
1701 int isz;
1702 enum mandoc_esc esc;
1703
1704 /* Spring the input line trap. */
1705
1706 if (roffit_lines == 1) {
1707 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1708 free(buf->buf);
1709 buf->buf = p;
1710 buf->sz = isz + 1;
1711 *offs = 0;
1712 free(roffit_macro);
1713 roffit_lines = 0;
1714 return ROFF_REPARSE;
1715 } else if (roffit_lines > 1)
1716 --roffit_lines;
1717
1718 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1719 if (roffce_lines < 1) {
1720 r->man->last = roffce_node;
1721 r->man->next = ROFF_NEXT_SIBLING;
1722 roffce_lines = 0;
1723 roffce_node = NULL;
1724 } else
1725 roffce_lines--;
1726 }
1727
1728 /* Convert all breakable hyphens into ASCII_HYPH. */
1729
1730 start = p = buf->buf + pos;
1731
1732 while (*p != '\0') {
1733 sz = strcspn(p, "-\\");
1734 p += sz;
1735
1736 if (*p == '\0')
1737 break;
1738
1739 if (*p == '\\') {
1740 /* Skip over escapes. */
1741 p++;
1742 esc = mandoc_escape((const char **)&p, NULL, NULL);
1743 if (esc == ESCAPE_ERROR)
1744 break;
1745 while (*p == '-')
1746 p++;
1747 continue;
1748 } else if (p == start) {
1749 p++;
1750 continue;
1751 }
1752
1753 if (isalpha((unsigned char)p[-1]) &&
1754 isalpha((unsigned char)p[1]))
1755 *p = ASCII_HYPH;
1756 p++;
1757 }
1758 return ROFF_CONT;
1759 }
1760
1761 int
1762 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1763 {
1764 enum roff_tok t;
1765 int e;
1766 int pos; /* parse point */
1767 int spos; /* saved parse point for messages */
1768 int ppos; /* original offset in buf->buf */
1769 int ctl; /* macro line (boolean) */
1770
1771 ppos = pos = *offs;
1772
1773 /* Handle in-line equation delimiters. */
1774
1775 if (r->tbl == NULL &&
1776 r->last_eqn != NULL && r->last_eqn->delim &&
1777 (r->eqn == NULL || r->eqn_inline)) {
1778 e = roff_eqndelim(r, buf, pos);
1779 if (e == ROFF_REPARSE)
1780 return e;
1781 assert(e == ROFF_CONT);
1782 }
1783
1784 /* Expand some escape sequences. */
1785
1786 e = roff_expand(r, buf, ln, pos, r->escape);
1787 if ((e & ROFF_MASK) == ROFF_IGN)
1788 return e;
1789 assert(e == ROFF_CONT);
1790
1791 ctl = roff_getcontrol(r, buf->buf, &pos);
1792
1793 /*
1794 * First, if a scope is open and we're not a macro, pass the
1795 * text through the macro's filter.
1796 * Equations process all content themselves.
1797 * Tables process almost all content themselves, but we want
1798 * to warn about macros before passing it there.
1799 */
1800
1801 if (r->last != NULL && ! ctl) {
1802 t = r->last->tok;
1803 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1804 if ((e & ROFF_MASK) == ROFF_IGN)
1805 return e;
1806 e &= ~ROFF_MASK;
1807 } else
1808 e = ROFF_IGN;
1809 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1810 eqn_read(r->eqn, buf->buf + ppos);
1811 return e;
1812 }
1813 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1814 tbl_read(r->tbl, ln, buf->buf, ppos);
1815 roff_addtbl(r->man, ln, r->tbl);
1816 return e;
1817 }
1818 if ( ! ctl)
1819 return roff_parsetext(r, buf, pos, offs) | e;
1820
1821 /* Skip empty request lines. */
1822
1823 if (buf->buf[pos] == '"') {
1824 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1825 return ROFF_IGN;
1826 } else if (buf->buf[pos] == '\0')
1827 return ROFF_IGN;
1828
1829 /*
1830 * If a scope is open, go to the child handler for that macro,
1831 * as it may want to preprocess before doing anything with it.
1832 * Don't do so if an equation is open.
1833 */
1834
1835 if (r->last) {
1836 t = r->last->tok;
1837 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1838 }
1839
1840 /* No scope is open. This is a new request or macro. */
1841
1842 spos = pos;
1843 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1844
1845 /* Tables ignore most macros. */
1846
1847 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1848 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1849 mandoc_msg(MANDOCERR_TBLMACRO,
1850 ln, pos, "%s", buf->buf + spos);
1851 if (t != TOKEN_NONE)
1852 return ROFF_IGN;
1853 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1854 pos++;
1855 while (buf->buf[pos] == ' ')
1856 pos++;
1857 tbl_read(r->tbl, ln, buf->buf, pos);
1858 roff_addtbl(r->man, ln, r->tbl);
1859 return ROFF_IGN;
1860 }
1861
1862 /* For now, let high level macros abort .ce mode. */
1863
1864 if (ctl && roffce_node != NULL &&
1865 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1866 t == ROFF_TH || t == ROFF_TS)) {
1867 r->man->last = roffce_node;
1868 r->man->next = ROFF_NEXT_SIBLING;
1869 roffce_lines = 0;
1870 roffce_node = NULL;
1871 }
1872
1873 /*
1874 * This is neither a roff request nor a user-defined macro.
1875 * Let the standard macro set parsers handle it.
1876 */
1877
1878 if (t == TOKEN_NONE)
1879 return ROFF_CONT;
1880
1881 /* Execute a roff request or a user defined macro. */
1882
1883 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1884 }
1885
1886 /*
1887 * Internal interface function to tell the roff parser that execution
1888 * of the current macro ended. This is required because macro
1889 * definitions usually do not end with a .return request.
1890 */
1891 void
1892 roff_userret(struct roff *r)
1893 {
1894 struct mctx *ctx;
1895 int i;
1896
1897 assert(r->mstackpos >= 0);
1898 ctx = r->mstack + r->mstackpos;
1899 for (i = 0; i < ctx->argc; i++)
1900 free(ctx->argv[i]);
1901 ctx->argc = 0;
1902 r->mstackpos--;
1903 }
1904
1905 void
1906 roff_endparse(struct roff *r)
1907 {
1908 if (r->last != NULL)
1909 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1910 r->last->col, "%s", roff_name[r->last->tok]);
1911
1912 if (r->eqn != NULL) {
1913 mandoc_msg(MANDOCERR_BLK_NOEND,
1914 r->eqn->node->line, r->eqn->node->pos, "EQ");
1915 eqn_parse(r->eqn);
1916 r->eqn = NULL;
1917 }
1918
1919 if (r->tbl != NULL) {
1920 tbl_end(r->tbl, 1);
1921 r->tbl = NULL;
1922 }
1923 }
1924
1925 /*
1926 * Parse a roff node's type from the input buffer. This must be in the
1927 * form of ".foo xxx" in the usual way.
1928 */
1929 static enum roff_tok
1930 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1931 {
1932 char *cp;
1933 const char *mac;
1934 size_t maclen;
1935 int deftype;
1936 enum roff_tok t;
1937
1938 cp = buf + *pos;
1939
1940 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1941 return TOKEN_NONE;
1942
1943 mac = cp;
1944 maclen = roff_getname(r, &cp, ln, ppos);
1945
1946 deftype = ROFFDEF_USER | ROFFDEF_REN;
1947 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1948 switch (deftype) {
1949 case ROFFDEF_USER:
1950 t = ROFF_USERDEF;
1951 break;
1952 case ROFFDEF_REN:
1953 t = ROFF_RENAMED;
1954 break;
1955 default:
1956 t = roffhash_find(r->reqtab, mac, maclen);
1957 break;
1958 }
1959 if (t != TOKEN_NONE)
1960 *pos = cp - buf;
1961 else if (deftype == ROFFDEF_UNDEF) {
1962 /* Using an undefined macro defines it to be empty. */
1963 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1964 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1965 }
1966 return t;
1967 }
1968
1969 /* --- handling of request blocks ----------------------------------------- */
1970
1971 static int
1972 roff_cblock(ROFF_ARGS)
1973 {
1974
1975 /*
1976 * A block-close `..' should only be invoked as a child of an
1977 * ignore macro, otherwise raise a warning and just ignore it.
1978 */
1979
1980 if (r->last == NULL) {
1981 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1982 return ROFF_IGN;
1983 }
1984
1985 switch (r->last->tok) {
1986 case ROFF_am:
1987 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1988 case ROFF_ami:
1989 case ROFF_de:
1990 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1991 case ROFF_dei:
1992 case ROFF_ig:
1993 break;
1994 default:
1995 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1996 return ROFF_IGN;
1997 }
1998
1999 if (buf->buf[pos] != '\0')
2000 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2001 ".. %s", buf->buf + pos);
2002
2003 roffnode_pop(r);
2004 roffnode_cleanscope(r);
2005 return ROFF_IGN;
2006
2007 }
2008
2009 /*
2010 * Pop all nodes ending at the end of the current input line.
2011 * Return the number of loops ended.
2012 */
2013 static int
2014 roffnode_cleanscope(struct roff *r)
2015 {
2016 int inloop;
2017
2018 inloop = 0;
2019 while (r->last != NULL) {
2020 if (--r->last->endspan != 0)
2021 break;
2022 inloop += roffnode_pop(r);
2023 }
2024 return inloop;
2025 }
2026
2027 /*
2028 * Handle the closing \} of a conditional block.
2029 * Apart from generating warnings, this only pops nodes.
2030 * Return the number of loops ended.
2031 */
2032 static int
2033 roff_ccond(struct roff *r, int ln, int ppos)
2034 {
2035 if (NULL == r->last) {
2036 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2037 return 0;
2038 }
2039
2040 switch (r->last->tok) {
2041 case ROFF_el:
2042 case ROFF_ie:
2043 case ROFF_if:
2044 case ROFF_while:
2045 break;
2046 default:
2047 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2048 return 0;
2049 }
2050
2051 if (r->last->endspan > -1) {
2052 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2053 return 0;
2054 }
2055
2056 return roffnode_pop(r) + roffnode_cleanscope(r);
2057 }
2058
2059 static int
2060 roff_block(ROFF_ARGS)
2061 {
2062 const char *name, *value;
2063 char *call, *cp, *iname, *rname;
2064 size_t csz, namesz, rsz;
2065 int deftype;
2066
2067 /* Ignore groff compatibility mode for now. */
2068
2069 if (tok == ROFF_de1)
2070 tok = ROFF_de;
2071 else if (tok == ROFF_dei1)
2072 tok = ROFF_dei;
2073 else if (tok == ROFF_am1)
2074 tok = ROFF_am;
2075 else if (tok == ROFF_ami1)
2076 tok = ROFF_ami;
2077
2078 /* Parse the macro name argument. */
2079
2080 cp = buf->buf + pos;
2081 if (tok == ROFF_ig) {
2082 iname = NULL;
2083 namesz = 0;
2084 } else {
2085 iname = cp;
2086 namesz = roff_getname(r, &cp, ln, ppos);
2087 iname[namesz] = '\0';
2088 }
2089
2090 /* Resolve the macro name argument if it is indirect. */
2091
2092 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2093 deftype = ROFFDEF_USER;
2094 name = roff_getstrn(r, iname, namesz, &deftype);
2095 if (name == NULL) {
2096 mandoc_msg(MANDOCERR_STR_UNDEF,
2097 ln, (int)(iname - buf->buf),
2098 "%.*s", (int)namesz, iname);
2099 namesz = 0;
2100 } else
2101 namesz = strlen(name);
2102 } else
2103 name = iname;
2104
2105 if (namesz == 0 && tok != ROFF_ig) {
2106 mandoc_msg(MANDOCERR_REQ_EMPTY,
2107 ln, ppos, "%s", roff_name[tok]);
2108 return ROFF_IGN;
2109 }
2110
2111 roffnode_push(r, tok, name, ln, ppos);
2112
2113 /*
2114 * At the beginning of a `de' macro, clear the existing string
2115 * with the same name, if there is one. New content will be
2116 * appended from roff_block_text() in multiline mode.
2117 */
2118
2119 if (tok == ROFF_de || tok == ROFF_dei) {
2120 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2121 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2122 } else if (tok == ROFF_am || tok == ROFF_ami) {
2123 deftype = ROFFDEF_ANY;
2124 value = roff_getstrn(r, iname, namesz, &deftype);
2125 switch (deftype) { /* Before appending, ... */
2126 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2127 roff_setstrn(&r->strtab, name, namesz,
2128 value, strlen(value), 0);
2129 break;
2130 case ROFFDEF_REN: /* call original standard macro. */
2131 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2132 (int)strlen(value), value);
2133 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2134 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2135 free(call);
2136 break;
2137 case ROFFDEF_STD: /* rename and call standard macro. */
2138 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2139 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2140 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2141 (int)rsz, rname);
2142 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2143 free(call);
2144 free(rname);
2145 break;
2146 default:
2147 break;
2148 }
2149 }
2150
2151 if (*cp == '\0')
2152 return ROFF_IGN;
2153
2154 /* Get the custom end marker. */
2155
2156 iname = cp;
2157 namesz = roff_getname(r, &cp, ln, ppos);
2158
2159 /* Resolve the end marker if it is indirect. */
2160
2161 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2162 deftype = ROFFDEF_USER;
2163 name = roff_getstrn(r, iname, namesz, &deftype);
2164 if (name == NULL) {
2165 mandoc_msg(MANDOCERR_STR_UNDEF,
2166 ln, (int)(iname - buf->buf),
2167 "%.*s", (int)namesz, iname);
2168 namesz = 0;
2169 } else
2170 namesz = strlen(name);
2171 } else
2172 name = iname;
2173
2174 if (namesz)
2175 r->last->end = mandoc_strndup(name, namesz);
2176
2177 if (*cp != '\0')
2178 mandoc_msg(MANDOCERR_ARG_EXCESS,
2179 ln, pos, ".%s ... %s", roff_name[tok], cp);
2180
2181 return ROFF_IGN;
2182 }
2183
2184 static int
2185 roff_block_sub(ROFF_ARGS)
2186 {
2187 enum roff_tok t;
2188 int i, j;
2189
2190 /*
2191 * First check whether a custom macro exists at this level. If
2192 * it does, then check against it. This is some of groff's
2193 * stranger behaviours. If we encountered a custom end-scope
2194 * tag and that tag also happens to be a "real" macro, then we
2195 * need to try interpreting it again as a real macro. If it's
2196 * not, then return ignore. Else continue.
2197 */
2198
2199 if (r->last->end) {
2200 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2201 if (buf->buf[i] != r->last->end[j])
2202 break;
2203
2204 if (r->last->end[j] == '\0' &&
2205 (buf->buf[i] == '\0' ||
2206 buf->buf[i] == ' ' ||
2207 buf->buf[i] == '\t')) {
2208 roffnode_pop(r);
2209 roffnode_cleanscope(r);
2210
2211 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2212 i++;
2213
2214 pos = i;
2215 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2216 TOKEN_NONE)
2217 return ROFF_RERUN;
2218 return ROFF_IGN;
2219 }
2220 }
2221
2222 /*
2223 * If we have no custom end-query or lookup failed, then try
2224 * pulling it out of the hashtable.
2225 */
2226
2227 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2228
2229 if (t != ROFF_cblock) {
2230 if (tok != ROFF_ig)
2231 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2232 return ROFF_IGN;
2233 }
2234
2235 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2236 }
2237
2238 static int
2239 roff_block_text(ROFF_ARGS)
2240 {
2241
2242 if (tok != ROFF_ig)
2243 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2244
2245 return ROFF_IGN;
2246 }
2247
2248 static int
2249 roff_cond_sub(ROFF_ARGS)
2250 {
2251 struct roffnode *bl;
2252 char *ep;
2253 int endloop, irc, rr;
2254 enum roff_tok t;
2255
2256 irc = ROFF_IGN;
2257 rr = r->last->rule;
2258 endloop = tok != ROFF_while ? ROFF_IGN :
2259 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2260 if (roffnode_cleanscope(r))
2261 irc |= endloop;
2262
2263 /*
2264 * If `\}' occurs on a macro line without a preceding macro,
2265 * drop the line completely.
2266 */
2267
2268 ep = buf->buf + pos;
2269 if (ep[0] == '\\' && ep[1] == '}')
2270 rr = 0;
2271
2272 /*
2273 * The closing delimiter `\}' rewinds the conditional scope
2274 * but is otherwise ignored when interpreting the line.
2275 */
2276
2277 while ((ep = strchr(ep, '\\')) != NULL) {
2278 switch (ep[1]) {
2279 case '}':
2280 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2281 if (roff_ccond(r, ln, ep - buf->buf))
2282 irc |= endloop;
2283 break;
2284 case '\0':
2285 ++ep;
2286 break;
2287 default:
2288 ep += 2;
2289 break;
2290 }
2291 }
2292
2293 /*
2294 * Fully handle known macros when they are structurally
2295 * required or when the conditional evaluated to true.
2296 */
2297
2298 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2299 if (t == ROFF_break) {
2300 if (irc & ROFF_LOOPMASK)
2301 irc = ROFF_IGN | ROFF_LOOPEXIT;
2302 else if (rr) {
2303 for (bl = r->last; bl != NULL; bl = bl->parent) {
2304 bl->rule = 0;
2305 if (bl->tok == ROFF_while)
2306 break;
2307 }
2308 }
2309 } else if (t != TOKEN_NONE &&
2310 (rr || roffs[t].flags & ROFFMAC_STRUCT))
2311 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2312 else
2313 irc |= rr ? ROFF_CONT : ROFF_IGN;
2314 return irc;
2315 }
2316
2317 static int
2318 roff_cond_text(ROFF_ARGS)
2319 {
2320 char *ep;
2321 int endloop, irc, rr;
2322
2323 irc = ROFF_IGN;
2324 rr = r->last->rule;
2325 endloop = tok != ROFF_while ? ROFF_IGN :
2326 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2327 if (roffnode_cleanscope(r))
2328 irc |= endloop;
2329
2330 /*
2331 * If `\}' occurs on a text line with neither preceding
2332 * nor following characters, drop the line completely.
2333 */
2334
2335 ep = buf->buf + pos;
2336 if (strcmp(ep, "\\}") == 0)
2337 rr = 0;
2338
2339 /*
2340 * The closing delimiter `\}' rewinds the conditional scope
2341 * but is otherwise ignored when interpreting the line.
2342 */
2343
2344 while ((ep = strchr(ep, '\\')) != NULL) {
2345 switch (ep[1]) {
2346 case '}':
2347 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2348 if (roff_ccond(r, ln, ep - buf->buf))
2349 irc |= endloop;
2350 break;
2351 case '\0':
2352 ++ep;
2353 break;
2354 default:
2355 ep += 2;
2356 break;
2357 }
2358 }
2359 if (rr)
2360 irc |= ROFF_CONT;
2361 return irc;
2362 }
2363
2364 /* --- handling of numeric and conditional expressions -------------------- */
2365
2366 /*
2367 * Parse a single signed integer number. Stop at the first non-digit.
2368 * If there is at least one digit, return success and advance the
2369 * parse point, else return failure and let the parse point unchanged.
2370 * Ignore overflows, treat them just like the C language.
2371 */
2372 static int
2373 roff_getnum(const char *v, int *pos, int *res, int flags)
2374 {
2375 int myres, scaled, n, p;
2376
2377 if (NULL == res)
2378 res = &myres;
2379
2380 p = *pos;
2381 n = v[p] == '-';
2382 if (n || v[p] == '+')
2383 p++;
2384
2385 if (flags & ROFFNUM_WHITE)
2386 while (isspace((unsigned char)v[p]))
2387 p++;
2388
2389 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2390 *res = 10 * *res + v[p] - '0';
2391 if (p == *pos + n)
2392 return 0;
2393
2394 if (n)
2395 *res = -*res;
2396
2397 /* Each number may be followed by one optional scaling unit. */
2398
2399 switch (v[p]) {
2400 case 'f':
2401 scaled = *res * 65536;
2402 break;
2403 case 'i':
2404 scaled = *res * 240;
2405 break;
2406 case 'c':
2407 scaled = *res * 240 / 2.54;
2408 break;
2409 case 'v':
2410 case 'P':
2411 scaled = *res * 40;
2412 break;
2413 case 'm':
2414 case 'n':
2415 scaled = *res * 24;
2416 break;
2417 case 'p':
2418 scaled = *res * 10 / 3;
2419 break;
2420 case 'u':
2421 scaled = *res;
2422 break;
2423 case 'M':
2424 scaled = *res * 6 / 25;
2425 break;
2426 default:
2427 scaled = *res;
2428 p--;
2429 break;
2430 }
2431 if (flags & ROFFNUM_SCALE)
2432 *res = scaled;
2433
2434 *pos = p + 1;
2435 return 1;
2436 }
2437
2438 /*
2439 * Evaluate a string comparison condition.
2440 * The first character is the delimiter.
2441 * Succeed if the string up to its second occurrence
2442 * matches the string up to its third occurence.
2443 * Advance the cursor after the third occurrence
2444 * or lacking that, to the end of the line.
2445 */
2446 static int
2447 roff_evalstrcond(const char *v, int *pos)
2448 {
2449 const char *s1, *s2, *s3;
2450 int match;
2451
2452 match = 0;
2453 s1 = v + *pos; /* initial delimiter */
2454 s2 = s1 + 1; /* for scanning the first string */
2455 s3 = strchr(s2, *s1); /* for scanning the second string */
2456
2457 if (NULL == s3) /* found no middle delimiter */
2458 goto out;
2459
2460 while ('\0' != *++s3) {
2461 if (*s2 != *s3) { /* mismatch */
2462 s3 = strchr(s3, *s1);
2463 break;
2464 }
2465 if (*s3 == *s1) { /* found the final delimiter */
2466 match = 1;
2467 break;
2468 }
2469 s2++;
2470 }
2471
2472 out:
2473 if (NULL == s3)
2474 s3 = strchr(s2, '\0');
2475 else if (*s3 != '\0')
2476 s3++;
2477 *pos = s3 - v;
2478 return match;
2479 }
2480
2481 /*
2482 * Evaluate an optionally negated single character, numerical,
2483 * or string condition.
2484 */
2485 static int
2486 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2487 {
2488 const char *start, *end;
2489 char *cp, *name;
2490 size_t sz;
2491 int deftype, len, number, savepos, istrue, wanttrue;
2492
2493 if ('!' == v[*pos]) {
2494 wanttrue = 0;
2495 (*pos)++;
2496 } else
2497 wanttrue = 1;
2498
2499 switch (v[*pos]) {
2500 case '\0':
2501 return 0;
2502 case 'n':
2503 case 'o':
2504 (*pos)++;
2505 return wanttrue;
2506 case 'e':
2507 case 't':
2508 case 'v':
2509 (*pos)++;
2510 return !wanttrue;
2511 case 'c':
2512 do {
2513 (*pos)++;
2514 } while (v[*pos] == ' ');
2515
2516 /*
2517 * Quirk for groff compatibility:
2518 * The horizontal tab is neither available nor unavailable.
2519 */
2520
2521 if (v[*pos] == '\t') {
2522 (*pos)++;
2523 return 0;
2524 }
2525
2526 /* Printable ASCII characters are available. */
2527
2528 if (v[*pos] != '\\') {
2529 (*pos)++;
2530 return wanttrue;
2531 }
2532
2533 end = v + ++*pos;
2534 switch (mandoc_escape(&end, &start, &len)) {
2535 case ESCAPE_SPECIAL:
2536 istrue = mchars_spec2cp(start, len) != -1;
2537 break;
2538 case ESCAPE_UNICODE:
2539 istrue = 1;
2540 break;
2541 case ESCAPE_NUMBERED:
2542 istrue = mchars_num2char(start, len) != -1;
2543 break;
2544 default:
2545 istrue = !wanttrue;
2546 break;
2547 }
2548 *pos = end - v;
2549 return istrue == wanttrue;
2550 case 'd':
2551 case 'r':
2552 cp = v + *pos + 1;
2553 while (*cp == ' ')
2554 cp++;
2555 name = cp;
2556 sz = roff_getname(r, &cp, ln, cp - v);
2557 if (sz == 0)
2558 istrue = 0;
2559 else if (v[*pos] == 'r')
2560 istrue = roff_hasregn(r, name, sz);
2561 else {
2562 deftype = ROFFDEF_ANY;
2563 roff_getstrn(r, name, sz, &deftype);
2564 istrue = !!deftype;
2565 }
2566 *pos = (name + sz) - v;
2567 return istrue == wanttrue;
2568 default:
2569 break;
2570 }
2571
2572 savepos = *pos;
2573 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2574 return (number > 0) == wanttrue;
2575 else if (*pos == savepos)
2576 return roff_evalstrcond(v, pos) == wanttrue;
2577 else
2578 return 0;
2579 }
2580
2581 static int
2582 roff_line_ignore(ROFF_ARGS)
2583 {
2584
2585 return ROFF_IGN;
2586 }
2587
2588 static int
2589 roff_insec(ROFF_ARGS)
2590 {
2591
2592 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2593 return ROFF_IGN;
2594 }
2595
2596 static int
2597 roff_unsupp(ROFF_ARGS)
2598 {
2599
2600 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2601 return ROFF_IGN;
2602 }
2603
2604 static int
2605 roff_cond(ROFF_ARGS)
2606 {
2607 int irc;
2608
2609 roffnode_push(r, tok, NULL, ln, ppos);
2610
2611 /*
2612 * An `.el' has no conditional body: it will consume the value
2613 * of the current rstack entry set in prior `ie' calls or
2614 * defaults to DENY.
2615 *
2616 * If we're not an `el', however, then evaluate the conditional.
2617 */
2618
2619 r->last->rule = tok == ROFF_el ?
2620 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2621 roff_evalcond(r, ln, buf->buf, &pos);
2622
2623 /*
2624 * An if-else will put the NEGATION of the current evaluated
2625 * conditional into the stack of rules.
2626 */
2627
2628 if (tok == ROFF_ie) {
2629 if (r->rstackpos + 1 == r->rstacksz) {
2630 r->rstacksz += 16;
2631 r->rstack = mandoc_reallocarray(r->rstack,
2632 r->rstacksz, sizeof(int));
2633 }
2634 r->rstack[++r->rstackpos] = !r->last->rule;
2635 }
2636
2637 /* If the parent has false as its rule, then so do we. */
2638
2639 if (r->last->parent && !r->last->parent->rule)
2640 r->last->rule = 0;
2641
2642 /*
2643 * Determine scope.
2644 * If there is nothing on the line after the conditional,
2645 * not even whitespace, use next-line scope.
2646 * Except that .while does not support next-line scope.
2647 */
2648
2649 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2650 r->last->endspan = 2;
2651 goto out;
2652 }
2653
2654 while (buf->buf[pos] == ' ')
2655 pos++;
2656
2657 /* An opening brace requests multiline scope. */
2658
2659 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2660 r->last->endspan = -1;
2661 pos += 2;
2662 while (buf->buf[pos] == ' ')
2663 pos++;
2664 goto out;
2665 }
2666
2667 /*
2668 * Anything else following the conditional causes
2669 * single-line scope. Warn if the scope contains
2670 * nothing but trailing whitespace.
2671 */
2672
2673 if (buf->buf[pos] == '\0')
2674 mandoc_msg(MANDOCERR_COND_EMPTY,
2675 ln, ppos, "%s", roff_name[tok]);
2676
2677 r->last->endspan = 1;
2678
2679 out:
2680 *offs = pos;
2681 irc = ROFF_RERUN;
2682 if (tok == ROFF_while)
2683 irc |= ROFF_WHILE;
2684 return irc;
2685 }
2686
2687 static int
2688 roff_ds(ROFF_ARGS)
2689 {
2690 char *string;
2691 const char *name;
2692 size_t namesz;
2693
2694 /* Ignore groff compatibility mode for now. */
2695
2696 if (tok == ROFF_ds1)
2697 tok = ROFF_ds;
2698 else if (tok == ROFF_as1)
2699 tok = ROFF_as;
2700
2701 /*
2702 * The first word is the name of the string.
2703 * If it is empty or terminated by an escape sequence,
2704 * abort the `ds' request without defining anything.
2705 */
2706
2707 name = string = buf->buf + pos;
2708 if (*name == '\0')
2709 return ROFF_IGN;
2710
2711 namesz = roff_getname(r, &string, ln, pos);
2712 switch (name[namesz]) {
2713 case '\\':
2714 return ROFF_IGN;
2715 case '\t':
2716 string = buf->buf + pos + namesz;
2717 break;
2718 default:
2719 break;
2720 }
2721
2722 /* Read past the initial double-quote, if any. */
2723 if (*string == '"')
2724 string++;
2725
2726 /* The rest is the value. */
2727 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2728 ROFF_as == tok);
2729 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2730 return ROFF_IGN;
2731 }
2732
2733 /*
2734 * Parse a single operator, one or two characters long.
2735 * If the operator is recognized, return success and advance the
2736 * parse point, else return failure and let the parse point unchanged.
2737 */
2738 static int
2739 roff_getop(const char *v, int *pos, char *res)
2740 {
2741
2742 *res = v[*pos];
2743
2744 switch (*res) {
2745 case '+':
2746 case '-':
2747 case '*':
2748 case '/':
2749 case '%':
2750 case '&':
2751 case ':':
2752 break;
2753 case '<':
2754 switch (v[*pos + 1]) {
2755 case '=':
2756 *res = 'l';
2757 (*pos)++;
2758 break;
2759 case '>':
2760 *res = '!';
2761 (*pos)++;
2762 break;
2763 case '?':
2764 *res = 'i';
2765 (*pos)++;
2766 break;
2767 default:
2768 break;
2769 }
2770 break;
2771 case '>':
2772 switch (v[*pos + 1]) {
2773 case '=':
2774 *res = 'g';
2775 (*pos)++;
2776 break;
2777 case '?':
2778 *res = 'a';
2779 (*pos)++;
2780 break;
2781 default:
2782 break;
2783 }
2784 break;
2785 case '=':
2786 if ('=' == v[*pos + 1])
2787 (*pos)++;
2788 break;
2789 default:
2790 return 0;
2791 }
2792 (*pos)++;
2793
2794 return *res;
2795 }
2796
2797 /*
2798 * Evaluate either a parenthesized numeric expression
2799 * or a single signed integer number.
2800 */
2801 static int
2802 roff_evalpar(struct roff *r, int ln,
2803 const char *v, int *pos, int *res, int flags)
2804 {
2805
2806 if ('(' != v[*pos])
2807 return roff_getnum(v, pos, res, flags);
2808
2809 (*pos)++;
2810 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2811 return 0;
2812
2813 /*
2814 * Omission of the closing parenthesis
2815 * is an error in validation mode,
2816 * but ignored in evaluation mode.
2817 */
2818
2819 if (')' == v[*pos])
2820 (*pos)++;
2821 else if (NULL == res)
2822 return 0;
2823
2824 return 1;
2825 }
2826
2827 /*
2828 * Evaluate a complete numeric expression.
2829 * Proceed left to right, there is no concept of precedence.
2830 */
2831 static int
2832 roff_evalnum(struct roff *r, int ln, const char *v,
2833 int *pos, int *res, int flags)
2834 {
2835 int mypos, operand2;
2836 char operator;
2837
2838 if (NULL == pos) {
2839 mypos = 0;
2840 pos = &mypos;
2841 }
2842
2843 if (flags & ROFFNUM_WHITE)
2844 while (isspace((unsigned char)v[*pos]))
2845 (*pos)++;
2846
2847 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2848 return 0;
2849
2850 while (1) {
2851 if (flags & ROFFNUM_WHITE)
2852 while (isspace((unsigned char)v[*pos]))
2853 (*pos)++;
2854
2855 if ( ! roff_getop(v, pos, &operator))
2856 break;
2857
2858 if (flags & ROFFNUM_WHITE)
2859 while (isspace((unsigned char)v[*pos]))
2860 (*pos)++;
2861
2862 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2863 return 0;
2864
2865 if (flags & ROFFNUM_WHITE)
2866 while (isspace((unsigned char)v[*pos]))
2867 (*pos)++;
2868
2869 if (NULL == res)
2870 continue;
2871
2872 switch (operator) {
2873 case '+':
2874 *res += operand2;
2875 break;
2876 case '-':
2877 *res -= operand2;
2878 break;
2879 case '*':
2880 *res *= operand2;
2881 break;
2882 case '/':
2883 if (operand2 == 0) {
2884 mandoc_msg(MANDOCERR_DIVZERO,
2885 ln, *pos, "%s", v);
2886 *res = 0;
2887 break;
2888 }
2889 *res /= operand2;
2890 break;
2891 case '%':
2892 if (operand2 == 0) {
2893 mandoc_msg(MANDOCERR_DIVZERO,
2894 ln, *pos, "%s", v);
2895 *res = 0;
2896 break;
2897 }
2898 *res %= operand2;
2899 break;
2900 case '<':
2901 *res = *res < operand2;
2902 break;
2903 case '>':
2904 *res = *res > operand2;
2905 break;
2906 case 'l':
2907 *res = *res <= operand2;
2908 break;
2909 case 'g':
2910 *res = *res >= operand2;
2911 break;
2912 case '=':
2913 *res = *res == operand2;
2914 break;
2915 case '!':
2916 *res = *res != operand2;
2917 break;
2918 case '&':
2919 *res = *res && operand2;
2920 break;
2921 case ':':
2922 *res = *res || operand2;
2923 break;
2924 case 'i':
2925 if (operand2 < *res)
2926 *res = operand2;
2927 break;
2928 case 'a':
2929 if (operand2 > *res)
2930 *res = operand2;
2931 break;
2932 default:
2933 abort();
2934 }
2935 }
2936 return 1;
2937 }
2938
2939 /* --- register management ------------------------------------------------ */
2940
2941 void
2942 roff_setreg(struct roff *r, const char *name, int val, char sign)
2943 {
2944 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2945 }
2946
2947 static void
2948 roff_setregn(struct roff *r, const char *name, size_t len,
2949 int val, char sign, int step)
2950 {
2951 struct roffreg *reg;
2952
2953 /* Search for an existing register with the same name. */
2954 reg = r->regtab;
2955
2956 while (reg != NULL && (reg->key.sz != len ||
2957 strncmp(reg->key.p, name, len) != 0))
2958 reg = reg->next;
2959
2960 if (NULL == reg) {
2961 /* Create a new register. */
2962 reg = mandoc_malloc(sizeof(struct roffreg));
2963 reg->key.p = mandoc_strndup(name, len);
2964 reg->key.sz = len;
2965 reg->val = 0;
2966 reg->step = 0;
2967 reg->next = r->regtab;
2968 r->regtab = reg;
2969 }
2970
2971 if ('+' == sign)
2972 reg->val += val;
2973 else if ('-' == sign)
2974 reg->val -= val;
2975 else
2976 reg->val = val;
2977 if (step != INT_MIN)
2978 reg->step = step;
2979 }
2980
2981 /*
2982 * Handle some predefined read-only number registers.
2983 * For now, return -1 if the requested register is not predefined;
2984 * in case a predefined read-only register having the value -1
2985 * were to turn up, another special value would have to be chosen.
2986 */
2987 static int
2988 roff_getregro(const struct roff *r, const char *name)
2989 {
2990
2991 switch (*name) {
2992 case '$': /* Number of arguments of the last macro evaluated. */
2993 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2994 case 'A': /* ASCII approximation mode is always off. */
2995 return 0;
2996 case 'g': /* Groff compatibility mode is always on. */
2997 return 1;
2998 case 'H': /* Fixed horizontal resolution. */
2999 return 24;
3000 case 'j': /* Always adjust left margin only. */
3001 return 0;
3002 case 'T': /* Some output device is always defined. */
3003 return 1;
3004 case 'V': /* Fixed vertical resolution. */
3005 return 40;
3006 default:
3007 return -1;
3008 }
3009 }
3010
3011 int
3012 roff_getreg(struct roff *r, const char *name)
3013 {
3014 return roff_getregn(r, name, strlen(name), '\0');
3015 }
3016
3017 static int
3018 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3019 {
3020 struct roffreg *reg;
3021 int val;
3022
3023 if ('.' == name[0] && 2 == len) {
3024 val = roff_getregro(r, name + 1);
3025 if (-1 != val)
3026 return val;
3027 }
3028
3029 for (reg = r->regtab; reg; reg = reg->next) {
3030 if (len == reg->key.sz &&
3031 0 == strncmp(name, reg->key.p, len)) {
3032 switch (sign) {
3033 case '+':
3034 reg->val += reg->step;
3035 break;
3036 case '-':
3037 reg->val -= reg->step;
3038 break;
3039 default:
3040 break;
3041 }
3042 return reg->val;
3043 }
3044 }
3045
3046 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3047 return 0;
3048 }
3049
3050 static int
3051 roff_hasregn(const struct roff *r, const char *name, size_t len)
3052 {
3053 struct roffreg *reg;
3054 int val;
3055
3056 if ('.' == name[0] && 2 == len) {
3057 val = roff_getregro(r, name + 1);
3058 if (-1 != val)
3059 return 1;
3060 }
3061
3062 for (reg = r->regtab; reg; reg = reg->next)
3063 if (len == reg->key.sz &&
3064 0 == strncmp(name, reg->key.p, len))
3065 return 1;
3066
3067 return 0;
3068 }
3069
3070 static void
3071 roff_freereg(struct roffreg *reg)
3072 {
3073 struct roffreg *old_reg;
3074
3075 while (NULL != reg) {
3076 free(reg->key.p);
3077 old_reg = reg;
3078 reg = reg->next;
3079 free(old_reg);
3080 }
3081 }
3082
3083 static int
3084 roff_nr(ROFF_ARGS)
3085 {
3086 char *key, *val, *step;
3087 size_t keysz;
3088 int iv, is, len;
3089 char sign;
3090
3091 key = val = buf->buf + pos;
3092 if (*key == '\0')
3093 return ROFF_IGN;
3094
3095 keysz = roff_getname(r, &val, ln, pos);
3096 if (key[keysz] == '\\' || key[keysz] == '\t')
3097 return ROFF_IGN;
3098
3099 sign = *val;
3100 if (sign == '+' || sign == '-')
3101 val++;
3102
3103 len = 0;
3104 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3105 return ROFF_IGN;
3106
3107 step = val + len;
3108 while (isspace((unsigned char)*step))
3109 step++;
3110 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3111 is = INT_MIN;
3112
3113 roff_setregn(r, key, keysz, iv, sign, is);
3114 return ROFF_IGN;
3115 }
3116
3117 static int
3118 roff_rr(ROFF_ARGS)
3119 {
3120 struct roffreg *reg, **prev;
3121 char *name, *cp;
3122 size_t namesz;
3123
3124 name = cp = buf->buf + pos;
3125 if (*name == '\0')
3126 return ROFF_IGN;
3127 namesz = roff_getname(r, &cp, ln, pos);
3128 name[namesz] = '\0';
3129
3130 prev = &r->regtab;
3131 while (1) {
3132 reg = *prev;
3133 if (reg == NULL || !strcmp(name, reg->key.p))
3134 break;
3135 prev = &reg->next;
3136 }
3137 if (reg != NULL) {
3138 *prev = reg->next;
3139 free(reg->key.p);
3140 free(reg);
3141 }
3142 return ROFF_IGN;
3143 }
3144
3145 /* --- handler functions for roff requests -------------------------------- */
3146
3147 static int
3148 roff_rm(ROFF_ARGS)
3149 {
3150 const char *name;
3151 char *cp;
3152 size_t namesz;
3153
3154 cp = buf->buf + pos;
3155 while (*cp != '\0') {
3156 name = cp;
3157 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3158 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3159 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3160 if (name[namesz] == '\\' || name[namesz] == '\t')
3161 break;
3162 }
3163 return ROFF_IGN;
3164 }
3165
3166 static int
3167 roff_it(ROFF_ARGS)
3168 {
3169 int iv;
3170
3171 /* Parse the number of lines. */
3172
3173 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3174 mandoc_msg(MANDOCERR_IT_NONUM,
3175 ln, ppos, "%s", buf->buf + 1);
3176 return ROFF_IGN;
3177 }
3178
3179 while (isspace((unsigned char)buf->buf[pos]))
3180 pos++;
3181
3182 /*
3183 * Arm the input line trap.
3184 * Special-casing "an-trap" is an ugly workaround to cope
3185 * with DocBook stupidly fiddling with man(7) internals.
3186 */
3187
3188 roffit_lines = iv;
3189 roffit_macro = mandoc_strdup(iv != 1 ||
3190 strcmp(buf->buf + pos, "an-trap") ?
3191 buf->buf + pos : "br");
3192 return ROFF_IGN;
3193 }
3194
3195 static int
3196 roff_Dd(ROFF_ARGS)
3197 {
3198 int mask;
3199 enum roff_tok t, te;
3200
3201 switch (tok) {
3202 case ROFF_Dd:
3203 tok = MDOC_Dd;
3204 te = MDOC_MAX;
3205 if (r->format == 0)
3206 r->format = MPARSE_MDOC;
3207 mask = MPARSE_MDOC | MPARSE_QUICK;
3208 break;
3209 case ROFF_TH:
3210 tok = MAN_TH;
3211 te = MAN_MAX;
3212 if (r->format == 0)
3213 r->format = MPARSE_MAN;
3214 mask = MPARSE_QUICK;
3215 break;
3216 default:
3217 abort();
3218 }
3219 if ((r->options & mask) == 0)
3220 for (t = tok; t < te; t++)
3221 roff_setstr(r, roff_name[t], NULL, 0);
3222 return ROFF_CONT;
3223 }
3224
3225 static int
3226 roff_TE(ROFF_ARGS)
3227 {
3228 r->man->flags &= ~ROFF_NONOFILL;
3229 if (r->tbl == NULL) {
3230 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3231 return ROFF_IGN;
3232 }
3233 if (tbl_end(r->tbl, 0) == 0) {
3234 r->tbl = NULL;
3235 free(buf->buf);
3236 buf->buf = mandoc_strdup(".sp");
3237 buf->sz = 4;
3238 *offs = 0;
3239 return ROFF_REPARSE;
3240 }
3241 r->tbl = NULL;
3242 return ROFF_IGN;
3243 }
3244
3245 static int
3246 roff_T_(ROFF_ARGS)
3247 {
3248
3249 if (NULL == r->tbl)
3250 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3251 else
3252 tbl_restart(ln, ppos, r->tbl);
3253
3254 return ROFF_IGN;
3255 }
3256
3257 /*
3258 * Handle in-line equation delimiters.
3259 */
3260 static int
3261 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3262 {
3263 char *cp1, *cp2;
3264 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3265
3266 /*
3267 * Outside equations, look for an opening delimiter.
3268 * If we are inside an equation, we already know it is
3269 * in-line, or this function wouldn't have been called;
3270 * so look for a closing delimiter.
3271 */
3272
3273 cp1 = buf->buf + pos;
3274 cp2 = strchr(cp1, r->eqn == NULL ?
3275 r->last_eqn->odelim : r->last_eqn->cdelim);
3276 if (cp2 == NULL)
3277 return ROFF_CONT;
3278
3279 *cp2++ = '\0';
3280 bef_pr = bef_nl = aft_nl = aft_pr = "";
3281
3282 /* Handle preceding text, protecting whitespace. */
3283
3284 if (*buf->buf != '\0') {
3285 if (r->eqn == NULL)
3286 bef_pr = "\\&";
3287 bef_nl = "\n";
3288 }
3289
3290 /*
3291 * Prepare replacing the delimiter with an equation macro
3292 * and drop leading white space from the equation.
3293 */
3294
3295 if (r->eqn == NULL) {
3296 while (*cp2 == ' ')
3297 cp2++;
3298 mac = ".EQ";
3299 } else
3300 mac = ".EN";
3301
3302 /* Handle following text, protecting whitespace. */
3303
3304 if (*cp2 != '\0') {
3305 aft_nl = "\n";
3306 if (r->eqn != NULL)
3307 aft_pr = "\\&";
3308 }
3309
3310 /* Do the actual replacement. */
3311
3312 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3313 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3314 free(buf->buf);
3315 buf->buf = cp1;
3316
3317 /* Toggle the in-line state of the eqn subsystem. */
3318
3319 r->eqn_inline = r->eqn == NULL;
3320 return ROFF_REPARSE;
3321 }
3322
3323 static int
3324 roff_EQ(ROFF_ARGS)
3325 {
3326 struct roff_node *n;
3327
3328 if (r->man->meta.macroset == MACROSET_MAN)
3329 man_breakscope(r->man, ROFF_EQ);
3330 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3331 if (ln > r->man->last->line)
3332 n->flags |= NODE_LINE;
3333 n->eqn = eqn_box_new();
3334 roff_node_append(r->man, n);
3335 r->man->next = ROFF_NEXT_SIBLING;
3336
3337 assert(r->eqn == NULL);
3338 if (r->last_eqn == NULL)
3339 r->last_eqn = eqn_alloc();
3340 else
3341 eqn_reset(r->last_eqn);
3342 r->eqn = r->last_eqn;
3343 r->eqn->node = n;
3344
3345 if (buf->buf[pos] != '\0')
3346 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3347 ".EQ %s", buf->buf + pos);
3348
3349 return ROFF_IGN;
3350 }
3351
3352 static int
3353 roff_EN(ROFF_ARGS)
3354 {
3355 if (r->eqn != NULL) {
3356 eqn_parse(r->eqn);
3357 r->eqn = NULL;
3358 } else
3359 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3360 if (buf->buf[pos] != '\0')
3361 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3362 "EN %s", buf->buf + pos);
3363 return ROFF_IGN;
3364 }
3365
3366 static int
3367 roff_TS(ROFF_ARGS)
3368 {
3369 if (r->tbl != NULL) {
3370 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3371 tbl_end(r->tbl, 0);
3372 }
3373 r->man->flags |= ROFF_NONOFILL;
3374 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3375 if (r->last_tbl == NULL)
3376 r->first_tbl = r->tbl;
3377 r->last_tbl = r->tbl;
3378 return ROFF_IGN;
3379 }
3380
3381 static int
3382 roff_noarg(ROFF_ARGS)
3383 {
3384 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3385 man_breakscope(r->man, tok);
3386 if (tok == ROFF_brp)
3387 tok = ROFF_br;
3388 roff_elem_alloc(r->man, ln, ppos, tok);
3389 if (buf->buf[pos] != '\0')
3390 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3391 "%s %s", roff_name[tok], buf->buf + pos);
3392 if (tok == ROFF_nf)
3393 r->man->flags |= ROFF_NOFILL;
3394 else if (tok == ROFF_fi)
3395 r->man->flags &= ~ROFF_NOFILL;
3396 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3397 r->man->next = ROFF_NEXT_SIBLING;
3398 return ROFF_IGN;
3399 }
3400
3401 static int
3402 roff_onearg(ROFF_ARGS)
3403 {
3404 struct roff_node *n;
3405 char *cp;
3406 int npos;
3407
3408 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3409 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3410 tok == ROFF_ti))
3411 man_breakscope(r->man, tok);
3412
3413 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3414 r->man->last = roffce_node;
3415 r->man->next = ROFF_NEXT_SIBLING;
3416 }
3417
3418 roff_elem_alloc(r->man, ln, ppos, tok);
3419 n = r->man->last;
3420
3421 cp = buf->buf + pos;
3422 if (*cp != '\0') {
3423 while (*cp != '\0' && *cp != ' ')
3424 cp++;
3425 while (*cp == ' ')
3426 *cp++ = '\0';
3427 if (*cp != '\0')
3428 mandoc_msg(MANDOCERR_ARG_EXCESS,
3429 ln, (int)(cp - buf->buf),
3430 "%s ... %s", roff_name[tok], cp);
3431 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3432 }
3433
3434 if (tok == ROFF_ce || tok == ROFF_rj) {
3435 if (r->man->last->type == ROFFT_ELEM) {
3436 roff_word_alloc(r->man, ln, pos, "1");
3437 r->man->last->flags |= NODE_NOSRC;
3438 }
3439 npos = 0;
3440 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3441 &roffce_lines, 0) == 0) {
3442 mandoc_msg(MANDOCERR_CE_NONUM,
3443 ln, pos, "ce %s", buf->buf + pos);
3444 roffce_lines = 1;
3445 }
3446 if (roffce_lines < 1) {
3447 r->man->last = r->man->last->parent;
3448 roffce_node = NULL;
3449 roffce_lines = 0;
3450 } else
3451 roffce_node = r->man->last->parent;
3452 } else {
3453 n->flags |= NODE_VALID | NODE_ENDED;
3454 r->man->last = n;
3455 }
3456 n->flags |= NODE_LINE;
3457 r->man->next = ROFF_NEXT_SIBLING;
3458 return ROFF_IGN;
3459 }
3460
3461 static int
3462 roff_manyarg(ROFF_ARGS)
3463 {
3464 struct roff_node *n;
3465 char *sp, *ep;
3466
3467 roff_elem_alloc(r->man, ln, ppos, tok);
3468 n = r->man->last;
3469
3470 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3471 while (*ep != '\0' && *ep != ' ')
3472 ep++;
3473 while (*ep == ' ')
3474 *ep++ = '\0';
3475 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3476 }
3477
3478 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3479 r->man->last = n;
3480 r->man->next = ROFF_NEXT_SIBLING;
3481 return ROFF_IGN;
3482 }
3483
3484 static int
3485 roff_als(ROFF_ARGS)
3486 {
3487 char *oldn, *newn, *end, *value;
3488 size_t oldsz, newsz, valsz;
3489
3490 newn = oldn = buf->buf + pos;
3491 if (*newn == '\0')
3492 return ROFF_IGN;
3493
3494 newsz = roff_getname(r, &oldn, ln, pos);
3495 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3496 return ROFF_IGN;
3497
3498 end = oldn;
3499 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3500 if (oldsz == 0)
3501 return ROFF_IGN;
3502
3503 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3504 (int)oldsz, oldn);
3505 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3506 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3507 free(value);
3508 return ROFF_IGN;
3509 }
3510
3511 /*
3512 * The .break request only makes sense inside conditionals,
3513 * and that case is already handled in roff_cond_sub().
3514 */
3515 static int
3516 roff_break(ROFF_ARGS)
3517 {
3518 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3519 return ROFF_IGN;
3520 }
3521
3522 static int
3523 roff_cc(ROFF_ARGS)
3524 {
3525 const char *p;
3526
3527 p = buf->buf + pos;
3528
3529 if (*p == '\0' || (r->control = *p++) == '.')
3530 r->control = '\0';
3531
3532 if (*p != '\0')
3533 mandoc_msg(MANDOCERR_ARG_EXCESS,
3534 ln, p - buf->buf, "cc ... %s", p);
3535
3536 return ROFF_IGN;
3537 }
3538
3539 static int
3540 roff_char(ROFF_ARGS)
3541 {
3542 const char *p, *kp, *vp;
3543 size_t ksz, vsz;
3544 int font;
3545
3546 /* Parse the character to be replaced. */
3547
3548 kp = buf->buf + pos;
3549 p = kp + 1;
3550 if (*kp == '\0' || (*kp == '\\' &&
3551 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3552 (*p != ' ' && *p != '\0')) {
3553 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3554 return ROFF_IGN;
3555 }
3556 ksz = p - kp;
3557 while (*p == ' ')
3558 p++;
3559
3560 /*
3561 * If the replacement string contains a font escape sequence,
3562 * we have to restore the font at the end.
3563 */
3564
3565 vp = p;
3566 vsz = strlen(p);
3567 font = 0;
3568 while (*p != '\0') {
3569 if (*p++ != '\\')
3570 continue;
3571 switch (mandoc_escape(&p, NULL, NULL)) {
3572 case ESCAPE_FONT:
3573 case ESCAPE_FONTROMAN:
3574 case ESCAPE_FONTITALIC:
3575 case ESCAPE_FONTBOLD:
3576 case ESCAPE_FONTBI:
3577 case ESCAPE_FONTCW:
3578 case ESCAPE_FONTPREV:
3579 font++;
3580 break;
3581 default:
3582 break;
3583 }
3584 }
3585 if (font > 1)
3586 mandoc_msg(MANDOCERR_CHAR_FONT,
3587 ln, (int)(vp - buf->buf), "%s", vp);
3588
3589 /*
3590 * Approximate the effect of .char using the .tr tables.
3591 * XXX In groff, .char and .tr interact differently.
3592 */
3593
3594 if (ksz == 1) {
3595 if (r->xtab == NULL)
3596 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3597 assert((unsigned int)*kp < 128);
3598 free(r->xtab[(int)*kp].p);
3599 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3600 "%s%s", vp, font ? "\fP" : "");
3601 } else {
3602 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3603 if (font)
3604 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3605 }
3606 return ROFF_IGN;
3607 }
3608
3609 static int
3610 roff_ec(ROFF_ARGS)
3611 {
3612 const char *p;
3613
3614 p = buf->buf + pos;
3615 if (*p == '\0')
3616 r->escape = '\\';
3617 else {
3618 r->escape = *p;
3619 if (*++p != '\0')
3620 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3621 (int)(p - buf->buf), "ec ... %s", p);
3622 }
3623 return ROFF_IGN;
3624 }
3625
3626 static int
3627 roff_eo(ROFF_ARGS)
3628 {
3629 r->escape = '\0';
3630 if (buf->buf[pos] != '\0')
3631 mandoc_msg(MANDOCERR_ARG_SKIP,
3632 ln, pos, "eo %s", buf->buf + pos);
3633 return ROFF_IGN;
3634 }
3635
3636 static int
3637 roff_nop(ROFF_ARGS)
3638 {
3639 while (buf->buf[pos] == ' ')
3640 pos++;
3641 *offs = pos;
3642 return ROFF_RERUN;
3643 }
3644
3645 static int
3646 roff_tr(ROFF_ARGS)
3647 {
3648 const char *p, *first, *second;
3649 size_t fsz, ssz;
3650 enum mandoc_esc esc;
3651
3652 p = buf->buf + pos;
3653
3654 if (*p == '\0') {
3655 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3656 return ROFF_IGN;
3657 }
3658
3659 while (*p != '\0') {
3660 fsz = ssz = 1;
3661
3662 first = p++;
3663 if (*first == '\\') {
3664 esc = mandoc_escape(&p, NULL, NULL);
3665 if (esc == ESCAPE_ERROR) {
3666 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3667 (int)(p - buf->buf), "%s", first);
3668 return ROFF_IGN;
3669 }
3670 fsz = (size_t)(p - first);
3671 }
3672
3673 second = p++;
3674 if (*second == '\\') {
3675 esc = mandoc_escape(&p, NULL, NULL);
3676 if (esc == ESCAPE_ERROR) {
3677 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3678 (int)(p - buf->buf), "%s", second);
3679 return ROFF_IGN;
3680 }
3681 ssz = (size_t)(p - second);
3682 } else if (*second == '\0') {
3683 mandoc_msg(MANDOCERR_TR_ODD, ln,
3684 (int)(first - buf->buf), "tr %s", first);
3685 second = " ";
3686 p--;
3687 }
3688
3689 if (fsz > 1) {
3690 roff_setstrn(&r->xmbtab, first, fsz,
3691 second, ssz, 0);
3692 continue;
3693 }
3694
3695 if (r->xtab == NULL)
3696 r->xtab = mandoc_calloc(128,
3697 sizeof(struct roffstr));
3698
3699 free(r->xtab[(int)*first].p);
3700 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3701 r->xtab[(int)*first].sz = ssz;
3702 }
3703
3704 return ROFF_IGN;
3705 }
3706
3707 /*
3708 * Implementation of the .return request.
3709 * There is no need to call roff_userret() from here.
3710 * The read module will call that after rewinding the reader stack
3711 * to the place from where the current macro was called.
3712 */
3713 static int
3714 roff_return(ROFF_ARGS)
3715 {
3716 if (r->mstackpos >= 0)
3717 return ROFF_IGN | ROFF_USERRET;
3718
3719 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3720 return ROFF_IGN;
3721 }
3722
3723 static int
3724 roff_rn(ROFF_ARGS)
3725 {
3726 const char *value;
3727 char *oldn, *newn, *end;
3728 size_t oldsz, newsz;
3729 int deftype;
3730
3731 oldn = newn = buf->buf + pos;
3732 if (*oldn == '\0')
3733 return ROFF_IGN;
3734
3735 oldsz = roff_getname(r, &newn, ln, pos);
3736 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3737 return ROFF_IGN;
3738
3739 end = newn;
3740 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3741 if (newsz == 0)
3742 return ROFF_IGN;
3743
3744 deftype = ROFFDEF_ANY;
3745 value = roff_getstrn(r, oldn, oldsz, &deftype);
3746 switch (deftype) {
3747 case ROFFDEF_USER:
3748 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3749 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3750 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3751 break;
3752 case ROFFDEF_PRE:
3753 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3754 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3755 break;
3756 case ROFFDEF_REN:
3757 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3758 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3759 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3760 break;
3761 case ROFFDEF_STD:
3762 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3763 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3764 break;
3765 default:
3766 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3767 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3768 break;
3769 }
3770 return ROFF_IGN;
3771 }
3772
3773 static int
3774 roff_shift(ROFF_ARGS)
3775 {
3776 struct mctx *ctx;
3777 int levels, i;
3778
3779 levels = 1;
3780 if (buf->buf[pos] != '\0' &&
3781 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3782 mandoc_msg(MANDOCERR_CE_NONUM,
3783 ln, pos, "shift %s", buf->buf + pos);
3784 levels = 1;
3785 }
3786 if (r->mstackpos < 0) {
3787 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3788 return ROFF_IGN;
3789 }
3790 ctx = r->mstack + r->mstackpos;
3791 if (levels > ctx->argc) {
3792 mandoc_msg(MANDOCERR_SHIFT,
3793 ln, pos, "%d, but max is %d", levels, ctx->argc);
3794 levels = ctx->argc;
3795 }
3796 if (levels == 0)
3797 return ROFF_IGN;
3798 for (i = 0; i < levels; i++)
3799 free(ctx->argv[i]);
3800 ctx->argc -= levels;
3801 for (i = 0; i < ctx->argc; i++)
3802 ctx->argv[i] = ctx->argv[i + levels];
3803 return ROFF_IGN;
3804 }
3805
3806 static int
3807 roff_so(ROFF_ARGS)
3808 {
3809 char *name, *cp;
3810
3811 name = buf->buf + pos;
3812 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3813
3814 /*
3815 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3816 * opening anything that's not in our cwd or anything beneath
3817 * it. Thus, explicitly disallow traversing up the file-system
3818 * or using absolute paths.
3819 */
3820
3821 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3822 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3823 buf->sz = mandoc_asprintf(&cp,
3824 ".sp\nSee the file %s.\n.sp", name) + 1;
3825 free(buf->buf);
3826 buf->buf = cp;
3827 *offs = 0;
3828 return ROFF_REPARSE;
3829 }
3830
3831 *offs = pos;
3832 return ROFF_SO;
3833 }
3834
3835 /* --- user defined strings and macros ------------------------------------ */
3836
3837 static int
3838 roff_userdef(ROFF_ARGS)
3839 {
3840 struct mctx *ctx;
3841 char *arg, *ap, *dst, *src;
3842 size_t sz;
3843
3844 /* If the macro is empty, ignore it altogether. */
3845
3846 if (*r->current_string == '\0')
3847 return ROFF_IGN;
3848
3849 /* Initialize a new macro stack context. */
3850
3851 if (++r->mstackpos == r->mstacksz) {
3852 r->mstack = mandoc_recallocarray(r->mstack,
3853 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3854 r->mstacksz += 8;
3855 }
3856 ctx = r->mstack + r->mstackpos;
3857 ctx->argsz = 0;
3858 ctx->argc = 0;
3859 ctx->argv = NULL;
3860
3861 /*
3862 * Collect pointers to macro argument strings,
3863 * NUL-terminating them and escaping quotes.
3864 */
3865
3866 src = buf->buf + pos;
3867 while (*src != '\0') {
3868 if (ctx->argc == ctx->argsz) {
3869 ctx->argsz += 8;
3870 ctx->argv = mandoc_reallocarray(ctx->argv,
3871 ctx->argsz, sizeof(*ctx->argv));
3872 }
3873 arg = roff_getarg(r, &src, ln, &pos);
3874 sz = 1; /* For the terminating NUL. */
3875 for (ap = arg; *ap != '\0'; ap++)
3876 sz += *ap == '"' ? 4 : 1;
3877 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3878 for (ap = arg; *ap != '\0'; ap++) {
3879 if (*ap == '"') {
3880 memcpy(dst, "\\(dq", 4);
3881 dst += 4;
3882 } else
3883 *dst++ = *ap;
3884 }
3885 *dst = '\0';
3886 free(arg);
3887 }
3888
3889 /* Replace the macro invocation by the macro definition. */
3890
3891 free(buf->buf);
3892 buf->buf = mandoc_strdup(r->current_string);
3893 buf->sz = strlen(buf->buf) + 1;
3894 *offs = 0;
3895
3896 return buf->buf[buf->sz - 2] == '\n' ?
3897 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3898 }
3899
3900 /*
3901 * Calling a high-level macro that was renamed with .rn.
3902 * r->current_string has already been set up by roff_parse().
3903 */
3904 static int
3905 roff_renamed(ROFF_ARGS)
3906 {
3907 char *nbuf;
3908
3909 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3910 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3911 free(buf->buf);
3912 buf->buf = nbuf;
3913 *offs = 0;
3914 return ROFF_CONT;
3915 }
3916
3917 /*
3918 * Measure the length in bytes of the roff identifier at *cpp
3919 * and advance the pointer to the next word.
3920 */
3921 static size_t
3922 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3923 {
3924 char *name, *cp;
3925 size_t namesz;
3926
3927 name = *cpp;
3928 if (*name == '\0')
3929 return 0;
3930
3931 /* Advance cp to the byte after the end of the name. */
3932
3933 for (cp = name; 1; cp++) {
3934 namesz = cp - name;
3935 if (*cp == '\0')
3936 break;
3937 if (*cp == ' ' || *cp == '\t') {
3938 cp++;
3939 break;
3940 }
3941 if (*cp != '\\')
3942 continue;
3943 if (cp[1] == '{' || cp[1] == '}')
3944 break;
3945 if (*++cp == '\\')
3946 continue;
3947 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
3948 "%.*s", (int)(cp - name + 1), name);
3949 mandoc_escape((const char **)&cp, NULL, NULL);
3950 break;
3951 }
3952
3953 /* Read past spaces. */
3954
3955 while (*cp == ' ')
3956 cp++;
3957
3958 *cpp = cp;
3959 return namesz;
3960 }
3961
3962 /*
3963 * Store *string into the user-defined string called *name.
3964 * To clear an existing entry, call with (*r, *name, NULL, 0).
3965 * append == 0: replace mode
3966 * append == 1: single-line append mode
3967 * append == 2: multiline append mode, append '\n' after each call
3968 */
3969 static void
3970 roff_setstr(struct roff *r, const char *name, const char *string,
3971 int append)
3972 {
3973 size_t namesz;
3974
3975 namesz = strlen(name);
3976 roff_setstrn(&r->strtab, name, namesz, string,
3977 string ? strlen(string) : 0, append);
3978 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3979 }
3980
3981 static void
3982 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3983 const char *string, size_t stringsz, int append)
3984 {
3985 struct roffkv *n;
3986 char *c;
3987 int i;
3988 size_t oldch, newch;
3989
3990 /* Search for an existing string with the same name. */
3991 n = *r;
3992
3993 while (n && (namesz != n->key.sz ||
3994 strncmp(n->key.p, name, namesz)))
3995 n = n->next;
3996
3997 if (NULL == n) {
3998 /* Create a new string table entry. */
3999 n = mandoc_malloc(sizeof(struct roffkv));
4000 n->key.p = mandoc_strndup(name, namesz);
4001 n->key.sz = namesz;
4002 n->val.p = NULL;
4003 n->val.sz = 0;
4004 n->next = *r;
4005 *r = n;
4006 } else if (0 == append) {
4007 free(n->val.p);
4008 n->val.p = NULL;
4009 n->val.sz = 0;
4010 }
4011
4012 if (NULL == string)
4013 return;
4014
4015 /*
4016 * One additional byte for the '\n' in multiline mode,
4017 * and one for the terminating '\0'.
4018 */
4019 newch = stringsz + (1 < append ? 2u : 1u);
4020
4021 if (NULL == n->val.p) {
4022 n->val.p = mandoc_malloc(newch);
4023 *n->val.p = '\0';
4024 oldch = 0;
4025 } else {
4026 oldch = n->val.sz;
4027 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4028 }
4029
4030 /* Skip existing content in the destination buffer. */
4031 c = n->val.p + (int)oldch;
4032
4033 /* Append new content to the destination buffer. */
4034 i = 0;
4035 while (i < (int)stringsz) {
4036 /*
4037 * Rudimentary roff copy mode:
4038 * Handle escaped backslashes.
4039 */
4040 if ('\\' == string[i] && '\\' == string[i + 1])
4041 i++;
4042 *c++ = string[i++];
4043 }
4044
4045 /* Append terminating bytes. */
4046 if (1 < append)
4047 *c++ = '\n';
4048
4049 *c = '\0';
4050 n->val.sz = (int)(c - n->val.p);
4051 }
4052
4053 static const char *
4054 roff_getstrn(struct roff *r, const char *name, size_t len,
4055 int *deftype)
4056 {
4057 const struct roffkv *n;
4058 int found, i;
4059 enum roff_tok tok;
4060
4061 found = 0;
4062 for (n = r->strtab; n != NULL; n = n->next) {
4063 if (strncmp(name, n->key.p, len) != 0 ||
4064 n->key.p[len] != '\0' || n->val.p == NULL)
4065 continue;
4066 if (*deftype & ROFFDEF_USER) {
4067 *deftype = ROFFDEF_USER;
4068 return n->val.p;
4069 } else {
4070 found = 1;
4071 break;
4072 }
4073 }
4074 for (n = r->rentab; n != NULL; n = n->next) {
4075 if (strncmp(name, n->key.p, len) != 0 ||
4076 n->key.p[len] != '\0' || n->val.p == NULL)
4077 continue;
4078 if (*deftype & ROFFDEF_REN) {
4079 *deftype = ROFFDEF_REN;
4080 return n->val.p;
4081 } else {
4082 found = 1;
4083 break;
4084 }
4085 }
4086 for (i = 0; i < PREDEFS_MAX; i++) {
4087 if (strncmp(name, predefs[i].name, len) != 0 ||
4088 predefs[i].name[len] != '\0')
4089 continue;
4090 if (*deftype & ROFFDEF_PRE) {
4091 *deftype = ROFFDEF_PRE;
4092 return predefs[i].str;
4093 } else {
4094 found = 1;
4095 break;
4096 }
4097 }
4098 if (r->man->meta.macroset != MACROSET_MAN) {
4099 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4100 if (strncmp(name, roff_name[tok], len) != 0 ||
4101 roff_name[tok][len] != '\0')
4102 continue;
4103 if (*deftype & ROFFDEF_STD) {
4104 *deftype = ROFFDEF_STD;
4105 return NULL;
4106 } else {
4107 found = 1;
4108 break;
4109 }
4110 }
4111 }
4112 if (r->man->meta.macroset != MACROSET_MDOC) {
4113 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4114 if (strncmp(name, roff_name[tok], len) != 0 ||
4115 roff_name[tok][len] != '\0')
4116 continue;
4117 if (*deftype & ROFFDEF_STD) {
4118 *deftype = ROFFDEF_STD;
4119 return NULL;
4120 } else {
4121 found = 1;
4122 break;
4123 }
4124 }
4125 }
4126
4127 if (found == 0 && *deftype != ROFFDEF_ANY) {
4128 if (*deftype & ROFFDEF_REN) {
4129 /*
4130 * This might still be a request,
4131 * so do not treat it as undefined yet.
4132 */
4133 *deftype = ROFFDEF_UNDEF;
4134 return NULL;
4135 }
4136
4137 /* Using an undefined string defines it to be empty. */
4138
4139 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4140 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4141 }
4142
4143 *deftype = 0;
4144 return NULL;
4145 }
4146
4147 static void
4148 roff_freestr(struct roffkv *r)
4149 {
4150 struct roffkv *n, *nn;
4151
4152 for (n = r; n; n = nn) {
4153 free(n->key.p);
4154 free(n->val.p);
4155 nn = n->next;
4156 free(n);
4157 }
4158 }
4159
4160 /* --- accessors and utility functions ------------------------------------ */
4161
4162 /*
4163 * Duplicate an input string, making the appropriate character
4164 * conversations (as stipulated by `tr') along the way.
4165 * Returns a heap-allocated string with all the replacements made.
4166 */
4167 char *
4168 roff_strdup(const struct roff *r, const char *p)
4169 {
4170 const struct roffkv *cp;
4171 char *res;
4172 const char *pp;
4173 size_t ssz, sz;
4174 enum mandoc_esc esc;
4175
4176 if (NULL == r->xmbtab && NULL == r->xtab)
4177 return mandoc_strdup(p);
4178 else if ('\0' == *p)
4179 return mandoc_strdup("");
4180
4181 /*
4182 * Step through each character looking for term matches
4183 * (remember that a `tr' can be invoked with an escape, which is
4184 * a glyph but the escape is multi-character).
4185 * We only do this if the character hash has been initialised
4186 * and the string is >0 length.
4187 */
4188
4189 res = NULL;
4190 ssz = 0;
4191
4192 while ('\0' != *p) {
4193 assert((unsigned int)*p < 128);
4194 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4195 sz = r->xtab[(int)*p].sz;
4196 res = mandoc_realloc(res, ssz + sz + 1);
4197 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4198 ssz += sz;
4199 p++;
4200 continue;
4201 } else if ('\\' != *p) {
4202 res = mandoc_realloc(res, ssz + 2);
4203 res[ssz++] = *p++;
4204 continue;
4205 }
4206
4207 /* Search for term matches. */
4208 for (cp = r->xmbtab; cp; cp = cp->next)
4209 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4210 break;
4211
4212 if (NULL != cp) {
4213 /*
4214 * A match has been found.
4215 * Append the match to the array and move
4216 * forward by its keysize.
4217 */
4218 res = mandoc_realloc(res,
4219 ssz + cp->val.sz + 1);
4220 memcpy(res + ssz, cp->val.p, cp->val.sz);
4221 ssz += cp->val.sz;
4222 p += (int)cp->key.sz;
4223 continue;
4224 }
4225
4226 /*
4227 * Handle escapes carefully: we need to copy
4228 * over just the escape itself, or else we might
4229 * do replacements within the escape itself.
4230 * Make sure to pass along the bogus string.
4231 */
4232 pp = p++;
4233 esc = mandoc_escape(&p, NULL, NULL);
4234 if (ESCAPE_ERROR == esc) {
4235 sz = strlen(pp);
4236 res = mandoc_realloc(res, ssz + sz + 1);
4237 memcpy(res + ssz, pp, sz);
4238 break;
4239 }
4240 /*
4241 * We bail out on bad escapes.
4242 * No need to warn: we already did so when
4243 * roff_expand() was called.
4244 */
4245 sz = (int)(p - pp);
4246 res = mandoc_realloc(res, ssz + sz + 1);
4247 memcpy(res + ssz, pp, sz);
4248 ssz += sz;
4249 }
4250
4251 res[(int)ssz] = '\0';
4252 return res;
4253 }
4254
4255 int
4256 roff_getformat(const struct roff *r)
4257 {
4258
4259 return r->format;
4260 }
4261
4262 /*
4263 * Find out whether a line is a macro line or not.
4264 * If it is, adjust the current position and return one; if it isn't,
4265 * return zero and don't change the current position.
4266 * If the control character has been set with `.cc', then let that grain
4267 * precedence.
4268 * This is slighly contrary to groff, where using the non-breaking
4269 * control character when `cc' has been invoked will cause the
4270 * non-breaking macro contents to be printed verbatim.
4271 */
4272 int
4273 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4274 {
4275 int pos;
4276
4277 pos = *ppos;
4278
4279 if (r->control != '\0' && cp[pos] == r->control)
4280 pos++;
4281 else if (r->control != '\0')
4282 return 0;
4283 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4284 pos += 2;
4285 else if ('.' == cp[pos] || '\'' == cp[pos])
4286 pos++;
4287 else
4288 return 0;
4289
4290 while (' ' == cp[pos] || '\t' == cp[pos])
4291 pos++;
4292
4293 *ppos = pos;
4294 return 1;
4295 }