]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Make roff_expand() parse left-to-right rather than right-to-left.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.388 2022/05/19 15:37:47 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_expand_patch(struct buf *, int,
211 const char *, int);
212 static void roff_free1(struct roff *);
213 static void roff_freereg(struct roffreg *);
214 static void roff_freestr(struct roffkv *);
215 static size_t roff_getname(struct roff *, char **, int, int);
216 static int roff_getnum(const char *, int *, int *, int);
217 static int roff_getop(const char *, int *, char *);
218 static int roff_getregn(struct roff *,
219 const char *, size_t, char);
220 static int roff_getregro(const struct roff *,
221 const char *name);
222 static const char *roff_getstrn(struct roff *,
223 const char *, size_t, int *);
224 static int roff_hasregn(const struct roff *,
225 const char *, size_t);
226 static int roff_insec(ROFF_ARGS);
227 static int roff_it(ROFF_ARGS);
228 static int roff_line_ignore(ROFF_ARGS);
229 static void roff_man_alloc1(struct roff_man *);
230 static void roff_man_free1(struct roff_man *);
231 static int roff_manyarg(ROFF_ARGS);
232 static int roff_mc(ROFF_ARGS);
233 static int roff_noarg(ROFF_ARGS);
234 static int roff_nop(ROFF_ARGS);
235 static int roff_nr(ROFF_ARGS);
236 static int roff_onearg(ROFF_ARGS);
237 static enum roff_tok roff_parse(struct roff *, char *, int *,
238 int, int);
239 static int roff_parse_comment(struct roff *, struct buf *,
240 int, int, char);
241 static int roff_parsetext(struct roff *, struct buf *,
242 int, int *);
243 static int roff_renamed(ROFF_ARGS);
244 static int roff_req_or_macro(ROFF_ARGS);
245 static int roff_return(ROFF_ARGS);
246 static int roff_rm(ROFF_ARGS);
247 static int roff_rn(ROFF_ARGS);
248 static int roff_rr(ROFF_ARGS);
249 static void roff_setregn(struct roff *, const char *,
250 size_t, int, char, int);
251 static void roff_setstr(struct roff *,
252 const char *, const char *, int);
253 static void roff_setstrn(struct roffkv **, const char *,
254 size_t, const char *, size_t, int);
255 static int roff_shift(ROFF_ARGS);
256 static int roff_so(ROFF_ARGS);
257 static int roff_tr(ROFF_ARGS);
258 static int roff_Dd(ROFF_ARGS);
259 static int roff_TE(ROFF_ARGS);
260 static int roff_TS(ROFF_ARGS);
261 static int roff_EQ(ROFF_ARGS);
262 static int roff_EN(ROFF_ARGS);
263 static int roff_T_(ROFF_ARGS);
264 static int roff_unsupp(ROFF_ARGS);
265 static int roff_userdef(ROFF_ARGS);
266
267 /* --- constant data ------------------------------------------------------ */
268
269 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
270 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
271
272 const char *__roff_name[MAN_MAX + 1] = {
273 "br", "ce", "fi", "ft",
274 "ll", "mc", "nf",
275 "po", "rj", "sp",
276 "ta", "ti", NULL,
277 "ab", "ad", "af", "aln",
278 "als", "am", "am1", "ami",
279 "ami1", "as", "as1", "asciify",
280 "backtrace", "bd", "bleedat", "blm",
281 "box", "boxa", "bp", "BP",
282 "break", "breakchar", "brnl", "brp",
283 "brpnl", "c2", "cc",
284 "cf", "cflags", "ch", "char",
285 "chop", "class", "close", "CL",
286 "color", "composite", "continue", "cp",
287 "cropat", "cs", "cu", "da",
288 "dch", "Dd", "de", "de1",
289 "defcolor", "dei", "dei1", "device",
290 "devicem", "di", "do", "ds",
291 "ds1", "dwh", "dt", "ec",
292 "ecr", "ecs", "el", "em",
293 "EN", "eo", "EP", "EQ",
294 "errprint", "ev", "evc", "ex",
295 "fallback", "fam", "fc", "fchar",
296 "fcolor", "fdeferlig", "feature", "fkern",
297 "fl", "flig", "fp", "fps",
298 "fschar", "fspacewidth", "fspecial", "ftr",
299 "fzoom", "gcolor", "hc", "hcode",
300 "hidechar", "hla", "hlm", "hpf",
301 "hpfa", "hpfcode", "hw", "hy",
302 "hylang", "hylen", "hym", "hypp",
303 "hys", "ie", "if", "ig",
304 "index", "it", "itc", "IX",
305 "kern", "kernafter", "kernbefore", "kernpair",
306 "lc", "lc_ctype", "lds", "length",
307 "letadj", "lf", "lg", "lhang",
308 "linetabs", "lnr", "lnrf", "lpfx",
309 "ls", "lsm", "lt",
310 "mediasize", "minss", "mk", "mso",
311 "na", "ne", "nh", "nhychar",
312 "nm", "nn", "nop", "nr",
313 "nrf", "nroff", "ns", "nx",
314 "open", "opena", "os", "output",
315 "padj", "papersize", "pc", "pev",
316 "pi", "PI", "pl", "pm",
317 "pn", "pnr", "ps",
318 "psbb", "pshape", "pso", "ptr",
319 "pvs", "rchar", "rd", "recursionlimit",
320 "return", "rfschar", "rhang",
321 "rm", "rn", "rnn", "rr",
322 "rs", "rt", "schar", "sentchar",
323 "shc", "shift", "sizes", "so",
324 "spacewidth", "special", "spreadwarn", "ss",
325 "sty", "substring", "sv", "sy",
326 "T&", "tc", "TE",
327 "TH", "tkf", "tl",
328 "tm", "tm1", "tmc", "tr",
329 "track", "transchar", "trf", "trimat",
330 "trin", "trnt", "troff", "TS",
331 "uf", "ul", "unformat", "unwatch",
332 "unwatchn", "vpt", "vs", "warn",
333 "warnscale", "watch", "watchlength", "watchn",
334 "wh", "while", "write", "writec",
335 "writem", "xflag", ".", NULL,
336 NULL, "text",
337 "Dd", "Dt", "Os", "Sh",
338 "Ss", "Pp", "D1", "Dl",
339 "Bd", "Ed", "Bl", "El",
340 "It", "Ad", "An", "Ap",
341 "Ar", "Cd", "Cm", "Dv",
342 "Er", "Ev", "Ex", "Fa",
343 "Fd", "Fl", "Fn", "Ft",
344 "Ic", "In", "Li", "Nd",
345 "Nm", "Op", "Ot", "Pa",
346 "Rv", "St", "Va", "Vt",
347 "Xr", "%A", "%B", "%D",
348 "%I", "%J", "%N", "%O",
349 "%P", "%R", "%T", "%V",
350 "Ac", "Ao", "Aq", "At",
351 "Bc", "Bf", "Bo", "Bq",
352 "Bsx", "Bx", "Db", "Dc",
353 "Do", "Dq", "Ec", "Ef",
354 "Em", "Eo", "Fx", "Ms",
355 "No", "Ns", "Nx", "Ox",
356 "Pc", "Pf", "Po", "Pq",
357 "Qc", "Ql", "Qo", "Qq",
358 "Re", "Rs", "Sc", "So",
359 "Sq", "Sm", "Sx", "Sy",
360 "Tn", "Ux", "Xc", "Xo",
361 "Fo", "Fc", "Oo", "Oc",
362 "Bk", "Ek", "Bt", "Hf",
363 "Fr", "Ud", "Lb", "Lp",
364 "Lk", "Mt", "Brq", "Bro",
365 "Brc", "%C", "Es", "En",
366 "Dx", "%Q", "%U", "Ta",
367 "Tg", NULL,
368 "TH", "SH", "SS", "TP",
369 "TQ",
370 "LP", "PP", "P", "IP",
371 "HP", "SM", "SB", "BI",
372 "IB", "BR", "RB", "R",
373 "B", "I", "IR", "RI",
374 "RE", "RS", "DT", "UC",
375 "PD", "AT", "in",
376 "SY", "YS", "OP",
377 "EX", "EE", "UR",
378 "UE", "MT", "ME", NULL
379 };
380 const char *const *roff_name = __roff_name;
381
382 static struct roffmac roffs[TOKEN_NONE] = {
383 { roff_noarg, NULL, NULL, 0 }, /* br */
384 { roff_onearg, NULL, NULL, 0 }, /* ce */
385 { roff_noarg, NULL, NULL, 0 }, /* fi */
386 { roff_onearg, NULL, NULL, 0 }, /* ft */
387 { roff_onearg, NULL, NULL, 0 }, /* ll */
388 { roff_mc, NULL, NULL, 0 }, /* mc */
389 { roff_noarg, NULL, NULL, 0 }, /* nf */
390 { roff_onearg, NULL, NULL, 0 }, /* po */
391 { roff_onearg, NULL, NULL, 0 }, /* rj */
392 { roff_onearg, NULL, NULL, 0 }, /* sp */
393 { roff_manyarg, NULL, NULL, 0 }, /* ta */
394 { roff_onearg, NULL, NULL, 0 }, /* ti */
395 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
396 { roff_unsupp, NULL, NULL, 0 }, /* ab */
397 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
398 { roff_line_ignore, NULL, NULL, 0 }, /* af */
399 { roff_unsupp, NULL, NULL, 0 }, /* aln */
400 { roff_als, NULL, NULL, 0 }, /* als */
401 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
402 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
403 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
404 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
405 { roff_ds, NULL, NULL, 0 }, /* as */
406 { roff_ds, NULL, NULL, 0 }, /* as1 */
407 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
408 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
409 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
410 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
411 { roff_unsupp, NULL, NULL, 0 }, /* blm */
412 { roff_unsupp, NULL, NULL, 0 }, /* box */
413 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
414 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
415 { roff_unsupp, NULL, NULL, 0 }, /* BP */
416 { roff_break, NULL, NULL, 0 }, /* break */
417 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
418 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
419 { roff_noarg, NULL, NULL, 0 }, /* brp */
420 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
421 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
422 { roff_cc, NULL, NULL, 0 }, /* cc */
423 { roff_insec, NULL, NULL, 0 }, /* cf */
424 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
425 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
426 { roff_char, NULL, NULL, 0 }, /* char */
427 { roff_unsupp, NULL, NULL, 0 }, /* chop */
428 { roff_line_ignore, NULL, NULL, 0 }, /* class */
429 { roff_insec, NULL, NULL, 0 }, /* close */
430 { roff_unsupp, NULL, NULL, 0 }, /* CL */
431 { roff_line_ignore, NULL, NULL, 0 }, /* color */
432 { roff_unsupp, NULL, NULL, 0 }, /* composite */
433 { roff_unsupp, NULL, NULL, 0 }, /* continue */
434 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
435 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
436 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
437 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
438 { roff_unsupp, NULL, NULL, 0 }, /* da */
439 { roff_unsupp, NULL, NULL, 0 }, /* dch */
440 { roff_Dd, NULL, NULL, 0 }, /* Dd */
441 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
442 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
443 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
444 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
445 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
446 { roff_unsupp, NULL, NULL, 0 }, /* device */
447 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
448 { roff_unsupp, NULL, NULL, 0 }, /* di */
449 { roff_unsupp, NULL, NULL, 0 }, /* do */
450 { roff_ds, NULL, NULL, 0 }, /* ds */
451 { roff_ds, NULL, NULL, 0 }, /* ds1 */
452 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
453 { roff_unsupp, NULL, NULL, 0 }, /* dt */
454 { roff_ec, NULL, NULL, 0 }, /* ec */
455 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
456 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
457 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
458 { roff_unsupp, NULL, NULL, 0 }, /* em */
459 { roff_EN, NULL, NULL, 0 }, /* EN */
460 { roff_eo, NULL, NULL, 0 }, /* eo */
461 { roff_unsupp, NULL, NULL, 0 }, /* EP */
462 { roff_EQ, NULL, NULL, 0 }, /* EQ */
463 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
464 { roff_unsupp, NULL, NULL, 0 }, /* ev */
465 { roff_unsupp, NULL, NULL, 0 }, /* evc */
466 { roff_unsupp, NULL, NULL, 0 }, /* ex */
467 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
469 { roff_unsupp, NULL, NULL, 0 }, /* fc */
470 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
473 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
476 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
477 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
478 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
479 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
480 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
481 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
482 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
483 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
484 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
494 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
495 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
496 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
497 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
498 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
499 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
500 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
501 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
502 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
503 { roff_unsupp, NULL, NULL, 0 }, /* index */
504 { roff_it, NULL, NULL, 0 }, /* it */
505 { roff_unsupp, NULL, NULL, 0 }, /* itc */
506 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
507 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
508 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
509 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
510 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
511 { roff_unsupp, NULL, NULL, 0 }, /* lc */
512 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
513 { roff_unsupp, NULL, NULL, 0 }, /* lds */
514 { roff_unsupp, NULL, NULL, 0 }, /* length */
515 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
516 { roff_insec, NULL, NULL, 0 }, /* lf */
517 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
518 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
519 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
520 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
521 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
522 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
523 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
524 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
525 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
526 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
527 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
528 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
529 { roff_insec, NULL, NULL, 0 }, /* mso */
530 { roff_line_ignore, NULL, NULL, 0 }, /* na */
531 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
532 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
533 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
534 { roff_unsupp, NULL, NULL, 0 }, /* nm */
535 { roff_unsupp, NULL, NULL, 0 }, /* nn */
536 { roff_nop, NULL, NULL, 0 }, /* nop */
537 { roff_nr, NULL, NULL, 0 }, /* nr */
538 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
539 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
540 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
541 { roff_insec, NULL, NULL, 0 }, /* nx */
542 { roff_insec, NULL, NULL, 0 }, /* open */
543 { roff_insec, NULL, NULL, 0 }, /* opena */
544 { roff_line_ignore, NULL, NULL, 0 }, /* os */
545 { roff_unsupp, NULL, NULL, 0 }, /* output */
546 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
547 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
550 { roff_insec, NULL, NULL, 0 }, /* pi */
551 { roff_unsupp, NULL, NULL, 0 }, /* PI */
552 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
553 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
554 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
555 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
556 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
557 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
558 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
559 { roff_insec, NULL, NULL, 0 }, /* pso */
560 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
561 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
562 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
563 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
564 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
565 { roff_return, NULL, NULL, 0 }, /* return */
566 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
567 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
568 { roff_rm, NULL, NULL, 0 }, /* rm */
569 { roff_rn, NULL, NULL, 0 }, /* rn */
570 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
571 { roff_rr, NULL, NULL, 0 }, /* rr */
572 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
573 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
574 { roff_unsupp, NULL, NULL, 0 }, /* schar */
575 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
576 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
577 { roff_shift, NULL, NULL, 0 }, /* shift */
578 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
579 { roff_so, NULL, NULL, 0 }, /* so */
580 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
581 { roff_line_ignore, NULL, NULL, 0 }, /* special */
582 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
583 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
584 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
585 { roff_unsupp, NULL, NULL, 0 }, /* substring */
586 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
587 { roff_insec, NULL, NULL, 0 }, /* sy */
588 { roff_T_, NULL, NULL, 0 }, /* T& */
589 { roff_unsupp, NULL, NULL, 0 }, /* tc */
590 { roff_TE, NULL, NULL, 0 }, /* TE */
591 { roff_Dd, NULL, NULL, 0 }, /* TH */
592 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
593 { roff_unsupp, NULL, NULL, 0 }, /* tl */
594 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
595 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
596 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
597 { roff_tr, NULL, NULL, 0 }, /* tr */
598 { roff_line_ignore, NULL, NULL, 0 }, /* track */
599 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
600 { roff_insec, NULL, NULL, 0 }, /* trf */
601 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
602 { roff_unsupp, NULL, NULL, 0 }, /* trin */
603 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
604 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
605 { roff_TS, NULL, NULL, 0 }, /* TS */
606 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
607 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
608 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
609 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
610 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
611 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
612 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
613 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
614 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
615 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
616 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
617 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
618 { roff_unsupp, NULL, NULL, 0 }, /* wh */
619 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
620 { roff_insec, NULL, NULL, 0 }, /* write */
621 { roff_insec, NULL, NULL, 0 }, /* writec */
622 { roff_insec, NULL, NULL, 0 }, /* writem */
623 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
624 { roff_cblock, NULL, NULL, 0 }, /* . */
625 { roff_renamed, NULL, NULL, 0 },
626 { roff_userdef, NULL, NULL, 0 }
627 };
628
629 /* Array of injected predefined strings. */
630 #define PREDEFS_MAX 38
631 static const struct predef predefs[PREDEFS_MAX] = {
632 #include "predefs.in"
633 };
634
635 static int roffce_lines; /* number of input lines to center */
636 static struct roff_node *roffce_node; /* active request */
637 static int roffit_lines; /* number of lines to delay */
638 static char *roffit_macro; /* nil-terminated macro line */
639
640
641 /* --- request table ------------------------------------------------------ */
642
643 struct ohash *
644 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
645 {
646 struct ohash *htab;
647 struct roffreq *req;
648 enum roff_tok tok;
649 size_t sz;
650 unsigned int slot;
651
652 htab = mandoc_malloc(sizeof(*htab));
653 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
654
655 for (tok = mintok; tok < maxtok; tok++) {
656 if (roff_name[tok] == NULL)
657 continue;
658 sz = strlen(roff_name[tok]);
659 req = mandoc_malloc(sizeof(*req) + sz + 1);
660 req->tok = tok;
661 memcpy(req->name, roff_name[tok], sz + 1);
662 slot = ohash_qlookup(htab, req->name);
663 ohash_insert(htab, slot, req);
664 }
665 return htab;
666 }
667
668 void
669 roffhash_free(struct ohash *htab)
670 {
671 struct roffreq *req;
672 unsigned int slot;
673
674 if (htab == NULL)
675 return;
676 for (req = ohash_first(htab, &slot); req != NULL;
677 req = ohash_next(htab, &slot))
678 free(req);
679 ohash_delete(htab);
680 free(htab);
681 }
682
683 enum roff_tok
684 roffhash_find(struct ohash *htab, const char *name, size_t sz)
685 {
686 struct roffreq *req;
687 const char *end;
688
689 if (sz) {
690 end = name + sz;
691 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
692 } else
693 req = ohash_find(htab, ohash_qlookup(htab, name));
694 return req == NULL ? TOKEN_NONE : req->tok;
695 }
696
697 /* --- stack of request blocks -------------------------------------------- */
698
699 /*
700 * Pop the current node off of the stack of roff instructions currently
701 * pending. Return 1 if it is a loop or 0 otherwise.
702 */
703 static int
704 roffnode_pop(struct roff *r)
705 {
706 struct roffnode *p;
707 int inloop;
708
709 p = r->last;
710 inloop = p->tok == ROFF_while;
711 r->last = p->parent;
712 free(p->name);
713 free(p->end);
714 free(p);
715 return inloop;
716 }
717
718 /*
719 * Push a roff node onto the instruction stack. This must later be
720 * removed with roffnode_pop().
721 */
722 static void
723 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
724 int line, int col)
725 {
726 struct roffnode *p;
727
728 p = mandoc_calloc(1, sizeof(struct roffnode));
729 p->tok = tok;
730 if (name)
731 p->name = mandoc_strdup(name);
732 p->parent = r->last;
733 p->line = line;
734 p->col = col;
735 p->rule = p->parent ? p->parent->rule : 0;
736
737 r->last = p;
738 }
739
740 /* --- roff parser state data management ---------------------------------- */
741
742 static void
743 roff_free1(struct roff *r)
744 {
745 int i;
746
747 tbl_free(r->first_tbl);
748 r->first_tbl = r->last_tbl = r->tbl = NULL;
749
750 eqn_free(r->last_eqn);
751 r->last_eqn = r->eqn = NULL;
752
753 while (r->mstackpos >= 0)
754 roff_userret(r);
755
756 while (r->last)
757 roffnode_pop(r);
758
759 free (r->rstack);
760 r->rstack = NULL;
761 r->rstacksz = 0;
762 r->rstackpos = -1;
763
764 roff_freereg(r->regtab);
765 r->regtab = NULL;
766
767 roff_freestr(r->strtab);
768 roff_freestr(r->rentab);
769 roff_freestr(r->xmbtab);
770 r->strtab = r->rentab = r->xmbtab = NULL;
771
772 if (r->xtab)
773 for (i = 0; i < 128; i++)
774 free(r->xtab[i].p);
775 free(r->xtab);
776 r->xtab = NULL;
777 }
778
779 void
780 roff_reset(struct roff *r)
781 {
782 roff_free1(r);
783 r->options |= MPARSE_COMMENT;
784 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
785 r->control = '\0';
786 r->escape = '\\';
787 roffce_lines = 0;
788 roffce_node = NULL;
789 roffit_lines = 0;
790 roffit_macro = NULL;
791 }
792
793 void
794 roff_free(struct roff *r)
795 {
796 int i;
797
798 roff_free1(r);
799 for (i = 0; i < r->mstacksz; i++)
800 free(r->mstack[i].argv);
801 free(r->mstack);
802 roffhash_free(r->reqtab);
803 free(r);
804 }
805
806 struct roff *
807 roff_alloc(int options)
808 {
809 struct roff *r;
810
811 r = mandoc_calloc(1, sizeof(struct roff));
812 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
813 r->options = options | MPARSE_COMMENT;
814 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
815 r->mstackpos = -1;
816 r->rstackpos = -1;
817 r->escape = '\\';
818 return r;
819 }
820
821 /* --- syntax tree state data management ---------------------------------- */
822
823 static void
824 roff_man_free1(struct roff_man *man)
825 {
826 if (man->meta.first != NULL)
827 roff_node_delete(man, man->meta.first);
828 free(man->meta.msec);
829 free(man->meta.vol);
830 free(man->meta.os);
831 free(man->meta.arch);
832 free(man->meta.title);
833 free(man->meta.name);
834 free(man->meta.date);
835 free(man->meta.sodest);
836 }
837
838 void
839 roff_state_reset(struct roff_man *man)
840 {
841 man->last = man->meta.first;
842 man->last_es = NULL;
843 man->flags = 0;
844 man->lastsec = man->lastnamed = SEC_NONE;
845 man->next = ROFF_NEXT_CHILD;
846 roff_setreg(man->roff, "nS", 0, '=');
847 }
848
849 static void
850 roff_man_alloc1(struct roff_man *man)
851 {
852 memset(&man->meta, 0, sizeof(man->meta));
853 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
854 man->meta.first->type = ROFFT_ROOT;
855 man->meta.macroset = MACROSET_NONE;
856 roff_state_reset(man);
857 }
858
859 void
860 roff_man_reset(struct roff_man *man)
861 {
862 roff_man_free1(man);
863 roff_man_alloc1(man);
864 }
865
866 void
867 roff_man_free(struct roff_man *man)
868 {
869 roff_man_free1(man);
870 free(man->os_r);
871 free(man);
872 }
873
874 struct roff_man *
875 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
876 {
877 struct roff_man *man;
878
879 man = mandoc_calloc(1, sizeof(*man));
880 man->roff = roff;
881 man->os_s = os_s;
882 man->quick = quick;
883 roff_man_alloc1(man);
884 roff->man = man;
885 return man;
886 }
887
888 /* --- syntax tree handling ----------------------------------------------- */
889
890 struct roff_node *
891 roff_node_alloc(struct roff_man *man, int line, int pos,
892 enum roff_type type, int tok)
893 {
894 struct roff_node *n;
895
896 n = mandoc_calloc(1, sizeof(*n));
897 n->line = line;
898 n->pos = pos;
899 n->tok = tok;
900 n->type = type;
901 n->sec = man->lastsec;
902
903 if (man->flags & MDOC_SYNOPSIS)
904 n->flags |= NODE_SYNPRETTY;
905 else
906 n->flags &= ~NODE_SYNPRETTY;
907 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
908 n->flags |= NODE_NOFILL;
909 else
910 n->flags &= ~NODE_NOFILL;
911 if (man->flags & MDOC_NEWLINE)
912 n->flags |= NODE_LINE;
913 man->flags &= ~MDOC_NEWLINE;
914
915 return n;
916 }
917
918 void
919 roff_node_append(struct roff_man *man, struct roff_node *n)
920 {
921
922 switch (man->next) {
923 case ROFF_NEXT_SIBLING:
924 if (man->last->next != NULL) {
925 n->next = man->last->next;
926 man->last->next->prev = n;
927 } else
928 man->last->parent->last = n;
929 man->last->next = n;
930 n->prev = man->last;
931 n->parent = man->last->parent;
932 break;
933 case ROFF_NEXT_CHILD:
934 if (man->last->child != NULL) {
935 n->next = man->last->child;
936 man->last->child->prev = n;
937 } else
938 man->last->last = n;
939 man->last->child = n;
940 n->parent = man->last;
941 break;
942 default:
943 abort();
944 }
945 man->last = n;
946
947 switch (n->type) {
948 case ROFFT_HEAD:
949 n->parent->head = n;
950 break;
951 case ROFFT_BODY:
952 if (n->end != ENDBODY_NOT)
953 return;
954 n->parent->body = n;
955 break;
956 case ROFFT_TAIL:
957 n->parent->tail = n;
958 break;
959 default:
960 return;
961 }
962
963 /*
964 * Copy over the normalised-data pointer of our parent. Not
965 * everybody has one, but copying a null pointer is fine.
966 */
967
968 n->norm = n->parent->norm;
969 assert(n->parent->type == ROFFT_BLOCK);
970 }
971
972 void
973 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
974 {
975 struct roff_node *n;
976
977 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
978 n->string = roff_strdup(man->roff, word);
979 roff_node_append(man, n);
980 n->flags |= NODE_VALID | NODE_ENDED;
981 man->next = ROFF_NEXT_SIBLING;
982 }
983
984 void
985 roff_word_append(struct roff_man *man, const char *word)
986 {
987 struct roff_node *n;
988 char *addstr, *newstr;
989
990 n = man->last;
991 addstr = roff_strdup(man->roff, word);
992 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
993 free(addstr);
994 free(n->string);
995 n->string = newstr;
996 man->next = ROFF_NEXT_SIBLING;
997 }
998
999 void
1000 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1001 {
1002 struct roff_node *n;
1003
1004 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1005 roff_node_append(man, n);
1006 man->next = ROFF_NEXT_CHILD;
1007 }
1008
1009 struct roff_node *
1010 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1011 {
1012 struct roff_node *n;
1013
1014 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1015 roff_node_append(man, n);
1016 man->next = ROFF_NEXT_CHILD;
1017 return n;
1018 }
1019
1020 struct roff_node *
1021 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1022 {
1023 struct roff_node *n;
1024
1025 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1026 roff_node_append(man, n);
1027 man->next = ROFF_NEXT_CHILD;
1028 return n;
1029 }
1030
1031 struct roff_node *
1032 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1033 {
1034 struct roff_node *n;
1035
1036 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1037 roff_node_append(man, n);
1038 man->next = ROFF_NEXT_CHILD;
1039 return n;
1040 }
1041
1042 static void
1043 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1044 {
1045 struct roff_node *n;
1046 struct tbl_span *span;
1047
1048 if (man->meta.macroset == MACROSET_MAN)
1049 man_breakscope(man, ROFF_TS);
1050 while ((span = tbl_span(tbl)) != NULL) {
1051 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1052 n->span = span;
1053 roff_node_append(man, n);
1054 n->flags |= NODE_VALID | NODE_ENDED;
1055 man->next = ROFF_NEXT_SIBLING;
1056 }
1057 }
1058
1059 void
1060 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1061 {
1062
1063 /* Adjust siblings. */
1064
1065 if (n->prev)
1066 n->prev->next = n->next;
1067 if (n->next)
1068 n->next->prev = n->prev;
1069
1070 /* Adjust parent. */
1071
1072 if (n->parent != NULL) {
1073 if (n->parent->child == n)
1074 n->parent->child = n->next;
1075 if (n->parent->last == n)
1076 n->parent->last = n->prev;
1077 }
1078
1079 /* Adjust parse point. */
1080
1081 if (man == NULL)
1082 return;
1083 if (man->last == n) {
1084 if (n->prev == NULL) {
1085 man->last = n->parent;
1086 man->next = ROFF_NEXT_CHILD;
1087 } else {
1088 man->last = n->prev;
1089 man->next = ROFF_NEXT_SIBLING;
1090 }
1091 }
1092 if (man->meta.first == n)
1093 man->meta.first = NULL;
1094 }
1095
1096 void
1097 roff_node_relink(struct roff_man *man, struct roff_node *n)
1098 {
1099 roff_node_unlink(man, n);
1100 n->prev = n->next = NULL;
1101 roff_node_append(man, n);
1102 }
1103
1104 void
1105 roff_node_free(struct roff_node *n)
1106 {
1107
1108 if (n->args != NULL)
1109 mdoc_argv_free(n->args);
1110 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1111 free(n->norm);
1112 eqn_box_free(n->eqn);
1113 free(n->string);
1114 free(n->tag);
1115 free(n);
1116 }
1117
1118 void
1119 roff_node_delete(struct roff_man *man, struct roff_node *n)
1120 {
1121
1122 while (n->child != NULL)
1123 roff_node_delete(man, n->child);
1124 roff_node_unlink(man, n);
1125 roff_node_free(n);
1126 }
1127
1128 int
1129 roff_node_transparent(struct roff_node *n)
1130 {
1131 if (n == NULL)
1132 return 0;
1133 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1134 return 1;
1135 return roff_tok_transparent(n->tok);
1136 }
1137
1138 int
1139 roff_tok_transparent(enum roff_tok tok)
1140 {
1141 switch (tok) {
1142 case ROFF_ft:
1143 case ROFF_ll:
1144 case ROFF_mc:
1145 case ROFF_po:
1146 case ROFF_ta:
1147 case MDOC_Db:
1148 case MDOC_Es:
1149 case MDOC_Sm:
1150 case MDOC_Tg:
1151 case MAN_DT:
1152 case MAN_UC:
1153 case MAN_PD:
1154 case MAN_AT:
1155 return 1;
1156 default:
1157 return 0;
1158 }
1159 }
1160
1161 struct roff_node *
1162 roff_node_child(struct roff_node *n)
1163 {
1164 for (n = n->child; roff_node_transparent(n); n = n->next)
1165 continue;
1166 return n;
1167 }
1168
1169 struct roff_node *
1170 roff_node_prev(struct roff_node *n)
1171 {
1172 do {
1173 n = n->prev;
1174 } while (roff_node_transparent(n));
1175 return n;
1176 }
1177
1178 struct roff_node *
1179 roff_node_next(struct roff_node *n)
1180 {
1181 do {
1182 n = n->next;
1183 } while (roff_node_transparent(n));
1184 return n;
1185 }
1186
1187 void
1188 deroff(char **dest, const struct roff_node *n)
1189 {
1190 char *cp;
1191 size_t sz;
1192
1193 if (n->string == NULL) {
1194 for (n = n->child; n != NULL; n = n->next)
1195 deroff(dest, n);
1196 return;
1197 }
1198
1199 /* Skip leading whitespace. */
1200
1201 for (cp = n->string; *cp != '\0'; cp++) {
1202 if (cp[0] == '\\' && cp[1] != '\0' &&
1203 strchr(" %&0^|~", cp[1]) != NULL)
1204 cp++;
1205 else if ( ! isspace((unsigned char)*cp))
1206 break;
1207 }
1208
1209 /* Skip trailing backslash. */
1210
1211 sz = strlen(cp);
1212 if (sz > 0 && cp[sz - 1] == '\\')
1213 sz--;
1214
1215 /* Skip trailing whitespace. */
1216
1217 for (; sz; sz--)
1218 if ( ! isspace((unsigned char)cp[sz-1]))
1219 break;
1220
1221 /* Skip empty strings. */
1222
1223 if (sz == 0)
1224 return;
1225
1226 if (*dest == NULL) {
1227 *dest = mandoc_strndup(cp, sz);
1228 return;
1229 }
1230
1231 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1232 free(*dest);
1233 *dest = cp;
1234 }
1235
1236 /* --- main functions of the roff parser ---------------------------------- */
1237
1238 /*
1239 * Save comments preceding the title macro, for example in order to
1240 * preserve Copyright and license headers in HTML output,
1241 * provide diagnostics about RCS ids and trailing whitespace in comments,
1242 * then discard comments including preceding whitespace.
1243 * This function also handles input line continuation.
1244 */
1245 static int
1246 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1247 {
1248 struct roff_node *n; /* used for header comments */
1249 const char *start; /* start of the string to process */
1250 const char *cp; /* for RCS id parsing */
1251 char *stesc; /* start of an escape sequence ('\\') */
1252 char *ep; /* end of comment string */
1253 int rcsid; /* kind of RCS id seen */
1254
1255 for (start = stesc = buf->buf + pos;; stesc++) {
1256 /*
1257 * XXX Ugly hack: Remove the newline character that
1258 * mparse_buf_r() appended to mark the end of input
1259 * if it is not preceded by an escape character.
1260 */
1261 if (stesc[0] == '\n') {
1262 assert(stesc[1] == '\0');
1263 stesc[0] = '\0';
1264 }
1265
1266 /* The line ends without continuation or comment. */
1267 if (stesc[0] == '\0')
1268 return ROFF_CONT;
1269
1270 /* Unescaped byte: skip it. */
1271 if (stesc[0] != ec)
1272 continue;
1273
1274 /*
1275 * XXX Ugly hack: Do not attempt to append another line
1276 * if the function mparse_buf_r() appended a newline
1277 * character to indicate the end of input.
1278 */
1279 if (stesc[1] == '\n') {
1280 assert(stesc[2] == '\0');
1281 stesc[0] = '\0';
1282 return ROFF_CONT;
1283 }
1284
1285 /*
1286 * An escape character at the end of an input line
1287 * requests line continuation.
1288 */
1289 if (stesc[1] == '\0') {
1290 stesc[0] = '\0';
1291 return ROFF_IGN | ROFF_APPEND;
1292 }
1293
1294 /* Found a comment: process it. */
1295 if (stesc[1] == '"' || stesc[1] == '#')
1296 break;
1297
1298 /* Escaped escape character: skip them both. */
1299 if (stesc[1] == ec)
1300 stesc++;
1301 }
1302
1303 /* Look for an RCS id in the comment. */
1304
1305 rcsid = 0;
1306 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1307 rcsid = 1 << MANDOC_OS_OPENBSD;
1308 cp += 8;
1309 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1310 rcsid = 1 << MANDOC_OS_NETBSD;
1311 cp += 7;
1312 }
1313 if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1314 strchr(cp, '$') != NULL) {
1315 if (r->man->meta.rcsids & rcsid)
1316 mandoc_msg(MANDOCERR_RCS_REP, ln,
1317 (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1318 r->man->meta.rcsids |= rcsid;
1319 }
1320
1321 /* Warn about trailing whitespace at the end of the comment. */
1322
1323 ep = strchr(stesc + 2, '\0') - 1;
1324 if (*ep == '\n')
1325 *ep-- = '\0';
1326 if (*ep == ' ' || *ep == '\t')
1327 mandoc_msg(MANDOCERR_SPACE_EOL,
1328 ln, (int)(ep - buf->buf), NULL);
1329
1330 /* Save comments preceding the title macro in the syntax tree. */
1331
1332 if (r->options & MPARSE_COMMENT) {
1333 while (*ep == ' ' || *ep == '\t')
1334 ep--;
1335 ep[1] = '\0';
1336 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1337 ROFFT_COMMENT, TOKEN_NONE);
1338 n->string = mandoc_strdup(stesc + 2);
1339 roff_node_append(r->man, n);
1340 n->flags |= NODE_VALID | NODE_ENDED;
1341 r->man->next = ROFF_NEXT_SIBLING;
1342 }
1343
1344 /* The comment requests line continuation. */
1345
1346 if (stesc[1] == '#') {
1347 *stesc = '\0';
1348 return ROFF_IGN | ROFF_APPEND;
1349 }
1350
1351 /* Discard the comment including preceding whitespace. */
1352
1353 while (stesc > start && stesc[-1] == ' ' &&
1354 (stesc == start + 1 || stesc[-2] != '\\'))
1355 stesc--;
1356 *stesc = '\0';
1357 return ROFF_CONT;
1358 }
1359
1360 /*
1361 * In the current line, expand escape sequences that produce parsable
1362 * input text. Also check the syntax of the remaining escape sequences,
1363 * which typically produce output glyphs or change formatter state.
1364 */
1365 static int
1366 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1367 {
1368 char ubuf[24]; /* buffer to print a number */
1369 struct mctx *ctx; /* current macro call context */
1370 const char *res; /* the string to be pasted */
1371 const char *src; /* source for copying */
1372 char *dst; /* destination for copying */
1373 int iesc; /* index of leading escape char */
1374 int inam; /* index of the escape name */
1375 int iarg; /* index beginning the argument */
1376 int iendarg; /* index right after the argument */
1377 int iend; /* index right after the sequence */
1378 int deftype; /* type of definition to paste */
1379 int argi; /* macro argument index */
1380 int quote_args; /* true for \\$@, false for \\$* */
1381 int asz; /* length of the replacement */
1382 int rsz; /* length of the rest of the string */
1383 int npos; /* position in numeric expression */
1384 int expand_count; /* to avoid infinite loops */
1385
1386 expand_count = 0;
1387 while (buf->buf[pos] != '\0') {
1388
1389 /*
1390 * Skip plain ASCII characters.
1391 * If we have a non-standard escape character,
1392 * escape literal backslashes because all processing in
1393 * subsequent functions uses the standard escaping rules.
1394 */
1395
1396 if (buf->buf[pos] != ec) {
1397 if (ec != ASCII_ESC && buf->buf[pos] == '\\') {
1398 roff_expand_patch(buf, pos, "\\e", pos + 1);
1399 pos++;
1400 }
1401 pos++;
1402 continue;
1403 }
1404
1405 /*
1406 * Parse escape sequences,
1407 * issue diagnostic messages when appropriate,
1408 * and skip sequences that do not need expansion.
1409 * If we have a non-standard escape character, translate
1410 * it to backslashes and translate backslashes to \e.
1411 */
1412
1413 if (roff_escape(buf->buf, ln, pos,
1414 &iesc, &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1415 while (pos < iend) {
1416 if (buf->buf[pos] == ec) {
1417 buf->buf[pos] = '\\';
1418 if (pos + 1 < iend)
1419 pos++;
1420 } else if (buf->buf[pos] == '\\') {
1421 roff_expand_patch(buf,
1422 pos, "\\e", pos + 1);
1423 pos++;
1424 iend++;
1425 }
1426 pos++;
1427 }
1428 continue;
1429 }
1430
1431 /*
1432 * Treat "\E" just like "\";
1433 * it only makes a difference in copy mode.
1434 */
1435
1436 inam = iesc + 1;
1437 while (buf->buf[inam] == 'E')
1438 inam++;
1439
1440 /* Handle expansion. */
1441
1442 res = NULL;
1443 switch (buf->buf[inam]) {
1444 case '*':
1445 if (iendarg == iarg)
1446 break;
1447 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1448 if ((res = roff_getstrn(r, buf->buf + iarg,
1449 iendarg - iarg, &deftype)) != NULL)
1450 break;
1451
1452 /*
1453 * If not overriden,
1454 * let \*(.T through to the formatters.
1455 */
1456
1457 if (iendarg - iarg == 2 &&
1458 buf->buf[iarg] == '.' &&
1459 buf->buf[iarg + 1] == 'T') {
1460 roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1461 pos = iend;
1462 continue;
1463 }
1464
1465 mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1466 "%.*s", iendarg - iarg, buf->buf + iarg);
1467 break;
1468
1469 case '$':
1470 if (r->mstackpos < 0) {
1471 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1472 "%.*s", iend - iesc, buf->buf + iesc);
1473 break;
1474 }
1475 ctx = r->mstack + r->mstackpos;
1476 argi = buf->buf[iarg] - '1';
1477 if (argi >= 0 && argi <= 8) {
1478 if (argi < ctx->argc)
1479 res = ctx->argv[argi];
1480 break;
1481 }
1482 if (buf->buf[iarg] == '*')
1483 quote_args = 0;
1484 else if (buf->buf[iarg] == '@')
1485 quote_args = 1;
1486 else {
1487 mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1488 "%.*s", iend - iesc, buf->buf + iesc);
1489 break;
1490 }
1491 asz = 0;
1492 for (argi = 0; argi < ctx->argc; argi++) {
1493 if (argi)
1494 asz++; /* blank */
1495 if (quote_args)
1496 asz += 2; /* quotes */
1497 asz += strlen(ctx->argv[argi]);
1498 }
1499 if (asz != iend - iesc) {
1500 rsz = buf->sz - iend;
1501 if (asz < iend - iesc)
1502 memmove(buf->buf + iesc + asz,
1503 buf->buf + iend, rsz);
1504 buf->sz = iesc + asz + rsz;
1505 buf->buf = mandoc_realloc(buf->buf, buf->sz);
1506 if (asz > iend - iesc)
1507 memmove(buf->buf + iesc + asz,
1508 buf->buf + iend, rsz);
1509 }
1510 dst = buf->buf + iesc;
1511 for (argi = 0; argi < ctx->argc; argi++) {
1512 if (argi)
1513 *dst++ = ' ';
1514 if (quote_args)
1515 *dst++ = '"';
1516 src = ctx->argv[argi];
1517 while (*src != '\0')
1518 *dst++ = *src++;
1519 if (quote_args)
1520 *dst++ = '"';
1521 }
1522 continue;
1523 case 'B':
1524 npos = 0;
1525 ubuf[0] = iendarg > iarg && iend > iendarg &&
1526 roff_evalnum(r, ln, buf->buf + iarg, &npos,
1527 NULL, ROFFNUM_SCALE) &&
1528 npos == iendarg - iarg ? '1' : '0';
1529 ubuf[1] = '\0';
1530 res = ubuf;
1531 break;
1532 case 'n':
1533 if (iendarg > iarg)
1534 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1535 roff_getregn(r, buf->buf + iarg,
1536 iendarg - iarg, buf->buf[inam + 1]));
1537 else
1538 ubuf[0] = '\0';
1539 res = ubuf;
1540 break;
1541 case 'w':
1542 (void)snprintf(ubuf, sizeof(ubuf),
1543 "%d", (iendarg - iarg) * 24);
1544 res = ubuf;
1545 break;
1546 default:
1547 break;
1548 }
1549 if (res == NULL)
1550 res = "";
1551 if (++expand_count > EXPAND_LIMIT ||
1552 buf->sz + strlen(res) > SHRT_MAX) {
1553 mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1554 return ROFF_IGN;
1555 }
1556 roff_expand_patch(buf, iesc, res, iend);
1557 }
1558 return ROFF_CONT;
1559 }
1560
1561 /*
1562 * Replace the substring from the start position (inclusive)
1563 * to end position (exclusive) with the repl(acement) string.
1564 */
1565 static void
1566 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1567 {
1568 char *nbuf;
1569
1570 buf->buf[start] = '\0';
1571 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", buf->buf, repl,
1572 buf->buf + end) + 1;
1573 free(buf->buf);
1574 buf->buf = nbuf;
1575 }
1576
1577 /*
1578 * Parse a quoted or unquoted roff-style request or macro argument.
1579 * Return a pointer to the parsed argument, which is either the original
1580 * pointer or advanced by one byte in case the argument is quoted.
1581 * NUL-terminate the argument in place.
1582 * Collapse pairs of quotes inside quoted arguments.
1583 * Advance the argument pointer to the next argument,
1584 * or to the NUL byte terminating the argument line.
1585 */
1586 char *
1587 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1588 {
1589 struct buf buf;
1590 char *cp, *start;
1591 int newesc, pairs, quoted, white;
1592
1593 /* Quoting can only start with a new word. */
1594 start = *cpp;
1595 quoted = 0;
1596 if ('"' == *start) {
1597 quoted = 1;
1598 start++;
1599 }
1600
1601 newesc = pairs = white = 0;
1602 for (cp = start; '\0' != *cp; cp++) {
1603
1604 /*
1605 * Move the following text left
1606 * after quoted quotes and after "\\" and "\t".
1607 */
1608 if (pairs)
1609 cp[-pairs] = cp[0];
1610
1611 if ('\\' == cp[0]) {
1612 /*
1613 * In copy mode, translate double to single
1614 * backslashes and backslash-t to literal tabs.
1615 */
1616 switch (cp[1]) {
1617 case 'a':
1618 case 't':
1619 cp[-pairs] = '\t';
1620 pairs++;
1621 cp++;
1622 break;
1623 case '\\':
1624 newesc = 1;
1625 cp[-pairs] = ASCII_ESC;
1626 pairs++;
1627 cp++;
1628 break;
1629 case ' ':
1630 /* Skip escaped blanks. */
1631 if (0 == quoted)
1632 cp++;
1633 break;
1634 default:
1635 break;
1636 }
1637 } else if (0 == quoted) {
1638 if (' ' == cp[0]) {
1639 /* Unescaped blanks end unquoted args. */
1640 white = 1;
1641 break;
1642 }
1643 } else if ('"' == cp[0]) {
1644 if ('"' == cp[1]) {
1645 /* Quoted quotes collapse. */
1646 pairs++;
1647 cp++;
1648 } else {
1649 /* Unquoted quotes end quoted args. */
1650 quoted = 2;
1651 break;
1652 }
1653 }
1654 }
1655
1656 /* Quoted argument without a closing quote. */
1657 if (1 == quoted)
1658 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1659
1660 /* NUL-terminate this argument and move to the next one. */
1661 if (pairs)
1662 cp[-pairs] = '\0';
1663 if ('\0' != *cp) {
1664 *cp++ = '\0';
1665 while (' ' == *cp)
1666 cp++;
1667 }
1668 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1669 *cpp = cp;
1670
1671 if ('\0' == *cp && (white || ' ' == cp[-1]))
1672 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1673
1674 start = mandoc_strdup(start);
1675 if (newesc == 0)
1676 return start;
1677
1678 buf.buf = start;
1679 buf.sz = strlen(start) + 1;
1680 buf.next = NULL;
1681 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1682 free(buf.buf);
1683 buf.buf = mandoc_strdup("");
1684 }
1685 return buf.buf;
1686 }
1687
1688
1689 /*
1690 * Process text streams.
1691 */
1692 static int
1693 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1694 {
1695 size_t sz;
1696 const char *start;
1697 char *p;
1698 int isz;
1699 enum mandoc_esc esc;
1700
1701 /* Spring the input line trap. */
1702
1703 if (roffit_lines == 1) {
1704 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1705 free(buf->buf);
1706 buf->buf = p;
1707 buf->sz = isz + 1;
1708 *offs = 0;
1709 free(roffit_macro);
1710 roffit_lines = 0;
1711 return ROFF_REPARSE;
1712 } else if (roffit_lines > 1)
1713 --roffit_lines;
1714
1715 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1716 if (roffce_lines < 1) {
1717 r->man->last = roffce_node;
1718 r->man->next = ROFF_NEXT_SIBLING;
1719 roffce_lines = 0;
1720 roffce_node = NULL;
1721 } else
1722 roffce_lines--;
1723 }
1724
1725 /* Convert all breakable hyphens into ASCII_HYPH. */
1726
1727 start = p = buf->buf + pos;
1728
1729 while (*p != '\0') {
1730 sz = strcspn(p, "-\\");
1731 p += sz;
1732
1733 if (*p == '\0')
1734 break;
1735
1736 if (*p == '\\') {
1737 /* Skip over escapes. */
1738 p++;
1739 esc = mandoc_escape((const char **)&p, NULL, NULL);
1740 if (esc == ESCAPE_ERROR)
1741 break;
1742 while (*p == '-')
1743 p++;
1744 continue;
1745 } else if (p == start) {
1746 p++;
1747 continue;
1748 }
1749
1750 if (isalpha((unsigned char)p[-1]) &&
1751 isalpha((unsigned char)p[1]))
1752 *p = ASCII_HYPH;
1753 p++;
1754 }
1755 return ROFF_CONT;
1756 }
1757
1758 int
1759 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1760 {
1761 enum roff_tok t;
1762 int e;
1763 int pos; /* parse point */
1764 int spos; /* saved parse point for messages */
1765 int ppos; /* original offset in buf->buf */
1766 int ctl; /* macro line (boolean) */
1767
1768 ppos = pos = *offs;
1769
1770 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1771 (r->man->flags & ROFF_NOFILL) == 0 &&
1772 strchr(" .\\", buf->buf[pos]) == NULL &&
1773 buf->buf[pos] != r->control &&
1774 strcspn(buf->buf, " ") < 80)
1775 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1776 "%.20s...", buf->buf + pos);
1777
1778 /* Handle in-line equation delimiters. */
1779
1780 if (r->tbl == NULL &&
1781 r->last_eqn != NULL && r->last_eqn->delim &&
1782 (r->eqn == NULL || r->eqn_inline)) {
1783 e = roff_eqndelim(r, buf, pos);
1784 if (e == ROFF_REPARSE)
1785 return e;
1786 assert(e == ROFF_CONT);
1787 }
1788
1789 /* Handle comments and escape sequences. */
1790
1791 e = roff_parse_comment(r, buf, ln, pos, r->escape);
1792 if ((e & ROFF_MASK) == ROFF_IGN)
1793 return e;
1794 assert(e == ROFF_CONT);
1795
1796 e = roff_expand(r, buf, ln, pos, r->escape);
1797 if ((e & ROFF_MASK) == ROFF_IGN)
1798 return e;
1799 assert(e == ROFF_CONT);
1800
1801 ctl = roff_getcontrol(r, buf->buf, &pos);
1802
1803 /*
1804 * First, if a scope is open and we're not a macro, pass the
1805 * text through the macro's filter.
1806 * Equations process all content themselves.
1807 * Tables process almost all content themselves, but we want
1808 * to warn about macros before passing it there.
1809 */
1810
1811 if (r->last != NULL && ! ctl) {
1812 t = r->last->tok;
1813 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1814 if ((e & ROFF_MASK) == ROFF_IGN)
1815 return e;
1816 e &= ~ROFF_MASK;
1817 } else
1818 e = ROFF_IGN;
1819 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1820 eqn_read(r->eqn, buf->buf + ppos);
1821 return e;
1822 }
1823 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1824 tbl_read(r->tbl, ln, buf->buf, ppos);
1825 roff_addtbl(r->man, ln, r->tbl);
1826 return e;
1827 }
1828 if ( ! ctl) {
1829 r->options &= ~MPARSE_COMMENT;
1830 return roff_parsetext(r, buf, pos, offs) | e;
1831 }
1832
1833 /* Skip empty request lines. */
1834
1835 if (buf->buf[pos] == '"') {
1836 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1837 return ROFF_IGN;
1838 } else if (buf->buf[pos] == '\0')
1839 return ROFF_IGN;
1840
1841 /*
1842 * If a scope is open, go to the child handler for that macro,
1843 * as it may want to preprocess before doing anything with it.
1844 */
1845
1846 if (r->last) {
1847 t = r->last->tok;
1848 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1849 }
1850
1851 r->options &= ~MPARSE_COMMENT;
1852 spos = pos;
1853 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1854 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1855 }
1856
1857 /*
1858 * Handle a new request or macro.
1859 * May be called outside any scope or from inside a conditional scope.
1860 */
1861 static int
1862 roff_req_or_macro(ROFF_ARGS) {
1863
1864 /* For now, tables ignore most macros and some request. */
1865
1866 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1867 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1868 tok == ROFF_sp)) {
1869 mandoc_msg(MANDOCERR_TBLMACRO,
1870 ln, ppos, "%s", buf->buf + ppos);
1871 if (tok != TOKEN_NONE)
1872 return ROFF_IGN;
1873 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1874 pos++;
1875 while (buf->buf[pos] == ' ')
1876 pos++;
1877 tbl_read(r->tbl, ln, buf->buf, pos);
1878 roff_addtbl(r->man, ln, r->tbl);
1879 return ROFF_IGN;
1880 }
1881
1882 /* For now, let high level macros abort .ce mode. */
1883
1884 if (roffce_node != NULL &&
1885 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1886 tok == ROFF_TH || tok == ROFF_TS)) {
1887 r->man->last = roffce_node;
1888 r->man->next = ROFF_NEXT_SIBLING;
1889 roffce_lines = 0;
1890 roffce_node = NULL;
1891 }
1892
1893 /*
1894 * This is neither a roff request nor a user-defined macro.
1895 * Let the standard macro set parsers handle it.
1896 */
1897
1898 if (tok == TOKEN_NONE)
1899 return ROFF_CONT;
1900
1901 /* Execute a roff request or a user-defined macro. */
1902
1903 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1904 }
1905
1906 /*
1907 * Internal interface function to tell the roff parser that execution
1908 * of the current macro ended. This is required because macro
1909 * definitions usually do not end with a .return request.
1910 */
1911 void
1912 roff_userret(struct roff *r)
1913 {
1914 struct mctx *ctx;
1915 int i;
1916
1917 assert(r->mstackpos >= 0);
1918 ctx = r->mstack + r->mstackpos;
1919 for (i = 0; i < ctx->argc; i++)
1920 free(ctx->argv[i]);
1921 ctx->argc = 0;
1922 r->mstackpos--;
1923 }
1924
1925 void
1926 roff_endparse(struct roff *r)
1927 {
1928 if (r->last != NULL)
1929 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1930 r->last->col, "%s", roff_name[r->last->tok]);
1931
1932 if (r->eqn != NULL) {
1933 mandoc_msg(MANDOCERR_BLK_NOEND,
1934 r->eqn->node->line, r->eqn->node->pos, "EQ");
1935 eqn_parse(r->eqn);
1936 r->eqn = NULL;
1937 }
1938
1939 if (r->tbl != NULL) {
1940 tbl_end(r->tbl, 1);
1941 r->tbl = NULL;
1942 }
1943 }
1944
1945 /*
1946 * Parse the request or macro name at buf[*pos].
1947 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1948 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1949 * As a side effect, set r->current_string to the definition or to NULL.
1950 */
1951 static enum roff_tok
1952 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1953 {
1954 char *cp;
1955 const char *mac;
1956 size_t maclen;
1957 int deftype;
1958 enum roff_tok t;
1959
1960 cp = buf + *pos;
1961
1962 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1963 return TOKEN_NONE;
1964
1965 mac = cp;
1966 maclen = roff_getname(r, &cp, ln, ppos);
1967
1968 deftype = ROFFDEF_USER | ROFFDEF_REN;
1969 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1970 switch (deftype) {
1971 case ROFFDEF_USER:
1972 t = ROFF_USERDEF;
1973 break;
1974 case ROFFDEF_REN:
1975 t = ROFF_RENAMED;
1976 break;
1977 default:
1978 t = roffhash_find(r->reqtab, mac, maclen);
1979 break;
1980 }
1981 if (t != TOKEN_NONE)
1982 *pos = cp - buf;
1983 else if (deftype == ROFFDEF_UNDEF) {
1984 /* Using an undefined macro defines it to be empty. */
1985 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1986 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1987 }
1988 return t;
1989 }
1990
1991 /* --- handling of request blocks ----------------------------------------- */
1992
1993 /*
1994 * Close a macro definition block or an "ignore" block.
1995 */
1996 static int
1997 roff_cblock(ROFF_ARGS)
1998 {
1999 int rr;
2000
2001 if (r->last == NULL) {
2002 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2003 return ROFF_IGN;
2004 }
2005
2006 switch (r->last->tok) {
2007 case ROFF_am:
2008 case ROFF_ami:
2009 case ROFF_de:
2010 case ROFF_dei:
2011 case ROFF_ig:
2012 break;
2013 case ROFF_am1:
2014 case ROFF_de1:
2015 /* Remapped in roff_block(). */
2016 abort();
2017 default:
2018 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2019 return ROFF_IGN;
2020 }
2021
2022 roffnode_pop(r);
2023 roffnode_cleanscope(r);
2024
2025 /*
2026 * If a conditional block with braces is still open,
2027 * check for "\}" block end markers.
2028 */
2029
2030 if (r->last != NULL && r->last->endspan < 0) {
2031 rr = 1; /* If arguments follow "\}", warn about them. */
2032 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2033 }
2034
2035 if (buf->buf[pos] != '\0')
2036 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2037 ".. %s", buf->buf + pos);
2038
2039 return ROFF_IGN;
2040 }
2041
2042 /*
2043 * Pop all nodes ending at the end of the current input line.
2044 * Return the number of loops ended.
2045 */
2046 static int
2047 roffnode_cleanscope(struct roff *r)
2048 {
2049 int inloop;
2050
2051 inloop = 0;
2052 while (r->last != NULL && r->last->endspan > 0) {
2053 if (--r->last->endspan != 0)
2054 break;
2055 inloop += roffnode_pop(r);
2056 }
2057 return inloop;
2058 }
2059
2060 /*
2061 * Handle the closing "\}" of a conditional block.
2062 * Apart from generating warnings, this only pops nodes.
2063 * Return the number of loops ended.
2064 */
2065 static int
2066 roff_ccond(struct roff *r, int ln, int ppos)
2067 {
2068 if (NULL == r->last) {
2069 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2070 return 0;
2071 }
2072
2073 switch (r->last->tok) {
2074 case ROFF_el:
2075 case ROFF_ie:
2076 case ROFF_if:
2077 case ROFF_while:
2078 break;
2079 default:
2080 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2081 return 0;
2082 }
2083
2084 if (r->last->endspan > -1) {
2085 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2086 return 0;
2087 }
2088
2089 return roffnode_pop(r) + roffnode_cleanscope(r);
2090 }
2091
2092 static int
2093 roff_block(ROFF_ARGS)
2094 {
2095 const char *name, *value;
2096 char *call, *cp, *iname, *rname;
2097 size_t csz, namesz, rsz;
2098 int deftype;
2099
2100 /* Ignore groff compatibility mode for now. */
2101
2102 if (tok == ROFF_de1)
2103 tok = ROFF_de;
2104 else if (tok == ROFF_dei1)
2105 tok = ROFF_dei;
2106 else if (tok == ROFF_am1)
2107 tok = ROFF_am;
2108 else if (tok == ROFF_ami1)
2109 tok = ROFF_ami;
2110
2111 /* Parse the macro name argument. */
2112
2113 cp = buf->buf + pos;
2114 if (tok == ROFF_ig) {
2115 iname = NULL;
2116 namesz = 0;
2117 } else {
2118 iname = cp;
2119 namesz = roff_getname(r, &cp, ln, ppos);
2120 iname[namesz] = '\0';
2121 }
2122
2123 /* Resolve the macro name argument if it is indirect. */
2124
2125 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2126 deftype = ROFFDEF_USER;
2127 name = roff_getstrn(r, iname, namesz, &deftype);
2128 if (name == NULL) {
2129 mandoc_msg(MANDOCERR_STR_UNDEF,
2130 ln, (int)(iname - buf->buf),
2131 "%.*s", (int)namesz, iname);
2132 namesz = 0;
2133 } else
2134 namesz = strlen(name);
2135 } else
2136 name = iname;
2137
2138 if (namesz == 0 && tok != ROFF_ig) {
2139 mandoc_msg(MANDOCERR_REQ_EMPTY,
2140 ln, ppos, "%s", roff_name[tok]);
2141 return ROFF_IGN;
2142 }
2143
2144 roffnode_push(r, tok, name, ln, ppos);
2145
2146 /*
2147 * At the beginning of a `de' macro, clear the existing string
2148 * with the same name, if there is one. New content will be
2149 * appended from roff_block_text() in multiline mode.
2150 */
2151
2152 if (tok == ROFF_de || tok == ROFF_dei) {
2153 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2154 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2155 } else if (tok == ROFF_am || tok == ROFF_ami) {
2156 deftype = ROFFDEF_ANY;
2157 value = roff_getstrn(r, iname, namesz, &deftype);
2158 switch (deftype) { /* Before appending, ... */
2159 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2160 roff_setstrn(&r->strtab, name, namesz,
2161 value, strlen(value), 0);
2162 break;
2163 case ROFFDEF_REN: /* call original standard macro. */
2164 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2165 (int)strlen(value), value);
2166 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2167 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2168 free(call);
2169 break;
2170 case ROFFDEF_STD: /* rename and call standard macro. */
2171 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2172 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2173 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2174 (int)rsz, rname);
2175 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2176 free(call);
2177 free(rname);
2178 break;
2179 default:
2180 break;
2181 }
2182 }
2183
2184 if (*cp == '\0')
2185 return ROFF_IGN;
2186
2187 /* Get the custom end marker. */
2188
2189 iname = cp;
2190 namesz = roff_getname(r, &cp, ln, ppos);
2191
2192 /* Resolve the end marker if it is indirect. */
2193
2194 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2195 deftype = ROFFDEF_USER;
2196 name = roff_getstrn(r, iname, namesz, &deftype);
2197 if (name == NULL) {
2198 mandoc_msg(MANDOCERR_STR_UNDEF,
2199 ln, (int)(iname - buf->buf),
2200 "%.*s", (int)namesz, iname);
2201 namesz = 0;
2202 } else
2203 namesz = strlen(name);
2204 } else
2205 name = iname;
2206
2207 if (namesz)
2208 r->last->end = mandoc_strndup(name, namesz);
2209
2210 if (*cp != '\0')
2211 mandoc_msg(MANDOCERR_ARG_EXCESS,
2212 ln, pos, ".%s ... %s", roff_name[tok], cp);
2213
2214 return ROFF_IGN;
2215 }
2216
2217 static int
2218 roff_block_sub(ROFF_ARGS)
2219 {
2220 enum roff_tok t;
2221 int i, j;
2222
2223 /*
2224 * If a custom end marker is a user-defined or predefined macro
2225 * or a request, interpret it.
2226 */
2227
2228 if (r->last->end) {
2229 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2230 if (buf->buf[i] != r->last->end[j])
2231 break;
2232
2233 if (r->last->end[j] == '\0' &&
2234 (buf->buf[i] == '\0' ||
2235 buf->buf[i] == ' ' ||
2236 buf->buf[i] == '\t')) {
2237 roffnode_pop(r);
2238 roffnode_cleanscope(r);
2239
2240 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2241 i++;
2242
2243 pos = i;
2244 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2245 TOKEN_NONE)
2246 return ROFF_RERUN;
2247 return ROFF_IGN;
2248 }
2249 }
2250
2251 /* Handle the standard end marker. */
2252
2253 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2254 if (t == ROFF_cblock)
2255 return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2256
2257 /* Not an end marker, so append the line to the block. */
2258
2259 if (tok != ROFF_ig)
2260 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2261 return ROFF_IGN;
2262 }
2263
2264 static int
2265 roff_block_text(ROFF_ARGS)
2266 {
2267
2268 if (tok != ROFF_ig)
2269 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2270
2271 return ROFF_IGN;
2272 }
2273
2274 /*
2275 * Check for a closing "\}" and handle it.
2276 * In this function, the final "int *offs" argument is used for
2277 * different purposes than elsewhere:
2278 * Input: *offs == 0: caller wants to discard arguments following \}
2279 * *offs == 1: caller wants to preserve text following \}
2280 * Output: *offs = 0: tell caller to discard input line
2281 * *offs = 1: tell caller to use input line
2282 */
2283 static int
2284 roff_cond_checkend(ROFF_ARGS)
2285 {
2286 char *ep;
2287 int endloop, irc, rr;
2288
2289 irc = ROFF_IGN;
2290 rr = r->last->rule;
2291 endloop = tok != ROFF_while ? ROFF_IGN :
2292 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2293 if (roffnode_cleanscope(r))
2294 irc |= endloop;
2295
2296 /*
2297 * If "\}" occurs on a macro line without a preceding macro or
2298 * a text line contains nothing else, drop the line completely.
2299 */
2300
2301 ep = buf->buf + pos;
2302 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2303 rr = 0;
2304
2305 /*
2306 * The closing delimiter "\}" rewinds the conditional scope
2307 * but is otherwise ignored when interpreting the line.
2308 */
2309
2310 while ((ep = strchr(ep, '\\')) != NULL) {
2311 switch (ep[1]) {
2312 case '}':
2313 if (ep[2] == '\0')
2314 ep[0] = '\0';
2315 else if (rr)
2316 ep[1] = '&';
2317 else
2318 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2319 if (roff_ccond(r, ln, ep - buf->buf))
2320 irc |= endloop;
2321 break;
2322 case '\0':
2323 ++ep;
2324 break;
2325 default:
2326 ep += 2;
2327 break;
2328 }
2329 }
2330 *offs = rr;
2331 return irc;
2332 }
2333
2334 /*
2335 * Parse and process a request or macro line in conditional scope.
2336 */
2337 static int
2338 roff_cond_sub(ROFF_ARGS)
2339 {
2340 struct roffnode *bl;
2341 int irc, rr, spos;
2342 enum roff_tok t;
2343
2344 rr = 0; /* If arguments follow "\}", skip them. */
2345 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2346 spos = pos;
2347 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2348
2349 /*
2350 * Handle requests and macros if the conditional evaluated
2351 * to true or if they are structurally required.
2352 * The .break request is always handled specially.
2353 */
2354
2355 if (t == ROFF_break) {
2356 if (irc & ROFF_LOOPMASK)
2357 irc = ROFF_IGN | ROFF_LOOPEXIT;
2358 else if (rr) {
2359 for (bl = r->last; bl != NULL; bl = bl->parent) {
2360 bl->rule = 0;
2361 if (bl->tok == ROFF_while)
2362 break;
2363 }
2364 }
2365 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2366 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2367 if (irc & ROFF_WHILE)
2368 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2369 }
2370 return irc;
2371 }
2372
2373 /*
2374 * Parse and process a text line in conditional scope.
2375 */
2376 static int
2377 roff_cond_text(ROFF_ARGS)
2378 {
2379 int irc, rr;
2380
2381 rr = 1; /* If arguments follow "\}", preserve them. */
2382 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2383 if (rr)
2384 irc |= ROFF_CONT;
2385 return irc;
2386 }
2387
2388 /* --- handling of numeric and conditional expressions -------------------- */
2389
2390 /*
2391 * Parse a single signed integer number. Stop at the first non-digit.
2392 * If there is at least one digit, return success and advance the
2393 * parse point, else return failure and let the parse point unchanged.
2394 * Ignore overflows, treat them just like the C language.
2395 */
2396 static int
2397 roff_getnum(const char *v, int *pos, int *res, int flags)
2398 {
2399 int myres, scaled, n, p;
2400
2401 if (NULL == res)
2402 res = &myres;
2403
2404 p = *pos;
2405 n = v[p] == '-';
2406 if (n || v[p] == '+')
2407 p++;
2408
2409 if (flags & ROFFNUM_WHITE)
2410 while (isspace((unsigned char)v[p]))
2411 p++;
2412
2413 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2414 *res = 10 * *res + v[p] - '0';
2415 if (p == *pos + n)
2416 return 0;
2417
2418 if (n)
2419 *res = -*res;
2420
2421 /* Each number may be followed by one optional scaling unit. */
2422
2423 switch (v[p]) {
2424 case 'f':
2425 scaled = *res * 65536;
2426 break;
2427 case 'i':
2428 scaled = *res * 240;
2429 break;
2430 case 'c':
2431 scaled = *res * 240 / 2.54;
2432 break;
2433 case 'v':
2434 case 'P':
2435 scaled = *res * 40;
2436 break;
2437 case 'm':
2438 case 'n':
2439 scaled = *res * 24;
2440 break;
2441 case 'p':
2442 scaled = *res * 10 / 3;
2443 break;
2444 case 'u':
2445 scaled = *res;
2446 break;
2447 case 'M':
2448 scaled = *res * 6 / 25;
2449 break;
2450 default:
2451 scaled = *res;
2452 p--;
2453 break;
2454 }
2455 if (flags & ROFFNUM_SCALE)
2456 *res = scaled;
2457
2458 *pos = p + 1;
2459 return 1;
2460 }
2461
2462 /*
2463 * Evaluate a string comparison condition.
2464 * The first character is the delimiter.
2465 * Succeed if the string up to its second occurrence
2466 * matches the string up to its third occurence.
2467 * Advance the cursor after the third occurrence
2468 * or lacking that, to the end of the line.
2469 */
2470 static int
2471 roff_evalstrcond(const char *v, int *pos)
2472 {
2473 const char *s1, *s2, *s3;
2474 int match;
2475
2476 match = 0;
2477 s1 = v + *pos; /* initial delimiter */
2478 s2 = s1 + 1; /* for scanning the first string */
2479 s3 = strchr(s2, *s1); /* for scanning the second string */
2480
2481 if (NULL == s3) /* found no middle delimiter */
2482 goto out;
2483
2484 while ('\0' != *++s3) {
2485 if (*s2 != *s3) { /* mismatch */
2486 s3 = strchr(s3, *s1);
2487 break;
2488 }
2489 if (*s3 == *s1) { /* found the final delimiter */
2490 match = 1;
2491 break;
2492 }
2493 s2++;
2494 }
2495
2496 out:
2497 if (NULL == s3)
2498 s3 = strchr(s2, '\0');
2499 else if (*s3 != '\0')
2500 s3++;
2501 *pos = s3 - v;
2502 return match;
2503 }
2504
2505 /*
2506 * Evaluate an optionally negated single character, numerical,
2507 * or string condition.
2508 */
2509 static int
2510 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2511 {
2512 const char *start, *end;
2513 char *cp, *name;
2514 size_t sz;
2515 int deftype, len, number, savepos, istrue, wanttrue;
2516
2517 if ('!' == v[*pos]) {
2518 wanttrue = 0;
2519 (*pos)++;
2520 } else
2521 wanttrue = 1;
2522
2523 switch (v[*pos]) {
2524 case '\0':
2525 return 0;
2526 case 'n':
2527 case 'o':
2528 (*pos)++;
2529 return wanttrue;
2530 case 'e':
2531 case 't':
2532 case 'v':
2533 (*pos)++;
2534 return !wanttrue;
2535 case 'c':
2536 do {
2537 (*pos)++;
2538 } while (v[*pos] == ' ');
2539
2540 /*
2541 * Quirk for groff compatibility:
2542 * The horizontal tab is neither available nor unavailable.
2543 */
2544
2545 if (v[*pos] == '\t') {
2546 (*pos)++;
2547 return 0;
2548 }
2549
2550 /* Printable ASCII characters are available. */
2551
2552 if (v[*pos] != '\\') {
2553 (*pos)++;
2554 return wanttrue;
2555 }
2556
2557 end = v + ++*pos;
2558 switch (mandoc_escape(&end, &start, &len)) {
2559 case ESCAPE_SPECIAL:
2560 istrue = mchars_spec2cp(start, len) != -1;
2561 break;
2562 case ESCAPE_UNICODE:
2563 istrue = 1;
2564 break;
2565 case ESCAPE_NUMBERED:
2566 istrue = mchars_num2char(start, len) != -1;
2567 break;
2568 default:
2569 istrue = !wanttrue;
2570 break;
2571 }
2572 *pos = end - v;
2573 return istrue == wanttrue;
2574 case 'd':
2575 case 'r':
2576 cp = v + *pos + 1;
2577 while (*cp == ' ')
2578 cp++;
2579 name = cp;
2580 sz = roff_getname(r, &cp, ln, cp - v);
2581 if (sz == 0)
2582 istrue = 0;
2583 else if (v[*pos] == 'r')
2584 istrue = roff_hasregn(r, name, sz);
2585 else {
2586 deftype = ROFFDEF_ANY;
2587 roff_getstrn(r, name, sz, &deftype);
2588 istrue = !!deftype;
2589 }
2590 *pos = (name + sz) - v;
2591 return istrue == wanttrue;
2592 default:
2593 break;
2594 }
2595
2596 savepos = *pos;
2597 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2598 return (number > 0) == wanttrue;
2599 else if (*pos == savepos)
2600 return roff_evalstrcond(v, pos) == wanttrue;
2601 else
2602 return 0;
2603 }
2604
2605 static int
2606 roff_line_ignore(ROFF_ARGS)
2607 {
2608
2609 return ROFF_IGN;
2610 }
2611
2612 static int
2613 roff_insec(ROFF_ARGS)
2614 {
2615
2616 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2617 return ROFF_IGN;
2618 }
2619
2620 static int
2621 roff_unsupp(ROFF_ARGS)
2622 {
2623
2624 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2625 return ROFF_IGN;
2626 }
2627
2628 static int
2629 roff_cond(ROFF_ARGS)
2630 {
2631 int irc;
2632
2633 roffnode_push(r, tok, NULL, ln, ppos);
2634
2635 /*
2636 * An `.el' has no conditional body: it will consume the value
2637 * of the current rstack entry set in prior `ie' calls or
2638 * defaults to DENY.
2639 *
2640 * If we're not an `el', however, then evaluate the conditional.
2641 */
2642
2643 r->last->rule = tok == ROFF_el ?
2644 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2645 roff_evalcond(r, ln, buf->buf, &pos);
2646
2647 /*
2648 * An if-else will put the NEGATION of the current evaluated
2649 * conditional into the stack of rules.
2650 */
2651
2652 if (tok == ROFF_ie) {
2653 if (r->rstackpos + 1 == r->rstacksz) {
2654 r->rstacksz += 16;
2655 r->rstack = mandoc_reallocarray(r->rstack,
2656 r->rstacksz, sizeof(int));
2657 }
2658 r->rstack[++r->rstackpos] = !r->last->rule;
2659 }
2660
2661 /* If the parent has false as its rule, then so do we. */
2662
2663 if (r->last->parent && !r->last->parent->rule)
2664 r->last->rule = 0;
2665
2666 /*
2667 * Determine scope.
2668 * If there is nothing on the line after the conditional,
2669 * not even whitespace, use next-line scope.
2670 * Except that .while does not support next-line scope.
2671 */
2672
2673 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2674 r->last->endspan = 2;
2675 goto out;
2676 }
2677
2678 while (buf->buf[pos] == ' ')
2679 pos++;
2680
2681 /* An opening brace requests multiline scope. */
2682
2683 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2684 r->last->endspan = -1;
2685 pos += 2;
2686 while (buf->buf[pos] == ' ')
2687 pos++;
2688 goto out;
2689 }
2690
2691 /*
2692 * Anything else following the conditional causes
2693 * single-line scope. Warn if the scope contains
2694 * nothing but trailing whitespace.
2695 */
2696
2697 if (buf->buf[pos] == '\0')
2698 mandoc_msg(MANDOCERR_COND_EMPTY,
2699 ln, ppos, "%s", roff_name[tok]);
2700
2701 r->last->endspan = 1;
2702
2703 out:
2704 *offs = pos;
2705 irc = ROFF_RERUN;
2706 if (tok == ROFF_while)
2707 irc |= ROFF_WHILE;
2708 return irc;
2709 }
2710
2711 static int
2712 roff_ds(ROFF_ARGS)
2713 {
2714 char *string;
2715 const char *name;
2716 size_t namesz;
2717
2718 /* Ignore groff compatibility mode for now. */
2719
2720 if (tok == ROFF_ds1)
2721 tok = ROFF_ds;
2722 else if (tok == ROFF_as1)
2723 tok = ROFF_as;
2724
2725 /*
2726 * The first word is the name of the string.
2727 * If it is empty or terminated by an escape sequence,
2728 * abort the `ds' request without defining anything.
2729 */
2730
2731 name = string = buf->buf + pos;
2732 if (*name == '\0')
2733 return ROFF_IGN;
2734
2735 namesz = roff_getname(r, &string, ln, pos);
2736 switch (name[namesz]) {
2737 case '\\':
2738 return ROFF_IGN;
2739 case '\t':
2740 string = buf->buf + pos + namesz;
2741 break;
2742 default:
2743 break;
2744 }
2745
2746 /* Read past the initial double-quote, if any. */
2747 if (*string == '"')
2748 string++;
2749
2750 /* The rest is the value. */
2751 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2752 ROFF_as == tok);
2753 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2754 return ROFF_IGN;
2755 }
2756
2757 /*
2758 * Parse a single operator, one or two characters long.
2759 * If the operator is recognized, return success and advance the
2760 * parse point, else return failure and let the parse point unchanged.
2761 */
2762 static int
2763 roff_getop(const char *v, int *pos, char *res)
2764 {
2765
2766 *res = v[*pos];
2767
2768 switch (*res) {
2769 case '+':
2770 case '-':
2771 case '*':
2772 case '/':
2773 case '%':
2774 case '&':
2775 case ':':
2776 break;
2777 case '<':
2778 switch (v[*pos + 1]) {
2779 case '=':
2780 *res = 'l';
2781 (*pos)++;
2782 break;
2783 case '>':
2784 *res = '!';
2785 (*pos)++;
2786 break;
2787 case '?':
2788 *res = 'i';
2789 (*pos)++;
2790 break;
2791 default:
2792 break;
2793 }
2794 break;
2795 case '>':
2796 switch (v[*pos + 1]) {
2797 case '=':
2798 *res = 'g';
2799 (*pos)++;
2800 break;
2801 case '?':
2802 *res = 'a';
2803 (*pos)++;
2804 break;
2805 default:
2806 break;
2807 }
2808 break;
2809 case '=':
2810 if ('=' == v[*pos + 1])
2811 (*pos)++;
2812 break;
2813 default:
2814 return 0;
2815 }
2816 (*pos)++;
2817
2818 return *res;
2819 }
2820
2821 /*
2822 * Evaluate either a parenthesized numeric expression
2823 * or a single signed integer number.
2824 */
2825 static int
2826 roff_evalpar(struct roff *r, int ln,
2827 const char *v, int *pos, int *res, int flags)
2828 {
2829
2830 if ('(' != v[*pos])
2831 return roff_getnum(v, pos, res, flags);
2832
2833 (*pos)++;
2834 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2835 return 0;
2836
2837 /*
2838 * Omission of the closing parenthesis
2839 * is an error in validation mode,
2840 * but ignored in evaluation mode.
2841 */
2842
2843 if (')' == v[*pos])
2844 (*pos)++;
2845 else if (NULL == res)
2846 return 0;
2847
2848 return 1;
2849 }
2850
2851 /*
2852 * Evaluate a complete numeric expression.
2853 * Proceed left to right, there is no concept of precedence.
2854 */
2855 static int
2856 roff_evalnum(struct roff *r, int ln, const char *v,
2857 int *pos, int *res, int flags)
2858 {
2859 int mypos, operand2;
2860 char operator;
2861
2862 if (NULL == pos) {
2863 mypos = 0;
2864 pos = &mypos;
2865 }
2866
2867 if (flags & ROFFNUM_WHITE)
2868 while (isspace((unsigned char)v[*pos]))
2869 (*pos)++;
2870
2871 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2872 return 0;
2873
2874 while (1) {
2875 if (flags & ROFFNUM_WHITE)
2876 while (isspace((unsigned char)v[*pos]))
2877 (*pos)++;
2878
2879 if ( ! roff_getop(v, pos, &operator))
2880 break;
2881
2882 if (flags & ROFFNUM_WHITE)
2883 while (isspace((unsigned char)v[*pos]))
2884 (*pos)++;
2885
2886 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2887 return 0;
2888
2889 if (flags & ROFFNUM_WHITE)
2890 while (isspace((unsigned char)v[*pos]))
2891 (*pos)++;
2892
2893 if (NULL == res)
2894 continue;
2895
2896 switch (operator) {
2897 case '+':
2898 *res += operand2;
2899 break;
2900 case '-':
2901 *res -= operand2;
2902 break;
2903 case '*':
2904 *res *= operand2;
2905 break;
2906 case '/':
2907 if (operand2 == 0) {
2908 mandoc_msg(MANDOCERR_DIVZERO,
2909 ln, *pos, "%s", v);
2910 *res = 0;
2911 break;
2912 }
2913 *res /= operand2;
2914 break;
2915 case '%':
2916 if (operand2 == 0) {
2917 mandoc_msg(MANDOCERR_DIVZERO,
2918 ln, *pos, "%s", v);
2919 *res = 0;
2920 break;
2921 }
2922 *res %= operand2;
2923 break;
2924 case '<':
2925 *res = *res < operand2;
2926 break;
2927 case '>':
2928 *res = *res > operand2;
2929 break;
2930 case 'l':
2931 *res = *res <= operand2;
2932 break;
2933 case 'g':
2934 *res = *res >= operand2;
2935 break;
2936 case '=':
2937 *res = *res == operand2;
2938 break;
2939 case '!':
2940 *res = *res != operand2;
2941 break;
2942 case '&':
2943 *res = *res && operand2;
2944 break;
2945 case ':':
2946 *res = *res || operand2;
2947 break;
2948 case 'i':
2949 if (operand2 < *res)
2950 *res = operand2;
2951 break;
2952 case 'a':
2953 if (operand2 > *res)
2954 *res = operand2;
2955 break;
2956 default:
2957 abort();
2958 }
2959 }
2960 return 1;
2961 }
2962
2963 /* --- register management ------------------------------------------------ */
2964
2965 void
2966 roff_setreg(struct roff *r, const char *name, int val, char sign)
2967 {
2968 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2969 }
2970
2971 static void
2972 roff_setregn(struct roff *r, const char *name, size_t len,
2973 int val, char sign, int step)
2974 {
2975 struct roffreg *reg;
2976
2977 /* Search for an existing register with the same name. */
2978 reg = r->regtab;
2979
2980 while (reg != NULL && (reg->key.sz != len ||
2981 strncmp(reg->key.p, name, len) != 0))
2982 reg = reg->next;
2983
2984 if (NULL == reg) {
2985 /* Create a new register. */
2986 reg = mandoc_malloc(sizeof(struct roffreg));
2987 reg->key.p = mandoc_strndup(name, len);
2988 reg->key.sz = len;
2989 reg->val = 0;
2990 reg->step = 0;
2991 reg->next = r->regtab;
2992 r->regtab = reg;
2993 }
2994
2995 if ('+' == sign)
2996 reg->val += val;
2997 else if ('-' == sign)
2998 reg->val -= val;
2999 else
3000 reg->val = val;
3001 if (step != INT_MIN)
3002 reg->step = step;
3003 }
3004
3005 /*
3006 * Handle some predefined read-only number registers.
3007 * For now, return -1 if the requested register is not predefined;
3008 * in case a predefined read-only register having the value -1
3009 * were to turn up, another special value would have to be chosen.
3010 */
3011 static int
3012 roff_getregro(const struct roff *r, const char *name)
3013 {
3014
3015 switch (*name) {
3016 case '$': /* Number of arguments of the last macro evaluated. */
3017 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3018 case 'A': /* ASCII approximation mode is always off. */
3019 return 0;
3020 case 'g': /* Groff compatibility mode is always on. */
3021 return 1;
3022 case 'H': /* Fixed horizontal resolution. */
3023 return 24;
3024 case 'j': /* Always adjust left margin only. */
3025 return 0;
3026 case 'T': /* Some output device is always defined. */
3027 return 1;
3028 case 'V': /* Fixed vertical resolution. */
3029 return 40;
3030 default:
3031 return -1;
3032 }
3033 }
3034
3035 int
3036 roff_getreg(struct roff *r, const char *name)
3037 {
3038 return roff_getregn(r, name, strlen(name), '\0');
3039 }
3040
3041 static int
3042 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3043 {
3044 struct roffreg *reg;
3045 int val;
3046
3047 if ('.' == name[0] && 2 == len) {
3048 val = roff_getregro(r, name + 1);
3049 if (-1 != val)
3050 return val;
3051 }
3052
3053 for (reg = r->regtab; reg; reg = reg->next) {
3054 if (len == reg->key.sz &&
3055 0 == strncmp(name, reg->key.p, len)) {
3056 switch (sign) {
3057 case '+':
3058 reg->val += reg->step;
3059 break;
3060 case '-':
3061 reg->val -= reg->step;
3062 break;
3063 default:
3064 break;
3065 }
3066 return reg->val;
3067 }
3068 }
3069
3070 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3071 return 0;
3072 }
3073
3074 static int
3075 roff_hasregn(const struct roff *r, const char *name, size_t len)
3076 {
3077 struct roffreg *reg;
3078 int val;
3079
3080 if ('.' == name[0] && 2 == len) {
3081 val = roff_getregro(r, name + 1);
3082 if (-1 != val)
3083 return 1;
3084 }
3085
3086 for (reg = r->regtab; reg; reg = reg->next)
3087 if (len == reg->key.sz &&
3088 0 == strncmp(name, reg->key.p, len))
3089 return 1;
3090
3091 return 0;
3092 }
3093
3094 static void
3095 roff_freereg(struct roffreg *reg)
3096 {
3097 struct roffreg *old_reg;
3098
3099 while (NULL != reg) {
3100 free(reg->key.p);
3101 old_reg = reg;
3102 reg = reg->next;
3103 free(old_reg);
3104 }
3105 }
3106
3107 static int
3108 roff_nr(ROFF_ARGS)
3109 {
3110 char *key, *val, *step;
3111 size_t keysz;
3112 int iv, is, len;
3113 char sign;
3114
3115 key = val = buf->buf + pos;
3116 if (*key == '\0')
3117 return ROFF_IGN;
3118
3119 keysz = roff_getname(r, &val, ln, pos);
3120 if (key[keysz] == '\\' || key[keysz] == '\t')
3121 return ROFF_IGN;
3122
3123 sign = *val;
3124 if (sign == '+' || sign == '-')
3125 val++;
3126
3127 len = 0;
3128 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3129 return ROFF_IGN;
3130
3131 step = val + len;
3132 while (isspace((unsigned char)*step))
3133 step++;
3134 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3135 is = INT_MIN;
3136
3137 roff_setregn(r, key, keysz, iv, sign, is);
3138 return ROFF_IGN;
3139 }
3140
3141 static int
3142 roff_rr(ROFF_ARGS)
3143 {
3144 struct roffreg *reg, **prev;
3145 char *name, *cp;
3146 size_t namesz;
3147
3148 name = cp = buf->buf + pos;
3149 if (*name == '\0')
3150 return ROFF_IGN;
3151 namesz = roff_getname(r, &cp, ln, pos);
3152 name[namesz] = '\0';
3153
3154 prev = &r->regtab;
3155 while (1) {
3156 reg = *prev;
3157 if (reg == NULL || !strcmp(name, reg->key.p))
3158 break;
3159 prev = &reg->next;
3160 }
3161 if (reg != NULL) {
3162 *prev = reg->next;
3163 free(reg->key.p);
3164 free(reg);
3165 }
3166 return ROFF_IGN;
3167 }
3168
3169 /* --- handler functions for roff requests -------------------------------- */
3170
3171 static int
3172 roff_rm(ROFF_ARGS)
3173 {
3174 const char *name;
3175 char *cp;
3176 size_t namesz;
3177
3178 cp = buf->buf + pos;
3179 while (*cp != '\0') {
3180 name = cp;
3181 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3182 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3183 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3184 if (name[namesz] == '\\' || name[namesz] == '\t')
3185 break;
3186 }
3187 return ROFF_IGN;
3188 }
3189
3190 static int
3191 roff_it(ROFF_ARGS)
3192 {
3193 int iv;
3194
3195 /* Parse the number of lines. */
3196
3197 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3198 mandoc_msg(MANDOCERR_IT_NONUM,
3199 ln, ppos, "%s", buf->buf + 1);
3200 return ROFF_IGN;
3201 }
3202
3203 while (isspace((unsigned char)buf->buf[pos]))
3204 pos++;
3205
3206 /*
3207 * Arm the input line trap.
3208 * Special-casing "an-trap" is an ugly workaround to cope
3209 * with DocBook stupidly fiddling with man(7) internals.
3210 */
3211
3212 roffit_lines = iv;
3213 roffit_macro = mandoc_strdup(iv != 1 ||
3214 strcmp(buf->buf + pos, "an-trap") ?
3215 buf->buf + pos : "br");
3216 return ROFF_IGN;
3217 }
3218
3219 static int
3220 roff_Dd(ROFF_ARGS)
3221 {
3222 int mask;
3223 enum roff_tok t, te;
3224
3225 switch (tok) {
3226 case ROFF_Dd:
3227 tok = MDOC_Dd;
3228 te = MDOC_MAX;
3229 if (r->format == 0)
3230 r->format = MPARSE_MDOC;
3231 mask = MPARSE_MDOC | MPARSE_QUICK;
3232 break;
3233 case ROFF_TH:
3234 tok = MAN_TH;
3235 te = MAN_MAX;
3236 if (r->format == 0)
3237 r->format = MPARSE_MAN;
3238 mask = MPARSE_QUICK;
3239 break;
3240 default:
3241 abort();
3242 }
3243 if ((r->options & mask) == 0)
3244 for (t = tok; t < te; t++)
3245 roff_setstr(r, roff_name[t], NULL, 0);
3246 return ROFF_CONT;
3247 }
3248
3249 static int
3250 roff_TE(ROFF_ARGS)
3251 {
3252 r->man->flags &= ~ROFF_NONOFILL;
3253 if (r->tbl == NULL) {
3254 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3255 return ROFF_IGN;
3256 }
3257 if (tbl_end(r->tbl, 0) == 0) {
3258 r->tbl = NULL;
3259 free(buf->buf);
3260 buf->buf = mandoc_strdup(".sp");
3261 buf->sz = 4;
3262 *offs = 0;
3263 return ROFF_REPARSE;
3264 }
3265 r->tbl = NULL;
3266 return ROFF_IGN;
3267 }
3268
3269 static int
3270 roff_T_(ROFF_ARGS)
3271 {
3272
3273 if (NULL == r->tbl)
3274 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3275 else
3276 tbl_restart(ln, ppos, r->tbl);
3277
3278 return ROFF_IGN;
3279 }
3280
3281 /*
3282 * Handle in-line equation delimiters.
3283 */
3284 static int
3285 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3286 {
3287 char *cp1, *cp2;
3288 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3289
3290 /*
3291 * Outside equations, look for an opening delimiter.
3292 * If we are inside an equation, we already know it is
3293 * in-line, or this function wouldn't have been called;
3294 * so look for a closing delimiter.
3295 */
3296
3297 cp1 = buf->buf + pos;
3298 cp2 = strchr(cp1, r->eqn == NULL ?
3299 r->last_eqn->odelim : r->last_eqn->cdelim);
3300 if (cp2 == NULL)
3301 return ROFF_CONT;
3302
3303 *cp2++ = '\0';
3304 bef_pr = bef_nl = aft_nl = aft_pr = "";
3305
3306 /* Handle preceding text, protecting whitespace. */
3307
3308 if (*buf->buf != '\0') {
3309 if (r->eqn == NULL)
3310 bef_pr = "\\&";
3311 bef_nl = "\n";
3312 }
3313
3314 /*
3315 * Prepare replacing the delimiter with an equation macro
3316 * and drop leading white space from the equation.
3317 */
3318
3319 if (r->eqn == NULL) {
3320 while (*cp2 == ' ')
3321 cp2++;
3322 mac = ".EQ";
3323 } else
3324 mac = ".EN";
3325
3326 /* Handle following text, protecting whitespace. */
3327
3328 if (*cp2 != '\0') {
3329 aft_nl = "\n";
3330 if (r->eqn != NULL)
3331 aft_pr = "\\&";
3332 }
3333
3334 /* Do the actual replacement. */
3335
3336 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3337 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3338 free(buf->buf);
3339 buf->buf = cp1;
3340
3341 /* Toggle the in-line state of the eqn subsystem. */
3342
3343 r->eqn_inline = r->eqn == NULL;
3344 return ROFF_REPARSE;
3345 }
3346
3347 static int
3348 roff_EQ(ROFF_ARGS)
3349 {
3350 struct roff_node *n;
3351
3352 if (r->man->meta.macroset == MACROSET_MAN)
3353 man_breakscope(r->man, ROFF_EQ);
3354 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3355 if (ln > r->man->last->line)
3356 n->flags |= NODE_LINE;
3357 n->eqn = eqn_box_new();
3358 roff_node_append(r->man, n);
3359 r->man->next = ROFF_NEXT_SIBLING;
3360
3361 assert(r->eqn == NULL);
3362 if (r->last_eqn == NULL)
3363 r->last_eqn = eqn_alloc();
3364 else
3365 eqn_reset(r->last_eqn);
3366 r->eqn = r->last_eqn;
3367 r->eqn->node = n;
3368
3369 if (buf->buf[pos] != '\0')
3370 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3371 ".EQ %s", buf->buf + pos);
3372
3373 return ROFF_IGN;
3374 }
3375
3376 static int
3377 roff_EN(ROFF_ARGS)
3378 {
3379 if (r->eqn != NULL) {
3380 eqn_parse(r->eqn);
3381 r->eqn = NULL;
3382 } else
3383 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3384 if (buf->buf[pos] != '\0')
3385 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3386 "EN %s", buf->buf + pos);
3387 return ROFF_IGN;
3388 }
3389
3390 static int
3391 roff_TS(ROFF_ARGS)
3392 {
3393 if (r->tbl != NULL) {
3394 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3395 tbl_end(r->tbl, 0);
3396 }
3397 r->man->flags |= ROFF_NONOFILL;
3398 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3399 if (r->last_tbl == NULL)
3400 r->first_tbl = r->tbl;
3401 r->last_tbl = r->tbl;
3402 return ROFF_IGN;
3403 }
3404
3405 static int
3406 roff_noarg(ROFF_ARGS)
3407 {
3408 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3409 man_breakscope(r->man, tok);
3410 if (tok == ROFF_brp)
3411 tok = ROFF_br;
3412 roff_elem_alloc(r->man, ln, ppos, tok);
3413 if (buf->buf[pos] != '\0')
3414 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3415 "%s %s", roff_name[tok], buf->buf + pos);
3416 if (tok == ROFF_nf)
3417 r->man->flags |= ROFF_NOFILL;
3418 else if (tok == ROFF_fi)
3419 r->man->flags &= ~ROFF_NOFILL;
3420 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3421 r->man->next = ROFF_NEXT_SIBLING;
3422 return ROFF_IGN;
3423 }
3424
3425 static int
3426 roff_onearg(ROFF_ARGS)
3427 {
3428 struct roff_node *n;
3429 char *cp;
3430 int npos;
3431
3432 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3433 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3434 tok == ROFF_ti))
3435 man_breakscope(r->man, tok);
3436
3437 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3438 r->man->last = roffce_node;
3439 r->man->next = ROFF_NEXT_SIBLING;
3440 }
3441
3442 roff_elem_alloc(r->man, ln, ppos, tok);
3443 n = r->man->last;
3444
3445 cp = buf->buf + pos;
3446 if (*cp != '\0') {
3447 while (*cp != '\0' && *cp != ' ')
3448 cp++;
3449 while (*cp == ' ')
3450 *cp++ = '\0';
3451 if (*cp != '\0')
3452 mandoc_msg(MANDOCERR_ARG_EXCESS,
3453 ln, (int)(cp - buf->buf),
3454 "%s ... %s", roff_name[tok], cp);
3455 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3456 }
3457
3458 if (tok == ROFF_ce || tok == ROFF_rj) {
3459 if (r->man->last->type == ROFFT_ELEM) {
3460 roff_word_alloc(r->man, ln, pos, "1");
3461 r->man->last->flags |= NODE_NOSRC;
3462 }
3463 npos = 0;
3464 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3465 &roffce_lines, 0) == 0) {
3466 mandoc_msg(MANDOCERR_CE_NONUM,
3467 ln, pos, "ce %s", buf->buf + pos);
3468 roffce_lines = 1;
3469 }
3470 if (roffce_lines < 1) {
3471 r->man->last = r->man->last->parent;
3472 roffce_node = NULL;
3473 roffce_lines = 0;
3474 } else
3475 roffce_node = r->man->last->parent;
3476 } else {
3477 n->flags |= NODE_VALID | NODE_ENDED;
3478 r->man->last = n;
3479 }
3480 n->flags |= NODE_LINE;
3481 r->man->next = ROFF_NEXT_SIBLING;
3482 return ROFF_IGN;
3483 }
3484
3485 static int
3486 roff_manyarg(ROFF_ARGS)
3487 {
3488 struct roff_node *n;
3489 char *sp, *ep;
3490
3491 roff_elem_alloc(r->man, ln, ppos, tok);
3492 n = r->man->last;
3493
3494 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3495 while (*ep != '\0' && *ep != ' ')
3496 ep++;
3497 while (*ep == ' ')
3498 *ep++ = '\0';
3499 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3500 }
3501
3502 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3503 r->man->last = n;
3504 r->man->next = ROFF_NEXT_SIBLING;
3505 return ROFF_IGN;
3506 }
3507
3508 static int
3509 roff_als(ROFF_ARGS)
3510 {
3511 char *oldn, *newn, *end, *value;
3512 size_t oldsz, newsz, valsz;
3513
3514 newn = oldn = buf->buf + pos;
3515 if (*newn == '\0')
3516 return ROFF_IGN;
3517
3518 newsz = roff_getname(r, &oldn, ln, pos);
3519 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3520 return ROFF_IGN;
3521
3522 end = oldn;
3523 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3524 if (oldsz == 0)
3525 return ROFF_IGN;
3526
3527 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3528 (int)oldsz, oldn);
3529 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3530 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3531 free(value);
3532 return ROFF_IGN;
3533 }
3534
3535 /*
3536 * The .break request only makes sense inside conditionals,
3537 * and that case is already handled in roff_cond_sub().
3538 */
3539 static int
3540 roff_break(ROFF_ARGS)
3541 {
3542 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3543 return ROFF_IGN;
3544 }
3545
3546 static int
3547 roff_cc(ROFF_ARGS)
3548 {
3549 const char *p;
3550
3551 p = buf->buf + pos;
3552
3553 if (*p == '\0' || (r->control = *p++) == '.')
3554 r->control = '\0';
3555
3556 if (*p != '\0')
3557 mandoc_msg(MANDOCERR_ARG_EXCESS,
3558 ln, p - buf->buf, "cc ... %s", p);
3559
3560 return ROFF_IGN;
3561 }
3562
3563 static int
3564 roff_char(ROFF_ARGS)
3565 {
3566 const char *p, *kp, *vp;
3567 size_t ksz, vsz;
3568 int font;
3569
3570 /* Parse the character to be replaced. */
3571
3572 kp = buf->buf + pos;
3573 p = kp + 1;
3574 if (*kp == '\0' || (*kp == '\\' &&
3575 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3576 (*p != ' ' && *p != '\0')) {
3577 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3578 return ROFF_IGN;
3579 }
3580 ksz = p - kp;
3581 while (*p == ' ')
3582 p++;
3583
3584 /*
3585 * If the replacement string contains a font escape sequence,
3586 * we have to restore the font at the end.
3587 */
3588
3589 vp = p;
3590 vsz = strlen(p);
3591 font = 0;
3592 while (*p != '\0') {
3593 if (*p++ != '\\')
3594 continue;
3595 switch (mandoc_escape(&p, NULL, NULL)) {
3596 case ESCAPE_FONT:
3597 case ESCAPE_FONTROMAN:
3598 case ESCAPE_FONTITALIC:
3599 case ESCAPE_FONTBOLD:
3600 case ESCAPE_FONTBI:
3601 case ESCAPE_FONTCR:
3602 case ESCAPE_FONTCB:
3603 case ESCAPE_FONTCI:
3604 case ESCAPE_FONTPREV:
3605 font++;
3606 break;
3607 default:
3608 break;
3609 }
3610 }
3611 if (font > 1)
3612 mandoc_msg(MANDOCERR_CHAR_FONT,
3613 ln, (int)(vp - buf->buf), "%s", vp);
3614
3615 /*
3616 * Approximate the effect of .char using the .tr tables.
3617 * XXX In groff, .char and .tr interact differently.
3618 */
3619
3620 if (ksz == 1) {
3621 if (r->xtab == NULL)
3622 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3623 assert((unsigned int)*kp < 128);
3624 free(r->xtab[(int)*kp].p);
3625 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3626 "%s%s", vp, font ? "\fP" : "");
3627 } else {
3628 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3629 if (font)
3630 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3631 }
3632 return ROFF_IGN;
3633 }
3634
3635 static int
3636 roff_ec(ROFF_ARGS)
3637 {
3638 const char *p;
3639
3640 p = buf->buf + pos;
3641 if (*p == '\0')
3642 r->escape = '\\';
3643 else {
3644 r->escape = *p;
3645 if (*++p != '\0')
3646 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3647 (int)(p - buf->buf), "ec ... %s", p);
3648 }
3649 return ROFF_IGN;
3650 }
3651
3652 static int
3653 roff_eo(ROFF_ARGS)
3654 {
3655 r->escape = '\0';
3656 if (buf->buf[pos] != '\0')
3657 mandoc_msg(MANDOCERR_ARG_SKIP,
3658 ln, pos, "eo %s", buf->buf + pos);
3659 return ROFF_IGN;
3660 }
3661
3662 static int
3663 roff_mc(ROFF_ARGS)
3664 {
3665 struct roff_node *n;
3666 char *cp;
3667
3668 /* Parse the first argument. */
3669
3670 cp = buf->buf + pos;
3671 if (*cp != '\0')
3672 cp++;
3673 if (buf->buf[pos] == '\\') {
3674 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3675 case ESCAPE_SPECIAL:
3676 case ESCAPE_UNICODE:
3677 case ESCAPE_NUMBERED:
3678 break;
3679 default:
3680 *cp = '\0';
3681 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3682 "mc %s", buf->buf + pos);
3683 buf->buf[pos] = '\0';
3684 break;
3685 }
3686 }
3687
3688 /* Ignore additional arguments. */
3689
3690 while (*cp == ' ')
3691 *cp++ = '\0';
3692 if (*cp != '\0') {
3693 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3694 "mc ... %s", cp);
3695 *cp = '\0';
3696 }
3697
3698 /* Create the .mc node. */
3699
3700 roff_elem_alloc(r->man, ln, ppos, tok);
3701 n = r->man->last;
3702 if (buf->buf[pos] != '\0')
3703 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3704 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3705 r->man->last = n;
3706 r->man->next = ROFF_NEXT_SIBLING;
3707 return ROFF_IGN;
3708 }
3709
3710 static int
3711 roff_nop(ROFF_ARGS)
3712 {
3713 while (buf->buf[pos] == ' ')
3714 pos++;
3715 *offs = pos;
3716 return ROFF_RERUN;
3717 }
3718
3719 static int
3720 roff_tr(ROFF_ARGS)
3721 {
3722 const char *p, *first, *second;
3723 size_t fsz, ssz;
3724 enum mandoc_esc esc;
3725
3726 p = buf->buf + pos;
3727
3728 if (*p == '\0') {
3729 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3730 return ROFF_IGN;
3731 }
3732
3733 while (*p != '\0') {
3734 fsz = ssz = 1;
3735
3736 first = p++;
3737 if (*first == '\\') {
3738 esc = mandoc_escape(&p, NULL, NULL);
3739 if (esc == ESCAPE_ERROR) {
3740 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3741 (int)(p - buf->buf), "%s", first);
3742 return ROFF_IGN;
3743 }
3744 fsz = (size_t)(p - first);
3745 }
3746
3747 second = p++;
3748 if (*second == '\\') {
3749 esc = mandoc_escape(&p, NULL, NULL);
3750 if (esc == ESCAPE_ERROR) {
3751 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3752 (int)(p - buf->buf), "%s", second);
3753 return ROFF_IGN;
3754 }
3755 ssz = (size_t)(p - second);
3756 } else if (*second == '\0') {
3757 mandoc_msg(MANDOCERR_TR_ODD, ln,
3758 (int)(first - buf->buf), "tr %s", first);
3759 second = " ";
3760 p--;
3761 }
3762
3763 if (fsz > 1) {
3764 roff_setstrn(&r->xmbtab, first, fsz,
3765 second, ssz, 0);
3766 continue;
3767 }
3768
3769 if (r->xtab == NULL)
3770 r->xtab = mandoc_calloc(128,
3771 sizeof(struct roffstr));
3772
3773 free(r->xtab[(int)*first].p);
3774 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3775 r->xtab[(int)*first].sz = ssz;
3776 }
3777
3778 return ROFF_IGN;
3779 }
3780
3781 /*
3782 * Implementation of the .return request.
3783 * There is no need to call roff_userret() from here.
3784 * The read module will call that after rewinding the reader stack
3785 * to the place from where the current macro was called.
3786 */
3787 static int
3788 roff_return(ROFF_ARGS)
3789 {
3790 if (r->mstackpos >= 0)
3791 return ROFF_IGN | ROFF_USERRET;
3792
3793 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3794 return ROFF_IGN;
3795 }
3796
3797 static int
3798 roff_rn(ROFF_ARGS)
3799 {
3800 const char *value;
3801 char *oldn, *newn, *end;
3802 size_t oldsz, newsz;
3803 int deftype;
3804
3805 oldn = newn = buf->buf + pos;
3806 if (*oldn == '\0')
3807 return ROFF_IGN;
3808
3809 oldsz = roff_getname(r, &newn, ln, pos);
3810 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3811 return ROFF_IGN;
3812
3813 end = newn;
3814 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3815 if (newsz == 0)
3816 return ROFF_IGN;
3817
3818 deftype = ROFFDEF_ANY;
3819 value = roff_getstrn(r, oldn, oldsz, &deftype);
3820 switch (deftype) {
3821 case ROFFDEF_USER:
3822 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3823 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3824 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3825 break;
3826 case ROFFDEF_PRE:
3827 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3828 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3829 break;
3830 case ROFFDEF_REN:
3831 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3832 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3833 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3834 break;
3835 case ROFFDEF_STD:
3836 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3837 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3838 break;
3839 default:
3840 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3841 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3842 break;
3843 }
3844 return ROFF_IGN;
3845 }
3846
3847 static int
3848 roff_shift(ROFF_ARGS)
3849 {
3850 struct mctx *ctx;
3851 int argpos, levels, i;
3852
3853 argpos = pos;
3854 levels = 1;
3855 if (buf->buf[pos] != '\0' &&
3856 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3857 mandoc_msg(MANDOCERR_CE_NONUM,
3858 ln, pos, "shift %s", buf->buf + pos);
3859 levels = 1;
3860 }
3861 if (r->mstackpos < 0) {
3862 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3863 return ROFF_IGN;
3864 }
3865 ctx = r->mstack + r->mstackpos;
3866 if (levels > ctx->argc) {
3867 mandoc_msg(MANDOCERR_SHIFT,
3868 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3869 levels = ctx->argc;
3870 }
3871 if (levels < 0) {
3872 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3873 levels = 0;
3874 }
3875 if (levels == 0)
3876 return ROFF_IGN;
3877 for (i = 0; i < levels; i++)
3878 free(ctx->argv[i]);
3879 ctx->argc -= levels;
3880 for (i = 0; i < ctx->argc; i++)
3881 ctx->argv[i] = ctx->argv[i + levels];
3882 return ROFF_IGN;
3883 }
3884
3885 static int
3886 roff_so(ROFF_ARGS)
3887 {
3888 char *name, *cp;
3889
3890 name = buf->buf + pos;
3891 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3892
3893 /*
3894 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3895 * opening anything that's not in our cwd or anything beneath
3896 * it. Thus, explicitly disallow traversing up the file-system
3897 * or using absolute paths.
3898 */
3899
3900 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3901 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3902 buf->sz = mandoc_asprintf(&cp,
3903 ".sp\nSee the file %s.\n.sp", name) + 1;
3904 free(buf->buf);
3905 buf->buf = cp;
3906 *offs = 0;
3907 return ROFF_REPARSE;
3908 }
3909
3910 *offs = pos;
3911 return ROFF_SO;
3912 }
3913
3914 /* --- user defined strings and macros ------------------------------------ */
3915
3916 static int
3917 roff_userdef(ROFF_ARGS)
3918 {
3919 struct mctx *ctx;
3920 char *arg, *ap, *dst, *src;
3921 size_t sz;
3922
3923 /* If the macro is empty, ignore it altogether. */
3924
3925 if (*r->current_string == '\0')
3926 return ROFF_IGN;
3927
3928 /* Initialize a new macro stack context. */
3929
3930 if (++r->mstackpos == r->mstacksz) {
3931 r->mstack = mandoc_recallocarray(r->mstack,
3932 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3933 r->mstacksz += 8;
3934 }
3935 ctx = r->mstack + r->mstackpos;
3936 ctx->argc = 0;
3937
3938 /*
3939 * Collect pointers to macro argument strings,
3940 * NUL-terminating them and escaping quotes.
3941 */
3942
3943 src = buf->buf + pos;
3944 while (*src != '\0') {
3945 if (ctx->argc == ctx->argsz) {
3946 ctx->argsz += 8;
3947 ctx->argv = mandoc_reallocarray(ctx->argv,
3948 ctx->argsz, sizeof(*ctx->argv));
3949 }
3950 arg = roff_getarg(r, &src, ln, &pos);
3951 sz = 1; /* For the terminating NUL. */
3952 for (ap = arg; *ap != '\0'; ap++)
3953 sz += *ap == '"' ? 4 : 1;
3954 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3955 for (ap = arg; *ap != '\0'; ap++) {
3956 if (*ap == '"') {
3957 memcpy(dst, "\\(dq", 4);
3958 dst += 4;
3959 } else
3960 *dst++ = *ap;
3961 }
3962 *dst = '\0';
3963 free(arg);
3964 }
3965
3966 /* Replace the macro invocation by the macro definition. */
3967
3968 free(buf->buf);
3969 buf->buf = mandoc_strdup(r->current_string);
3970 buf->sz = strlen(buf->buf) + 1;
3971 *offs = 0;
3972
3973 return buf->buf[buf->sz - 2] == '\n' ?
3974 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3975 }
3976
3977 /*
3978 * Calling a high-level macro that was renamed with .rn.
3979 * r->current_string has already been set up by roff_parse().
3980 */
3981 static int
3982 roff_renamed(ROFF_ARGS)
3983 {
3984 char *nbuf;
3985
3986 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3987 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3988 free(buf->buf);
3989 buf->buf = nbuf;
3990 *offs = 0;
3991 return ROFF_CONT;
3992 }
3993
3994 /*
3995 * Measure the length in bytes of the roff identifier at *cpp
3996 * and advance the pointer to the next word.
3997 */
3998 static size_t
3999 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4000 {
4001 char *name, *cp;
4002 size_t namesz;
4003
4004 name = *cpp;
4005 if (*name == '\0')
4006 return 0;
4007
4008 /* Advance cp to the byte after the end of the name. */
4009
4010 for (cp = name; 1; cp++) {
4011 namesz = cp - name;
4012 if (*cp == '\0')
4013 break;
4014 if (*cp == ' ' || *cp == '\t') {
4015 cp++;
4016 break;
4017 }
4018 if (*cp != '\\')
4019 continue;
4020 if (cp[1] == '{' || cp[1] == '}')
4021 break;
4022 if (*++cp == '\\')
4023 continue;
4024 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4025 "%.*s", (int)(cp - name + 1), name);
4026 mandoc_escape((const char **)&cp, NULL, NULL);
4027 break;
4028 }
4029
4030 /* Read past spaces. */
4031
4032 while (*cp == ' ')
4033 cp++;
4034
4035 *cpp = cp;
4036 return namesz;
4037 }
4038
4039 /*
4040 * Store *string into the user-defined string called *name.
4041 * To clear an existing entry, call with (*r, *name, NULL, 0).
4042 * append == 0: replace mode
4043 * append == 1: single-line append mode
4044 * append == 2: multiline append mode, append '\n' after each call
4045 */
4046 static void
4047 roff_setstr(struct roff *r, const char *name, const char *string,
4048 int append)
4049 {
4050 size_t namesz;
4051
4052 namesz = strlen(name);
4053 roff_setstrn(&r->strtab, name, namesz, string,
4054 string ? strlen(string) : 0, append);
4055 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4056 }
4057
4058 static void
4059 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4060 const char *string, size_t stringsz, int append)
4061 {
4062 struct roffkv *n;
4063 char *c;
4064 int i;
4065 size_t oldch, newch;
4066
4067 /* Search for an existing string with the same name. */
4068 n = *r;
4069
4070 while (n && (namesz != n->key.sz ||
4071 strncmp(n->key.p, name, namesz)))
4072 n = n->next;
4073
4074 if (NULL == n) {
4075 /* Create a new string table entry. */
4076 n = mandoc_malloc(sizeof(struct roffkv));
4077 n->key.p = mandoc_strndup(name, namesz);
4078 n->key.sz = namesz;
4079 n->val.p = NULL;
4080 n->val.sz = 0;
4081 n->next = *r;
4082 *r = n;
4083 } else if (0 == append) {
4084 free(n->val.p);
4085 n->val.p = NULL;
4086 n->val.sz = 0;
4087 }
4088
4089 if (NULL == string)
4090 return;
4091
4092 /*
4093 * One additional byte for the '\n' in multiline mode,
4094 * and one for the terminating '\0'.
4095 */
4096 newch = stringsz + (1 < append ? 2u : 1u);
4097
4098 if (NULL == n->val.p) {
4099 n->val.p = mandoc_malloc(newch);
4100 *n->val.p = '\0';
4101 oldch = 0;
4102 } else {
4103 oldch = n->val.sz;
4104 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4105 }
4106
4107 /* Skip existing content in the destination buffer. */
4108 c = n->val.p + (int)oldch;
4109
4110 /* Append new content to the destination buffer. */
4111 i = 0;
4112 while (i < (int)stringsz) {
4113 /*
4114 * Rudimentary roff copy mode:
4115 * Handle escaped backslashes.
4116 */
4117 if ('\\' == string[i] && '\\' == string[i + 1])
4118 i++;
4119 *c++ = string[i++];
4120 }
4121
4122 /* Append terminating bytes. */
4123 if (1 < append)
4124 *c++ = '\n';
4125
4126 *c = '\0';
4127 n->val.sz = (int)(c - n->val.p);
4128 }
4129
4130 static const char *
4131 roff_getstrn(struct roff *r, const char *name, size_t len,
4132 int *deftype)
4133 {
4134 const struct roffkv *n;
4135 int found, i;
4136 enum roff_tok tok;
4137
4138 found = 0;
4139 for (n = r->strtab; n != NULL; n = n->next) {
4140 if (strncmp(name, n->key.p, len) != 0 ||
4141 n->key.p[len] != '\0' || n->val.p == NULL)
4142 continue;
4143 if (*deftype & ROFFDEF_USER) {
4144 *deftype = ROFFDEF_USER;
4145 return n->val.p;
4146 } else {
4147 found = 1;
4148 break;
4149 }
4150 }
4151 for (n = r->rentab; n != NULL; n = n->next) {
4152 if (strncmp(name, n->key.p, len) != 0 ||
4153 n->key.p[len] != '\0' || n->val.p == NULL)
4154 continue;
4155 if (*deftype & ROFFDEF_REN) {
4156 *deftype = ROFFDEF_REN;
4157 return n->val.p;
4158 } else {
4159 found = 1;
4160 break;
4161 }
4162 }
4163 for (i = 0; i < PREDEFS_MAX; i++) {
4164 if (strncmp(name, predefs[i].name, len) != 0 ||
4165 predefs[i].name[len] != '\0')
4166 continue;
4167 if (*deftype & ROFFDEF_PRE) {
4168 *deftype = ROFFDEF_PRE;
4169 return predefs[i].str;
4170 } else {
4171 found = 1;
4172 break;
4173 }
4174 }
4175 if (r->man->meta.macroset != MACROSET_MAN) {
4176 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4177 if (strncmp(name, roff_name[tok], len) != 0 ||
4178 roff_name[tok][len] != '\0')
4179 continue;
4180 if (*deftype & ROFFDEF_STD) {
4181 *deftype = ROFFDEF_STD;
4182 return NULL;
4183 } else {
4184 found = 1;
4185 break;
4186 }
4187 }
4188 }
4189 if (r->man->meta.macroset != MACROSET_MDOC) {
4190 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4191 if (strncmp(name, roff_name[tok], len) != 0 ||
4192 roff_name[tok][len] != '\0')
4193 continue;
4194 if (*deftype & ROFFDEF_STD) {
4195 *deftype = ROFFDEF_STD;
4196 return NULL;
4197 } else {
4198 found = 1;
4199 break;
4200 }
4201 }
4202 }
4203
4204 if (found == 0 && *deftype != ROFFDEF_ANY) {
4205 if (*deftype & ROFFDEF_REN) {
4206 /*
4207 * This might still be a request,
4208 * so do not treat it as undefined yet.
4209 */
4210 *deftype = ROFFDEF_UNDEF;
4211 return NULL;
4212 }
4213
4214 /* Using an undefined string defines it to be empty. */
4215
4216 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4217 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4218 }
4219
4220 *deftype = 0;
4221 return NULL;
4222 }
4223
4224 static void
4225 roff_freestr(struct roffkv *r)
4226 {
4227 struct roffkv *n, *nn;
4228
4229 for (n = r; n; n = nn) {
4230 free(n->key.p);
4231 free(n->val.p);
4232 nn = n->next;
4233 free(n);
4234 }
4235 }
4236
4237 /* --- accessors and utility functions ------------------------------------ */
4238
4239 /*
4240 * Duplicate an input string, making the appropriate character
4241 * conversations (as stipulated by `tr') along the way.
4242 * Returns a heap-allocated string with all the replacements made.
4243 */
4244 char *
4245 roff_strdup(const struct roff *r, const char *p)
4246 {
4247 const struct roffkv *cp;
4248 char *res;
4249 const char *pp;
4250 size_t ssz, sz;
4251 enum mandoc_esc esc;
4252
4253 if (NULL == r->xmbtab && NULL == r->xtab)
4254 return mandoc_strdup(p);
4255 else if ('\0' == *p)
4256 return mandoc_strdup("");
4257
4258 /*
4259 * Step through each character looking for term matches
4260 * (remember that a `tr' can be invoked with an escape, which is
4261 * a glyph but the escape is multi-character).
4262 * We only do this if the character hash has been initialised
4263 * and the string is >0 length.
4264 */
4265
4266 res = NULL;
4267 ssz = 0;
4268
4269 while ('\0' != *p) {
4270 assert((unsigned int)*p < 128);
4271 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4272 sz = r->xtab[(int)*p].sz;
4273 res = mandoc_realloc(res, ssz + sz + 1);
4274 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4275 ssz += sz;
4276 p++;
4277 continue;
4278 } else if ('\\' != *p) {
4279 res = mandoc_realloc(res, ssz + 2);
4280 res[ssz++] = *p++;
4281 continue;
4282 }
4283
4284 /* Search for term matches. */
4285 for (cp = r->xmbtab; cp; cp = cp->next)
4286 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4287 break;
4288
4289 if (NULL != cp) {
4290 /*
4291 * A match has been found.
4292 * Append the match to the array and move
4293 * forward by its keysize.
4294 */
4295 res = mandoc_realloc(res,
4296 ssz + cp->val.sz + 1);
4297 memcpy(res + ssz, cp->val.p, cp->val.sz);
4298 ssz += cp->val.sz;
4299 p += (int)cp->key.sz;
4300 continue;
4301 }
4302
4303 /*
4304 * Handle escapes carefully: we need to copy
4305 * over just the escape itself, or else we might
4306 * do replacements within the escape itself.
4307 * Make sure to pass along the bogus string.
4308 */
4309 pp = p++;
4310 esc = mandoc_escape(&p, NULL, NULL);
4311 if (ESCAPE_ERROR == esc) {
4312 sz = strlen(pp);
4313 res = mandoc_realloc(res, ssz + sz + 1);
4314 memcpy(res + ssz, pp, sz);
4315 break;
4316 }
4317 /*
4318 * We bail out on bad escapes.
4319 * No need to warn: we already did so when
4320 * roff_expand() was called.
4321 */
4322 sz = (int)(p - pp);
4323 res = mandoc_realloc(res, ssz + sz + 1);
4324 memcpy(res + ssz, pp, sz);
4325 ssz += sz;
4326 }
4327
4328 res[(int)ssz] = '\0';
4329 return res;
4330 }
4331
4332 int
4333 roff_getformat(const struct roff *r)
4334 {
4335
4336 return r->format;
4337 }
4338
4339 /*
4340 * Find out whether a line is a macro line or not.
4341 * If it is, adjust the current position and return one; if it isn't,
4342 * return zero and don't change the current position.
4343 * If the control character has been set with `.cc', then let that grain
4344 * precedence.
4345 * This is slighly contrary to groff, where using the non-breaking
4346 * control character when `cc' has been invoked will cause the
4347 * non-breaking macro contents to be printed verbatim.
4348 */
4349 int
4350 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4351 {
4352 int pos;
4353
4354 pos = *ppos;
4355
4356 if (r->control != '\0' && cp[pos] == r->control)
4357 pos++;
4358 else if (r->control != '\0')
4359 return 0;
4360 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4361 pos += 2;
4362 else if ('.' == cp[pos] || '\'' == cp[pos])
4363 pos++;
4364 else
4365 return 0;
4366
4367 while (' ' == cp[pos] || '\t' == cp[pos])
4368 pos++;
4369
4370 *ppos = pos;
4371 return 1;
4372 }