]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Provide a new function roff_req_or_macro() to parse and handle a request
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.386 2022/04/30 18:51:36 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_mc(ROFF_ARGS);
231 static int roff_noarg(ROFF_ARGS);
232 static int roff_nop(ROFF_ARGS);
233 static int roff_nr(ROFF_ARGS);
234 static int roff_onearg(ROFF_ARGS);
235 static enum roff_tok roff_parse(struct roff *, char *, int *,
236 int, int);
237 static int roff_parsetext(struct roff *, struct buf *,
238 int, int *);
239 static int roff_renamed(ROFF_ARGS);
240 static int roff_req_or_macro(ROFF_ARGS);
241 static int roff_return(ROFF_ARGS);
242 static int roff_rm(ROFF_ARGS);
243 static int roff_rn(ROFF_ARGS);
244 static int roff_rr(ROFF_ARGS);
245 static void roff_setregn(struct roff *, const char *,
246 size_t, int, char, int);
247 static void roff_setstr(struct roff *,
248 const char *, const char *, int);
249 static void roff_setstrn(struct roffkv **, const char *,
250 size_t, const char *, size_t, int);
251 static int roff_shift(ROFF_ARGS);
252 static int roff_so(ROFF_ARGS);
253 static int roff_tr(ROFF_ARGS);
254 static int roff_Dd(ROFF_ARGS);
255 static int roff_TE(ROFF_ARGS);
256 static int roff_TS(ROFF_ARGS);
257 static int roff_EQ(ROFF_ARGS);
258 static int roff_EN(ROFF_ARGS);
259 static int roff_T_(ROFF_ARGS);
260 static int roff_unsupp(ROFF_ARGS);
261 static int roff_userdef(ROFF_ARGS);
262
263 /* --- constant data ------------------------------------------------------ */
264
265 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
266 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
267
268 const char *__roff_name[MAN_MAX + 1] = {
269 "br", "ce", "fi", "ft",
270 "ll", "mc", "nf",
271 "po", "rj", "sp",
272 "ta", "ti", NULL,
273 "ab", "ad", "af", "aln",
274 "als", "am", "am1", "ami",
275 "ami1", "as", "as1", "asciify",
276 "backtrace", "bd", "bleedat", "blm",
277 "box", "boxa", "bp", "BP",
278 "break", "breakchar", "brnl", "brp",
279 "brpnl", "c2", "cc",
280 "cf", "cflags", "ch", "char",
281 "chop", "class", "close", "CL",
282 "color", "composite", "continue", "cp",
283 "cropat", "cs", "cu", "da",
284 "dch", "Dd", "de", "de1",
285 "defcolor", "dei", "dei1", "device",
286 "devicem", "di", "do", "ds",
287 "ds1", "dwh", "dt", "ec",
288 "ecr", "ecs", "el", "em",
289 "EN", "eo", "EP", "EQ",
290 "errprint", "ev", "evc", "ex",
291 "fallback", "fam", "fc", "fchar",
292 "fcolor", "fdeferlig", "feature", "fkern",
293 "fl", "flig", "fp", "fps",
294 "fschar", "fspacewidth", "fspecial", "ftr",
295 "fzoom", "gcolor", "hc", "hcode",
296 "hidechar", "hla", "hlm", "hpf",
297 "hpfa", "hpfcode", "hw", "hy",
298 "hylang", "hylen", "hym", "hypp",
299 "hys", "ie", "if", "ig",
300 "index", "it", "itc", "IX",
301 "kern", "kernafter", "kernbefore", "kernpair",
302 "lc", "lc_ctype", "lds", "length",
303 "letadj", "lf", "lg", "lhang",
304 "linetabs", "lnr", "lnrf", "lpfx",
305 "ls", "lsm", "lt",
306 "mediasize", "minss", "mk", "mso",
307 "na", "ne", "nh", "nhychar",
308 "nm", "nn", "nop", "nr",
309 "nrf", "nroff", "ns", "nx",
310 "open", "opena", "os", "output",
311 "padj", "papersize", "pc", "pev",
312 "pi", "PI", "pl", "pm",
313 "pn", "pnr", "ps",
314 "psbb", "pshape", "pso", "ptr",
315 "pvs", "rchar", "rd", "recursionlimit",
316 "return", "rfschar", "rhang",
317 "rm", "rn", "rnn", "rr",
318 "rs", "rt", "schar", "sentchar",
319 "shc", "shift", "sizes", "so",
320 "spacewidth", "special", "spreadwarn", "ss",
321 "sty", "substring", "sv", "sy",
322 "T&", "tc", "TE",
323 "TH", "tkf", "tl",
324 "tm", "tm1", "tmc", "tr",
325 "track", "transchar", "trf", "trimat",
326 "trin", "trnt", "troff", "TS",
327 "uf", "ul", "unformat", "unwatch",
328 "unwatchn", "vpt", "vs", "warn",
329 "warnscale", "watch", "watchlength", "watchn",
330 "wh", "while", "write", "writec",
331 "writem", "xflag", ".", NULL,
332 NULL, "text",
333 "Dd", "Dt", "Os", "Sh",
334 "Ss", "Pp", "D1", "Dl",
335 "Bd", "Ed", "Bl", "El",
336 "It", "Ad", "An", "Ap",
337 "Ar", "Cd", "Cm", "Dv",
338 "Er", "Ev", "Ex", "Fa",
339 "Fd", "Fl", "Fn", "Ft",
340 "Ic", "In", "Li", "Nd",
341 "Nm", "Op", "Ot", "Pa",
342 "Rv", "St", "Va", "Vt",
343 "Xr", "%A", "%B", "%D",
344 "%I", "%J", "%N", "%O",
345 "%P", "%R", "%T", "%V",
346 "Ac", "Ao", "Aq", "At",
347 "Bc", "Bf", "Bo", "Bq",
348 "Bsx", "Bx", "Db", "Dc",
349 "Do", "Dq", "Ec", "Ef",
350 "Em", "Eo", "Fx", "Ms",
351 "No", "Ns", "Nx", "Ox",
352 "Pc", "Pf", "Po", "Pq",
353 "Qc", "Ql", "Qo", "Qq",
354 "Re", "Rs", "Sc", "So",
355 "Sq", "Sm", "Sx", "Sy",
356 "Tn", "Ux", "Xc", "Xo",
357 "Fo", "Fc", "Oo", "Oc",
358 "Bk", "Ek", "Bt", "Hf",
359 "Fr", "Ud", "Lb", "Lp",
360 "Lk", "Mt", "Brq", "Bro",
361 "Brc", "%C", "Es", "En",
362 "Dx", "%Q", "%U", "Ta",
363 "Tg", NULL,
364 "TH", "SH", "SS", "TP",
365 "TQ",
366 "LP", "PP", "P", "IP",
367 "HP", "SM", "SB", "BI",
368 "IB", "BR", "RB", "R",
369 "B", "I", "IR", "RI",
370 "RE", "RS", "DT", "UC",
371 "PD", "AT", "in",
372 "SY", "YS", "OP",
373 "EX", "EE", "UR",
374 "UE", "MT", "ME", NULL
375 };
376 const char *const *roff_name = __roff_name;
377
378 static struct roffmac roffs[TOKEN_NONE] = {
379 { roff_noarg, NULL, NULL, 0 }, /* br */
380 { roff_onearg, NULL, NULL, 0 }, /* ce */
381 { roff_noarg, NULL, NULL, 0 }, /* fi */
382 { roff_onearg, NULL, NULL, 0 }, /* ft */
383 { roff_onearg, NULL, NULL, 0 }, /* ll */
384 { roff_mc, NULL, NULL, 0 }, /* mc */
385 { roff_noarg, NULL, NULL, 0 }, /* nf */
386 { roff_onearg, NULL, NULL, 0 }, /* po */
387 { roff_onearg, NULL, NULL, 0 }, /* rj */
388 { roff_onearg, NULL, NULL, 0 }, /* sp */
389 { roff_manyarg, NULL, NULL, 0 }, /* ta */
390 { roff_onearg, NULL, NULL, 0 }, /* ti */
391 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
392 { roff_unsupp, NULL, NULL, 0 }, /* ab */
393 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
394 { roff_line_ignore, NULL, NULL, 0 }, /* af */
395 { roff_unsupp, NULL, NULL, 0 }, /* aln */
396 { roff_als, NULL, NULL, 0 }, /* als */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
400 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
401 { roff_ds, NULL, NULL, 0 }, /* as */
402 { roff_ds, NULL, NULL, 0 }, /* as1 */
403 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
404 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
405 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
406 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
407 { roff_unsupp, NULL, NULL, 0 }, /* blm */
408 { roff_unsupp, NULL, NULL, 0 }, /* box */
409 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
410 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
411 { roff_unsupp, NULL, NULL, 0 }, /* BP */
412 { roff_break, NULL, NULL, 0 }, /* break */
413 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
414 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
415 { roff_noarg, NULL, NULL, 0 }, /* brp */
416 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
417 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
418 { roff_cc, NULL, NULL, 0 }, /* cc */
419 { roff_insec, NULL, NULL, 0 }, /* cf */
420 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
421 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
422 { roff_char, NULL, NULL, 0 }, /* char */
423 { roff_unsupp, NULL, NULL, 0 }, /* chop */
424 { roff_line_ignore, NULL, NULL, 0 }, /* class */
425 { roff_insec, NULL, NULL, 0 }, /* close */
426 { roff_unsupp, NULL, NULL, 0 }, /* CL */
427 { roff_line_ignore, NULL, NULL, 0 }, /* color */
428 { roff_unsupp, NULL, NULL, 0 }, /* composite */
429 { roff_unsupp, NULL, NULL, 0 }, /* continue */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
431 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
432 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
433 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
434 { roff_unsupp, NULL, NULL, 0 }, /* da */
435 { roff_unsupp, NULL, NULL, 0 }, /* dch */
436 { roff_Dd, NULL, NULL, 0 }, /* Dd */
437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
439 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
440 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
441 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
442 { roff_unsupp, NULL, NULL, 0 }, /* device */
443 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
444 { roff_unsupp, NULL, NULL, 0 }, /* di */
445 { roff_unsupp, NULL, NULL, 0 }, /* do */
446 { roff_ds, NULL, NULL, 0 }, /* ds */
447 { roff_ds, NULL, NULL, 0 }, /* ds1 */
448 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
449 { roff_unsupp, NULL, NULL, 0 }, /* dt */
450 { roff_ec, NULL, NULL, 0 }, /* ec */
451 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
452 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
453 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
454 { roff_unsupp, NULL, NULL, 0 }, /* em */
455 { roff_EN, NULL, NULL, 0 }, /* EN */
456 { roff_eo, NULL, NULL, 0 }, /* eo */
457 { roff_unsupp, NULL, NULL, 0 }, /* EP */
458 { roff_EQ, NULL, NULL, 0 }, /* EQ */
459 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
460 { roff_unsupp, NULL, NULL, 0 }, /* ev */
461 { roff_unsupp, NULL, NULL, 0 }, /* evc */
462 { roff_unsupp, NULL, NULL, 0 }, /* ex */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
464 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
465 { roff_unsupp, NULL, NULL, 0 }, /* fc */
466 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
467 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
469 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
470 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
472 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
473 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
475 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
476 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
477 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
478 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
479 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
480 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
494 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
495 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
496 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
497 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
498 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
499 { roff_unsupp, NULL, NULL, 0 }, /* index */
500 { roff_it, NULL, NULL, 0 }, /* it */
501 { roff_unsupp, NULL, NULL, 0 }, /* itc */
502 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
504 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
505 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
506 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
507 { roff_unsupp, NULL, NULL, 0 }, /* lc */
508 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
509 { roff_unsupp, NULL, NULL, 0 }, /* lds */
510 { roff_unsupp, NULL, NULL, 0 }, /* length */
511 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
512 { roff_insec, NULL, NULL, 0 }, /* lf */
513 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
514 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
515 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
516 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
517 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
518 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
519 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
520 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
521 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
522 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
523 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
524 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
525 { roff_insec, NULL, NULL, 0 }, /* mso */
526 { roff_line_ignore, NULL, NULL, 0 }, /* na */
527 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
528 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
529 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
530 { roff_unsupp, NULL, NULL, 0 }, /* nm */
531 { roff_unsupp, NULL, NULL, 0 }, /* nn */
532 { roff_nop, NULL, NULL, 0 }, /* nop */
533 { roff_nr, NULL, NULL, 0 }, /* nr */
534 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
535 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
536 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
537 { roff_insec, NULL, NULL, 0 }, /* nx */
538 { roff_insec, NULL, NULL, 0 }, /* open */
539 { roff_insec, NULL, NULL, 0 }, /* opena */
540 { roff_line_ignore, NULL, NULL, 0 }, /* os */
541 { roff_unsupp, NULL, NULL, 0 }, /* output */
542 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
543 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
544 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
545 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
546 { roff_insec, NULL, NULL, 0 }, /* pi */
547 { roff_unsupp, NULL, NULL, 0 }, /* PI */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
550 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
551 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
552 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
553 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
554 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
555 { roff_insec, NULL, NULL, 0 }, /* pso */
556 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
557 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
558 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
559 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
560 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
561 { roff_return, NULL, NULL, 0 }, /* return */
562 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
563 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
564 { roff_rm, NULL, NULL, 0 }, /* rm */
565 { roff_rn, NULL, NULL, 0 }, /* rn */
566 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
567 { roff_rr, NULL, NULL, 0 }, /* rr */
568 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
569 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
570 { roff_unsupp, NULL, NULL, 0 }, /* schar */
571 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
572 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
573 { roff_shift, NULL, NULL, 0 }, /* shift */
574 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
575 { roff_so, NULL, NULL, 0 }, /* so */
576 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
577 { roff_line_ignore, NULL, NULL, 0 }, /* special */
578 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
579 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
580 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
581 { roff_unsupp, NULL, NULL, 0 }, /* substring */
582 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
583 { roff_insec, NULL, NULL, 0 }, /* sy */
584 { roff_T_, NULL, NULL, 0 }, /* T& */
585 { roff_unsupp, NULL, NULL, 0 }, /* tc */
586 { roff_TE, NULL, NULL, 0 }, /* TE */
587 { roff_Dd, NULL, NULL, 0 }, /* TH */
588 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
589 { roff_unsupp, NULL, NULL, 0 }, /* tl */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
591 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
592 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
593 { roff_tr, NULL, NULL, 0 }, /* tr */
594 { roff_line_ignore, NULL, NULL, 0 }, /* track */
595 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
596 { roff_insec, NULL, NULL, 0 }, /* trf */
597 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
598 { roff_unsupp, NULL, NULL, 0 }, /* trin */
599 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
600 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
601 { roff_TS, NULL, NULL, 0 }, /* TS */
602 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
603 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
604 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
605 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
606 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
607 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
608 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
609 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
610 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
611 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
612 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
613 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
614 { roff_unsupp, NULL, NULL, 0 }, /* wh */
615 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
616 { roff_insec, NULL, NULL, 0 }, /* write */
617 { roff_insec, NULL, NULL, 0 }, /* writec */
618 { roff_insec, NULL, NULL, 0 }, /* writem */
619 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
620 { roff_cblock, NULL, NULL, 0 }, /* . */
621 { roff_renamed, NULL, NULL, 0 },
622 { roff_userdef, NULL, NULL, 0 }
623 };
624
625 /* Array of injected predefined strings. */
626 #define PREDEFS_MAX 38
627 static const struct predef predefs[PREDEFS_MAX] = {
628 #include "predefs.in"
629 };
630
631 static int roffce_lines; /* number of input lines to center */
632 static struct roff_node *roffce_node; /* active request */
633 static int roffit_lines; /* number of lines to delay */
634 static char *roffit_macro; /* nil-terminated macro line */
635
636
637 /* --- request table ------------------------------------------------------ */
638
639 struct ohash *
640 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
641 {
642 struct ohash *htab;
643 struct roffreq *req;
644 enum roff_tok tok;
645 size_t sz;
646 unsigned int slot;
647
648 htab = mandoc_malloc(sizeof(*htab));
649 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
650
651 for (tok = mintok; tok < maxtok; tok++) {
652 if (roff_name[tok] == NULL)
653 continue;
654 sz = strlen(roff_name[tok]);
655 req = mandoc_malloc(sizeof(*req) + sz + 1);
656 req->tok = tok;
657 memcpy(req->name, roff_name[tok], sz + 1);
658 slot = ohash_qlookup(htab, req->name);
659 ohash_insert(htab, slot, req);
660 }
661 return htab;
662 }
663
664 void
665 roffhash_free(struct ohash *htab)
666 {
667 struct roffreq *req;
668 unsigned int slot;
669
670 if (htab == NULL)
671 return;
672 for (req = ohash_first(htab, &slot); req != NULL;
673 req = ohash_next(htab, &slot))
674 free(req);
675 ohash_delete(htab);
676 free(htab);
677 }
678
679 enum roff_tok
680 roffhash_find(struct ohash *htab, const char *name, size_t sz)
681 {
682 struct roffreq *req;
683 const char *end;
684
685 if (sz) {
686 end = name + sz;
687 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
688 } else
689 req = ohash_find(htab, ohash_qlookup(htab, name));
690 return req == NULL ? TOKEN_NONE : req->tok;
691 }
692
693 /* --- stack of request blocks -------------------------------------------- */
694
695 /*
696 * Pop the current node off of the stack of roff instructions currently
697 * pending. Return 1 if it is a loop or 0 otherwise.
698 */
699 static int
700 roffnode_pop(struct roff *r)
701 {
702 struct roffnode *p;
703 int inloop;
704
705 p = r->last;
706 inloop = p->tok == ROFF_while;
707 r->last = p->parent;
708 free(p->name);
709 free(p->end);
710 free(p);
711 return inloop;
712 }
713
714 /*
715 * Push a roff node onto the instruction stack. This must later be
716 * removed with roffnode_pop().
717 */
718 static void
719 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
720 int line, int col)
721 {
722 struct roffnode *p;
723
724 p = mandoc_calloc(1, sizeof(struct roffnode));
725 p->tok = tok;
726 if (name)
727 p->name = mandoc_strdup(name);
728 p->parent = r->last;
729 p->line = line;
730 p->col = col;
731 p->rule = p->parent ? p->parent->rule : 0;
732
733 r->last = p;
734 }
735
736 /* --- roff parser state data management ---------------------------------- */
737
738 static void
739 roff_free1(struct roff *r)
740 {
741 int i;
742
743 tbl_free(r->first_tbl);
744 r->first_tbl = r->last_tbl = r->tbl = NULL;
745
746 eqn_free(r->last_eqn);
747 r->last_eqn = r->eqn = NULL;
748
749 while (r->mstackpos >= 0)
750 roff_userret(r);
751
752 while (r->last)
753 roffnode_pop(r);
754
755 free (r->rstack);
756 r->rstack = NULL;
757 r->rstacksz = 0;
758 r->rstackpos = -1;
759
760 roff_freereg(r->regtab);
761 r->regtab = NULL;
762
763 roff_freestr(r->strtab);
764 roff_freestr(r->rentab);
765 roff_freestr(r->xmbtab);
766 r->strtab = r->rentab = r->xmbtab = NULL;
767
768 if (r->xtab)
769 for (i = 0; i < 128; i++)
770 free(r->xtab[i].p);
771 free(r->xtab);
772 r->xtab = NULL;
773 }
774
775 void
776 roff_reset(struct roff *r)
777 {
778 roff_free1(r);
779 r->options |= MPARSE_COMMENT;
780 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
781 r->control = '\0';
782 r->escape = '\\';
783 roffce_lines = 0;
784 roffce_node = NULL;
785 roffit_lines = 0;
786 roffit_macro = NULL;
787 }
788
789 void
790 roff_free(struct roff *r)
791 {
792 int i;
793
794 roff_free1(r);
795 for (i = 0; i < r->mstacksz; i++)
796 free(r->mstack[i].argv);
797 free(r->mstack);
798 roffhash_free(r->reqtab);
799 free(r);
800 }
801
802 struct roff *
803 roff_alloc(int options)
804 {
805 struct roff *r;
806
807 r = mandoc_calloc(1, sizeof(struct roff));
808 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
809 r->options = options | MPARSE_COMMENT;
810 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
811 r->mstackpos = -1;
812 r->rstackpos = -1;
813 r->escape = '\\';
814 return r;
815 }
816
817 /* --- syntax tree state data management ---------------------------------- */
818
819 static void
820 roff_man_free1(struct roff_man *man)
821 {
822 if (man->meta.first != NULL)
823 roff_node_delete(man, man->meta.first);
824 free(man->meta.msec);
825 free(man->meta.vol);
826 free(man->meta.os);
827 free(man->meta.arch);
828 free(man->meta.title);
829 free(man->meta.name);
830 free(man->meta.date);
831 free(man->meta.sodest);
832 }
833
834 void
835 roff_state_reset(struct roff_man *man)
836 {
837 man->last = man->meta.first;
838 man->last_es = NULL;
839 man->flags = 0;
840 man->lastsec = man->lastnamed = SEC_NONE;
841 man->next = ROFF_NEXT_CHILD;
842 roff_setreg(man->roff, "nS", 0, '=');
843 }
844
845 static void
846 roff_man_alloc1(struct roff_man *man)
847 {
848 memset(&man->meta, 0, sizeof(man->meta));
849 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
850 man->meta.first->type = ROFFT_ROOT;
851 man->meta.macroset = MACROSET_NONE;
852 roff_state_reset(man);
853 }
854
855 void
856 roff_man_reset(struct roff_man *man)
857 {
858 roff_man_free1(man);
859 roff_man_alloc1(man);
860 }
861
862 void
863 roff_man_free(struct roff_man *man)
864 {
865 roff_man_free1(man);
866 free(man->os_r);
867 free(man);
868 }
869
870 struct roff_man *
871 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
872 {
873 struct roff_man *man;
874
875 man = mandoc_calloc(1, sizeof(*man));
876 man->roff = roff;
877 man->os_s = os_s;
878 man->quick = quick;
879 roff_man_alloc1(man);
880 roff->man = man;
881 return man;
882 }
883
884 /* --- syntax tree handling ----------------------------------------------- */
885
886 struct roff_node *
887 roff_node_alloc(struct roff_man *man, int line, int pos,
888 enum roff_type type, int tok)
889 {
890 struct roff_node *n;
891
892 n = mandoc_calloc(1, sizeof(*n));
893 n->line = line;
894 n->pos = pos;
895 n->tok = tok;
896 n->type = type;
897 n->sec = man->lastsec;
898
899 if (man->flags & MDOC_SYNOPSIS)
900 n->flags |= NODE_SYNPRETTY;
901 else
902 n->flags &= ~NODE_SYNPRETTY;
903 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
904 n->flags |= NODE_NOFILL;
905 else
906 n->flags &= ~NODE_NOFILL;
907 if (man->flags & MDOC_NEWLINE)
908 n->flags |= NODE_LINE;
909 man->flags &= ~MDOC_NEWLINE;
910
911 return n;
912 }
913
914 void
915 roff_node_append(struct roff_man *man, struct roff_node *n)
916 {
917
918 switch (man->next) {
919 case ROFF_NEXT_SIBLING:
920 if (man->last->next != NULL) {
921 n->next = man->last->next;
922 man->last->next->prev = n;
923 } else
924 man->last->parent->last = n;
925 man->last->next = n;
926 n->prev = man->last;
927 n->parent = man->last->parent;
928 break;
929 case ROFF_NEXT_CHILD:
930 if (man->last->child != NULL) {
931 n->next = man->last->child;
932 man->last->child->prev = n;
933 } else
934 man->last->last = n;
935 man->last->child = n;
936 n->parent = man->last;
937 break;
938 default:
939 abort();
940 }
941 man->last = n;
942
943 switch (n->type) {
944 case ROFFT_HEAD:
945 n->parent->head = n;
946 break;
947 case ROFFT_BODY:
948 if (n->end != ENDBODY_NOT)
949 return;
950 n->parent->body = n;
951 break;
952 case ROFFT_TAIL:
953 n->parent->tail = n;
954 break;
955 default:
956 return;
957 }
958
959 /*
960 * Copy over the normalised-data pointer of our parent. Not
961 * everybody has one, but copying a null pointer is fine.
962 */
963
964 n->norm = n->parent->norm;
965 assert(n->parent->type == ROFFT_BLOCK);
966 }
967
968 void
969 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
970 {
971 struct roff_node *n;
972
973 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
974 n->string = roff_strdup(man->roff, word);
975 roff_node_append(man, n);
976 n->flags |= NODE_VALID | NODE_ENDED;
977 man->next = ROFF_NEXT_SIBLING;
978 }
979
980 void
981 roff_word_append(struct roff_man *man, const char *word)
982 {
983 struct roff_node *n;
984 char *addstr, *newstr;
985
986 n = man->last;
987 addstr = roff_strdup(man->roff, word);
988 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
989 free(addstr);
990 free(n->string);
991 n->string = newstr;
992 man->next = ROFF_NEXT_SIBLING;
993 }
994
995 void
996 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
997 {
998 struct roff_node *n;
999
1000 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1001 roff_node_append(man, n);
1002 man->next = ROFF_NEXT_CHILD;
1003 }
1004
1005 struct roff_node *
1006 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1007 {
1008 struct roff_node *n;
1009
1010 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1011 roff_node_append(man, n);
1012 man->next = ROFF_NEXT_CHILD;
1013 return n;
1014 }
1015
1016 struct roff_node *
1017 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1018 {
1019 struct roff_node *n;
1020
1021 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1022 roff_node_append(man, n);
1023 man->next = ROFF_NEXT_CHILD;
1024 return n;
1025 }
1026
1027 struct roff_node *
1028 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1029 {
1030 struct roff_node *n;
1031
1032 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1033 roff_node_append(man, n);
1034 man->next = ROFF_NEXT_CHILD;
1035 return n;
1036 }
1037
1038 static void
1039 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1040 {
1041 struct roff_node *n;
1042 struct tbl_span *span;
1043
1044 if (man->meta.macroset == MACROSET_MAN)
1045 man_breakscope(man, ROFF_TS);
1046 while ((span = tbl_span(tbl)) != NULL) {
1047 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1048 n->span = span;
1049 roff_node_append(man, n);
1050 n->flags |= NODE_VALID | NODE_ENDED;
1051 man->next = ROFF_NEXT_SIBLING;
1052 }
1053 }
1054
1055 void
1056 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1057 {
1058
1059 /* Adjust siblings. */
1060
1061 if (n->prev)
1062 n->prev->next = n->next;
1063 if (n->next)
1064 n->next->prev = n->prev;
1065
1066 /* Adjust parent. */
1067
1068 if (n->parent != NULL) {
1069 if (n->parent->child == n)
1070 n->parent->child = n->next;
1071 if (n->parent->last == n)
1072 n->parent->last = n->prev;
1073 }
1074
1075 /* Adjust parse point. */
1076
1077 if (man == NULL)
1078 return;
1079 if (man->last == n) {
1080 if (n->prev == NULL) {
1081 man->last = n->parent;
1082 man->next = ROFF_NEXT_CHILD;
1083 } else {
1084 man->last = n->prev;
1085 man->next = ROFF_NEXT_SIBLING;
1086 }
1087 }
1088 if (man->meta.first == n)
1089 man->meta.first = NULL;
1090 }
1091
1092 void
1093 roff_node_relink(struct roff_man *man, struct roff_node *n)
1094 {
1095 roff_node_unlink(man, n);
1096 n->prev = n->next = NULL;
1097 roff_node_append(man, n);
1098 }
1099
1100 void
1101 roff_node_free(struct roff_node *n)
1102 {
1103
1104 if (n->args != NULL)
1105 mdoc_argv_free(n->args);
1106 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1107 free(n->norm);
1108 eqn_box_free(n->eqn);
1109 free(n->string);
1110 free(n->tag);
1111 free(n);
1112 }
1113
1114 void
1115 roff_node_delete(struct roff_man *man, struct roff_node *n)
1116 {
1117
1118 while (n->child != NULL)
1119 roff_node_delete(man, n->child);
1120 roff_node_unlink(man, n);
1121 roff_node_free(n);
1122 }
1123
1124 int
1125 roff_node_transparent(struct roff_node *n)
1126 {
1127 if (n == NULL)
1128 return 0;
1129 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1130 return 1;
1131 return roff_tok_transparent(n->tok);
1132 }
1133
1134 int
1135 roff_tok_transparent(enum roff_tok tok)
1136 {
1137 switch (tok) {
1138 case ROFF_ft:
1139 case ROFF_ll:
1140 case ROFF_mc:
1141 case ROFF_po:
1142 case ROFF_ta:
1143 case MDOC_Db:
1144 case MDOC_Es:
1145 case MDOC_Sm:
1146 case MDOC_Tg:
1147 case MAN_DT:
1148 case MAN_UC:
1149 case MAN_PD:
1150 case MAN_AT:
1151 return 1;
1152 default:
1153 return 0;
1154 }
1155 }
1156
1157 struct roff_node *
1158 roff_node_child(struct roff_node *n)
1159 {
1160 for (n = n->child; roff_node_transparent(n); n = n->next)
1161 continue;
1162 return n;
1163 }
1164
1165 struct roff_node *
1166 roff_node_prev(struct roff_node *n)
1167 {
1168 do {
1169 n = n->prev;
1170 } while (roff_node_transparent(n));
1171 return n;
1172 }
1173
1174 struct roff_node *
1175 roff_node_next(struct roff_node *n)
1176 {
1177 do {
1178 n = n->next;
1179 } while (roff_node_transparent(n));
1180 return n;
1181 }
1182
1183 void
1184 deroff(char **dest, const struct roff_node *n)
1185 {
1186 char *cp;
1187 size_t sz;
1188
1189 if (n->string == NULL) {
1190 for (n = n->child; n != NULL; n = n->next)
1191 deroff(dest, n);
1192 return;
1193 }
1194
1195 /* Skip leading whitespace. */
1196
1197 for (cp = n->string; *cp != '\0'; cp++) {
1198 if (cp[0] == '\\' && cp[1] != '\0' &&
1199 strchr(" %&0^|~", cp[1]) != NULL)
1200 cp++;
1201 else if ( ! isspace((unsigned char)*cp))
1202 break;
1203 }
1204
1205 /* Skip trailing backslash. */
1206
1207 sz = strlen(cp);
1208 if (sz > 0 && cp[sz - 1] == '\\')
1209 sz--;
1210
1211 /* Skip trailing whitespace. */
1212
1213 for (; sz; sz--)
1214 if ( ! isspace((unsigned char)cp[sz-1]))
1215 break;
1216
1217 /* Skip empty strings. */
1218
1219 if (sz == 0)
1220 return;
1221
1222 if (*dest == NULL) {
1223 *dest = mandoc_strndup(cp, sz);
1224 return;
1225 }
1226
1227 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1228 free(*dest);
1229 *dest = cp;
1230 }
1231
1232 /* --- main functions of the roff parser ---------------------------------- */
1233
1234 /*
1235 * In the current line, expand escape sequences that produce parsable
1236 * input text. Also check the syntax of the remaining escape sequences,
1237 * which typically produce output glyphs or change formatter state.
1238 */
1239 static int
1240 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1241 {
1242 struct mctx *ctx; /* current macro call context */
1243 char ubuf[24]; /* buffer to print the number */
1244 struct roff_node *n; /* used for header comments */
1245 const char *start; /* start of the string to process */
1246 char *stesc; /* start of an escape sequence ('\\') */
1247 const char *esct; /* type of esccape sequence */
1248 char *ep; /* end of comment string */
1249 const char *stnam; /* start of the name, after "[(*" */
1250 const char *cp; /* end of the name, e.g. before ']' */
1251 const char *res; /* the string to be substituted */
1252 char *nbuf; /* new buffer to copy buf->buf to */
1253 size_t maxl; /* expected length of the escape name */
1254 size_t naml; /* actual length of the escape name */
1255 size_t asz; /* length of the replacement */
1256 size_t rsz; /* length of the rest of the string */
1257 int inaml; /* length returned from mandoc_escape() */
1258 int expand_count; /* to avoid infinite loops */
1259 int npos; /* position in numeric expression */
1260 int arg_complete; /* argument not interrupted by eol */
1261 int quote_args; /* true for \\$@, false for \\$* */
1262 int done; /* no more input available */
1263 int deftype; /* type of definition to paste */
1264 int rcsid; /* kind of RCS id seen */
1265 enum mandocerr err; /* for escape sequence problems */
1266 char sign; /* increment number register */
1267 char term; /* character terminating the escape */
1268
1269 /* Search forward for comments. */
1270
1271 done = 0;
1272 start = buf->buf + pos;
1273 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1274 if (stesc[0] != newesc || stesc[1] == '\0')
1275 continue;
1276 stesc++;
1277 if (*stesc != '"' && *stesc != '#')
1278 continue;
1279
1280 /* Comment found, look for RCS id. */
1281
1282 rcsid = 0;
1283 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1284 rcsid = 1 << MANDOC_OS_OPENBSD;
1285 cp += 8;
1286 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1287 rcsid = 1 << MANDOC_OS_NETBSD;
1288 cp += 7;
1289 }
1290 if (cp != NULL &&
1291 isalnum((unsigned char)*cp) == 0 &&
1292 strchr(cp, '$') != NULL) {
1293 if (r->man->meta.rcsids & rcsid)
1294 mandoc_msg(MANDOCERR_RCS_REP, ln,
1295 (int)(stesc - buf->buf) + 1,
1296 "%s", stesc + 1);
1297 r->man->meta.rcsids |= rcsid;
1298 }
1299
1300 /* Handle trailing whitespace. */
1301
1302 ep = strchr(stesc--, '\0') - 1;
1303 if (*ep == '\n') {
1304 done = 1;
1305 ep--;
1306 }
1307 if (*ep == ' ' || *ep == '\t')
1308 mandoc_msg(MANDOCERR_SPACE_EOL,
1309 ln, (int)(ep - buf->buf), NULL);
1310
1311 /*
1312 * Save comments preceding the title macro
1313 * in the syntax tree.
1314 */
1315
1316 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1317 while (*ep == ' ' || *ep == '\t')
1318 ep--;
1319 ep[1] = '\0';
1320 n = roff_node_alloc(r->man,
1321 ln, stesc + 1 - buf->buf,
1322 ROFFT_COMMENT, TOKEN_NONE);
1323 n->string = mandoc_strdup(stesc + 2);
1324 roff_node_append(r->man, n);
1325 n->flags |= NODE_VALID | NODE_ENDED;
1326 r->man->next = ROFF_NEXT_SIBLING;
1327 }
1328
1329 /* Line continuation with comment. */
1330
1331 if (stesc[1] == '#') {
1332 *stesc = '\0';
1333 return ROFF_IGN | ROFF_APPEND;
1334 }
1335
1336 /* Discard normal comments. */
1337
1338 while (stesc > start && stesc[-1] == ' ' &&
1339 (stesc == start + 1 || stesc[-2] != '\\'))
1340 stesc--;
1341 *stesc = '\0';
1342 break;
1343 }
1344 if (stesc == start)
1345 return ROFF_CONT;
1346 stesc--;
1347
1348 /* Notice the end of the input. */
1349
1350 if (*stesc == '\n') {
1351 *stesc-- = '\0';
1352 done = 1;
1353 }
1354
1355 expand_count = 0;
1356 while (stesc >= start) {
1357 if (*stesc != newesc) {
1358
1359 /*
1360 * If we have a non-standard escape character,
1361 * escape literal backslashes because all
1362 * processing in subsequent functions uses
1363 * the standard escaping rules.
1364 */
1365
1366 if (newesc != ASCII_ESC && *stesc == '\\') {
1367 *stesc = '\0';
1368 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1369 buf->buf, stesc + 1) + 1;
1370 start = nbuf + pos;
1371 stesc = nbuf + (stesc - buf->buf);
1372 free(buf->buf);
1373 buf->buf = nbuf;
1374 }
1375
1376 /* Search backwards for the next escape. */
1377
1378 stesc--;
1379 continue;
1380 }
1381
1382 /* If it is escaped, skip it. */
1383
1384 for (cp = stesc - 1; cp >= start; cp--)
1385 if (*cp != r->escape)
1386 break;
1387
1388 if ((stesc - cp) % 2 == 0) {
1389 while (stesc > cp)
1390 *stesc-- = '\\';
1391 continue;
1392 } else if (stesc[1] != '\0') {
1393 *stesc = '\\';
1394 } else {
1395 *stesc-- = '\0';
1396 if (done)
1397 continue;
1398 else
1399 return ROFF_IGN | ROFF_APPEND;
1400 }
1401
1402 /* Decide whether to expand or to check only. */
1403
1404 term = '\0';
1405 cp = stesc + 1;
1406 while (*cp == 'E')
1407 cp++;
1408 esct = cp;
1409 switch (*esct) {
1410 case '*':
1411 case '$':
1412 res = NULL;
1413 break;
1414 case 'B':
1415 case 'w':
1416 term = cp[1];
1417 /* FALLTHROUGH */
1418 case 'n':
1419 sign = cp[1];
1420 if (sign == '+' || sign == '-')
1421 cp++;
1422 res = ubuf;
1423 break;
1424 default:
1425 err = MANDOCERR_OK;
1426 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1427 case ESCAPE_SPECIAL:
1428 if (mchars_spec2cp(stnam, inaml) >= 0)
1429 break;
1430 /* FALLTHROUGH */
1431 case ESCAPE_ERROR:
1432 err = MANDOCERR_ESC_BAD;
1433 break;
1434 case ESCAPE_UNDEF:
1435 err = MANDOCERR_ESC_UNDEF;
1436 break;
1437 case ESCAPE_UNSUPP:
1438 err = MANDOCERR_ESC_UNSUPP;
1439 break;
1440 default:
1441 break;
1442 }
1443 if (err != MANDOCERR_OK)
1444 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1445 "%.*s", (int)(cp - stesc), stesc);
1446 stesc--;
1447 continue;
1448 }
1449
1450 if (EXPAND_LIMIT < ++expand_count) {
1451 mandoc_msg(MANDOCERR_ROFFLOOP,
1452 ln, (int)(stesc - buf->buf), NULL);
1453 return ROFF_IGN;
1454 }
1455
1456 /*
1457 * The third character decides the length
1458 * of the name of the string or register.
1459 * Save a pointer to the name.
1460 */
1461
1462 if (term == '\0') {
1463 switch (*++cp) {
1464 case '\0':
1465 maxl = 0;
1466 break;
1467 case '(':
1468 cp++;
1469 maxl = 2;
1470 break;
1471 case '[':
1472 cp++;
1473 term = ']';
1474 maxl = 0;
1475 break;
1476 default:
1477 maxl = 1;
1478 break;
1479 }
1480 } else {
1481 cp += 2;
1482 maxl = 0;
1483 }
1484 stnam = cp;
1485
1486 /* Advance to the end of the name. */
1487
1488 naml = 0;
1489 arg_complete = 1;
1490 while (maxl == 0 || naml < maxl) {
1491 if (*cp == '\0') {
1492 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1493 (int)(stesc - buf->buf), "%s", stesc);
1494 arg_complete = 0;
1495 break;
1496 }
1497 if (maxl == 0 && *cp == term) {
1498 cp++;
1499 break;
1500 }
1501 if (*cp++ != '\\' || *esct != 'w') {
1502 naml++;
1503 continue;
1504 }
1505 switch (mandoc_escape(&cp, NULL, NULL)) {
1506 case ESCAPE_SPECIAL:
1507 case ESCAPE_UNICODE:
1508 case ESCAPE_NUMBERED:
1509 case ESCAPE_UNDEF:
1510 case ESCAPE_OVERSTRIKE:
1511 naml++;
1512 break;
1513 default:
1514 break;
1515 }
1516 }
1517
1518 /*
1519 * Retrieve the replacement string; if it is
1520 * undefined, resume searching for escapes.
1521 */
1522
1523 switch (*esct) {
1524 case '*':
1525 if (arg_complete) {
1526 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1527 res = roff_getstrn(r, stnam, naml, &deftype);
1528
1529 /*
1530 * If not overriden, let \*(.T
1531 * through to the formatters.
1532 */
1533
1534 if (res == NULL && naml == 2 &&
1535 stnam[0] == '.' && stnam[1] == 'T') {
1536 roff_setstrn(&r->strtab,
1537 ".T", 2, NULL, 0, 0);
1538 stesc--;
1539 continue;
1540 }
1541 }
1542 break;
1543 case '$':
1544 if (r->mstackpos < 0) {
1545 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1546 (int)(stesc - buf->buf), "%.3s", stesc);
1547 break;
1548 }
1549 ctx = r->mstack + r->mstackpos;
1550 npos = esct[1] - '1';
1551 if (npos >= 0 && npos <= 8) {
1552 res = npos < ctx->argc ?
1553 ctx->argv[npos] : "";
1554 break;
1555 }
1556 if (esct[1] == '*')
1557 quote_args = 0;
1558 else if (esct[1] == '@')
1559 quote_args = 1;
1560 else {
1561 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1562 (int)(stesc - buf->buf), "%.3s", stesc);
1563 break;
1564 }
1565 asz = 0;
1566 for (npos = 0; npos < ctx->argc; npos++) {
1567 if (npos)
1568 asz++; /* blank */
1569 if (quote_args)
1570 asz += 2; /* quotes */
1571 asz += strlen(ctx->argv[npos]);
1572 }
1573 if (asz != 3) {
1574 rsz = buf->sz - (stesc - buf->buf) - 3;
1575 if (asz < 3)
1576 memmove(stesc + asz, stesc + 3, rsz);
1577 buf->sz += asz - 3;
1578 nbuf = mandoc_realloc(buf->buf, buf->sz);
1579 start = nbuf + pos;
1580 stesc = nbuf + (stesc - buf->buf);
1581 buf->buf = nbuf;
1582 if (asz > 3)
1583 memmove(stesc + asz, stesc + 3, rsz);
1584 }
1585 for (npos = 0; npos < ctx->argc; npos++) {
1586 if (npos)
1587 *stesc++ = ' ';
1588 if (quote_args)
1589 *stesc++ = '"';
1590 cp = ctx->argv[npos];
1591 while (*cp != '\0')
1592 *stesc++ = *cp++;
1593 if (quote_args)
1594 *stesc++ = '"';
1595 }
1596 continue;
1597 case 'B':
1598 npos = 0;
1599 ubuf[0] = arg_complete &&
1600 roff_evalnum(r, ln, stnam, &npos,
1601 NULL, ROFFNUM_SCALE) &&
1602 stnam + npos + 1 == cp ? '1' : '0';
1603 ubuf[1] = '\0';
1604 break;
1605 case 'n':
1606 if (arg_complete)
1607 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1608 roff_getregn(r, stnam, naml, sign));
1609 else
1610 ubuf[0] = '\0';
1611 break;
1612 case 'w':
1613 /* use even incomplete args */
1614 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1615 24 * (int)naml);
1616 break;
1617 }
1618
1619 if (res == NULL) {
1620 if (*esct == '*')
1621 mandoc_msg(MANDOCERR_STR_UNDEF,
1622 ln, (int)(stesc - buf->buf),
1623 "%.*s", (int)naml, stnam);
1624 res = "";
1625 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1626 mandoc_msg(MANDOCERR_ROFFLOOP,
1627 ln, (int)(stesc - buf->buf), NULL);
1628 return ROFF_IGN;
1629 }
1630
1631 /* Replace the escape sequence by the string. */
1632
1633 *stesc = '\0';
1634 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1635 buf->buf, res, cp) + 1;
1636
1637 /* Prepare for the next replacement. */
1638
1639 start = nbuf + pos;
1640 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1641 free(buf->buf);
1642 buf->buf = nbuf;
1643 }
1644 return ROFF_CONT;
1645 }
1646
1647 /*
1648 * Parse a quoted or unquoted roff-style request or macro argument.
1649 * Return a pointer to the parsed argument, which is either the original
1650 * pointer or advanced by one byte in case the argument is quoted.
1651 * NUL-terminate the argument in place.
1652 * Collapse pairs of quotes inside quoted arguments.
1653 * Advance the argument pointer to the next argument,
1654 * or to the NUL byte terminating the argument line.
1655 */
1656 char *
1657 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1658 {
1659 struct buf buf;
1660 char *cp, *start;
1661 int newesc, pairs, quoted, white;
1662
1663 /* Quoting can only start with a new word. */
1664 start = *cpp;
1665 quoted = 0;
1666 if ('"' == *start) {
1667 quoted = 1;
1668 start++;
1669 }
1670
1671 newesc = pairs = white = 0;
1672 for (cp = start; '\0' != *cp; cp++) {
1673
1674 /*
1675 * Move the following text left
1676 * after quoted quotes and after "\\" and "\t".
1677 */
1678 if (pairs)
1679 cp[-pairs] = cp[0];
1680
1681 if ('\\' == cp[0]) {
1682 /*
1683 * In copy mode, translate double to single
1684 * backslashes and backslash-t to literal tabs.
1685 */
1686 switch (cp[1]) {
1687 case 'a':
1688 case 't':
1689 cp[-pairs] = '\t';
1690 pairs++;
1691 cp++;
1692 break;
1693 case '\\':
1694 newesc = 1;
1695 cp[-pairs] = ASCII_ESC;
1696 pairs++;
1697 cp++;
1698 break;
1699 case ' ':
1700 /* Skip escaped blanks. */
1701 if (0 == quoted)
1702 cp++;
1703 break;
1704 default:
1705 break;
1706 }
1707 } else if (0 == quoted) {
1708 if (' ' == cp[0]) {
1709 /* Unescaped blanks end unquoted args. */
1710 white = 1;
1711 break;
1712 }
1713 } else if ('"' == cp[0]) {
1714 if ('"' == cp[1]) {
1715 /* Quoted quotes collapse. */
1716 pairs++;
1717 cp++;
1718 } else {
1719 /* Unquoted quotes end quoted args. */
1720 quoted = 2;
1721 break;
1722 }
1723 }
1724 }
1725
1726 /* Quoted argument without a closing quote. */
1727 if (1 == quoted)
1728 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1729
1730 /* NUL-terminate this argument and move to the next one. */
1731 if (pairs)
1732 cp[-pairs] = '\0';
1733 if ('\0' != *cp) {
1734 *cp++ = '\0';
1735 while (' ' == *cp)
1736 cp++;
1737 }
1738 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1739 *cpp = cp;
1740
1741 if ('\0' == *cp && (white || ' ' == cp[-1]))
1742 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1743
1744 start = mandoc_strdup(start);
1745 if (newesc == 0)
1746 return start;
1747
1748 buf.buf = start;
1749 buf.sz = strlen(start) + 1;
1750 buf.next = NULL;
1751 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1752 free(buf.buf);
1753 buf.buf = mandoc_strdup("");
1754 }
1755 return buf.buf;
1756 }
1757
1758
1759 /*
1760 * Process text streams.
1761 */
1762 static int
1763 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1764 {
1765 size_t sz;
1766 const char *start;
1767 char *p;
1768 int isz;
1769 enum mandoc_esc esc;
1770
1771 /* Spring the input line trap. */
1772
1773 if (roffit_lines == 1) {
1774 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1775 free(buf->buf);
1776 buf->buf = p;
1777 buf->sz = isz + 1;
1778 *offs = 0;
1779 free(roffit_macro);
1780 roffit_lines = 0;
1781 return ROFF_REPARSE;
1782 } else if (roffit_lines > 1)
1783 --roffit_lines;
1784
1785 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1786 if (roffce_lines < 1) {
1787 r->man->last = roffce_node;
1788 r->man->next = ROFF_NEXT_SIBLING;
1789 roffce_lines = 0;
1790 roffce_node = NULL;
1791 } else
1792 roffce_lines--;
1793 }
1794
1795 /* Convert all breakable hyphens into ASCII_HYPH. */
1796
1797 start = p = buf->buf + pos;
1798
1799 while (*p != '\0') {
1800 sz = strcspn(p, "-\\");
1801 p += sz;
1802
1803 if (*p == '\0')
1804 break;
1805
1806 if (*p == '\\') {
1807 /* Skip over escapes. */
1808 p++;
1809 esc = mandoc_escape((const char **)&p, NULL, NULL);
1810 if (esc == ESCAPE_ERROR)
1811 break;
1812 while (*p == '-')
1813 p++;
1814 continue;
1815 } else if (p == start) {
1816 p++;
1817 continue;
1818 }
1819
1820 if (isalpha((unsigned char)p[-1]) &&
1821 isalpha((unsigned char)p[1]))
1822 *p = ASCII_HYPH;
1823 p++;
1824 }
1825 return ROFF_CONT;
1826 }
1827
1828 int
1829 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1830 {
1831 enum roff_tok t;
1832 int e;
1833 int pos; /* parse point */
1834 int spos; /* saved parse point for messages */
1835 int ppos; /* original offset in buf->buf */
1836 int ctl; /* macro line (boolean) */
1837
1838 ppos = pos = *offs;
1839
1840 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1841 (r->man->flags & ROFF_NOFILL) == 0 &&
1842 strchr(" .\\", buf->buf[pos]) == NULL &&
1843 buf->buf[pos] != r->control &&
1844 strcspn(buf->buf, " ") < 80)
1845 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1846 "%.20s...", buf->buf + pos);
1847
1848 /* Handle in-line equation delimiters. */
1849
1850 if (r->tbl == NULL &&
1851 r->last_eqn != NULL && r->last_eqn->delim &&
1852 (r->eqn == NULL || r->eqn_inline)) {
1853 e = roff_eqndelim(r, buf, pos);
1854 if (e == ROFF_REPARSE)
1855 return e;
1856 assert(e == ROFF_CONT);
1857 }
1858
1859 /* Expand some escape sequences. */
1860
1861 e = roff_expand(r, buf, ln, pos, r->escape);
1862 if ((e & ROFF_MASK) == ROFF_IGN)
1863 return e;
1864 assert(e == ROFF_CONT);
1865
1866 ctl = roff_getcontrol(r, buf->buf, &pos);
1867
1868 /*
1869 * First, if a scope is open and we're not a macro, pass the
1870 * text through the macro's filter.
1871 * Equations process all content themselves.
1872 * Tables process almost all content themselves, but we want
1873 * to warn about macros before passing it there.
1874 */
1875
1876 if (r->last != NULL && ! ctl) {
1877 t = r->last->tok;
1878 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1879 if ((e & ROFF_MASK) == ROFF_IGN)
1880 return e;
1881 e &= ~ROFF_MASK;
1882 } else
1883 e = ROFF_IGN;
1884 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1885 eqn_read(r->eqn, buf->buf + ppos);
1886 return e;
1887 }
1888 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1889 tbl_read(r->tbl, ln, buf->buf, ppos);
1890 roff_addtbl(r->man, ln, r->tbl);
1891 return e;
1892 }
1893 if ( ! ctl) {
1894 r->options &= ~MPARSE_COMMENT;
1895 return roff_parsetext(r, buf, pos, offs) | e;
1896 }
1897
1898 /* Skip empty request lines. */
1899
1900 if (buf->buf[pos] == '"') {
1901 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1902 return ROFF_IGN;
1903 } else if (buf->buf[pos] == '\0')
1904 return ROFF_IGN;
1905
1906 /*
1907 * If a scope is open, go to the child handler for that macro,
1908 * as it may want to preprocess before doing anything with it.
1909 */
1910
1911 if (r->last) {
1912 t = r->last->tok;
1913 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1914 }
1915
1916 r->options &= ~MPARSE_COMMENT;
1917 spos = pos;
1918 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1919 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1920 }
1921
1922 /*
1923 * Handle a new request or macro.
1924 * May be called outside any scope or from inside a conditional scope.
1925 */
1926 static int
1927 roff_req_or_macro(ROFF_ARGS) {
1928
1929 /* For now, tables ignore most macros and some request. */
1930
1931 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1932 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1933 tok == ROFF_sp)) {
1934 mandoc_msg(MANDOCERR_TBLMACRO,
1935 ln, ppos, "%s", buf->buf + ppos);
1936 if (tok != TOKEN_NONE)
1937 return ROFF_IGN;
1938 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1939 pos++;
1940 while (buf->buf[pos] == ' ')
1941 pos++;
1942 tbl_read(r->tbl, ln, buf->buf, pos);
1943 roff_addtbl(r->man, ln, r->tbl);
1944 return ROFF_IGN;
1945 }
1946
1947 /* For now, let high level macros abort .ce mode. */
1948
1949 if (roffce_node != NULL &&
1950 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1951 tok == ROFF_TH || tok == ROFF_TS)) {
1952 r->man->last = roffce_node;
1953 r->man->next = ROFF_NEXT_SIBLING;
1954 roffce_lines = 0;
1955 roffce_node = NULL;
1956 }
1957
1958 /*
1959 * This is neither a roff request nor a user-defined macro.
1960 * Let the standard macro set parsers handle it.
1961 */
1962
1963 if (tok == TOKEN_NONE)
1964 return ROFF_CONT;
1965
1966 /* Execute a roff request or a user-defined macro. */
1967
1968 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1969 }
1970
1971 /*
1972 * Internal interface function to tell the roff parser that execution
1973 * of the current macro ended. This is required because macro
1974 * definitions usually do not end with a .return request.
1975 */
1976 void
1977 roff_userret(struct roff *r)
1978 {
1979 struct mctx *ctx;
1980 int i;
1981
1982 assert(r->mstackpos >= 0);
1983 ctx = r->mstack + r->mstackpos;
1984 for (i = 0; i < ctx->argc; i++)
1985 free(ctx->argv[i]);
1986 ctx->argc = 0;
1987 r->mstackpos--;
1988 }
1989
1990 void
1991 roff_endparse(struct roff *r)
1992 {
1993 if (r->last != NULL)
1994 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1995 r->last->col, "%s", roff_name[r->last->tok]);
1996
1997 if (r->eqn != NULL) {
1998 mandoc_msg(MANDOCERR_BLK_NOEND,
1999 r->eqn->node->line, r->eqn->node->pos, "EQ");
2000 eqn_parse(r->eqn);
2001 r->eqn = NULL;
2002 }
2003
2004 if (r->tbl != NULL) {
2005 tbl_end(r->tbl, 1);
2006 r->tbl = NULL;
2007 }
2008 }
2009
2010 /*
2011 * Parse the request or macro name at buf[*pos].
2012 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
2013 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
2014 * As a side effect, set r->current_string to the definition or to NULL.
2015 */
2016 static enum roff_tok
2017 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2018 {
2019 char *cp;
2020 const char *mac;
2021 size_t maclen;
2022 int deftype;
2023 enum roff_tok t;
2024
2025 cp = buf + *pos;
2026
2027 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2028 return TOKEN_NONE;
2029
2030 mac = cp;
2031 maclen = roff_getname(r, &cp, ln, ppos);
2032
2033 deftype = ROFFDEF_USER | ROFFDEF_REN;
2034 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2035 switch (deftype) {
2036 case ROFFDEF_USER:
2037 t = ROFF_USERDEF;
2038 break;
2039 case ROFFDEF_REN:
2040 t = ROFF_RENAMED;
2041 break;
2042 default:
2043 t = roffhash_find(r->reqtab, mac, maclen);
2044 break;
2045 }
2046 if (t != TOKEN_NONE)
2047 *pos = cp - buf;
2048 else if (deftype == ROFFDEF_UNDEF) {
2049 /* Using an undefined macro defines it to be empty. */
2050 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2051 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2052 }
2053 return t;
2054 }
2055
2056 /* --- handling of request blocks ----------------------------------------- */
2057
2058 /*
2059 * Close a macro definition block or an "ignore" block.
2060 */
2061 static int
2062 roff_cblock(ROFF_ARGS)
2063 {
2064 int rr;
2065
2066 if (r->last == NULL) {
2067 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2068 return ROFF_IGN;
2069 }
2070
2071 switch (r->last->tok) {
2072 case ROFF_am:
2073 case ROFF_ami:
2074 case ROFF_de:
2075 case ROFF_dei:
2076 case ROFF_ig:
2077 break;
2078 case ROFF_am1:
2079 case ROFF_de1:
2080 /* Remapped in roff_block(). */
2081 abort();
2082 default:
2083 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2084 return ROFF_IGN;
2085 }
2086
2087 roffnode_pop(r);
2088 roffnode_cleanscope(r);
2089
2090 /*
2091 * If a conditional block with braces is still open,
2092 * check for "\}" block end markers.
2093 */
2094
2095 if (r->last != NULL && r->last->endspan < 0) {
2096 rr = 1; /* If arguments follow "\}", warn about them. */
2097 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2098 }
2099
2100 if (buf->buf[pos] != '\0')
2101 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2102 ".. %s", buf->buf + pos);
2103
2104 return ROFF_IGN;
2105 }
2106
2107 /*
2108 * Pop all nodes ending at the end of the current input line.
2109 * Return the number of loops ended.
2110 */
2111 static int
2112 roffnode_cleanscope(struct roff *r)
2113 {
2114 int inloop;
2115
2116 inloop = 0;
2117 while (r->last != NULL && r->last->endspan > 0) {
2118 if (--r->last->endspan != 0)
2119 break;
2120 inloop += roffnode_pop(r);
2121 }
2122 return inloop;
2123 }
2124
2125 /*
2126 * Handle the closing "\}" of a conditional block.
2127 * Apart from generating warnings, this only pops nodes.
2128 * Return the number of loops ended.
2129 */
2130 static int
2131 roff_ccond(struct roff *r, int ln, int ppos)
2132 {
2133 if (NULL == r->last) {
2134 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2135 return 0;
2136 }
2137
2138 switch (r->last->tok) {
2139 case ROFF_el:
2140 case ROFF_ie:
2141 case ROFF_if:
2142 case ROFF_while:
2143 break;
2144 default:
2145 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2146 return 0;
2147 }
2148
2149 if (r->last->endspan > -1) {
2150 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2151 return 0;
2152 }
2153
2154 return roffnode_pop(r) + roffnode_cleanscope(r);
2155 }
2156
2157 static int
2158 roff_block(ROFF_ARGS)
2159 {
2160 const char *name, *value;
2161 char *call, *cp, *iname, *rname;
2162 size_t csz, namesz, rsz;
2163 int deftype;
2164
2165 /* Ignore groff compatibility mode for now. */
2166
2167 if (tok == ROFF_de1)
2168 tok = ROFF_de;
2169 else if (tok == ROFF_dei1)
2170 tok = ROFF_dei;
2171 else if (tok == ROFF_am1)
2172 tok = ROFF_am;
2173 else if (tok == ROFF_ami1)
2174 tok = ROFF_ami;
2175
2176 /* Parse the macro name argument. */
2177
2178 cp = buf->buf + pos;
2179 if (tok == ROFF_ig) {
2180 iname = NULL;
2181 namesz = 0;
2182 } else {
2183 iname = cp;
2184 namesz = roff_getname(r, &cp, ln, ppos);
2185 iname[namesz] = '\0';
2186 }
2187
2188 /* Resolve the macro name argument if it is indirect. */
2189
2190 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2191 deftype = ROFFDEF_USER;
2192 name = roff_getstrn(r, iname, namesz, &deftype);
2193 if (name == NULL) {
2194 mandoc_msg(MANDOCERR_STR_UNDEF,
2195 ln, (int)(iname - buf->buf),
2196 "%.*s", (int)namesz, iname);
2197 namesz = 0;
2198 } else
2199 namesz = strlen(name);
2200 } else
2201 name = iname;
2202
2203 if (namesz == 0 && tok != ROFF_ig) {
2204 mandoc_msg(MANDOCERR_REQ_EMPTY,
2205 ln, ppos, "%s", roff_name[tok]);
2206 return ROFF_IGN;
2207 }
2208
2209 roffnode_push(r, tok, name, ln, ppos);
2210
2211 /*
2212 * At the beginning of a `de' macro, clear the existing string
2213 * with the same name, if there is one. New content will be
2214 * appended from roff_block_text() in multiline mode.
2215 */
2216
2217 if (tok == ROFF_de || tok == ROFF_dei) {
2218 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2219 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2220 } else if (tok == ROFF_am || tok == ROFF_ami) {
2221 deftype = ROFFDEF_ANY;
2222 value = roff_getstrn(r, iname, namesz, &deftype);
2223 switch (deftype) { /* Before appending, ... */
2224 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2225 roff_setstrn(&r->strtab, name, namesz,
2226 value, strlen(value), 0);
2227 break;
2228 case ROFFDEF_REN: /* call original standard macro. */
2229 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2230 (int)strlen(value), value);
2231 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2232 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2233 free(call);
2234 break;
2235 case ROFFDEF_STD: /* rename and call standard macro. */
2236 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2237 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2238 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2239 (int)rsz, rname);
2240 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2241 free(call);
2242 free(rname);
2243 break;
2244 default:
2245 break;
2246 }
2247 }
2248
2249 if (*cp == '\0')
2250 return ROFF_IGN;
2251
2252 /* Get the custom end marker. */
2253
2254 iname = cp;
2255 namesz = roff_getname(r, &cp, ln, ppos);
2256
2257 /* Resolve the end marker if it is indirect. */
2258
2259 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2260 deftype = ROFFDEF_USER;
2261 name = roff_getstrn(r, iname, namesz, &deftype);
2262 if (name == NULL) {
2263 mandoc_msg(MANDOCERR_STR_UNDEF,
2264 ln, (int)(iname - buf->buf),
2265 "%.*s", (int)namesz, iname);
2266 namesz = 0;
2267 } else
2268 namesz = strlen(name);
2269 } else
2270 name = iname;
2271
2272 if (namesz)
2273 r->last->end = mandoc_strndup(name, namesz);
2274
2275 if (*cp != '\0')
2276 mandoc_msg(MANDOCERR_ARG_EXCESS,
2277 ln, pos, ".%s ... %s", roff_name[tok], cp);
2278
2279 return ROFF_IGN;
2280 }
2281
2282 static int
2283 roff_block_sub(ROFF_ARGS)
2284 {
2285 enum roff_tok t;
2286 int i, j;
2287
2288 /*
2289 * If a custom end marker is a user-defined or predefined macro
2290 * or a request, interpret it.
2291 */
2292
2293 if (r->last->end) {
2294 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2295 if (buf->buf[i] != r->last->end[j])
2296 break;
2297
2298 if (r->last->end[j] == '\0' &&
2299 (buf->buf[i] == '\0' ||
2300 buf->buf[i] == ' ' ||
2301 buf->buf[i] == '\t')) {
2302 roffnode_pop(r);
2303 roffnode_cleanscope(r);
2304
2305 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2306 i++;
2307
2308 pos = i;
2309 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2310 TOKEN_NONE)
2311 return ROFF_RERUN;
2312 return ROFF_IGN;
2313 }
2314 }
2315
2316 /* Handle the standard end marker. */
2317
2318 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2319 if (t == ROFF_cblock)
2320 return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2321
2322 /* Not an end marker, so append the line to the block. */
2323
2324 if (tok != ROFF_ig)
2325 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2326 return ROFF_IGN;
2327 }
2328
2329 static int
2330 roff_block_text(ROFF_ARGS)
2331 {
2332
2333 if (tok != ROFF_ig)
2334 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2335
2336 return ROFF_IGN;
2337 }
2338
2339 /*
2340 * Check for a closing "\}" and handle it.
2341 * In this function, the final "int *offs" argument is used for
2342 * different purposes than elsewhere:
2343 * Input: *offs == 0: caller wants to discard arguments following \}
2344 * *offs == 1: caller wants to preserve text following \}
2345 * Output: *offs = 0: tell caller to discard input line
2346 * *offs = 1: tell caller to use input line
2347 */
2348 static int
2349 roff_cond_checkend(ROFF_ARGS)
2350 {
2351 char *ep;
2352 int endloop, irc, rr;
2353
2354 irc = ROFF_IGN;
2355 rr = r->last->rule;
2356 endloop = tok != ROFF_while ? ROFF_IGN :
2357 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2358 if (roffnode_cleanscope(r))
2359 irc |= endloop;
2360
2361 /*
2362 * If "\}" occurs on a macro line without a preceding macro or
2363 * a text line contains nothing else, drop the line completely.
2364 */
2365
2366 ep = buf->buf + pos;
2367 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2368 rr = 0;
2369
2370 /*
2371 * The closing delimiter "\}" rewinds the conditional scope
2372 * but is otherwise ignored when interpreting the line.
2373 */
2374
2375 while ((ep = strchr(ep, '\\')) != NULL) {
2376 switch (ep[1]) {
2377 case '}':
2378 if (ep[2] == '\0')
2379 ep[0] = '\0';
2380 else if (rr)
2381 ep[1] = '&';
2382 else
2383 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2384 if (roff_ccond(r, ln, ep - buf->buf))
2385 irc |= endloop;
2386 break;
2387 case '\0':
2388 ++ep;
2389 break;
2390 default:
2391 ep += 2;
2392 break;
2393 }
2394 }
2395 *offs = rr;
2396 return irc;
2397 }
2398
2399 /*
2400 * Parse and process a request or macro line in conditional scope.
2401 */
2402 static int
2403 roff_cond_sub(ROFF_ARGS)
2404 {
2405 struct roffnode *bl;
2406 int irc, rr, spos;
2407 enum roff_tok t;
2408
2409 rr = 0; /* If arguments follow "\}", skip them. */
2410 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2411 spos = pos;
2412 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2413
2414 /*
2415 * Handle requests and macros if the conditional evaluated
2416 * to true or if they are structurally required.
2417 * The .break request is always handled specially.
2418 */
2419
2420 if (t == ROFF_break) {
2421 if (irc & ROFF_LOOPMASK)
2422 irc = ROFF_IGN | ROFF_LOOPEXIT;
2423 else if (rr) {
2424 for (bl = r->last; bl != NULL; bl = bl->parent) {
2425 bl->rule = 0;
2426 if (bl->tok == ROFF_while)
2427 break;
2428 }
2429 }
2430 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2431 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2432 if (irc & ROFF_WHILE)
2433 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2434 }
2435 return irc;
2436 }
2437
2438 /*
2439 * Parse and process a text line in conditional scope.
2440 */
2441 static int
2442 roff_cond_text(ROFF_ARGS)
2443 {
2444 int irc, rr;
2445
2446 rr = 1; /* If arguments follow "\}", preserve them. */
2447 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2448 if (rr)
2449 irc |= ROFF_CONT;
2450 return irc;
2451 }
2452
2453 /* --- handling of numeric and conditional expressions -------------------- */
2454
2455 /*
2456 * Parse a single signed integer number. Stop at the first non-digit.
2457 * If there is at least one digit, return success and advance the
2458 * parse point, else return failure and let the parse point unchanged.
2459 * Ignore overflows, treat them just like the C language.
2460 */
2461 static int
2462 roff_getnum(const char *v, int *pos, int *res, int flags)
2463 {
2464 int myres, scaled, n, p;
2465
2466 if (NULL == res)
2467 res = &myres;
2468
2469 p = *pos;
2470 n = v[p] == '-';
2471 if (n || v[p] == '+')
2472 p++;
2473
2474 if (flags & ROFFNUM_WHITE)
2475 while (isspace((unsigned char)v[p]))
2476 p++;
2477
2478 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2479 *res = 10 * *res + v[p] - '0';
2480 if (p == *pos + n)
2481 return 0;
2482
2483 if (n)
2484 *res = -*res;
2485
2486 /* Each number may be followed by one optional scaling unit. */
2487
2488 switch (v[p]) {
2489 case 'f':
2490 scaled = *res * 65536;
2491 break;
2492 case 'i':
2493 scaled = *res * 240;
2494 break;
2495 case 'c':
2496 scaled = *res * 240 / 2.54;
2497 break;
2498 case 'v':
2499 case 'P':
2500 scaled = *res * 40;
2501 break;
2502 case 'm':
2503 case 'n':
2504 scaled = *res * 24;
2505 break;
2506 case 'p':
2507 scaled = *res * 10 / 3;
2508 break;
2509 case 'u':
2510 scaled = *res;
2511 break;
2512 case 'M':
2513 scaled = *res * 6 / 25;
2514 break;
2515 default:
2516 scaled = *res;
2517 p--;
2518 break;
2519 }
2520 if (flags & ROFFNUM_SCALE)
2521 *res = scaled;
2522
2523 *pos = p + 1;
2524 return 1;
2525 }
2526
2527 /*
2528 * Evaluate a string comparison condition.
2529 * The first character is the delimiter.
2530 * Succeed if the string up to its second occurrence
2531 * matches the string up to its third occurence.
2532 * Advance the cursor after the third occurrence
2533 * or lacking that, to the end of the line.
2534 */
2535 static int
2536 roff_evalstrcond(const char *v, int *pos)
2537 {
2538 const char *s1, *s2, *s3;
2539 int match;
2540
2541 match = 0;
2542 s1 = v + *pos; /* initial delimiter */
2543 s2 = s1 + 1; /* for scanning the first string */
2544 s3 = strchr(s2, *s1); /* for scanning the second string */
2545
2546 if (NULL == s3) /* found no middle delimiter */
2547 goto out;
2548
2549 while ('\0' != *++s3) {
2550 if (*s2 != *s3) { /* mismatch */
2551 s3 = strchr(s3, *s1);
2552 break;
2553 }
2554 if (*s3 == *s1) { /* found the final delimiter */
2555 match = 1;
2556 break;
2557 }
2558 s2++;
2559 }
2560
2561 out:
2562 if (NULL == s3)
2563 s3 = strchr(s2, '\0');
2564 else if (*s3 != '\0')
2565 s3++;
2566 *pos = s3 - v;
2567 return match;
2568 }
2569
2570 /*
2571 * Evaluate an optionally negated single character, numerical,
2572 * or string condition.
2573 */
2574 static int
2575 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2576 {
2577 const char *start, *end;
2578 char *cp, *name;
2579 size_t sz;
2580 int deftype, len, number, savepos, istrue, wanttrue;
2581
2582 if ('!' == v[*pos]) {
2583 wanttrue = 0;
2584 (*pos)++;
2585 } else
2586 wanttrue = 1;
2587
2588 switch (v[*pos]) {
2589 case '\0':
2590 return 0;
2591 case 'n':
2592 case 'o':
2593 (*pos)++;
2594 return wanttrue;
2595 case 'e':
2596 case 't':
2597 case 'v':
2598 (*pos)++;
2599 return !wanttrue;
2600 case 'c':
2601 do {
2602 (*pos)++;
2603 } while (v[*pos] == ' ');
2604
2605 /*
2606 * Quirk for groff compatibility:
2607 * The horizontal tab is neither available nor unavailable.
2608 */
2609
2610 if (v[*pos] == '\t') {
2611 (*pos)++;
2612 return 0;
2613 }
2614
2615 /* Printable ASCII characters are available. */
2616
2617 if (v[*pos] != '\\') {
2618 (*pos)++;
2619 return wanttrue;
2620 }
2621
2622 end = v + ++*pos;
2623 switch (mandoc_escape(&end, &start, &len)) {
2624 case ESCAPE_SPECIAL:
2625 istrue = mchars_spec2cp(start, len) != -1;
2626 break;
2627 case ESCAPE_UNICODE:
2628 istrue = 1;
2629 break;
2630 case ESCAPE_NUMBERED:
2631 istrue = mchars_num2char(start, len) != -1;
2632 break;
2633 default:
2634 istrue = !wanttrue;
2635 break;
2636 }
2637 *pos = end - v;
2638 return istrue == wanttrue;
2639 case 'd':
2640 case 'r':
2641 cp = v + *pos + 1;
2642 while (*cp == ' ')
2643 cp++;
2644 name = cp;
2645 sz = roff_getname(r, &cp, ln, cp - v);
2646 if (sz == 0)
2647 istrue = 0;
2648 else if (v[*pos] == 'r')
2649 istrue = roff_hasregn(r, name, sz);
2650 else {
2651 deftype = ROFFDEF_ANY;
2652 roff_getstrn(r, name, sz, &deftype);
2653 istrue = !!deftype;
2654 }
2655 *pos = (name + sz) - v;
2656 return istrue == wanttrue;
2657 default:
2658 break;
2659 }
2660
2661 savepos = *pos;
2662 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2663 return (number > 0) == wanttrue;
2664 else if (*pos == savepos)
2665 return roff_evalstrcond(v, pos) == wanttrue;
2666 else
2667 return 0;
2668 }
2669
2670 static int
2671 roff_line_ignore(ROFF_ARGS)
2672 {
2673
2674 return ROFF_IGN;
2675 }
2676
2677 static int
2678 roff_insec(ROFF_ARGS)
2679 {
2680
2681 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2682 return ROFF_IGN;
2683 }
2684
2685 static int
2686 roff_unsupp(ROFF_ARGS)
2687 {
2688
2689 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2690 return ROFF_IGN;
2691 }
2692
2693 static int
2694 roff_cond(ROFF_ARGS)
2695 {
2696 int irc;
2697
2698 roffnode_push(r, tok, NULL, ln, ppos);
2699
2700 /*
2701 * An `.el' has no conditional body: it will consume the value
2702 * of the current rstack entry set in prior `ie' calls or
2703 * defaults to DENY.
2704 *
2705 * If we're not an `el', however, then evaluate the conditional.
2706 */
2707
2708 r->last->rule = tok == ROFF_el ?
2709 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2710 roff_evalcond(r, ln, buf->buf, &pos);
2711
2712 /*
2713 * An if-else will put the NEGATION of the current evaluated
2714 * conditional into the stack of rules.
2715 */
2716
2717 if (tok == ROFF_ie) {
2718 if (r->rstackpos + 1 == r->rstacksz) {
2719 r->rstacksz += 16;
2720 r->rstack = mandoc_reallocarray(r->rstack,
2721 r->rstacksz, sizeof(int));
2722 }
2723 r->rstack[++r->rstackpos] = !r->last->rule;
2724 }
2725
2726 /* If the parent has false as its rule, then so do we. */
2727
2728 if (r->last->parent && !r->last->parent->rule)
2729 r->last->rule = 0;
2730
2731 /*
2732 * Determine scope.
2733 * If there is nothing on the line after the conditional,
2734 * not even whitespace, use next-line scope.
2735 * Except that .while does not support next-line scope.
2736 */
2737
2738 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2739 r->last->endspan = 2;
2740 goto out;
2741 }
2742
2743 while (buf->buf[pos] == ' ')
2744 pos++;
2745
2746 /* An opening brace requests multiline scope. */
2747
2748 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2749 r->last->endspan = -1;
2750 pos += 2;
2751 while (buf->buf[pos] == ' ')
2752 pos++;
2753 goto out;
2754 }
2755
2756 /*
2757 * Anything else following the conditional causes
2758 * single-line scope. Warn if the scope contains
2759 * nothing but trailing whitespace.
2760 */
2761
2762 if (buf->buf[pos] == '\0')
2763 mandoc_msg(MANDOCERR_COND_EMPTY,
2764 ln, ppos, "%s", roff_name[tok]);
2765
2766 r->last->endspan = 1;
2767
2768 out:
2769 *offs = pos;
2770 irc = ROFF_RERUN;
2771 if (tok == ROFF_while)
2772 irc |= ROFF_WHILE;
2773 return irc;
2774 }
2775
2776 static int
2777 roff_ds(ROFF_ARGS)
2778 {
2779 char *string;
2780 const char *name;
2781 size_t namesz;
2782
2783 /* Ignore groff compatibility mode for now. */
2784
2785 if (tok == ROFF_ds1)
2786 tok = ROFF_ds;
2787 else if (tok == ROFF_as1)
2788 tok = ROFF_as;
2789
2790 /*
2791 * The first word is the name of the string.
2792 * If it is empty or terminated by an escape sequence,
2793 * abort the `ds' request without defining anything.
2794 */
2795
2796 name = string = buf->buf + pos;
2797 if (*name == '\0')
2798 return ROFF_IGN;
2799
2800 namesz = roff_getname(r, &string, ln, pos);
2801 switch (name[namesz]) {
2802 case '\\':
2803 return ROFF_IGN;
2804 case '\t':
2805 string = buf->buf + pos + namesz;
2806 break;
2807 default:
2808 break;
2809 }
2810
2811 /* Read past the initial double-quote, if any. */
2812 if (*string == '"')
2813 string++;
2814
2815 /* The rest is the value. */
2816 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2817 ROFF_as == tok);
2818 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2819 return ROFF_IGN;
2820 }
2821
2822 /*
2823 * Parse a single operator, one or two characters long.
2824 * If the operator is recognized, return success and advance the
2825 * parse point, else return failure and let the parse point unchanged.
2826 */
2827 static int
2828 roff_getop(const char *v, int *pos, char *res)
2829 {
2830
2831 *res = v[*pos];
2832
2833 switch (*res) {
2834 case '+':
2835 case '-':
2836 case '*':
2837 case '/':
2838 case '%':
2839 case '&':
2840 case ':':
2841 break;
2842 case '<':
2843 switch (v[*pos + 1]) {
2844 case '=':
2845 *res = 'l';
2846 (*pos)++;
2847 break;
2848 case '>':
2849 *res = '!';
2850 (*pos)++;
2851 break;
2852 case '?':
2853 *res = 'i';
2854 (*pos)++;
2855 break;
2856 default:
2857 break;
2858 }
2859 break;
2860 case '>':
2861 switch (v[*pos + 1]) {
2862 case '=':
2863 *res = 'g';
2864 (*pos)++;
2865 break;
2866 case '?':
2867 *res = 'a';
2868 (*pos)++;
2869 break;
2870 default:
2871 break;
2872 }
2873 break;
2874 case '=':
2875 if ('=' == v[*pos + 1])
2876 (*pos)++;
2877 break;
2878 default:
2879 return 0;
2880 }
2881 (*pos)++;
2882
2883 return *res;
2884 }
2885
2886 /*
2887 * Evaluate either a parenthesized numeric expression
2888 * or a single signed integer number.
2889 */
2890 static int
2891 roff_evalpar(struct roff *r, int ln,
2892 const char *v, int *pos, int *res, int flags)
2893 {
2894
2895 if ('(' != v[*pos])
2896 return roff_getnum(v, pos, res, flags);
2897
2898 (*pos)++;
2899 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2900 return 0;
2901
2902 /*
2903 * Omission of the closing parenthesis
2904 * is an error in validation mode,
2905 * but ignored in evaluation mode.
2906 */
2907
2908 if (')' == v[*pos])
2909 (*pos)++;
2910 else if (NULL == res)
2911 return 0;
2912
2913 return 1;
2914 }
2915
2916 /*
2917 * Evaluate a complete numeric expression.
2918 * Proceed left to right, there is no concept of precedence.
2919 */
2920 static int
2921 roff_evalnum(struct roff *r, int ln, const char *v,
2922 int *pos, int *res, int flags)
2923 {
2924 int mypos, operand2;
2925 char operator;
2926
2927 if (NULL == pos) {
2928 mypos = 0;
2929 pos = &mypos;
2930 }
2931
2932 if (flags & ROFFNUM_WHITE)
2933 while (isspace((unsigned char)v[*pos]))
2934 (*pos)++;
2935
2936 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2937 return 0;
2938
2939 while (1) {
2940 if (flags & ROFFNUM_WHITE)
2941 while (isspace((unsigned char)v[*pos]))
2942 (*pos)++;
2943
2944 if ( ! roff_getop(v, pos, &operator))
2945 break;
2946
2947 if (flags & ROFFNUM_WHITE)
2948 while (isspace((unsigned char)v[*pos]))
2949 (*pos)++;
2950
2951 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2952 return 0;
2953
2954 if (flags & ROFFNUM_WHITE)
2955 while (isspace((unsigned char)v[*pos]))
2956 (*pos)++;
2957
2958 if (NULL == res)
2959 continue;
2960
2961 switch (operator) {
2962 case '+':
2963 *res += operand2;
2964 break;
2965 case '-':
2966 *res -= operand2;
2967 break;
2968 case '*':
2969 *res *= operand2;
2970 break;
2971 case '/':
2972 if (operand2 == 0) {
2973 mandoc_msg(MANDOCERR_DIVZERO,
2974 ln, *pos, "%s", v);
2975 *res = 0;
2976 break;
2977 }
2978 *res /= operand2;
2979 break;
2980 case '%':
2981 if (operand2 == 0) {
2982 mandoc_msg(MANDOCERR_DIVZERO,
2983 ln, *pos, "%s", v);
2984 *res = 0;
2985 break;
2986 }
2987 *res %= operand2;
2988 break;
2989 case '<':
2990 *res = *res < operand2;
2991 break;
2992 case '>':
2993 *res = *res > operand2;
2994 break;
2995 case 'l':
2996 *res = *res <= operand2;
2997 break;
2998 case 'g':
2999 *res = *res >= operand2;
3000 break;
3001 case '=':
3002 *res = *res == operand2;
3003 break;
3004 case '!':
3005 *res = *res != operand2;
3006 break;
3007 case '&':
3008 *res = *res && operand2;
3009 break;
3010 case ':':
3011 *res = *res || operand2;
3012 break;
3013 case 'i':
3014 if (operand2 < *res)
3015 *res = operand2;
3016 break;
3017 case 'a':
3018 if (operand2 > *res)
3019 *res = operand2;
3020 break;
3021 default:
3022 abort();
3023 }
3024 }
3025 return 1;
3026 }
3027
3028 /* --- register management ------------------------------------------------ */
3029
3030 void
3031 roff_setreg(struct roff *r, const char *name, int val, char sign)
3032 {
3033 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3034 }
3035
3036 static void
3037 roff_setregn(struct roff *r, const char *name, size_t len,
3038 int val, char sign, int step)
3039 {
3040 struct roffreg *reg;
3041
3042 /* Search for an existing register with the same name. */
3043 reg = r->regtab;
3044
3045 while (reg != NULL && (reg->key.sz != len ||
3046 strncmp(reg->key.p, name, len) != 0))
3047 reg = reg->next;
3048
3049 if (NULL == reg) {
3050 /* Create a new register. */
3051 reg = mandoc_malloc(sizeof(struct roffreg));
3052 reg->key.p = mandoc_strndup(name, len);
3053 reg->key.sz = len;
3054 reg->val = 0;
3055 reg->step = 0;
3056 reg->next = r->regtab;
3057 r->regtab = reg;
3058 }
3059
3060 if ('+' == sign)
3061 reg->val += val;
3062 else if ('-' == sign)
3063 reg->val -= val;
3064 else
3065 reg->val = val;
3066 if (step != INT_MIN)
3067 reg->step = step;
3068 }
3069
3070 /*
3071 * Handle some predefined read-only number registers.
3072 * For now, return -1 if the requested register is not predefined;
3073 * in case a predefined read-only register having the value -1
3074 * were to turn up, another special value would have to be chosen.
3075 */
3076 static int
3077 roff_getregro(const struct roff *r, const char *name)
3078 {
3079
3080 switch (*name) {
3081 case '$': /* Number of arguments of the last macro evaluated. */
3082 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3083 case 'A': /* ASCII approximation mode is always off. */
3084 return 0;
3085 case 'g': /* Groff compatibility mode is always on. */
3086 return 1;
3087 case 'H': /* Fixed horizontal resolution. */
3088 return 24;
3089 case 'j': /* Always adjust left margin only. */
3090 return 0;
3091 case 'T': /* Some output device is always defined. */
3092 return 1;
3093 case 'V': /* Fixed vertical resolution. */
3094 return 40;
3095 default:
3096 return -1;
3097 }
3098 }
3099
3100 int
3101 roff_getreg(struct roff *r, const char *name)
3102 {
3103 return roff_getregn(r, name, strlen(name), '\0');
3104 }
3105
3106 static int
3107 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3108 {
3109 struct roffreg *reg;
3110 int val;
3111
3112 if ('.' == name[0] && 2 == len) {
3113 val = roff_getregro(r, name + 1);
3114 if (-1 != val)
3115 return val;
3116 }
3117
3118 for (reg = r->regtab; reg; reg = reg->next) {
3119 if (len == reg->key.sz &&
3120 0 == strncmp(name, reg->key.p, len)) {
3121 switch (sign) {
3122 case '+':
3123 reg->val += reg->step;
3124 break;
3125 case '-':
3126 reg->val -= reg->step;
3127 break;
3128 default:
3129 break;
3130 }
3131 return reg->val;
3132 }
3133 }
3134
3135 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3136 return 0;
3137 }
3138
3139 static int
3140 roff_hasregn(const struct roff *r, const char *name, size_t len)
3141 {
3142 struct roffreg *reg;
3143 int val;
3144
3145 if ('.' == name[0] && 2 == len) {
3146 val = roff_getregro(r, name + 1);
3147 if (-1 != val)
3148 return 1;
3149 }
3150
3151 for (reg = r->regtab; reg; reg = reg->next)
3152 if (len == reg->key.sz &&
3153 0 == strncmp(name, reg->key.p, len))
3154 return 1;
3155
3156 return 0;
3157 }
3158
3159 static void
3160 roff_freereg(struct roffreg *reg)
3161 {
3162 struct roffreg *old_reg;
3163
3164 while (NULL != reg) {
3165 free(reg->key.p);
3166 old_reg = reg;
3167 reg = reg->next;
3168 free(old_reg);
3169 }
3170 }
3171
3172 static int
3173 roff_nr(ROFF_ARGS)
3174 {
3175 char *key, *val, *step;
3176 size_t keysz;
3177 int iv, is, len;
3178 char sign;
3179
3180 key = val = buf->buf + pos;
3181 if (*key == '\0')
3182 return ROFF_IGN;
3183
3184 keysz = roff_getname(r, &val, ln, pos);
3185 if (key[keysz] == '\\' || key[keysz] == '\t')
3186 return ROFF_IGN;
3187
3188 sign = *val;
3189 if (sign == '+' || sign == '-')
3190 val++;
3191
3192 len = 0;
3193 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3194 return ROFF_IGN;
3195
3196 step = val + len;
3197 while (isspace((unsigned char)*step))
3198 step++;
3199 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3200 is = INT_MIN;
3201
3202 roff_setregn(r, key, keysz, iv, sign, is);
3203 return ROFF_IGN;
3204 }
3205
3206 static int
3207 roff_rr(ROFF_ARGS)
3208 {
3209 struct roffreg *reg, **prev;
3210 char *name, *cp;
3211 size_t namesz;
3212
3213 name = cp = buf->buf + pos;
3214 if (*name == '\0')
3215 return ROFF_IGN;
3216 namesz = roff_getname(r, &cp, ln, pos);
3217 name[namesz] = '\0';
3218
3219 prev = &r->regtab;
3220 while (1) {
3221 reg = *prev;
3222 if (reg == NULL || !strcmp(name, reg->key.p))
3223 break;
3224 prev = &reg->next;
3225 }
3226 if (reg != NULL) {
3227 *prev = reg->next;
3228 free(reg->key.p);
3229 free(reg);
3230 }
3231 return ROFF_IGN;
3232 }
3233
3234 /* --- handler functions for roff requests -------------------------------- */
3235
3236 static int
3237 roff_rm(ROFF_ARGS)
3238 {
3239 const char *name;
3240 char *cp;
3241 size_t namesz;
3242
3243 cp = buf->buf + pos;
3244 while (*cp != '\0') {
3245 name = cp;
3246 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3247 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3248 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3249 if (name[namesz] == '\\' || name[namesz] == '\t')
3250 break;
3251 }
3252 return ROFF_IGN;
3253 }
3254
3255 static int
3256 roff_it(ROFF_ARGS)
3257 {
3258 int iv;
3259
3260 /* Parse the number of lines. */
3261
3262 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3263 mandoc_msg(MANDOCERR_IT_NONUM,
3264 ln, ppos, "%s", buf->buf + 1);
3265 return ROFF_IGN;
3266 }
3267
3268 while (isspace((unsigned char)buf->buf[pos]))
3269 pos++;
3270
3271 /*
3272 * Arm the input line trap.
3273 * Special-casing "an-trap" is an ugly workaround to cope
3274 * with DocBook stupidly fiddling with man(7) internals.
3275 */
3276
3277 roffit_lines = iv;
3278 roffit_macro = mandoc_strdup(iv != 1 ||
3279 strcmp(buf->buf + pos, "an-trap") ?
3280 buf->buf + pos : "br");
3281 return ROFF_IGN;
3282 }
3283
3284 static int
3285 roff_Dd(ROFF_ARGS)
3286 {
3287 int mask;
3288 enum roff_tok t, te;
3289
3290 switch (tok) {
3291 case ROFF_Dd:
3292 tok = MDOC_Dd;
3293 te = MDOC_MAX;
3294 if (r->format == 0)
3295 r->format = MPARSE_MDOC;
3296 mask = MPARSE_MDOC | MPARSE_QUICK;
3297 break;
3298 case ROFF_TH:
3299 tok = MAN_TH;
3300 te = MAN_MAX;
3301 if (r->format == 0)
3302 r->format = MPARSE_MAN;
3303 mask = MPARSE_QUICK;
3304 break;
3305 default:
3306 abort();
3307 }
3308 if ((r->options & mask) == 0)
3309 for (t = tok; t < te; t++)
3310 roff_setstr(r, roff_name[t], NULL, 0);
3311 return ROFF_CONT;
3312 }
3313
3314 static int
3315 roff_TE(ROFF_ARGS)
3316 {
3317 r->man->flags &= ~ROFF_NONOFILL;
3318 if (r->tbl == NULL) {
3319 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3320 return ROFF_IGN;
3321 }
3322 if (tbl_end(r->tbl, 0) == 0) {
3323 r->tbl = NULL;
3324 free(buf->buf);
3325 buf->buf = mandoc_strdup(".sp");
3326 buf->sz = 4;
3327 *offs = 0;
3328 return ROFF_REPARSE;
3329 }
3330 r->tbl = NULL;
3331 return ROFF_IGN;
3332 }
3333
3334 static int
3335 roff_T_(ROFF_ARGS)
3336 {
3337
3338 if (NULL == r->tbl)
3339 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3340 else
3341 tbl_restart(ln, ppos, r->tbl);
3342
3343 return ROFF_IGN;
3344 }
3345
3346 /*
3347 * Handle in-line equation delimiters.
3348 */
3349 static int
3350 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3351 {
3352 char *cp1, *cp2;
3353 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3354
3355 /*
3356 * Outside equations, look for an opening delimiter.
3357 * If we are inside an equation, we already know it is
3358 * in-line, or this function wouldn't have been called;
3359 * so look for a closing delimiter.
3360 */
3361
3362 cp1 = buf->buf + pos;
3363 cp2 = strchr(cp1, r->eqn == NULL ?
3364 r->last_eqn->odelim : r->last_eqn->cdelim);
3365 if (cp2 == NULL)
3366 return ROFF_CONT;
3367
3368 *cp2++ = '\0';
3369 bef_pr = bef_nl = aft_nl = aft_pr = "";
3370
3371 /* Handle preceding text, protecting whitespace. */
3372
3373 if (*buf->buf != '\0') {
3374 if (r->eqn == NULL)
3375 bef_pr = "\\&";
3376 bef_nl = "\n";
3377 }
3378
3379 /*
3380 * Prepare replacing the delimiter with an equation macro
3381 * and drop leading white space from the equation.
3382 */
3383
3384 if (r->eqn == NULL) {
3385 while (*cp2 == ' ')
3386 cp2++;
3387 mac = ".EQ";
3388 } else
3389 mac = ".EN";
3390
3391 /* Handle following text, protecting whitespace. */
3392
3393 if (*cp2 != '\0') {
3394 aft_nl = "\n";
3395 if (r->eqn != NULL)
3396 aft_pr = "\\&";
3397 }
3398
3399 /* Do the actual replacement. */
3400
3401 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3402 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3403 free(buf->buf);
3404 buf->buf = cp1;
3405
3406 /* Toggle the in-line state of the eqn subsystem. */
3407
3408 r->eqn_inline = r->eqn == NULL;
3409 return ROFF_REPARSE;
3410 }
3411
3412 static int
3413 roff_EQ(ROFF_ARGS)
3414 {
3415 struct roff_node *n;
3416
3417 if (r->man->meta.macroset == MACROSET_MAN)
3418 man_breakscope(r->man, ROFF_EQ);
3419 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3420 if (ln > r->man->last->line)
3421 n->flags |= NODE_LINE;
3422 n->eqn = eqn_box_new();
3423 roff_node_append(r->man, n);
3424 r->man->next = ROFF_NEXT_SIBLING;
3425
3426 assert(r->eqn == NULL);
3427 if (r->last_eqn == NULL)
3428 r->last_eqn = eqn_alloc();
3429 else
3430 eqn_reset(r->last_eqn);
3431 r->eqn = r->last_eqn;
3432 r->eqn->node = n;
3433
3434 if (buf->buf[pos] != '\0')
3435 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3436 ".EQ %s", buf->buf + pos);
3437
3438 return ROFF_IGN;
3439 }
3440
3441 static int
3442 roff_EN(ROFF_ARGS)
3443 {
3444 if (r->eqn != NULL) {
3445 eqn_parse(r->eqn);
3446 r->eqn = NULL;
3447 } else
3448 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3449 if (buf->buf[pos] != '\0')
3450 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3451 "EN %s", buf->buf + pos);
3452 return ROFF_IGN;
3453 }
3454
3455 static int
3456 roff_TS(ROFF_ARGS)
3457 {
3458 if (r->tbl != NULL) {
3459 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3460 tbl_end(r->tbl, 0);
3461 }
3462 r->man->flags |= ROFF_NONOFILL;
3463 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3464 if (r->last_tbl == NULL)
3465 r->first_tbl = r->tbl;
3466 r->last_tbl = r->tbl;
3467 return ROFF_IGN;
3468 }
3469
3470 static int
3471 roff_noarg(ROFF_ARGS)
3472 {
3473 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3474 man_breakscope(r->man, tok);
3475 if (tok == ROFF_brp)
3476 tok = ROFF_br;
3477 roff_elem_alloc(r->man, ln, ppos, tok);
3478 if (buf->buf[pos] != '\0')
3479 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3480 "%s %s", roff_name[tok], buf->buf + pos);
3481 if (tok == ROFF_nf)
3482 r->man->flags |= ROFF_NOFILL;
3483 else if (tok == ROFF_fi)
3484 r->man->flags &= ~ROFF_NOFILL;
3485 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3486 r->man->next = ROFF_NEXT_SIBLING;
3487 return ROFF_IGN;
3488 }
3489
3490 static int
3491 roff_onearg(ROFF_ARGS)
3492 {
3493 struct roff_node *n;
3494 char *cp;
3495 int npos;
3496
3497 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3498 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3499 tok == ROFF_ti))
3500 man_breakscope(r->man, tok);
3501
3502 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3503 r->man->last = roffce_node;
3504 r->man->next = ROFF_NEXT_SIBLING;
3505 }
3506
3507 roff_elem_alloc(r->man, ln, ppos, tok);
3508 n = r->man->last;
3509
3510 cp = buf->buf + pos;
3511 if (*cp != '\0') {
3512 while (*cp != '\0' && *cp != ' ')
3513 cp++;
3514 while (*cp == ' ')
3515 *cp++ = '\0';
3516 if (*cp != '\0')
3517 mandoc_msg(MANDOCERR_ARG_EXCESS,
3518 ln, (int)(cp - buf->buf),
3519 "%s ... %s", roff_name[tok], cp);
3520 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3521 }
3522
3523 if (tok == ROFF_ce || tok == ROFF_rj) {
3524 if (r->man->last->type == ROFFT_ELEM) {
3525 roff_word_alloc(r->man, ln, pos, "1");
3526 r->man->last->flags |= NODE_NOSRC;
3527 }
3528 npos = 0;
3529 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3530 &roffce_lines, 0) == 0) {
3531 mandoc_msg(MANDOCERR_CE_NONUM,
3532 ln, pos, "ce %s", buf->buf + pos);
3533 roffce_lines = 1;
3534 }
3535 if (roffce_lines < 1) {
3536 r->man->last = r->man->last->parent;
3537 roffce_node = NULL;
3538 roffce_lines = 0;
3539 } else
3540 roffce_node = r->man->last->parent;
3541 } else {
3542 n->flags |= NODE_VALID | NODE_ENDED;
3543 r->man->last = n;
3544 }
3545 n->flags |= NODE_LINE;
3546 r->man->next = ROFF_NEXT_SIBLING;
3547 return ROFF_IGN;
3548 }
3549
3550 static int
3551 roff_manyarg(ROFF_ARGS)
3552 {
3553 struct roff_node *n;
3554 char *sp, *ep;
3555
3556 roff_elem_alloc(r->man, ln, ppos, tok);
3557 n = r->man->last;
3558
3559 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3560 while (*ep != '\0' && *ep != ' ')
3561 ep++;
3562 while (*ep == ' ')
3563 *ep++ = '\0';
3564 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3565 }
3566
3567 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3568 r->man->last = n;
3569 r->man->next = ROFF_NEXT_SIBLING;
3570 return ROFF_IGN;
3571 }
3572
3573 static int
3574 roff_als(ROFF_ARGS)
3575 {
3576 char *oldn, *newn, *end, *value;
3577 size_t oldsz, newsz, valsz;
3578
3579 newn = oldn = buf->buf + pos;
3580 if (*newn == '\0')
3581 return ROFF_IGN;
3582
3583 newsz = roff_getname(r, &oldn, ln, pos);
3584 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3585 return ROFF_IGN;
3586
3587 end = oldn;
3588 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3589 if (oldsz == 0)
3590 return ROFF_IGN;
3591
3592 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3593 (int)oldsz, oldn);
3594 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3595 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3596 free(value);
3597 return ROFF_IGN;
3598 }
3599
3600 /*
3601 * The .break request only makes sense inside conditionals,
3602 * and that case is already handled in roff_cond_sub().
3603 */
3604 static int
3605 roff_break(ROFF_ARGS)
3606 {
3607 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3608 return ROFF_IGN;
3609 }
3610
3611 static int
3612 roff_cc(ROFF_ARGS)
3613 {
3614 const char *p;
3615
3616 p = buf->buf + pos;
3617
3618 if (*p == '\0' || (r->control = *p++) == '.')
3619 r->control = '\0';
3620
3621 if (*p != '\0')
3622 mandoc_msg(MANDOCERR_ARG_EXCESS,
3623 ln, p - buf->buf, "cc ... %s", p);
3624
3625 return ROFF_IGN;
3626 }
3627
3628 static int
3629 roff_char(ROFF_ARGS)
3630 {
3631 const char *p, *kp, *vp;
3632 size_t ksz, vsz;
3633 int font;
3634
3635 /* Parse the character to be replaced. */
3636
3637 kp = buf->buf + pos;
3638 p = kp + 1;
3639 if (*kp == '\0' || (*kp == '\\' &&
3640 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3641 (*p != ' ' && *p != '\0')) {
3642 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3643 return ROFF_IGN;
3644 }
3645 ksz = p - kp;
3646 while (*p == ' ')
3647 p++;
3648
3649 /*
3650 * If the replacement string contains a font escape sequence,
3651 * we have to restore the font at the end.
3652 */
3653
3654 vp = p;
3655 vsz = strlen(p);
3656 font = 0;
3657 while (*p != '\0') {
3658 if (*p++ != '\\')
3659 continue;
3660 switch (mandoc_escape(&p, NULL, NULL)) {
3661 case ESCAPE_FONT:
3662 case ESCAPE_FONTROMAN:
3663 case ESCAPE_FONTITALIC:
3664 case ESCAPE_FONTBOLD:
3665 case ESCAPE_FONTBI:
3666 case ESCAPE_FONTCR:
3667 case ESCAPE_FONTCB:
3668 case ESCAPE_FONTCI:
3669 case ESCAPE_FONTPREV:
3670 font++;
3671 break;
3672 default:
3673 break;
3674 }
3675 }
3676 if (font > 1)
3677 mandoc_msg(MANDOCERR_CHAR_FONT,
3678 ln, (int)(vp - buf->buf), "%s", vp);
3679
3680 /*
3681 * Approximate the effect of .char using the .tr tables.
3682 * XXX In groff, .char and .tr interact differently.
3683 */
3684
3685 if (ksz == 1) {
3686 if (r->xtab == NULL)
3687 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3688 assert((unsigned int)*kp < 128);
3689 free(r->xtab[(int)*kp].p);
3690 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3691 "%s%s", vp, font ? "\fP" : "");
3692 } else {
3693 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3694 if (font)
3695 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3696 }
3697 return ROFF_IGN;
3698 }
3699
3700 static int
3701 roff_ec(ROFF_ARGS)
3702 {
3703 const char *p;
3704
3705 p = buf->buf + pos;
3706 if (*p == '\0')
3707 r->escape = '\\';
3708 else {
3709 r->escape = *p;
3710 if (*++p != '\0')
3711 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3712 (int)(p - buf->buf), "ec ... %s", p);
3713 }
3714 return ROFF_IGN;
3715 }
3716
3717 static int
3718 roff_eo(ROFF_ARGS)
3719 {
3720 r->escape = '\0';
3721 if (buf->buf[pos] != '\0')
3722 mandoc_msg(MANDOCERR_ARG_SKIP,
3723 ln, pos, "eo %s", buf->buf + pos);
3724 return ROFF_IGN;
3725 }
3726
3727 static int
3728 roff_mc(ROFF_ARGS)
3729 {
3730 struct roff_node *n;
3731 char *cp;
3732
3733 /* Parse the first argument. */
3734
3735 cp = buf->buf + pos;
3736 if (*cp != '\0')
3737 cp++;
3738 if (buf->buf[pos] == '\\') {
3739 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3740 case ESCAPE_SPECIAL:
3741 case ESCAPE_UNICODE:
3742 case ESCAPE_NUMBERED:
3743 break;
3744 default:
3745 *cp = '\0';
3746 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3747 "mc %s", buf->buf + pos);
3748 buf->buf[pos] = '\0';
3749 break;
3750 }
3751 }
3752
3753 /* Ignore additional arguments. */
3754
3755 while (*cp == ' ')
3756 *cp++ = '\0';
3757 if (*cp != '\0') {
3758 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3759 "mc ... %s", cp);
3760 *cp = '\0';
3761 }
3762
3763 /* Create the .mc node. */
3764
3765 roff_elem_alloc(r->man, ln, ppos, tok);
3766 n = r->man->last;
3767 if (buf->buf[pos] != '\0')
3768 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3769 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3770 r->man->last = n;
3771 r->man->next = ROFF_NEXT_SIBLING;
3772 return ROFF_IGN;
3773 }
3774
3775 static int
3776 roff_nop(ROFF_ARGS)
3777 {
3778 while (buf->buf[pos] == ' ')
3779 pos++;
3780 *offs = pos;
3781 return ROFF_RERUN;
3782 }
3783
3784 static int
3785 roff_tr(ROFF_ARGS)
3786 {
3787 const char *p, *first, *second;
3788 size_t fsz, ssz;
3789 enum mandoc_esc esc;
3790
3791 p = buf->buf + pos;
3792
3793 if (*p == '\0') {
3794 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3795 return ROFF_IGN;
3796 }
3797
3798 while (*p != '\0') {
3799 fsz = ssz = 1;
3800
3801 first = p++;
3802 if (*first == '\\') {
3803 esc = mandoc_escape(&p, NULL, NULL);
3804 if (esc == ESCAPE_ERROR) {
3805 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3806 (int)(p - buf->buf), "%s", first);
3807 return ROFF_IGN;
3808 }
3809 fsz = (size_t)(p - first);
3810 }
3811
3812 second = p++;
3813 if (*second == '\\') {
3814 esc = mandoc_escape(&p, NULL, NULL);
3815 if (esc == ESCAPE_ERROR) {
3816 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3817 (int)(p - buf->buf), "%s", second);
3818 return ROFF_IGN;
3819 }
3820 ssz = (size_t)(p - second);
3821 } else if (*second == '\0') {
3822 mandoc_msg(MANDOCERR_TR_ODD, ln,
3823 (int)(first - buf->buf), "tr %s", first);
3824 second = " ";
3825 p--;
3826 }
3827
3828 if (fsz > 1) {
3829 roff_setstrn(&r->xmbtab, first, fsz,
3830 second, ssz, 0);
3831 continue;
3832 }
3833
3834 if (r->xtab == NULL)
3835 r->xtab = mandoc_calloc(128,
3836 sizeof(struct roffstr));
3837
3838 free(r->xtab[(int)*first].p);
3839 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3840 r->xtab[(int)*first].sz = ssz;
3841 }
3842
3843 return ROFF_IGN;
3844 }
3845
3846 /*
3847 * Implementation of the .return request.
3848 * There is no need to call roff_userret() from here.
3849 * The read module will call that after rewinding the reader stack
3850 * to the place from where the current macro was called.
3851 */
3852 static int
3853 roff_return(ROFF_ARGS)
3854 {
3855 if (r->mstackpos >= 0)
3856 return ROFF_IGN | ROFF_USERRET;
3857
3858 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3859 return ROFF_IGN;
3860 }
3861
3862 static int
3863 roff_rn(ROFF_ARGS)
3864 {
3865 const char *value;
3866 char *oldn, *newn, *end;
3867 size_t oldsz, newsz;
3868 int deftype;
3869
3870 oldn = newn = buf->buf + pos;
3871 if (*oldn == '\0')
3872 return ROFF_IGN;
3873
3874 oldsz = roff_getname(r, &newn, ln, pos);
3875 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3876 return ROFF_IGN;
3877
3878 end = newn;
3879 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3880 if (newsz == 0)
3881 return ROFF_IGN;
3882
3883 deftype = ROFFDEF_ANY;
3884 value = roff_getstrn(r, oldn, oldsz, &deftype);
3885 switch (deftype) {
3886 case ROFFDEF_USER:
3887 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3888 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3889 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3890 break;
3891 case ROFFDEF_PRE:
3892 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3893 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3894 break;
3895 case ROFFDEF_REN:
3896 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3897 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3898 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3899 break;
3900 case ROFFDEF_STD:
3901 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3902 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3903 break;
3904 default:
3905 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3906 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3907 break;
3908 }
3909 return ROFF_IGN;
3910 }
3911
3912 static int
3913 roff_shift(ROFF_ARGS)
3914 {
3915 struct mctx *ctx;
3916 int argpos, levels, i;
3917
3918 argpos = pos;
3919 levels = 1;
3920 if (buf->buf[pos] != '\0' &&
3921 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3922 mandoc_msg(MANDOCERR_CE_NONUM,
3923 ln, pos, "shift %s", buf->buf + pos);
3924 levels = 1;
3925 }
3926 if (r->mstackpos < 0) {
3927 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3928 return ROFF_IGN;
3929 }
3930 ctx = r->mstack + r->mstackpos;
3931 if (levels > ctx->argc) {
3932 mandoc_msg(MANDOCERR_SHIFT,
3933 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3934 levels = ctx->argc;
3935 }
3936 if (levels < 0) {
3937 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3938 levels = 0;
3939 }
3940 if (levels == 0)
3941 return ROFF_IGN;
3942 for (i = 0; i < levels; i++)
3943 free(ctx->argv[i]);
3944 ctx->argc -= levels;
3945 for (i = 0; i < ctx->argc; i++)
3946 ctx->argv[i] = ctx->argv[i + levels];
3947 return ROFF_IGN;
3948 }
3949
3950 static int
3951 roff_so(ROFF_ARGS)
3952 {
3953 char *name, *cp;
3954
3955 name = buf->buf + pos;
3956 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3957
3958 /*
3959 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3960 * opening anything that's not in our cwd or anything beneath
3961 * it. Thus, explicitly disallow traversing up the file-system
3962 * or using absolute paths.
3963 */
3964
3965 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3966 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3967 buf->sz = mandoc_asprintf(&cp,
3968 ".sp\nSee the file %s.\n.sp", name) + 1;
3969 free(buf->buf);
3970 buf->buf = cp;
3971 *offs = 0;
3972 return ROFF_REPARSE;
3973 }
3974
3975 *offs = pos;
3976 return ROFF_SO;
3977 }
3978
3979 /* --- user defined strings and macros ------------------------------------ */
3980
3981 static int
3982 roff_userdef(ROFF_ARGS)
3983 {
3984 struct mctx *ctx;
3985 char *arg, *ap, *dst, *src;
3986 size_t sz;
3987
3988 /* If the macro is empty, ignore it altogether. */
3989
3990 if (*r->current_string == '\0')
3991 return ROFF_IGN;
3992
3993 /* Initialize a new macro stack context. */
3994
3995 if (++r->mstackpos == r->mstacksz) {
3996 r->mstack = mandoc_recallocarray(r->mstack,
3997 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3998 r->mstacksz += 8;
3999 }
4000 ctx = r->mstack + r->mstackpos;
4001 ctx->argc = 0;
4002
4003 /*
4004 * Collect pointers to macro argument strings,
4005 * NUL-terminating them and escaping quotes.
4006 */
4007
4008 src = buf->buf + pos;
4009 while (*src != '\0') {
4010 if (ctx->argc == ctx->argsz) {
4011 ctx->argsz += 8;
4012 ctx->argv = mandoc_reallocarray(ctx->argv,
4013 ctx->argsz, sizeof(*ctx->argv));
4014 }
4015 arg = roff_getarg(r, &src, ln, &pos);
4016 sz = 1; /* For the terminating NUL. */
4017 for (ap = arg; *ap != '\0'; ap++)
4018 sz += *ap == '"' ? 4 : 1;
4019 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
4020 for (ap = arg; *ap != '\0'; ap++) {
4021 if (*ap == '"') {
4022 memcpy(dst, "\\(dq", 4);
4023 dst += 4;
4024 } else
4025 *dst++ = *ap;
4026 }
4027 *dst = '\0';
4028 free(arg);
4029 }
4030
4031 /* Replace the macro invocation by the macro definition. */
4032
4033 free(buf->buf);
4034 buf->buf = mandoc_strdup(r->current_string);
4035 buf->sz = strlen(buf->buf) + 1;
4036 *offs = 0;
4037
4038 return buf->buf[buf->sz - 2] == '\n' ?
4039 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4040 }
4041
4042 /*
4043 * Calling a high-level macro that was renamed with .rn.
4044 * r->current_string has already been set up by roff_parse().
4045 */
4046 static int
4047 roff_renamed(ROFF_ARGS)
4048 {
4049 char *nbuf;
4050
4051 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4052 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4053 free(buf->buf);
4054 buf->buf = nbuf;
4055 *offs = 0;
4056 return ROFF_CONT;
4057 }
4058
4059 /*
4060 * Measure the length in bytes of the roff identifier at *cpp
4061 * and advance the pointer to the next word.
4062 */
4063 static size_t
4064 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4065 {
4066 char *name, *cp;
4067 size_t namesz;
4068
4069 name = *cpp;
4070 if (*name == '\0')
4071 return 0;
4072
4073 /* Advance cp to the byte after the end of the name. */
4074
4075 for (cp = name; 1; cp++) {
4076 namesz = cp - name;
4077 if (*cp == '\0')
4078 break;
4079 if (*cp == ' ' || *cp == '\t') {
4080 cp++;
4081 break;
4082 }
4083 if (*cp != '\\')
4084 continue;
4085 if (cp[1] == '{' || cp[1] == '}')
4086 break;
4087 if (*++cp == '\\')
4088 continue;
4089 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4090 "%.*s", (int)(cp - name + 1), name);
4091 mandoc_escape((const char **)&cp, NULL, NULL);
4092 break;
4093 }
4094
4095 /* Read past spaces. */
4096
4097 while (*cp == ' ')
4098 cp++;
4099
4100 *cpp = cp;
4101 return namesz;
4102 }
4103
4104 /*
4105 * Store *string into the user-defined string called *name.
4106 * To clear an existing entry, call with (*r, *name, NULL, 0).
4107 * append == 0: replace mode
4108 * append == 1: single-line append mode
4109 * append == 2: multiline append mode, append '\n' after each call
4110 */
4111 static void
4112 roff_setstr(struct roff *r, const char *name, const char *string,
4113 int append)
4114 {
4115 size_t namesz;
4116
4117 namesz = strlen(name);
4118 roff_setstrn(&r->strtab, name, namesz, string,
4119 string ? strlen(string) : 0, append);
4120 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4121 }
4122
4123 static void
4124 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4125 const char *string, size_t stringsz, int append)
4126 {
4127 struct roffkv *n;
4128 char *c;
4129 int i;
4130 size_t oldch, newch;
4131
4132 /* Search for an existing string with the same name. */
4133 n = *r;
4134
4135 while (n && (namesz != n->key.sz ||
4136 strncmp(n->key.p, name, namesz)))
4137 n = n->next;
4138
4139 if (NULL == n) {
4140 /* Create a new string table entry. */
4141 n = mandoc_malloc(sizeof(struct roffkv));
4142 n->key.p = mandoc_strndup(name, namesz);
4143 n->key.sz = namesz;
4144 n->val.p = NULL;
4145 n->val.sz = 0;
4146 n->next = *r;
4147 *r = n;
4148 } else if (0 == append) {
4149 free(n->val.p);
4150 n->val.p = NULL;
4151 n->val.sz = 0;
4152 }
4153
4154 if (NULL == string)
4155 return;
4156
4157 /*
4158 * One additional byte for the '\n' in multiline mode,
4159 * and one for the terminating '\0'.
4160 */
4161 newch = stringsz + (1 < append ? 2u : 1u);
4162
4163 if (NULL == n->val.p) {
4164 n->val.p = mandoc_malloc(newch);
4165 *n->val.p = '\0';
4166 oldch = 0;
4167 } else {
4168 oldch = n->val.sz;
4169 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4170 }
4171
4172 /* Skip existing content in the destination buffer. */
4173 c = n->val.p + (int)oldch;
4174
4175 /* Append new content to the destination buffer. */
4176 i = 0;
4177 while (i < (int)stringsz) {
4178 /*
4179 * Rudimentary roff copy mode:
4180 * Handle escaped backslashes.
4181 */
4182 if ('\\' == string[i] && '\\' == string[i + 1])
4183 i++;
4184 *c++ = string[i++];
4185 }
4186
4187 /* Append terminating bytes. */
4188 if (1 < append)
4189 *c++ = '\n';
4190
4191 *c = '\0';
4192 n->val.sz = (int)(c - n->val.p);
4193 }
4194
4195 static const char *
4196 roff_getstrn(struct roff *r, const char *name, size_t len,
4197 int *deftype)
4198 {
4199 const struct roffkv *n;
4200 int found, i;
4201 enum roff_tok tok;
4202
4203 found = 0;
4204 for (n = r->strtab; n != NULL; n = n->next) {
4205 if (strncmp(name, n->key.p, len) != 0 ||
4206 n->key.p[len] != '\0' || n->val.p == NULL)
4207 continue;
4208 if (*deftype & ROFFDEF_USER) {
4209 *deftype = ROFFDEF_USER;
4210 return n->val.p;
4211 } else {
4212 found = 1;
4213 break;
4214 }
4215 }
4216 for (n = r->rentab; n != NULL; n = n->next) {
4217 if (strncmp(name, n->key.p, len) != 0 ||
4218 n->key.p[len] != '\0' || n->val.p == NULL)
4219 continue;
4220 if (*deftype & ROFFDEF_REN) {
4221 *deftype = ROFFDEF_REN;
4222 return n->val.p;
4223 } else {
4224 found = 1;
4225 break;
4226 }
4227 }
4228 for (i = 0; i < PREDEFS_MAX; i++) {
4229 if (strncmp(name, predefs[i].name, len) != 0 ||
4230 predefs[i].name[len] != '\0')
4231 continue;
4232 if (*deftype & ROFFDEF_PRE) {
4233 *deftype = ROFFDEF_PRE;
4234 return predefs[i].str;
4235 } else {
4236 found = 1;
4237 break;
4238 }
4239 }
4240 if (r->man->meta.macroset != MACROSET_MAN) {
4241 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4242 if (strncmp(name, roff_name[tok], len) != 0 ||
4243 roff_name[tok][len] != '\0')
4244 continue;
4245 if (*deftype & ROFFDEF_STD) {
4246 *deftype = ROFFDEF_STD;
4247 return NULL;
4248 } else {
4249 found = 1;
4250 break;
4251 }
4252 }
4253 }
4254 if (r->man->meta.macroset != MACROSET_MDOC) {
4255 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4256 if (strncmp(name, roff_name[tok], len) != 0 ||
4257 roff_name[tok][len] != '\0')
4258 continue;
4259 if (*deftype & ROFFDEF_STD) {
4260 *deftype = ROFFDEF_STD;
4261 return NULL;
4262 } else {
4263 found = 1;
4264 break;
4265 }
4266 }
4267 }
4268
4269 if (found == 0 && *deftype != ROFFDEF_ANY) {
4270 if (*deftype & ROFFDEF_REN) {
4271 /*
4272 * This might still be a request,
4273 * so do not treat it as undefined yet.
4274 */
4275 *deftype = ROFFDEF_UNDEF;
4276 return NULL;
4277 }
4278
4279 /* Using an undefined string defines it to be empty. */
4280
4281 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4282 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4283 }
4284
4285 *deftype = 0;
4286 return NULL;
4287 }
4288
4289 static void
4290 roff_freestr(struct roffkv *r)
4291 {
4292 struct roffkv *n, *nn;
4293
4294 for (n = r; n; n = nn) {
4295 free(n->key.p);
4296 free(n->val.p);
4297 nn = n->next;
4298 free(n);
4299 }
4300 }
4301
4302 /* --- accessors and utility functions ------------------------------------ */
4303
4304 /*
4305 * Duplicate an input string, making the appropriate character
4306 * conversations (as stipulated by `tr') along the way.
4307 * Returns a heap-allocated string with all the replacements made.
4308 */
4309 char *
4310 roff_strdup(const struct roff *r, const char *p)
4311 {
4312 const struct roffkv *cp;
4313 char *res;
4314 const char *pp;
4315 size_t ssz, sz;
4316 enum mandoc_esc esc;
4317
4318 if (NULL == r->xmbtab && NULL == r->xtab)
4319 return mandoc_strdup(p);
4320 else if ('\0' == *p)
4321 return mandoc_strdup("");
4322
4323 /*
4324 * Step through each character looking for term matches
4325 * (remember that a `tr' can be invoked with an escape, which is
4326 * a glyph but the escape is multi-character).
4327 * We only do this if the character hash has been initialised
4328 * and the string is >0 length.
4329 */
4330
4331 res = NULL;
4332 ssz = 0;
4333
4334 while ('\0' != *p) {
4335 assert((unsigned int)*p < 128);
4336 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4337 sz = r->xtab[(int)*p].sz;
4338 res = mandoc_realloc(res, ssz + sz + 1);
4339 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4340 ssz += sz;
4341 p++;
4342 continue;
4343 } else if ('\\' != *p) {
4344 res = mandoc_realloc(res, ssz + 2);
4345 res[ssz++] = *p++;
4346 continue;
4347 }
4348
4349 /* Search for term matches. */
4350 for (cp = r->xmbtab; cp; cp = cp->next)
4351 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4352 break;
4353
4354 if (NULL != cp) {
4355 /*
4356 * A match has been found.
4357 * Append the match to the array and move
4358 * forward by its keysize.
4359 */
4360 res = mandoc_realloc(res,
4361 ssz + cp->val.sz + 1);
4362 memcpy(res + ssz, cp->val.p, cp->val.sz);
4363 ssz += cp->val.sz;
4364 p += (int)cp->key.sz;
4365 continue;
4366 }
4367
4368 /*
4369 * Handle escapes carefully: we need to copy
4370 * over just the escape itself, or else we might
4371 * do replacements within the escape itself.
4372 * Make sure to pass along the bogus string.
4373 */
4374 pp = p++;
4375 esc = mandoc_escape(&p, NULL, NULL);
4376 if (ESCAPE_ERROR == esc) {
4377 sz = strlen(pp);
4378 res = mandoc_realloc(res, ssz + sz + 1);
4379 memcpy(res + ssz, pp, sz);
4380 break;
4381 }
4382 /*
4383 * We bail out on bad escapes.
4384 * No need to warn: we already did so when
4385 * roff_expand() was called.
4386 */
4387 sz = (int)(p - pp);
4388 res = mandoc_realloc(res, ssz + sz + 1);
4389 memcpy(res + ssz, pp, sz);
4390 ssz += sz;
4391 }
4392
4393 res[(int)ssz] = '\0';
4394 return res;
4395 }
4396
4397 int
4398 roff_getformat(const struct roff *r)
4399 {
4400
4401 return r->format;
4402 }
4403
4404 /*
4405 * Find out whether a line is a macro line or not.
4406 * If it is, adjust the current position and return one; if it isn't,
4407 * return zero and don't change the current position.
4408 * If the control character has been set with `.cc', then let that grain
4409 * precedence.
4410 * This is slighly contrary to groff, where using the non-breaking
4411 * control character when `cc' has been invoked will cause the
4412 * non-breaking macro contents to be printed verbatim.
4413 */
4414 int
4415 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4416 {
4417 int pos;
4418
4419 pos = *ppos;
4420
4421 if (r->control != '\0' && cp[pos] == r->control)
4422 pos++;
4423 else if (r->control != '\0')
4424 return 0;
4425 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4426 pos += 2;
4427 else if ('.' == cp[pos] || '\'' == cp[pos])
4428 pos++;
4429 else
4430 return 0;
4431
4432 while (' ' == cp[pos] || '\t' == cp[pos])
4433 pos++;
4434
4435 *ppos = pos;
4436 return 1;
4437 }