]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
warn about time machines; suggested by Thomas Klausner <wiz @ NetBSD>
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.317 2017/06/25 11:42:02 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* Types of definitions of macros and strings. */
43 #define ROFFDEF_USER (1 << 1) /* User-defined. */
44 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
45 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
46 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
47 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
48 ROFFDEF_REN | ROFFDEF_STD)
49
50 /* --- data types --------------------------------------------------------- */
51
52 /*
53 * An incredibly-simple string buffer.
54 */
55 struct roffstr {
56 char *p; /* nil-terminated buffer */
57 size_t sz; /* saved strlen(p) */
58 };
59
60 /*
61 * A key-value roffstr pair as part of a singly-linked list.
62 */
63 struct roffkv {
64 struct roffstr key;
65 struct roffstr val;
66 struct roffkv *next; /* next in list */
67 };
68
69 /*
70 * A single number register as part of a singly-linked list.
71 */
72 struct roffreg {
73 struct roffstr key;
74 int val;
75 struct roffreg *next;
76 };
77
78 /*
79 * Association of request and macro names with token IDs.
80 */
81 struct roffreq {
82 enum roff_tok tok;
83 char name[];
84 };
85
86 struct roff {
87 struct mparse *parse; /* parse point */
88 struct roff_man *man; /* mdoc or man parser */
89 struct roffnode *last; /* leaf of stack */
90 int *rstack; /* stack of inverted `ie' values */
91 struct ohash *reqtab; /* request lookup table */
92 struct roffreg *regtab; /* number registers */
93 struct roffkv *strtab; /* user-defined strings & macros */
94 struct roffkv *rentab; /* renamed strings & macros */
95 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
96 struct roffstr *xtab; /* single-byte trans table (`tr') */
97 const char *current_string; /* value of last called user macro */
98 struct tbl_node *first_tbl; /* first table parsed */
99 struct tbl_node *last_tbl; /* last table parsed */
100 struct tbl_node *tbl; /* current table being parsed */
101 struct eqn_node *last_eqn; /* last equation parsed */
102 struct eqn_node *first_eqn; /* first equation parsed */
103 struct eqn_node *eqn; /* current equation being parsed */
104 int eqn_inline; /* current equation is inline */
105 int options; /* parse options */
106 int rstacksz; /* current size limit of rstack */
107 int rstackpos; /* position in rstack */
108 int format; /* current file in mdoc or man format */
109 int argc; /* number of args of the last macro */
110 char control; /* control character */
111 char escape; /* escape character */
112 };
113
114 struct roffnode {
115 enum roff_tok tok; /* type of node */
116 struct roffnode *parent; /* up one in stack */
117 int line; /* parse line */
118 int col; /* parse col */
119 char *name; /* node name, e.g. macro name */
120 char *end; /* end-rules: custom token */
121 int endspan; /* end-rules: next-line or infty */
122 int rule; /* current evaluation rule */
123 };
124
125 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
126 enum roff_tok tok, /* tok of macro */ \
127 struct buf *buf, /* input buffer */ \
128 int ln, /* parse line */ \
129 int ppos, /* original pos in buffer */ \
130 int pos, /* current pos in buffer */ \
131 int *offs /* reset offset of buffer data */
132
133 typedef enum rofferr (*roffproc)(ROFF_ARGS);
134
135 struct roffmac {
136 roffproc proc; /* process new macro */
137 roffproc text; /* process as child text of macro */
138 roffproc sub; /* process as child of macro */
139 int flags;
140 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
141 };
142
143 struct predef {
144 const char *name; /* predefined input name */
145 const char *str; /* replacement symbol */
146 };
147
148 #define PREDEF(__name, __str) \
149 { (__name), (__str) },
150
151 /* --- function prototypes ------------------------------------------------ */
152
153 static void roffnode_cleanscope(struct roff *);
154 static void roffnode_pop(struct roff *);
155 static void roffnode_push(struct roff *, enum roff_tok,
156 const char *, int, int);
157 static enum rofferr roff_als(ROFF_ARGS);
158 static enum rofferr roff_block(ROFF_ARGS);
159 static enum rofferr roff_block_text(ROFF_ARGS);
160 static enum rofferr roff_block_sub(ROFF_ARGS);
161 static enum rofferr roff_br(ROFF_ARGS);
162 static enum rofferr roff_cblock(ROFF_ARGS);
163 static enum rofferr roff_cc(ROFF_ARGS);
164 static void roff_ccond(struct roff *, int, int);
165 static enum rofferr roff_cond(ROFF_ARGS);
166 static enum rofferr roff_cond_text(ROFF_ARGS);
167 static enum rofferr roff_cond_sub(ROFF_ARGS);
168 static enum rofferr roff_ds(ROFF_ARGS);
169 static enum rofferr roff_ec(ROFF_ARGS);
170 static enum rofferr roff_eo(ROFF_ARGS);
171 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
172 static int roff_evalcond(struct roff *r, int, char *, int *);
173 static int roff_evalnum(struct roff *, int,
174 const char *, int *, int *, int);
175 static int roff_evalpar(struct roff *, int,
176 const char *, int *, int *, int);
177 static int roff_evalstrcond(const char *, int *);
178 static void roff_free1(struct roff *);
179 static void roff_freereg(struct roffreg *);
180 static void roff_freestr(struct roffkv *);
181 static size_t roff_getname(struct roff *, char **, int, int);
182 static int roff_getnum(const char *, int *, int *, int);
183 static int roff_getop(const char *, int *, char *);
184 static int roff_getregn(const struct roff *,
185 const char *, size_t);
186 static int roff_getregro(const struct roff *,
187 const char *name);
188 static const char *roff_getstrn(const struct roff *,
189 const char *, size_t, int *);
190 static int roff_hasregn(const struct roff *,
191 const char *, size_t);
192 static enum rofferr roff_insec(ROFF_ARGS);
193 static enum rofferr roff_it(ROFF_ARGS);
194 static enum rofferr roff_line_ignore(ROFF_ARGS);
195 static void roff_man_alloc1(struct roff_man *);
196 static void roff_man_free1(struct roff_man *);
197 static enum rofferr roff_manyarg(ROFF_ARGS);
198 static enum rofferr roff_nr(ROFF_ARGS);
199 static enum rofferr roff_onearg(ROFF_ARGS);
200 static enum roff_tok roff_parse(struct roff *, char *, int *,
201 int, int);
202 static enum rofferr roff_parsetext(struct roff *, struct buf *,
203 int, int *);
204 static enum rofferr roff_renamed(ROFF_ARGS);
205 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
206 static enum rofferr roff_rm(ROFF_ARGS);
207 static enum rofferr roff_rn(ROFF_ARGS);
208 static enum rofferr roff_rr(ROFF_ARGS);
209 static void roff_setstr(struct roff *,
210 const char *, const char *, int);
211 static void roff_setstrn(struct roffkv **, const char *,
212 size_t, const char *, size_t, int);
213 static enum rofferr roff_so(ROFF_ARGS);
214 static enum rofferr roff_tr(ROFF_ARGS);
215 static enum rofferr roff_Dd(ROFF_ARGS);
216 static enum rofferr roff_TE(ROFF_ARGS);
217 static enum rofferr roff_TS(ROFF_ARGS);
218 static enum rofferr roff_EQ(ROFF_ARGS);
219 static enum rofferr roff_EN(ROFF_ARGS);
220 static enum rofferr roff_T_(ROFF_ARGS);
221 static enum rofferr roff_unsupp(ROFF_ARGS);
222 static enum rofferr roff_userdef(ROFF_ARGS);
223
224 /* --- constant data ------------------------------------------------------ */
225
226 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
227 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
228
229 const char *__roff_name[MAN_MAX + 1] = {
230 "br", "ce", "ft", "ll",
231 "mc", "po", "rj", "sp",
232 "ta", "ti", NULL,
233 "ab", "ad", "af", "aln",
234 "als", "am", "am1", "ami",
235 "ami1", "as", "as1", "asciify",
236 "backtrace", "bd", "bleedat", "blm",
237 "box", "boxa", "bp", "BP",
238 "break", "breakchar", "brnl", "brp",
239 "brpnl", "c2", "cc",
240 "cf", "cflags", "ch", "char",
241 "chop", "class", "close", "CL",
242 "color", "composite", "continue", "cp",
243 "cropat", "cs", "cu", "da",
244 "dch", "Dd", "de", "de1",
245 "defcolor", "dei", "dei1", "device",
246 "devicem", "di", "do", "ds",
247 "ds1", "dwh", "dt", "ec",
248 "ecr", "ecs", "el", "em",
249 "EN", "eo", "EP", "EQ",
250 "errprint", "ev", "evc", "ex",
251 "fallback", "fam", "fc", "fchar",
252 "fcolor", "fdeferlig", "feature", "fkern",
253 "fl", "flig", "fp", "fps",
254 "fschar", "fspacewidth", "fspecial", "ftr",
255 "fzoom", "gcolor", "hc", "hcode",
256 "hidechar", "hla", "hlm", "hpf",
257 "hpfa", "hpfcode", "hw", "hy",
258 "hylang", "hylen", "hym", "hypp",
259 "hys", "ie", "if", "ig",
260 "index", "it", "itc", "IX",
261 "kern", "kernafter", "kernbefore", "kernpair",
262 "lc", "lc_ctype", "lds", "length",
263 "letadj", "lf", "lg", "lhang",
264 "linetabs", "lnr", "lnrf", "lpfx",
265 "ls", "lsm", "lt",
266 "mediasize", "minss", "mk", "mso",
267 "na", "ne", "nh", "nhychar",
268 "nm", "nn", "nop", "nr",
269 "nrf", "nroff", "ns", "nx",
270 "open", "opena", "os", "output",
271 "padj", "papersize", "pc", "pev",
272 "pi", "PI", "pl", "pm",
273 "pn", "pnr", "ps",
274 "psbb", "pshape", "pso", "ptr",
275 "pvs", "rchar", "rd", "recursionlimit",
276 "return", "rfschar", "rhang",
277 "rm", "rn", "rnn", "rr",
278 "rs", "rt", "schar", "sentchar",
279 "shc", "shift", "sizes", "so",
280 "spacewidth", "special", "spreadwarn", "ss",
281 "sty", "substring", "sv", "sy",
282 "T&", "tc", "TE",
283 "TH", "tkf", "tl",
284 "tm", "tm1", "tmc", "tr",
285 "track", "transchar", "trf", "trimat",
286 "trin", "trnt", "troff", "TS",
287 "uf", "ul", "unformat", "unwatch",
288 "unwatchn", "vpt", "vs", "warn",
289 "warnscale", "watch", "watchlength", "watchn",
290 "wh", "while", "write", "writec",
291 "writem", "xflag", ".", NULL,
292 NULL, "text",
293 "Dd", "Dt", "Os", "Sh",
294 "Ss", "Pp", "D1", "Dl",
295 "Bd", "Ed", "Bl", "El",
296 "It", "Ad", "An", "Ap",
297 "Ar", "Cd", "Cm", "Dv",
298 "Er", "Ev", "Ex", "Fa",
299 "Fd", "Fl", "Fn", "Ft",
300 "Ic", "In", "Li", "Nd",
301 "Nm", "Op", "Ot", "Pa",
302 "Rv", "St", "Va", "Vt",
303 "Xr", "%A", "%B", "%D",
304 "%I", "%J", "%N", "%O",
305 "%P", "%R", "%T", "%V",
306 "Ac", "Ao", "Aq", "At",
307 "Bc", "Bf", "Bo", "Bq",
308 "Bsx", "Bx", "Db", "Dc",
309 "Do", "Dq", "Ec", "Ef",
310 "Em", "Eo", "Fx", "Ms",
311 "No", "Ns", "Nx", "Ox",
312 "Pc", "Pf", "Po", "Pq",
313 "Qc", "Ql", "Qo", "Qq",
314 "Re", "Rs", "Sc", "So",
315 "Sq", "Sm", "Sx", "Sy",
316 "Tn", "Ux", "Xc", "Xo",
317 "Fo", "Fc", "Oo", "Oc",
318 "Bk", "Ek", "Bt", "Hf",
319 "Fr", "Ud", "Lb", "Lp",
320 "Lk", "Mt", "Brq", "Bro",
321 "Brc", "%C", "Es", "En",
322 "Dx", "%Q", "%U", "Ta",
323 NULL,
324 "TH", "SH", "SS", "TP",
325 "LP", "PP", "P", "IP",
326 "HP", "SM", "SB", "BI",
327 "IB", "BR", "RB", "R",
328 "B", "I", "IR", "RI",
329 "nf", "fi",
330 "RE", "RS", "DT", "UC",
331 "PD", "AT", "in",
332 "OP", "EX", "EE", "UR",
333 "UE", "MT", "ME", NULL
334 };
335 const char *const *roff_name = __roff_name;
336
337 static struct roffmac roffs[TOKEN_NONE] = {
338 { roff_br, NULL, NULL, 0 }, /* br */
339 { roff_onearg, NULL, NULL, 0 }, /* ce */
340 { roff_onearg, NULL, NULL, 0 }, /* ft */
341 { roff_onearg, NULL, NULL, 0 }, /* ll */
342 { roff_onearg, NULL, NULL, 0 }, /* mc */
343 { roff_onearg, NULL, NULL, 0 }, /* po */
344 { roff_onearg, NULL, NULL, 0 }, /* rj */
345 { roff_onearg, NULL, NULL, 0 }, /* sp */
346 { roff_manyarg, NULL, NULL, 0 }, /* ta */
347 { roff_onearg, NULL, NULL, 0 }, /* ti */
348 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
349 { roff_unsupp, NULL, NULL, 0 }, /* ab */
350 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
351 { roff_line_ignore, NULL, NULL, 0 }, /* af */
352 { roff_unsupp, NULL, NULL, 0 }, /* aln */
353 { roff_als, NULL, NULL, 0 }, /* als */
354 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
355 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
356 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
357 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
358 { roff_ds, NULL, NULL, 0 }, /* as */
359 { roff_ds, NULL, NULL, 0 }, /* as1 */
360 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
361 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
362 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
363 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
364 { roff_unsupp, NULL, NULL, 0 }, /* blm */
365 { roff_unsupp, NULL, NULL, 0 }, /* box */
366 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
367 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
368 { roff_unsupp, NULL, NULL, 0 }, /* BP */
369 { roff_unsupp, NULL, NULL, 0 }, /* break */
370 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
371 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
372 { roff_br, NULL, NULL, 0 }, /* brp */
373 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
374 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
375 { roff_cc, NULL, NULL, 0 }, /* cc */
376 { roff_insec, NULL, NULL, 0 }, /* cf */
377 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
378 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
379 { roff_unsupp, NULL, NULL, 0 }, /* char */
380 { roff_unsupp, NULL, NULL, 0 }, /* chop */
381 { roff_line_ignore, NULL, NULL, 0 }, /* class */
382 { roff_insec, NULL, NULL, 0 }, /* close */
383 { roff_unsupp, NULL, NULL, 0 }, /* CL */
384 { roff_line_ignore, NULL, NULL, 0 }, /* color */
385 { roff_unsupp, NULL, NULL, 0 }, /* composite */
386 { roff_unsupp, NULL, NULL, 0 }, /* continue */
387 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
388 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
389 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
390 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
391 { roff_unsupp, NULL, NULL, 0 }, /* da */
392 { roff_unsupp, NULL, NULL, 0 }, /* dch */
393 { roff_Dd, NULL, NULL, 0 }, /* Dd */
394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
396 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
399 { roff_unsupp, NULL, NULL, 0 }, /* device */
400 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
401 { roff_unsupp, NULL, NULL, 0 }, /* di */
402 { roff_unsupp, NULL, NULL, 0 }, /* do */
403 { roff_ds, NULL, NULL, 0 }, /* ds */
404 { roff_ds, NULL, NULL, 0 }, /* ds1 */
405 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
406 { roff_unsupp, NULL, NULL, 0 }, /* dt */
407 { roff_ec, NULL, NULL, 0 }, /* ec */
408 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
409 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
410 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
411 { roff_unsupp, NULL, NULL, 0 }, /* em */
412 { roff_EN, NULL, NULL, 0 }, /* EN */
413 { roff_eo, NULL, NULL, 0 }, /* eo */
414 { roff_unsupp, NULL, NULL, 0 }, /* EP */
415 { roff_EQ, NULL, NULL, 0 }, /* EQ */
416 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
417 { roff_unsupp, NULL, NULL, 0 }, /* ev */
418 { roff_unsupp, NULL, NULL, 0 }, /* evc */
419 { roff_unsupp, NULL, NULL, 0 }, /* ex */
420 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
421 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
422 { roff_unsupp, NULL, NULL, 0 }, /* fc */
423 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
424 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
425 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
426 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
427 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
428 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
429 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
430 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
431 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
432 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
433 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
434 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
435 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
436 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
437 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
438 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
439 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
440 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
441 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
442 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
443 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
444 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
445 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
446 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
447 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
448 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
449 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
450 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
451 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
452 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
453 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
454 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
455 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
456 { roff_unsupp, NULL, NULL, 0 }, /* index */
457 { roff_it, NULL, NULL, 0 }, /* it */
458 { roff_unsupp, NULL, NULL, 0 }, /* itc */
459 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
460 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
461 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
462 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
463 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
464 { roff_unsupp, NULL, NULL, 0 }, /* lc */
465 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
466 { roff_unsupp, NULL, NULL, 0 }, /* lds */
467 { roff_unsupp, NULL, NULL, 0 }, /* length */
468 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
469 { roff_insec, NULL, NULL, 0 }, /* lf */
470 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
471 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
472 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
473 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
474 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
475 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
476 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
477 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
478 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
479 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
480 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
481 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
482 { roff_insec, NULL, NULL, 0 }, /* mso */
483 { roff_line_ignore, NULL, NULL, 0 }, /* na */
484 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
485 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
486 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
487 { roff_unsupp, NULL, NULL, 0 }, /* nm */
488 { roff_unsupp, NULL, NULL, 0 }, /* nn */
489 { roff_unsupp, NULL, NULL, 0 }, /* nop */
490 { roff_nr, NULL, NULL, 0 }, /* nr */
491 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
492 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
493 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
494 { roff_insec, NULL, NULL, 0 }, /* nx */
495 { roff_insec, NULL, NULL, 0 }, /* open */
496 { roff_insec, NULL, NULL, 0 }, /* opena */
497 { roff_line_ignore, NULL, NULL, 0 }, /* os */
498 { roff_unsupp, NULL, NULL, 0 }, /* output */
499 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
500 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
501 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
502 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
503 { roff_insec, NULL, NULL, 0 }, /* pi */
504 { roff_unsupp, NULL, NULL, 0 }, /* PI */
505 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
506 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
507 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
508 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
509 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
510 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
511 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
512 { roff_insec, NULL, NULL, 0 }, /* pso */
513 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
514 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
515 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
516 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
517 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
518 { roff_unsupp, NULL, NULL, 0 }, /* return */
519 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
520 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
521 { roff_rm, NULL, NULL, 0 }, /* rm */
522 { roff_rn, NULL, NULL, 0 }, /* rn */
523 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
524 { roff_rr, NULL, NULL, 0 }, /* rr */
525 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
526 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
527 { roff_unsupp, NULL, NULL, 0 }, /* schar */
528 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
529 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
530 { roff_unsupp, NULL, NULL, 0 }, /* shift */
531 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
532 { roff_so, NULL, NULL, 0 }, /* so */
533 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
534 { roff_line_ignore, NULL, NULL, 0 }, /* special */
535 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
536 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
537 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
538 { roff_unsupp, NULL, NULL, 0 }, /* substring */
539 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
540 { roff_insec, NULL, NULL, 0 }, /* sy */
541 { roff_T_, NULL, NULL, 0 }, /* T& */
542 { roff_unsupp, NULL, NULL, 0 }, /* tc */
543 { roff_TE, NULL, NULL, 0 }, /* TE */
544 { roff_Dd, NULL, NULL, 0 }, /* TH */
545 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
546 { roff_unsupp, NULL, NULL, 0 }, /* tl */
547 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
548 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
549 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
550 { roff_tr, NULL, NULL, 0 }, /* tr */
551 { roff_line_ignore, NULL, NULL, 0 }, /* track */
552 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
553 { roff_insec, NULL, NULL, 0 }, /* trf */
554 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
555 { roff_unsupp, NULL, NULL, 0 }, /* trin */
556 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
557 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
558 { roff_TS, NULL, NULL, 0 }, /* TS */
559 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
560 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
561 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
562 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
563 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
564 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
565 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
566 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
567 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
568 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
569 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
570 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
571 { roff_unsupp, NULL, NULL, 0 }, /* wh */
572 { roff_unsupp, NULL, NULL, 0 }, /* while */
573 { roff_insec, NULL, NULL, 0 }, /* write */
574 { roff_insec, NULL, NULL, 0 }, /* writec */
575 { roff_insec, NULL, NULL, 0 }, /* writem */
576 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
577 { roff_cblock, NULL, NULL, 0 }, /* . */
578 { roff_renamed, NULL, NULL, 0 },
579 { roff_userdef, NULL, NULL, 0 }
580 };
581
582 /* Array of injected predefined strings. */
583 #define PREDEFS_MAX 38
584 static const struct predef predefs[PREDEFS_MAX] = {
585 #include "predefs.in"
586 };
587
588 static int roffce_lines; /* number of input lines to center */
589 static struct roff_node *roffce_node; /* active request */
590 static int roffit_lines; /* number of lines to delay */
591 static char *roffit_macro; /* nil-terminated macro line */
592
593
594 /* --- request table ------------------------------------------------------ */
595
596 struct ohash *
597 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
598 {
599 struct ohash *htab;
600 struct roffreq *req;
601 enum roff_tok tok;
602 size_t sz;
603 unsigned int slot;
604
605 htab = mandoc_malloc(sizeof(*htab));
606 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
607
608 for (tok = mintok; tok < maxtok; tok++) {
609 if (roff_name[tok] == NULL)
610 continue;
611 sz = strlen(roff_name[tok]);
612 req = mandoc_malloc(sizeof(*req) + sz + 1);
613 req->tok = tok;
614 memcpy(req->name, roff_name[tok], sz + 1);
615 slot = ohash_qlookup(htab, req->name);
616 ohash_insert(htab, slot, req);
617 }
618 return htab;
619 }
620
621 void
622 roffhash_free(struct ohash *htab)
623 {
624 struct roffreq *req;
625 unsigned int slot;
626
627 if (htab == NULL)
628 return;
629 for (req = ohash_first(htab, &slot); req != NULL;
630 req = ohash_next(htab, &slot))
631 free(req);
632 ohash_delete(htab);
633 free(htab);
634 }
635
636 enum roff_tok
637 roffhash_find(struct ohash *htab, const char *name, size_t sz)
638 {
639 struct roffreq *req;
640 const char *end;
641
642 if (sz) {
643 end = name + sz;
644 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
645 } else
646 req = ohash_find(htab, ohash_qlookup(htab, name));
647 return req == NULL ? TOKEN_NONE : req->tok;
648 }
649
650 /* --- stack of request blocks -------------------------------------------- */
651
652 /*
653 * Pop the current node off of the stack of roff instructions currently
654 * pending.
655 */
656 static void
657 roffnode_pop(struct roff *r)
658 {
659 struct roffnode *p;
660
661 assert(r->last);
662 p = r->last;
663
664 r->last = r->last->parent;
665 free(p->name);
666 free(p->end);
667 free(p);
668 }
669
670 /*
671 * Push a roff node onto the instruction stack. This must later be
672 * removed with roffnode_pop().
673 */
674 static void
675 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
676 int line, int col)
677 {
678 struct roffnode *p;
679
680 p = mandoc_calloc(1, sizeof(struct roffnode));
681 p->tok = tok;
682 if (name)
683 p->name = mandoc_strdup(name);
684 p->parent = r->last;
685 p->line = line;
686 p->col = col;
687 p->rule = p->parent ? p->parent->rule : 0;
688
689 r->last = p;
690 }
691
692 /* --- roff parser state data management ---------------------------------- */
693
694 static void
695 roff_free1(struct roff *r)
696 {
697 struct tbl_node *tbl;
698 struct eqn_node *e;
699 int i;
700
701 while (NULL != (tbl = r->first_tbl)) {
702 r->first_tbl = tbl->next;
703 tbl_free(tbl);
704 }
705 r->first_tbl = r->last_tbl = r->tbl = NULL;
706
707 while (NULL != (e = r->first_eqn)) {
708 r->first_eqn = e->next;
709 eqn_free(e);
710 }
711 r->first_eqn = r->last_eqn = r->eqn = NULL;
712
713 while (r->last)
714 roffnode_pop(r);
715
716 free (r->rstack);
717 r->rstack = NULL;
718 r->rstacksz = 0;
719 r->rstackpos = -1;
720
721 roff_freereg(r->regtab);
722 r->regtab = NULL;
723
724 roff_freestr(r->strtab);
725 roff_freestr(r->rentab);
726 roff_freestr(r->xmbtab);
727 r->strtab = r->rentab = r->xmbtab = NULL;
728
729 if (r->xtab)
730 for (i = 0; i < 128; i++)
731 free(r->xtab[i].p);
732 free(r->xtab);
733 r->xtab = NULL;
734 }
735
736 void
737 roff_reset(struct roff *r)
738 {
739 roff_free1(r);
740 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
741 r->control = '\0';
742 r->escape = '\\';
743 roffce_lines = 0;
744 roffce_node = NULL;
745 roffit_lines = 0;
746 roffit_macro = NULL;
747 }
748
749 void
750 roff_free(struct roff *r)
751 {
752 roff_free1(r);
753 roffhash_free(r->reqtab);
754 free(r);
755 }
756
757 struct roff *
758 roff_alloc(struct mparse *parse, int options)
759 {
760 struct roff *r;
761
762 r = mandoc_calloc(1, sizeof(struct roff));
763 r->parse = parse;
764 r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
765 r->options = options;
766 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
767 r->rstackpos = -1;
768 r->escape = '\\';
769 return r;
770 }
771
772 /* --- syntax tree state data management ---------------------------------- */
773
774 static void
775 roff_man_free1(struct roff_man *man)
776 {
777
778 if (man->first != NULL)
779 roff_node_delete(man, man->first);
780 free(man->meta.msec);
781 free(man->meta.vol);
782 free(man->meta.os);
783 free(man->meta.arch);
784 free(man->meta.title);
785 free(man->meta.name);
786 free(man->meta.date);
787 }
788
789 static void
790 roff_man_alloc1(struct roff_man *man)
791 {
792
793 memset(&man->meta, 0, sizeof(man->meta));
794 man->first = mandoc_calloc(1, sizeof(*man->first));
795 man->first->type = ROFFT_ROOT;
796 man->last = man->first;
797 man->last_es = NULL;
798 man->flags = 0;
799 man->macroset = MACROSET_NONE;
800 man->lastsec = man->lastnamed = SEC_NONE;
801 man->next = ROFF_NEXT_CHILD;
802 }
803
804 void
805 roff_man_reset(struct roff_man *man)
806 {
807
808 roff_man_free1(man);
809 roff_man_alloc1(man);
810 }
811
812 void
813 roff_man_free(struct roff_man *man)
814 {
815
816 roff_man_free1(man);
817 free(man);
818 }
819
820 struct roff_man *
821 roff_man_alloc(struct roff *roff, struct mparse *parse,
822 const char *os_s, int quick)
823 {
824 struct roff_man *man;
825
826 man = mandoc_calloc(1, sizeof(*man));
827 man->parse = parse;
828 man->roff = roff;
829 man->os_s = os_s;
830 man->quick = quick;
831 roff_man_alloc1(man);
832 roff->man = man;
833 return man;
834 }
835
836 /* --- syntax tree handling ----------------------------------------------- */
837
838 struct roff_node *
839 roff_node_alloc(struct roff_man *man, int line, int pos,
840 enum roff_type type, int tok)
841 {
842 struct roff_node *n;
843
844 n = mandoc_calloc(1, sizeof(*n));
845 n->line = line;
846 n->pos = pos;
847 n->tok = tok;
848 n->type = type;
849 n->sec = man->lastsec;
850
851 if (man->flags & MDOC_SYNOPSIS)
852 n->flags |= NODE_SYNPRETTY;
853 else
854 n->flags &= ~NODE_SYNPRETTY;
855 if (man->flags & MDOC_NEWLINE)
856 n->flags |= NODE_LINE;
857 man->flags &= ~MDOC_NEWLINE;
858
859 return n;
860 }
861
862 void
863 roff_node_append(struct roff_man *man, struct roff_node *n)
864 {
865
866 switch (man->next) {
867 case ROFF_NEXT_SIBLING:
868 if (man->last->next != NULL) {
869 n->next = man->last->next;
870 man->last->next->prev = n;
871 } else
872 man->last->parent->last = n;
873 man->last->next = n;
874 n->prev = man->last;
875 n->parent = man->last->parent;
876 break;
877 case ROFF_NEXT_CHILD:
878 if (man->last->child != NULL) {
879 n->next = man->last->child;
880 man->last->child->prev = n;
881 } else
882 man->last->last = n;
883 man->last->child = n;
884 n->parent = man->last;
885 break;
886 default:
887 abort();
888 }
889 man->last = n;
890
891 switch (n->type) {
892 case ROFFT_HEAD:
893 n->parent->head = n;
894 break;
895 case ROFFT_BODY:
896 if (n->end != ENDBODY_NOT)
897 return;
898 n->parent->body = n;
899 break;
900 case ROFFT_TAIL:
901 n->parent->tail = n;
902 break;
903 default:
904 return;
905 }
906
907 /*
908 * Copy over the normalised-data pointer of our parent. Not
909 * everybody has one, but copying a null pointer is fine.
910 */
911
912 n->norm = n->parent->norm;
913 assert(n->parent->type == ROFFT_BLOCK);
914 }
915
916 void
917 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
918 {
919 struct roff_node *n;
920
921 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
922 n->string = roff_strdup(man->roff, word);
923 roff_node_append(man, n);
924 n->flags |= NODE_VALID | NODE_ENDED;
925 man->next = ROFF_NEXT_SIBLING;
926 }
927
928 void
929 roff_word_append(struct roff_man *man, const char *word)
930 {
931 struct roff_node *n;
932 char *addstr, *newstr;
933
934 n = man->last;
935 addstr = roff_strdup(man->roff, word);
936 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
937 free(addstr);
938 free(n->string);
939 n->string = newstr;
940 man->next = ROFF_NEXT_SIBLING;
941 }
942
943 void
944 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
945 {
946 struct roff_node *n;
947
948 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
949 roff_node_append(man, n);
950 man->next = ROFF_NEXT_CHILD;
951 }
952
953 struct roff_node *
954 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
955 {
956 struct roff_node *n;
957
958 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
959 roff_node_append(man, n);
960 man->next = ROFF_NEXT_CHILD;
961 return n;
962 }
963
964 struct roff_node *
965 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
966 {
967 struct roff_node *n;
968
969 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
970 roff_node_append(man, n);
971 man->next = ROFF_NEXT_CHILD;
972 return n;
973 }
974
975 struct roff_node *
976 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
977 {
978 struct roff_node *n;
979
980 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
981 roff_node_append(man, n);
982 man->next = ROFF_NEXT_CHILD;
983 return n;
984 }
985
986 void
987 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
988 {
989 struct roff_node *n;
990
991 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
992 n->eqn = eqn;
993 if (eqn->ln > man->last->line)
994 n->flags |= NODE_LINE;
995 roff_node_append(man, n);
996 man->next = ROFF_NEXT_SIBLING;
997 }
998
999 void
1000 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1001 {
1002 struct roff_node *n;
1003
1004 if (man->macroset == MACROSET_MAN)
1005 man_breakscope(man, ROFF_TS);
1006 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1007 n->span = tbl;
1008 roff_node_append(man, n);
1009 n->flags |= NODE_VALID | NODE_ENDED;
1010 man->next = ROFF_NEXT_SIBLING;
1011 }
1012
1013 void
1014 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1015 {
1016
1017 /* Adjust siblings. */
1018
1019 if (n->prev)
1020 n->prev->next = n->next;
1021 if (n->next)
1022 n->next->prev = n->prev;
1023
1024 /* Adjust parent. */
1025
1026 if (n->parent != NULL) {
1027 if (n->parent->child == n)
1028 n->parent->child = n->next;
1029 if (n->parent->last == n)
1030 n->parent->last = n->prev;
1031 }
1032
1033 /* Adjust parse point. */
1034
1035 if (man == NULL)
1036 return;
1037 if (man->last == n) {
1038 if (n->prev == NULL) {
1039 man->last = n->parent;
1040 man->next = ROFF_NEXT_CHILD;
1041 } else {
1042 man->last = n->prev;
1043 man->next = ROFF_NEXT_SIBLING;
1044 }
1045 }
1046 if (man->first == n)
1047 man->first = NULL;
1048 }
1049
1050 void
1051 roff_node_free(struct roff_node *n)
1052 {
1053
1054 if (n->args != NULL)
1055 mdoc_argv_free(n->args);
1056 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1057 free(n->norm);
1058 free(n->string);
1059 free(n);
1060 }
1061
1062 void
1063 roff_node_delete(struct roff_man *man, struct roff_node *n)
1064 {
1065
1066 while (n->child != NULL)
1067 roff_node_delete(man, n->child);
1068 roff_node_unlink(man, n);
1069 roff_node_free(n);
1070 }
1071
1072 void
1073 deroff(char **dest, const struct roff_node *n)
1074 {
1075 char *cp;
1076 size_t sz;
1077
1078 if (n->type != ROFFT_TEXT) {
1079 for (n = n->child; n != NULL; n = n->next)
1080 deroff(dest, n);
1081 return;
1082 }
1083
1084 /* Skip leading whitespace. */
1085
1086 for (cp = n->string; *cp != '\0'; cp++) {
1087 if (cp[0] == '\\' && cp[1] != '\0' &&
1088 strchr(" %&0^|~", cp[1]) != NULL)
1089 cp++;
1090 else if ( ! isspace((unsigned char)*cp))
1091 break;
1092 }
1093
1094 /* Skip trailing backslash. */
1095
1096 sz = strlen(cp);
1097 if (sz > 0 && cp[sz - 1] == '\\')
1098 sz--;
1099
1100 /* Skip trailing whitespace. */
1101
1102 for (; sz; sz--)
1103 if ( ! isspace((unsigned char)cp[sz-1]))
1104 break;
1105
1106 /* Skip empty strings. */
1107
1108 if (sz == 0)
1109 return;
1110
1111 if (*dest == NULL) {
1112 *dest = mandoc_strndup(cp, sz);
1113 return;
1114 }
1115
1116 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1117 free(*dest);
1118 *dest = cp;
1119 }
1120
1121 /* --- main functions of the roff parser ---------------------------------- */
1122
1123 /*
1124 * In the current line, expand escape sequences that tend to get
1125 * used in numerical expressions and conditional requests.
1126 * Also check the syntax of the remaining escape sequences.
1127 */
1128 static enum rofferr
1129 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1130 {
1131 char ubuf[24]; /* buffer to print the number */
1132 const char *start; /* start of the string to process */
1133 char *stesc; /* start of an escape sequence ('\\') */
1134 const char *stnam; /* start of the name, after "[(*" */
1135 const char *cp; /* end of the name, e.g. before ']' */
1136 const char *res; /* the string to be substituted */
1137 char *nbuf; /* new buffer to copy buf->buf to */
1138 size_t maxl; /* expected length of the escape name */
1139 size_t naml; /* actual length of the escape name */
1140 enum mandoc_esc esc; /* type of the escape sequence */
1141 enum mandoc_os os_e; /* kind of RCS id seen */
1142 int inaml; /* length returned from mandoc_escape() */
1143 int expand_count; /* to avoid infinite loops */
1144 int npos; /* position in numeric expression */
1145 int arg_complete; /* argument not interrupted by eol */
1146 int done; /* no more input available */
1147 int deftype; /* type of definition to paste */
1148 char term; /* character terminating the escape */
1149
1150 /* Search forward for comments. */
1151
1152 done = 0;
1153 start = buf->buf + pos;
1154 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1155 if (stesc[0] != r->escape || stesc[1] == '\0')
1156 continue;
1157 stesc++;
1158 if (*stesc != '"' && *stesc != '#')
1159 continue;
1160
1161 /* Comment found, look for RCS id. */
1162
1163 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1164 os_e = MANDOC_OS_OPENBSD;
1165 cp += 8;
1166 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1167 os_e = MANDOC_OS_NETBSD;
1168 cp += 7;
1169 }
1170 if (cp != NULL &&
1171 isalnum((unsigned char)*cp) == 0 &&
1172 strchr(cp, '$') != NULL) {
1173 if (r->man->meta.rcsids & (1 << os_e))
1174 mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1175 ln, stesc + 1 - buf->buf, stesc + 1);
1176 r->man->meta.rcsids |= 1 << os_e;
1177 }
1178
1179 /* Handle trailing whitespace. */
1180
1181 cp = strchr(stesc--, '\0') - 1;
1182 if (*cp == '\n') {
1183 done = 1;
1184 cp--;
1185 }
1186 if (*cp == ' ' || *cp == '\t')
1187 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1188 ln, cp - buf->buf, NULL);
1189 while (stesc > start && stesc[-1] == ' ')
1190 stesc--;
1191 *stesc = '\0';
1192 break;
1193 }
1194 if (stesc == start)
1195 return ROFF_CONT;
1196 stesc--;
1197
1198 /* Notice the end of the input. */
1199
1200 if (*stesc == '\n') {
1201 *stesc-- = '\0';
1202 done = 1;
1203 }
1204
1205 expand_count = 0;
1206 while (stesc >= start) {
1207
1208 /* Search backwards for the next backslash. */
1209
1210 if (*stesc != r->escape) {
1211 if (*stesc == '\\') {
1212 *stesc = '\0';
1213 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1214 buf->buf, stesc + 1) + 1;
1215 start = nbuf + pos;
1216 stesc = nbuf + (stesc - buf->buf);
1217 free(buf->buf);
1218 buf->buf = nbuf;
1219 }
1220 stesc--;
1221 continue;
1222 }
1223
1224 /* If it is escaped, skip it. */
1225
1226 for (cp = stesc - 1; cp >= start; cp--)
1227 if (*cp != r->escape)
1228 break;
1229
1230 if ((stesc - cp) % 2 == 0) {
1231 while (stesc > cp)
1232 *stesc-- = '\\';
1233 continue;
1234 } else if (stesc[1] != '\0') {
1235 *stesc = '\\';
1236 } else {
1237 *stesc-- = '\0';
1238 if (done)
1239 continue;
1240 else
1241 return ROFF_APPEND;
1242 }
1243
1244 /* Decide whether to expand or to check only. */
1245
1246 term = '\0';
1247 cp = stesc + 1;
1248 switch (*cp) {
1249 case '*':
1250 res = NULL;
1251 break;
1252 case 'B':
1253 case 'w':
1254 term = cp[1];
1255 /* FALLTHROUGH */
1256 case 'n':
1257 res = ubuf;
1258 break;
1259 default:
1260 esc = mandoc_escape(&cp, &stnam, &inaml);
1261 if (esc == ESCAPE_ERROR ||
1262 (esc == ESCAPE_SPECIAL &&
1263 mchars_spec2cp(stnam, inaml) < 0))
1264 mandoc_vmsg(MANDOCERR_ESC_BAD,
1265 r->parse, ln, (int)(stesc - buf->buf),
1266 "%.*s", (int)(cp - stesc), stesc);
1267 stesc--;
1268 continue;
1269 }
1270
1271 if (EXPAND_LIMIT < ++expand_count) {
1272 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1273 ln, (int)(stesc - buf->buf), NULL);
1274 return ROFF_IGN;
1275 }
1276
1277 /*
1278 * The third character decides the length
1279 * of the name of the string or register.
1280 * Save a pointer to the name.
1281 */
1282
1283 if (term == '\0') {
1284 switch (*++cp) {
1285 case '\0':
1286 maxl = 0;
1287 break;
1288 case '(':
1289 cp++;
1290 maxl = 2;
1291 break;
1292 case '[':
1293 cp++;
1294 term = ']';
1295 maxl = 0;
1296 break;
1297 default:
1298 maxl = 1;
1299 break;
1300 }
1301 } else {
1302 cp += 2;
1303 maxl = 0;
1304 }
1305 stnam = cp;
1306
1307 /* Advance to the end of the name. */
1308
1309 naml = 0;
1310 arg_complete = 1;
1311 while (maxl == 0 || naml < maxl) {
1312 if (*cp == '\0') {
1313 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1314 ln, (int)(stesc - buf->buf), stesc);
1315 arg_complete = 0;
1316 break;
1317 }
1318 if (maxl == 0 && *cp == term) {
1319 cp++;
1320 break;
1321 }
1322 if (*cp++ != '\\' || stesc[1] != 'w') {
1323 naml++;
1324 continue;
1325 }
1326 switch (mandoc_escape(&cp, NULL, NULL)) {
1327 case ESCAPE_SPECIAL:
1328 case ESCAPE_UNICODE:
1329 case ESCAPE_NUMBERED:
1330 case ESCAPE_OVERSTRIKE:
1331 naml++;
1332 break;
1333 default:
1334 break;
1335 }
1336 }
1337
1338 /*
1339 * Retrieve the replacement string; if it is
1340 * undefined, resume searching for escapes.
1341 */
1342
1343 switch (stesc[1]) {
1344 case '*':
1345 if (arg_complete) {
1346 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1347 res = roff_getstrn(r, stnam, naml, &deftype);
1348 }
1349 break;
1350 case 'B':
1351 npos = 0;
1352 ubuf[0] = arg_complete &&
1353 roff_evalnum(r, ln, stnam, &npos,
1354 NULL, ROFFNUM_SCALE) &&
1355 stnam + npos + 1 == cp ? '1' : '0';
1356 ubuf[1] = '\0';
1357 break;
1358 case 'n':
1359 if (arg_complete)
1360 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1361 roff_getregn(r, stnam, naml));
1362 else
1363 ubuf[0] = '\0';
1364 break;
1365 case 'w':
1366 /* use even incomplete args */
1367 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1368 24 * (int)naml);
1369 break;
1370 }
1371
1372 if (res == NULL) {
1373 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1374 r->parse, ln, (int)(stesc - buf->buf),
1375 "%.*s", (int)naml, stnam);
1376 res = "";
1377 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1378 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1379 ln, (int)(stesc - buf->buf), NULL);
1380 return ROFF_IGN;
1381 }
1382
1383 /* Replace the escape sequence by the string. */
1384
1385 *stesc = '\0';
1386 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1387 buf->buf, res, cp) + 1;
1388
1389 /* Prepare for the next replacement. */
1390
1391 start = nbuf + pos;
1392 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1393 free(buf->buf);
1394 buf->buf = nbuf;
1395 }
1396 return ROFF_CONT;
1397 }
1398
1399 /*
1400 * Process text streams.
1401 */
1402 static enum rofferr
1403 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1404 {
1405 size_t sz;
1406 const char *start;
1407 char *p;
1408 int isz;
1409 enum mandoc_esc esc;
1410
1411 /* Spring the input line trap. */
1412
1413 if (roffit_lines == 1) {
1414 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1415 free(buf->buf);
1416 buf->buf = p;
1417 buf->sz = isz + 1;
1418 *offs = 0;
1419 free(roffit_macro);
1420 roffit_lines = 0;
1421 return ROFF_REPARSE;
1422 } else if (roffit_lines > 1)
1423 --roffit_lines;
1424
1425 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1426 if (roffce_lines < 1) {
1427 r->man->last = roffce_node;
1428 r->man->next = ROFF_NEXT_SIBLING;
1429 roffce_lines = 0;
1430 roffce_node = NULL;
1431 } else
1432 roffce_lines--;
1433 }
1434
1435 /* Convert all breakable hyphens into ASCII_HYPH. */
1436
1437 start = p = buf->buf + pos;
1438
1439 while (*p != '\0') {
1440 sz = strcspn(p, "-\\");
1441 p += sz;
1442
1443 if (*p == '\0')
1444 break;
1445
1446 if (*p == '\\') {
1447 /* Skip over escapes. */
1448 p++;
1449 esc = mandoc_escape((const char **)&p, NULL, NULL);
1450 if (esc == ESCAPE_ERROR)
1451 break;
1452 while (*p == '-')
1453 p++;
1454 continue;
1455 } else if (p == start) {
1456 p++;
1457 continue;
1458 }
1459
1460 if (isalpha((unsigned char)p[-1]) &&
1461 isalpha((unsigned char)p[1]))
1462 *p = ASCII_HYPH;
1463 p++;
1464 }
1465 return ROFF_CONT;
1466 }
1467
1468 enum rofferr
1469 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1470 {
1471 enum roff_tok t;
1472 enum rofferr e;
1473 int pos; /* parse point */
1474 int spos; /* saved parse point for messages */
1475 int ppos; /* original offset in buf->buf */
1476 int ctl; /* macro line (boolean) */
1477
1478 ppos = pos = *offs;
1479
1480 /* Handle in-line equation delimiters. */
1481
1482 if (r->tbl == NULL &&
1483 r->last_eqn != NULL && r->last_eqn->delim &&
1484 (r->eqn == NULL || r->eqn_inline)) {
1485 e = roff_eqndelim(r, buf, pos);
1486 if (e == ROFF_REPARSE)
1487 return e;
1488 assert(e == ROFF_CONT);
1489 }
1490
1491 /* Expand some escape sequences. */
1492
1493 e = roff_res(r, buf, ln, pos);
1494 if (e == ROFF_IGN || e == ROFF_APPEND)
1495 return e;
1496 assert(e == ROFF_CONT);
1497
1498 ctl = roff_getcontrol(r, buf->buf, &pos);
1499
1500 /*
1501 * First, if a scope is open and we're not a macro, pass the
1502 * text through the macro's filter.
1503 * Equations process all content themselves.
1504 * Tables process almost all content themselves, but we want
1505 * to warn about macros before passing it there.
1506 */
1507
1508 if (r->last != NULL && ! ctl) {
1509 t = r->last->tok;
1510 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1511 if (e == ROFF_IGN)
1512 return e;
1513 assert(e == ROFF_CONT);
1514 }
1515 if (r->eqn != NULL)
1516 return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1517 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1518 return tbl_read(r->tbl, ln, buf->buf, ppos);
1519 if ( ! ctl)
1520 return roff_parsetext(r, buf, pos, offs);
1521
1522 /* Skip empty request lines. */
1523
1524 if (buf->buf[pos] == '"') {
1525 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1526 ln, pos, NULL);
1527 return ROFF_IGN;
1528 } else if (buf->buf[pos] == '\0')
1529 return ROFF_IGN;
1530
1531 /*
1532 * If a scope is open, go to the child handler for that macro,
1533 * as it may want to preprocess before doing anything with it.
1534 * Don't do so if an equation is open.
1535 */
1536
1537 if (r->last) {
1538 t = r->last->tok;
1539 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1540 }
1541
1542 /* No scope is open. This is a new request or macro. */
1543
1544 spos = pos;
1545 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1546
1547 /* Tables ignore most macros. */
1548
1549 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1550 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1551 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1552 ln, pos, buf->buf + spos);
1553 if (t != TOKEN_NONE)
1554 return ROFF_IGN;
1555 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1556 pos++;
1557 while (buf->buf[pos] == ' ')
1558 pos++;
1559 return tbl_read(r->tbl, ln, buf->buf, pos);
1560 }
1561
1562 /* For now, let high level macros abort .ce mode. */
1563
1564 if (ctl && roffce_node != NULL &&
1565 (t == TOKEN_NONE || t == ROFF_EQ || t == ROFF_TS)) {
1566 r->man->last = roffce_node;
1567 r->man->next = ROFF_NEXT_SIBLING;
1568 roffce_lines = 0;
1569 roffce_node = NULL;
1570 }
1571
1572 /*
1573 * This is neither a roff request nor a user-defined macro.
1574 * Let the standard macro set parsers handle it.
1575 */
1576
1577 if (t == TOKEN_NONE)
1578 return ROFF_CONT;
1579
1580 /* Execute a roff request or a user defined macro. */
1581
1582 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1583 }
1584
1585 void
1586 roff_endparse(struct roff *r)
1587 {
1588
1589 if (r->last)
1590 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1591 r->last->line, r->last->col,
1592 roff_name[r->last->tok]);
1593
1594 if (r->eqn) {
1595 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1596 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1597 eqn_end(&r->eqn);
1598 }
1599
1600 if (r->tbl) {
1601 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1602 r->tbl->line, r->tbl->pos, "TS");
1603 tbl_end(&r->tbl);
1604 }
1605 }
1606
1607 /*
1608 * Parse a roff node's type from the input buffer. This must be in the
1609 * form of ".foo xxx" in the usual way.
1610 */
1611 static enum roff_tok
1612 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1613 {
1614 char *cp;
1615 const char *mac;
1616 size_t maclen;
1617 int deftype;
1618 enum roff_tok t;
1619
1620 cp = buf + *pos;
1621
1622 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1623 return TOKEN_NONE;
1624
1625 mac = cp;
1626 maclen = roff_getname(r, &cp, ln, ppos);
1627
1628 deftype = ROFFDEF_USER | ROFFDEF_REN;
1629 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1630 switch (deftype) {
1631 case ROFFDEF_USER:
1632 t = ROFF_USERDEF;
1633 break;
1634 case ROFFDEF_REN:
1635 t = ROFF_RENAMED;
1636 break;
1637 default:
1638 t = roffhash_find(r->reqtab, mac, maclen);
1639 break;
1640 }
1641 if (t != TOKEN_NONE)
1642 *pos = cp - buf;
1643 return t;
1644 }
1645
1646 /* --- handling of request blocks ----------------------------------------- */
1647
1648 static enum rofferr
1649 roff_cblock(ROFF_ARGS)
1650 {
1651
1652 /*
1653 * A block-close `..' should only be invoked as a child of an
1654 * ignore macro, otherwise raise a warning and just ignore it.
1655 */
1656
1657 if (r->last == NULL) {
1658 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1659 ln, ppos, "..");
1660 return ROFF_IGN;
1661 }
1662
1663 switch (r->last->tok) {
1664 case ROFF_am:
1665 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1666 case ROFF_ami:
1667 case ROFF_de:
1668 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1669 case ROFF_dei:
1670 case ROFF_ig:
1671 break;
1672 default:
1673 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1674 ln, ppos, "..");
1675 return ROFF_IGN;
1676 }
1677
1678 if (buf->buf[pos] != '\0')
1679 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1680 ".. %s", buf->buf + pos);
1681
1682 roffnode_pop(r);
1683 roffnode_cleanscope(r);
1684 return ROFF_IGN;
1685
1686 }
1687
1688 static void
1689 roffnode_cleanscope(struct roff *r)
1690 {
1691
1692 while (r->last) {
1693 if (--r->last->endspan != 0)
1694 break;
1695 roffnode_pop(r);
1696 }
1697 }
1698
1699 static void
1700 roff_ccond(struct roff *r, int ln, int ppos)
1701 {
1702
1703 if (NULL == r->last) {
1704 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1705 ln, ppos, "\\}");
1706 return;
1707 }
1708
1709 switch (r->last->tok) {
1710 case ROFF_el:
1711 case ROFF_ie:
1712 case ROFF_if:
1713 break;
1714 default:
1715 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1716 ln, ppos, "\\}");
1717 return;
1718 }
1719
1720 if (r->last->endspan > -1) {
1721 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1722 ln, ppos, "\\}");
1723 return;
1724 }
1725
1726 roffnode_pop(r);
1727 roffnode_cleanscope(r);
1728 return;
1729 }
1730
1731 static enum rofferr
1732 roff_block(ROFF_ARGS)
1733 {
1734 const char *name, *value;
1735 char *call, *cp, *iname, *rname;
1736 size_t csz, namesz, rsz;
1737 int deftype;
1738
1739 /* Ignore groff compatibility mode for now. */
1740
1741 if (tok == ROFF_de1)
1742 tok = ROFF_de;
1743 else if (tok == ROFF_dei1)
1744 tok = ROFF_dei;
1745 else if (tok == ROFF_am1)
1746 tok = ROFF_am;
1747 else if (tok == ROFF_ami1)
1748 tok = ROFF_ami;
1749
1750 /* Parse the macro name argument. */
1751
1752 cp = buf->buf + pos;
1753 if (tok == ROFF_ig) {
1754 iname = NULL;
1755 namesz = 0;
1756 } else {
1757 iname = cp;
1758 namesz = roff_getname(r, &cp, ln, ppos);
1759 iname[namesz] = '\0';
1760 }
1761
1762 /* Resolve the macro name argument if it is indirect. */
1763
1764 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1765 deftype = ROFFDEF_USER;
1766 name = roff_getstrn(r, iname, namesz, &deftype);
1767 if (name == NULL) {
1768 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1769 r->parse, ln, (int)(iname - buf->buf),
1770 "%.*s", (int)namesz, iname);
1771 namesz = 0;
1772 } else
1773 namesz = strlen(name);
1774 } else
1775 name = iname;
1776
1777 if (namesz == 0 && tok != ROFF_ig) {
1778 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1779 ln, ppos, roff_name[tok]);
1780 return ROFF_IGN;
1781 }
1782
1783 roffnode_push(r, tok, name, ln, ppos);
1784
1785 /*
1786 * At the beginning of a `de' macro, clear the existing string
1787 * with the same name, if there is one. New content will be
1788 * appended from roff_block_text() in multiline mode.
1789 */
1790
1791 if (tok == ROFF_de || tok == ROFF_dei) {
1792 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1793 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1794 } else if (tok == ROFF_am || tok == ROFF_ami) {
1795 deftype = ROFFDEF_ANY;
1796 value = roff_getstrn(r, iname, namesz, &deftype);
1797 switch (deftype) { /* Before appending, ... */
1798 case ROFFDEF_PRE: /* copy predefined to user-defined. */
1799 roff_setstrn(&r->strtab, name, namesz,
1800 value, strlen(value), 0);
1801 break;
1802 case ROFFDEF_REN: /* call original standard macro. */
1803 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1804 (int)strlen(value), value);
1805 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1806 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1807 free(call);
1808 break;
1809 case ROFFDEF_STD: /* rename and call standard macro. */
1810 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1811 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1812 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1813 (int)rsz, rname);
1814 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1815 free(call);
1816 free(rname);
1817 break;
1818 default:
1819 break;
1820 }
1821 }
1822
1823 if (*cp == '\0')
1824 return ROFF_IGN;
1825
1826 /* Get the custom end marker. */
1827
1828 iname = cp;
1829 namesz = roff_getname(r, &cp, ln, ppos);
1830
1831 /* Resolve the end marker if it is indirect. */
1832
1833 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1834 deftype = ROFFDEF_USER;
1835 name = roff_getstrn(r, iname, namesz, &deftype);
1836 if (name == NULL) {
1837 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1838 r->parse, ln, (int)(iname - buf->buf),
1839 "%.*s", (int)namesz, iname);
1840 namesz = 0;
1841 } else
1842 namesz = strlen(name);
1843 } else
1844 name = iname;
1845
1846 if (namesz)
1847 r->last->end = mandoc_strndup(name, namesz);
1848
1849 if (*cp != '\0')
1850 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1851 ln, pos, ".%s ... %s", roff_name[tok], cp);
1852
1853 return ROFF_IGN;
1854 }
1855
1856 static enum rofferr
1857 roff_block_sub(ROFF_ARGS)
1858 {
1859 enum roff_tok t;
1860 int i, j;
1861
1862 /*
1863 * First check whether a custom macro exists at this level. If
1864 * it does, then check against it. This is some of groff's
1865 * stranger behaviours. If we encountered a custom end-scope
1866 * tag and that tag also happens to be a "real" macro, then we
1867 * need to try interpreting it again as a real macro. If it's
1868 * not, then return ignore. Else continue.
1869 */
1870
1871 if (r->last->end) {
1872 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1873 if (buf->buf[i] != r->last->end[j])
1874 break;
1875
1876 if (r->last->end[j] == '\0' &&
1877 (buf->buf[i] == '\0' ||
1878 buf->buf[i] == ' ' ||
1879 buf->buf[i] == '\t')) {
1880 roffnode_pop(r);
1881 roffnode_cleanscope(r);
1882
1883 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1884 i++;
1885
1886 pos = i;
1887 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1888 TOKEN_NONE)
1889 return ROFF_RERUN;
1890 return ROFF_IGN;
1891 }
1892 }
1893
1894 /*
1895 * If we have no custom end-query or lookup failed, then try
1896 * pulling it out of the hashtable.
1897 */
1898
1899 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1900
1901 if (t != ROFF_cblock) {
1902 if (tok != ROFF_ig)
1903 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1904 return ROFF_IGN;
1905 }
1906
1907 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1908 }
1909
1910 static enum rofferr
1911 roff_block_text(ROFF_ARGS)
1912 {
1913
1914 if (tok != ROFF_ig)
1915 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1916
1917 return ROFF_IGN;
1918 }
1919
1920 static enum rofferr
1921 roff_cond_sub(ROFF_ARGS)
1922 {
1923 enum roff_tok t;
1924 char *ep;
1925 int rr;
1926
1927 rr = r->last->rule;
1928 roffnode_cleanscope(r);
1929 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1930
1931 /*
1932 * Fully handle known macros when they are structurally
1933 * required or when the conditional evaluated to true.
1934 */
1935
1936 if (t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT))
1937 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1938
1939 /*
1940 * If `\}' occurs on a macro line without a preceding macro,
1941 * drop the line completely.
1942 */
1943
1944 ep = buf->buf + pos;
1945 if (ep[0] == '\\' && ep[1] == '}')
1946 rr = 0;
1947
1948 /* Always check for the closing delimiter `\}'. */
1949
1950 while ((ep = strchr(ep, '\\')) != NULL) {
1951 if (*(++ep) == '}') {
1952 *ep = '&';
1953 roff_ccond(r, ln, ep - buf->buf - 1);
1954 }
1955 if (*ep != '\0')
1956 ++ep;
1957 }
1958 return rr ? ROFF_CONT : ROFF_IGN;
1959 }
1960
1961 static enum rofferr
1962 roff_cond_text(ROFF_ARGS)
1963 {
1964 char *ep;
1965 int rr;
1966
1967 rr = r->last->rule;
1968 roffnode_cleanscope(r);
1969
1970 ep = buf->buf + pos;
1971 while ((ep = strchr(ep, '\\')) != NULL) {
1972 if (*(++ep) == '}') {
1973 *ep = '&';
1974 roff_ccond(r, ln, ep - buf->buf - 1);
1975 }
1976 if (*ep != '\0')
1977 ++ep;
1978 }
1979 return rr ? ROFF_CONT : ROFF_IGN;
1980 }
1981
1982 /* --- handling of numeric and conditional expressions -------------------- */
1983
1984 /*
1985 * Parse a single signed integer number. Stop at the first non-digit.
1986 * If there is at least one digit, return success and advance the
1987 * parse point, else return failure and let the parse point unchanged.
1988 * Ignore overflows, treat them just like the C language.
1989 */
1990 static int
1991 roff_getnum(const char *v, int *pos, int *res, int flags)
1992 {
1993 int myres, scaled, n, p;
1994
1995 if (NULL == res)
1996 res = &myres;
1997
1998 p = *pos;
1999 n = v[p] == '-';
2000 if (n || v[p] == '+')
2001 p++;
2002
2003 if (flags & ROFFNUM_WHITE)
2004 while (isspace((unsigned char)v[p]))
2005 p++;
2006
2007 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2008 *res = 10 * *res + v[p] - '0';
2009 if (p == *pos + n)
2010 return 0;
2011
2012 if (n)
2013 *res = -*res;
2014
2015 /* Each number may be followed by one optional scaling unit. */
2016
2017 switch (v[p]) {
2018 case 'f':
2019 scaled = *res * 65536;
2020 break;
2021 case 'i':
2022 scaled = *res * 240;
2023 break;
2024 case 'c':
2025 scaled = *res * 240 / 2.54;
2026 break;
2027 case 'v':
2028 case 'P':
2029 scaled = *res * 40;
2030 break;
2031 case 'm':
2032 case 'n':
2033 scaled = *res * 24;
2034 break;
2035 case 'p':
2036 scaled = *res * 10 / 3;
2037 break;
2038 case 'u':
2039 scaled = *res;
2040 break;
2041 case 'M':
2042 scaled = *res * 6 / 25;
2043 break;
2044 default:
2045 scaled = *res;
2046 p--;
2047 break;
2048 }
2049 if (flags & ROFFNUM_SCALE)
2050 *res = scaled;
2051
2052 *pos = p + 1;
2053 return 1;
2054 }
2055
2056 /*
2057 * Evaluate a string comparison condition.
2058 * The first character is the delimiter.
2059 * Succeed if the string up to its second occurrence
2060 * matches the string up to its third occurence.
2061 * Advance the cursor after the third occurrence
2062 * or lacking that, to the end of the line.
2063 */
2064 static int
2065 roff_evalstrcond(const char *v, int *pos)
2066 {
2067 const char *s1, *s2, *s3;
2068 int match;
2069
2070 match = 0;
2071 s1 = v + *pos; /* initial delimiter */
2072 s2 = s1 + 1; /* for scanning the first string */
2073 s3 = strchr(s2, *s1); /* for scanning the second string */
2074
2075 if (NULL == s3) /* found no middle delimiter */
2076 goto out;
2077
2078 while ('\0' != *++s3) {
2079 if (*s2 != *s3) { /* mismatch */
2080 s3 = strchr(s3, *s1);
2081 break;
2082 }
2083 if (*s3 == *s1) { /* found the final delimiter */
2084 match = 1;
2085 break;
2086 }
2087 s2++;
2088 }
2089
2090 out:
2091 if (NULL == s3)
2092 s3 = strchr(s2, '\0');
2093 else if (*s3 != '\0')
2094 s3++;
2095 *pos = s3 - v;
2096 return match;
2097 }
2098
2099 /*
2100 * Evaluate an optionally negated single character, numerical,
2101 * or string condition.
2102 */
2103 static int
2104 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2105 {
2106 char *cp, *name;
2107 size_t sz;
2108 int deftype, number, savepos, istrue, wanttrue;
2109
2110 if ('!' == v[*pos]) {
2111 wanttrue = 0;
2112 (*pos)++;
2113 } else
2114 wanttrue = 1;
2115
2116 switch (v[*pos]) {
2117 case '\0':
2118 return 0;
2119 case 'n':
2120 case 'o':
2121 (*pos)++;
2122 return wanttrue;
2123 case 'c':
2124 case 'e':
2125 case 't':
2126 case 'v':
2127 (*pos)++;
2128 return !wanttrue;
2129 case 'd':
2130 case 'r':
2131 cp = v + *pos + 1;
2132 while (*cp == ' ')
2133 cp++;
2134 name = cp;
2135 sz = roff_getname(r, &cp, ln, cp - v);
2136 if (sz == 0)
2137 istrue = 0;
2138 else if (v[*pos] == 'r')
2139 istrue = roff_hasregn(r, name, sz);
2140 else {
2141 deftype = ROFFDEF_ANY;
2142 roff_getstrn(r, name, sz, &deftype);
2143 istrue = !!deftype;
2144 }
2145 *pos = cp - v;
2146 return istrue == wanttrue;
2147 default:
2148 break;
2149 }
2150
2151 savepos = *pos;
2152 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2153 return (number > 0) == wanttrue;
2154 else if (*pos == savepos)
2155 return roff_evalstrcond(v, pos) == wanttrue;
2156 else
2157 return 0;
2158 }
2159
2160 static enum rofferr
2161 roff_line_ignore(ROFF_ARGS)
2162 {
2163
2164 return ROFF_IGN;
2165 }
2166
2167 static enum rofferr
2168 roff_insec(ROFF_ARGS)
2169 {
2170
2171 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2172 ln, ppos, roff_name[tok]);
2173 return ROFF_IGN;
2174 }
2175
2176 static enum rofferr
2177 roff_unsupp(ROFF_ARGS)
2178 {
2179
2180 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2181 ln, ppos, roff_name[tok]);
2182 return ROFF_IGN;
2183 }
2184
2185 static enum rofferr
2186 roff_cond(ROFF_ARGS)
2187 {
2188
2189 roffnode_push(r, tok, NULL, ln, ppos);
2190
2191 /*
2192 * An `.el' has no conditional body: it will consume the value
2193 * of the current rstack entry set in prior `ie' calls or
2194 * defaults to DENY.
2195 *
2196 * If we're not an `el', however, then evaluate the conditional.
2197 */
2198
2199 r->last->rule = tok == ROFF_el ?
2200 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2201 roff_evalcond(r, ln, buf->buf, &pos);
2202
2203 /*
2204 * An if-else will put the NEGATION of the current evaluated
2205 * conditional into the stack of rules.
2206 */
2207
2208 if (tok == ROFF_ie) {
2209 if (r->rstackpos + 1 == r->rstacksz) {
2210 r->rstacksz += 16;
2211 r->rstack = mandoc_reallocarray(r->rstack,
2212 r->rstacksz, sizeof(int));
2213 }
2214 r->rstack[++r->rstackpos] = !r->last->rule;
2215 }
2216
2217 /* If the parent has false as its rule, then so do we. */
2218
2219 if (r->last->parent && !r->last->parent->rule)
2220 r->last->rule = 0;
2221
2222 /*
2223 * Determine scope.
2224 * If there is nothing on the line after the conditional,
2225 * not even whitespace, use next-line scope.
2226 */
2227
2228 if (buf->buf[pos] == '\0') {
2229 r->last->endspan = 2;
2230 goto out;
2231 }
2232
2233 while (buf->buf[pos] == ' ')
2234 pos++;
2235
2236 /* An opening brace requests multiline scope. */
2237
2238 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2239 r->last->endspan = -1;
2240 pos += 2;
2241 while (buf->buf[pos] == ' ')
2242 pos++;
2243 goto out;
2244 }
2245
2246 /*
2247 * Anything else following the conditional causes
2248 * single-line scope. Warn if the scope contains
2249 * nothing but trailing whitespace.
2250 */
2251
2252 if (buf->buf[pos] == '\0')
2253 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2254 ln, ppos, roff_name[tok]);
2255
2256 r->last->endspan = 1;
2257
2258 out:
2259 *offs = pos;
2260 return ROFF_RERUN;
2261 }
2262
2263 static enum rofferr
2264 roff_ds(ROFF_ARGS)
2265 {
2266 char *string;
2267 const char *name;
2268 size_t namesz;
2269
2270 /* Ignore groff compatibility mode for now. */
2271
2272 if (tok == ROFF_ds1)
2273 tok = ROFF_ds;
2274 else if (tok == ROFF_as1)
2275 tok = ROFF_as;
2276
2277 /*
2278 * The first word is the name of the string.
2279 * If it is empty or terminated by an escape sequence,
2280 * abort the `ds' request without defining anything.
2281 */
2282
2283 name = string = buf->buf + pos;
2284 if (*name == '\0')
2285 return ROFF_IGN;
2286
2287 namesz = roff_getname(r, &string, ln, pos);
2288 if (name[namesz] == '\\')
2289 return ROFF_IGN;
2290
2291 /* Read past the initial double-quote, if any. */
2292 if (*string == '"')
2293 string++;
2294
2295 /* The rest is the value. */
2296 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2297 ROFF_as == tok);
2298 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2299 return ROFF_IGN;
2300 }
2301
2302 /*
2303 * Parse a single operator, one or two characters long.
2304 * If the operator is recognized, return success and advance the
2305 * parse point, else return failure and let the parse point unchanged.
2306 */
2307 static int
2308 roff_getop(const char *v, int *pos, char *res)
2309 {
2310
2311 *res = v[*pos];
2312
2313 switch (*res) {
2314 case '+':
2315 case '-':
2316 case '*':
2317 case '/':
2318 case '%':
2319 case '&':
2320 case ':':
2321 break;
2322 case '<':
2323 switch (v[*pos + 1]) {
2324 case '=':
2325 *res = 'l';
2326 (*pos)++;
2327 break;
2328 case '>':
2329 *res = '!';
2330 (*pos)++;
2331 break;
2332 case '?':
2333 *res = 'i';
2334 (*pos)++;
2335 break;
2336 default:
2337 break;
2338 }
2339 break;
2340 case '>':
2341 switch (v[*pos + 1]) {
2342 case '=':
2343 *res = 'g';
2344 (*pos)++;
2345 break;
2346 case '?':
2347 *res = 'a';
2348 (*pos)++;
2349 break;
2350 default:
2351 break;
2352 }
2353 break;
2354 case '=':
2355 if ('=' == v[*pos + 1])
2356 (*pos)++;
2357 break;
2358 default:
2359 return 0;
2360 }
2361 (*pos)++;
2362
2363 return *res;
2364 }
2365
2366 /*
2367 * Evaluate either a parenthesized numeric expression
2368 * or a single signed integer number.
2369 */
2370 static int
2371 roff_evalpar(struct roff *r, int ln,
2372 const char *v, int *pos, int *res, int flags)
2373 {
2374
2375 if ('(' != v[*pos])
2376 return roff_getnum(v, pos, res, flags);
2377
2378 (*pos)++;
2379 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2380 return 0;
2381
2382 /*
2383 * Omission of the closing parenthesis
2384 * is an error in validation mode,
2385 * but ignored in evaluation mode.
2386 */
2387
2388 if (')' == v[*pos])
2389 (*pos)++;
2390 else if (NULL == res)
2391 return 0;
2392
2393 return 1;
2394 }
2395
2396 /*
2397 * Evaluate a complete numeric expression.
2398 * Proceed left to right, there is no concept of precedence.
2399 */
2400 static int
2401 roff_evalnum(struct roff *r, int ln, const char *v,
2402 int *pos, int *res, int flags)
2403 {
2404 int mypos, operand2;
2405 char operator;
2406
2407 if (NULL == pos) {
2408 mypos = 0;
2409 pos = &mypos;
2410 }
2411
2412 if (flags & ROFFNUM_WHITE)
2413 while (isspace((unsigned char)v[*pos]))
2414 (*pos)++;
2415
2416 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2417 return 0;
2418
2419 while (1) {
2420 if (flags & ROFFNUM_WHITE)
2421 while (isspace((unsigned char)v[*pos]))
2422 (*pos)++;
2423
2424 if ( ! roff_getop(v, pos, &operator))
2425 break;
2426
2427 if (flags & ROFFNUM_WHITE)
2428 while (isspace((unsigned char)v[*pos]))
2429 (*pos)++;
2430
2431 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2432 return 0;
2433
2434 if (flags & ROFFNUM_WHITE)
2435 while (isspace((unsigned char)v[*pos]))
2436 (*pos)++;
2437
2438 if (NULL == res)
2439 continue;
2440
2441 switch (operator) {
2442 case '+':
2443 *res += operand2;
2444 break;
2445 case '-':
2446 *res -= operand2;
2447 break;
2448 case '*':
2449 *res *= operand2;
2450 break;
2451 case '/':
2452 if (operand2 == 0) {
2453 mandoc_msg(MANDOCERR_DIVZERO,
2454 r->parse, ln, *pos, v);
2455 *res = 0;
2456 break;
2457 }
2458 *res /= operand2;
2459 break;
2460 case '%':
2461 if (operand2 == 0) {
2462 mandoc_msg(MANDOCERR_DIVZERO,
2463 r->parse, ln, *pos, v);
2464 *res = 0;
2465 break;
2466 }
2467 *res %= operand2;
2468 break;
2469 case '<':
2470 *res = *res < operand2;
2471 break;
2472 case '>':
2473 *res = *res > operand2;
2474 break;
2475 case 'l':
2476 *res = *res <= operand2;
2477 break;
2478 case 'g':
2479 *res = *res >= operand2;
2480 break;
2481 case '=':
2482 *res = *res == operand2;
2483 break;
2484 case '!':
2485 *res = *res != operand2;
2486 break;
2487 case '&':
2488 *res = *res && operand2;
2489 break;
2490 case ':':
2491 *res = *res || operand2;
2492 break;
2493 case 'i':
2494 if (operand2 < *res)
2495 *res = operand2;
2496 break;
2497 case 'a':
2498 if (operand2 > *res)
2499 *res = operand2;
2500 break;
2501 default:
2502 abort();
2503 }
2504 }
2505 return 1;
2506 }
2507
2508 /* --- register management ------------------------------------------------ */
2509
2510 void
2511 roff_setreg(struct roff *r, const char *name, int val, char sign)
2512 {
2513 struct roffreg *reg;
2514
2515 /* Search for an existing register with the same name. */
2516 reg = r->regtab;
2517
2518 while (reg && strcmp(name, reg->key.p))
2519 reg = reg->next;
2520
2521 if (NULL == reg) {
2522 /* Create a new register. */
2523 reg = mandoc_malloc(sizeof(struct roffreg));
2524 reg->key.p = mandoc_strdup(name);
2525 reg->key.sz = strlen(name);
2526 reg->val = 0;
2527 reg->next = r->regtab;
2528 r->regtab = reg;
2529 }
2530
2531 if ('+' == sign)
2532 reg->val += val;
2533 else if ('-' == sign)
2534 reg->val -= val;
2535 else
2536 reg->val = val;
2537 }
2538
2539 /*
2540 * Handle some predefined read-only number registers.
2541 * For now, return -1 if the requested register is not predefined;
2542 * in case a predefined read-only register having the value -1
2543 * were to turn up, another special value would have to be chosen.
2544 */
2545 static int
2546 roff_getregro(const struct roff *r, const char *name)
2547 {
2548
2549 switch (*name) {
2550 case '$': /* Number of arguments of the last macro evaluated. */
2551 return r->argc;
2552 case 'A': /* ASCII approximation mode is always off. */
2553 return 0;
2554 case 'g': /* Groff compatibility mode is always on. */
2555 return 1;
2556 case 'H': /* Fixed horizontal resolution. */
2557 return 24;
2558 case 'j': /* Always adjust left margin only. */
2559 return 0;
2560 case 'T': /* Some output device is always defined. */
2561 return 1;
2562 case 'V': /* Fixed vertical resolution. */
2563 return 40;
2564 default:
2565 return -1;
2566 }
2567 }
2568
2569 int
2570 roff_getreg(const struct roff *r, const char *name)
2571 {
2572 struct roffreg *reg;
2573 int val;
2574
2575 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2576 val = roff_getregro(r, name + 1);
2577 if (-1 != val)
2578 return val;
2579 }
2580
2581 for (reg = r->regtab; reg; reg = reg->next)
2582 if (0 == strcmp(name, reg->key.p))
2583 return reg->val;
2584
2585 return 0;
2586 }
2587
2588 static int
2589 roff_getregn(const struct roff *r, const char *name, size_t len)
2590 {
2591 struct roffreg *reg;
2592 int val;
2593
2594 if ('.' == name[0] && 2 == len) {
2595 val = roff_getregro(r, name + 1);
2596 if (-1 != val)
2597 return val;
2598 }
2599
2600 for (reg = r->regtab; reg; reg = reg->next)
2601 if (len == reg->key.sz &&
2602 0 == strncmp(name, reg->key.p, len))
2603 return reg->val;
2604
2605 return 0;
2606 }
2607
2608 static int
2609 roff_hasregn(const struct roff *r, const char *name, size_t len)
2610 {
2611 struct roffreg *reg;
2612 int val;
2613
2614 if ('.' == name[0] && 2 == len) {
2615 val = roff_getregro(r, name + 1);
2616 if (-1 != val)
2617 return 1;
2618 }
2619
2620 for (reg = r->regtab; reg; reg = reg->next)
2621 if (len == reg->key.sz &&
2622 0 == strncmp(name, reg->key.p, len))
2623 return 1;
2624
2625 return 0;
2626 }
2627
2628 static void
2629 roff_freereg(struct roffreg *reg)
2630 {
2631 struct roffreg *old_reg;
2632
2633 while (NULL != reg) {
2634 free(reg->key.p);
2635 old_reg = reg;
2636 reg = reg->next;
2637 free(old_reg);
2638 }
2639 }
2640
2641 static enum rofferr
2642 roff_nr(ROFF_ARGS)
2643 {
2644 char *key, *val;
2645 size_t keysz;
2646 int iv;
2647 char sign;
2648
2649 key = val = buf->buf + pos;
2650 if (*key == '\0')
2651 return ROFF_IGN;
2652
2653 keysz = roff_getname(r, &val, ln, pos);
2654 if (key[keysz] == '\\')
2655 return ROFF_IGN;
2656 key[keysz] = '\0';
2657
2658 sign = *val;
2659 if (sign == '+' || sign == '-')
2660 val++;
2661
2662 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2663 roff_setreg(r, key, iv, sign);
2664
2665 return ROFF_IGN;
2666 }
2667
2668 static enum rofferr
2669 roff_rr(ROFF_ARGS)
2670 {
2671 struct roffreg *reg, **prev;
2672 char *name, *cp;
2673 size_t namesz;
2674
2675 name = cp = buf->buf + pos;
2676 if (*name == '\0')
2677 return ROFF_IGN;
2678 namesz = roff_getname(r, &cp, ln, pos);
2679 name[namesz] = '\0';
2680
2681 prev = &r->regtab;
2682 while (1) {
2683 reg = *prev;
2684 if (reg == NULL || !strcmp(name, reg->key.p))
2685 break;
2686 prev = &reg->next;
2687 }
2688 if (reg != NULL) {
2689 *prev = reg->next;
2690 free(reg->key.p);
2691 free(reg);
2692 }
2693 return ROFF_IGN;
2694 }
2695
2696 /* --- handler functions for roff requests -------------------------------- */
2697
2698 static enum rofferr
2699 roff_rm(ROFF_ARGS)
2700 {
2701 const char *name;
2702 char *cp;
2703 size_t namesz;
2704
2705 cp = buf->buf + pos;
2706 while (*cp != '\0') {
2707 name = cp;
2708 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2709 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2710 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2711 if (name[namesz] == '\\')
2712 break;
2713 }
2714 return ROFF_IGN;
2715 }
2716
2717 static enum rofferr
2718 roff_it(ROFF_ARGS)
2719 {
2720 int iv;
2721
2722 /* Parse the number of lines. */
2723
2724 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2725 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2726 ln, ppos, buf->buf + 1);
2727 return ROFF_IGN;
2728 }
2729
2730 while (isspace((unsigned char)buf->buf[pos]))
2731 pos++;
2732
2733 /*
2734 * Arm the input line trap.
2735 * Special-casing "an-trap" is an ugly workaround to cope
2736 * with DocBook stupidly fiddling with man(7) internals.
2737 */
2738
2739 roffit_lines = iv;
2740 roffit_macro = mandoc_strdup(iv != 1 ||
2741 strcmp(buf->buf + pos, "an-trap") ?
2742 buf->buf + pos : "br");
2743 return ROFF_IGN;
2744 }
2745
2746 static enum rofferr
2747 roff_Dd(ROFF_ARGS)
2748 {
2749 int mask;
2750 enum roff_tok t, te;
2751
2752 switch (tok) {
2753 case ROFF_Dd:
2754 tok = MDOC_Dd;
2755 te = MDOC_MAX;
2756 if (r->format == 0)
2757 r->format = MPARSE_MDOC;
2758 mask = MPARSE_MDOC | MPARSE_QUICK;
2759 break;
2760 case ROFF_TH:
2761 tok = MAN_TH;
2762 te = MAN_MAX;
2763 if (r->format == 0)
2764 r->format = MPARSE_MAN;
2765 mask = MPARSE_QUICK;
2766 break;
2767 default:
2768 abort();
2769 }
2770 if ((r->options & mask) == 0)
2771 for (t = tok; t < te; t++)
2772 roff_setstr(r, roff_name[t], NULL, 0);
2773 return ROFF_CONT;
2774 }
2775
2776 static enum rofferr
2777 roff_TE(ROFF_ARGS)
2778 {
2779
2780 if (NULL == r->tbl)
2781 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2782 ln, ppos, "TE");
2783 else if ( ! tbl_end(&r->tbl)) {
2784 free(buf->buf);
2785 buf->buf = mandoc_strdup(".sp");
2786 buf->sz = 4;
2787 return ROFF_REPARSE;
2788 }
2789 return ROFF_IGN;
2790 }
2791
2792 static enum rofferr
2793 roff_T_(ROFF_ARGS)
2794 {
2795
2796 if (NULL == r->tbl)
2797 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2798 ln, ppos, "T&");
2799 else
2800 tbl_restart(ln, ppos, r->tbl);
2801
2802 return ROFF_IGN;
2803 }
2804
2805 /*
2806 * Handle in-line equation delimiters.
2807 */
2808 static enum rofferr
2809 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2810 {
2811 char *cp1, *cp2;
2812 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2813
2814 /*
2815 * Outside equations, look for an opening delimiter.
2816 * If we are inside an equation, we already know it is
2817 * in-line, or this function wouldn't have been called;
2818 * so look for a closing delimiter.
2819 */
2820
2821 cp1 = buf->buf + pos;
2822 cp2 = strchr(cp1, r->eqn == NULL ?
2823 r->last_eqn->odelim : r->last_eqn->cdelim);
2824 if (cp2 == NULL)
2825 return ROFF_CONT;
2826
2827 *cp2++ = '\0';
2828 bef_pr = bef_nl = aft_nl = aft_pr = "";
2829
2830 /* Handle preceding text, protecting whitespace. */
2831
2832 if (*buf->buf != '\0') {
2833 if (r->eqn == NULL)
2834 bef_pr = "\\&";
2835 bef_nl = "\n";
2836 }
2837
2838 /*
2839 * Prepare replacing the delimiter with an equation macro
2840 * and drop leading white space from the equation.
2841 */
2842
2843 if (r->eqn == NULL) {
2844 while (*cp2 == ' ')
2845 cp2++;
2846 mac = ".EQ";
2847 } else
2848 mac = ".EN";
2849
2850 /* Handle following text, protecting whitespace. */
2851
2852 if (*cp2 != '\0') {
2853 aft_nl = "\n";
2854 if (r->eqn != NULL)
2855 aft_pr = "\\&";
2856 }
2857
2858 /* Do the actual replacement. */
2859
2860 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2861 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2862 free(buf->buf);
2863 buf->buf = cp1;
2864
2865 /* Toggle the in-line state of the eqn subsystem. */
2866
2867 r->eqn_inline = r->eqn == NULL;
2868 return ROFF_REPARSE;
2869 }
2870
2871 static enum rofferr
2872 roff_EQ(ROFF_ARGS)
2873 {
2874 struct eqn_node *e;
2875
2876 assert(r->eqn == NULL);
2877 e = eqn_alloc(ppos, ln, r->parse);
2878
2879 if (r->last_eqn) {
2880 r->last_eqn->next = e;
2881 e->delim = r->last_eqn->delim;
2882 e->odelim = r->last_eqn->odelim;
2883 e->cdelim = r->last_eqn->cdelim;
2884 } else
2885 r->first_eqn = r->last_eqn = e;
2886
2887 r->eqn = r->last_eqn = e;
2888
2889 if (buf->buf[pos] != '\0')
2890 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2891 ".EQ %s", buf->buf + pos);
2892
2893 return ROFF_IGN;
2894 }
2895
2896 static enum rofferr
2897 roff_EN(ROFF_ARGS)
2898 {
2899
2900 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2901 return ROFF_IGN;
2902 }
2903
2904 static enum rofferr
2905 roff_TS(ROFF_ARGS)
2906 {
2907 struct tbl_node *tbl;
2908
2909 if (r->tbl) {
2910 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2911 ln, ppos, "TS breaks TS");
2912 tbl_end(&r->tbl);
2913 }
2914
2915 tbl = tbl_alloc(ppos, ln, r->parse);
2916
2917 if (r->last_tbl)
2918 r->last_tbl->next = tbl;
2919 else
2920 r->first_tbl = r->last_tbl = tbl;
2921
2922 r->tbl = r->last_tbl = tbl;
2923 return ROFF_IGN;
2924 }
2925
2926 static enum rofferr
2927 roff_onearg(ROFF_ARGS)
2928 {
2929 struct roff_node *n;
2930 char *cp;
2931 int npos;
2932
2933 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
2934 (tok == ROFF_sp || tok == ROFF_ti))
2935 man_breakscope(r->man, tok);
2936
2937 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
2938 r->man->last = roffce_node;
2939 r->man->next = ROFF_NEXT_SIBLING;
2940 }
2941
2942 roff_elem_alloc(r->man, ln, ppos, tok);
2943 n = r->man->last;
2944
2945 cp = buf->buf + pos;
2946 if (*cp != '\0') {
2947 while (*cp != '\0' && *cp != ' ')
2948 cp++;
2949 while (*cp == ' ')
2950 *cp++ = '\0';
2951 if (*cp != '\0')
2952 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2953 r->parse, ln, cp - buf->buf,
2954 "%s ... %s", roff_name[tok], cp);
2955 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2956 }
2957
2958 if (tok == ROFF_ce || tok == ROFF_rj) {
2959 if (r->man->last->type == ROFFT_ELEM) {
2960 roff_word_alloc(r->man, ln, pos, "1");
2961 r->man->last->flags |= NODE_NOSRC;
2962 }
2963 npos = 0;
2964 if (roff_evalnum(r, ln, r->man->last->string, &npos,
2965 &roffce_lines, 0) == 0) {
2966 mandoc_vmsg(MANDOCERR_CE_NONUM,
2967 r->parse, ln, pos, "ce %s", buf->buf + pos);
2968 roffce_lines = 1;
2969 }
2970 if (roffce_lines < 1) {
2971 r->man->last = r->man->last->parent;
2972 roffce_node = NULL;
2973 roffce_lines = 0;
2974 } else
2975 roffce_node = r->man->last->parent;
2976 } else {
2977 n->flags |= NODE_VALID | NODE_ENDED;
2978 r->man->last = n;
2979 }
2980 n->flags |= NODE_LINE;
2981 r->man->next = ROFF_NEXT_SIBLING;
2982 return ROFF_IGN;
2983 }
2984
2985 static enum rofferr
2986 roff_manyarg(ROFF_ARGS)
2987 {
2988 struct roff_node *n;
2989 char *sp, *ep;
2990
2991 roff_elem_alloc(r->man, ln, ppos, tok);
2992 n = r->man->last;
2993
2994 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
2995 while (*ep != '\0' && *ep != ' ')
2996 ep++;
2997 while (*ep == ' ')
2998 *ep++ = '\0';
2999 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3000 }
3001
3002 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3003 r->man->last = n;
3004 r->man->next = ROFF_NEXT_SIBLING;
3005 return ROFF_IGN;
3006 }
3007
3008 static enum rofferr
3009 roff_als(ROFF_ARGS)
3010 {
3011 char *oldn, *newn, *end, *value;
3012 size_t oldsz, newsz, valsz;
3013
3014 newn = oldn = buf->buf + pos;
3015 if (*newn == '\0')
3016 return ROFF_IGN;
3017
3018 newsz = roff_getname(r, &oldn, ln, pos);
3019 if (newn[newsz] == '\\' || *oldn == '\0')
3020 return ROFF_IGN;
3021
3022 end = oldn;
3023 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3024 if (oldsz == 0)
3025 return ROFF_IGN;
3026
3027 valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3028 (int)oldsz, oldn);
3029 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3030 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3031 free(value);
3032 return ROFF_IGN;
3033 }
3034
3035 static enum rofferr
3036 roff_br(ROFF_ARGS)
3037 {
3038 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3039 man_breakscope(r->man, ROFF_br);
3040 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3041 if (buf->buf[pos] != '\0')
3042 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3043 "%s %s", roff_name[tok], buf->buf + pos);
3044 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3045 r->man->next = ROFF_NEXT_SIBLING;
3046 return ROFF_IGN;
3047 }
3048
3049 static enum rofferr
3050 roff_cc(ROFF_ARGS)
3051 {
3052 const char *p;
3053
3054 p = buf->buf + pos;
3055
3056 if (*p == '\0' || (r->control = *p++) == '.')
3057 r->control = '\0';
3058
3059 if (*p != '\0')
3060 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3061 ln, p - buf->buf, "cc ... %s", p);
3062
3063 return ROFF_IGN;
3064 }
3065
3066 static enum rofferr
3067 roff_ec(ROFF_ARGS)
3068 {
3069 const char *p;
3070
3071 p = buf->buf + pos;
3072 if (*p == '\0')
3073 r->escape = '\\';
3074 else {
3075 r->escape = *p;
3076 if (*++p != '\0')
3077 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3078 ln, p - buf->buf, "ec ... %s", p);
3079 }
3080 return ROFF_IGN;
3081 }
3082
3083 static enum rofferr
3084 roff_eo(ROFF_ARGS)
3085 {
3086 r->escape = '\0';
3087 if (buf->buf[pos] != '\0')
3088 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3089 ln, pos, "eo %s", buf->buf + pos);
3090 return ROFF_IGN;
3091 }
3092
3093 static enum rofferr
3094 roff_tr(ROFF_ARGS)
3095 {
3096 const char *p, *first, *second;
3097 size_t fsz, ssz;
3098 enum mandoc_esc esc;
3099
3100 p = buf->buf + pos;
3101
3102 if (*p == '\0') {
3103 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3104 return ROFF_IGN;
3105 }
3106
3107 while (*p != '\0') {
3108 fsz = ssz = 1;
3109
3110 first = p++;
3111 if (*first == '\\') {
3112 esc = mandoc_escape(&p, NULL, NULL);
3113 if (esc == ESCAPE_ERROR) {
3114 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3115 ln, (int)(p - buf->buf), first);
3116 return ROFF_IGN;
3117 }
3118 fsz = (size_t)(p - first);
3119 }
3120
3121 second = p++;
3122 if (*second == '\\') {
3123 esc = mandoc_escape(&p, NULL, NULL);
3124 if (esc == ESCAPE_ERROR) {
3125 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3126 ln, (int)(p - buf->buf), second);
3127 return ROFF_IGN;
3128 }
3129 ssz = (size_t)(p - second);
3130 } else if (*second == '\0') {
3131 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3132 ln, first - buf->buf, "tr %s", first);
3133 second = " ";
3134 p--;
3135 }
3136
3137 if (fsz > 1) {
3138 roff_setstrn(&r->xmbtab, first, fsz,
3139 second, ssz, 0);
3140 continue;
3141 }
3142
3143 if (r->xtab == NULL)
3144 r->xtab = mandoc_calloc(128,
3145 sizeof(struct roffstr));
3146
3147 free(r->xtab[(int)*first].p);
3148 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3149 r->xtab[(int)*first].sz = ssz;
3150 }
3151
3152 return ROFF_IGN;
3153 }
3154
3155 static enum rofferr
3156 roff_rn(ROFF_ARGS)
3157 {
3158 const char *value;
3159 char *oldn, *newn, *end;
3160 size_t oldsz, newsz;
3161 int deftype;
3162
3163 oldn = newn = buf->buf + pos;
3164 if (*oldn == '\0')
3165 return ROFF_IGN;
3166
3167 oldsz = roff_getname(r, &newn, ln, pos);
3168 if (oldn[oldsz] == '\\' || *newn == '\0')
3169 return ROFF_IGN;
3170
3171 end = newn;
3172 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3173 if (newsz == 0)
3174 return ROFF_IGN;
3175
3176 deftype = ROFFDEF_ANY;
3177 value = roff_getstrn(r, oldn, oldsz, &deftype);
3178 switch (deftype) {
3179 case ROFFDEF_USER:
3180 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3181 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3182 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3183 break;
3184 case ROFFDEF_PRE:
3185 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3186 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3187 break;
3188 case ROFFDEF_REN:
3189 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3190 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3191 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3192 break;
3193 case ROFFDEF_STD:
3194 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3195 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3196 break;
3197 default:
3198 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3199 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3200 break;
3201 }
3202 return ROFF_IGN;
3203 }
3204
3205 static enum rofferr
3206 roff_so(ROFF_ARGS)
3207 {
3208 char *name, *cp;
3209
3210 name = buf->buf + pos;
3211 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3212
3213 /*
3214 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3215 * opening anything that's not in our cwd or anything beneath
3216 * it. Thus, explicitly disallow traversing up the file-system
3217 * or using absolute paths.
3218 */
3219
3220 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3221 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3222 ".so %s", name);
3223 buf->sz = mandoc_asprintf(&cp,
3224 ".sp\nSee the file %s.\n.sp", name) + 1;
3225 free(buf->buf);
3226 buf->buf = cp;
3227 *offs = 0;
3228 return ROFF_REPARSE;
3229 }
3230
3231 *offs = pos;
3232 return ROFF_SO;
3233 }
3234
3235 /* --- user defined strings and macros ------------------------------------ */
3236
3237 static enum rofferr
3238 roff_userdef(ROFF_ARGS)
3239 {
3240 const char *arg[16], *ap;
3241 char *cp, *n1, *n2;
3242 int expand_count, i, ib, ie;
3243 size_t asz, rsz;
3244
3245 /*
3246 * Collect pointers to macro argument strings
3247 * and NUL-terminate them.
3248 */
3249
3250 r->argc = 0;
3251 cp = buf->buf + pos;
3252 for (i = 0; i < 16; i++) {
3253 if (*cp == '\0')
3254 arg[i] = "";
3255 else {
3256 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3257 r->argc = i + 1;
3258 }
3259 }
3260
3261 /*
3262 * Expand macro arguments.
3263 */
3264
3265 buf->sz = strlen(r->current_string) + 1;
3266 n1 = n2 = cp = mandoc_malloc(buf->sz);
3267 memcpy(n1, r->current_string, buf->sz);
3268 expand_count = 0;
3269 while (*cp != '\0') {
3270
3271 /* Scan ahead for the next argument invocation. */
3272
3273 if (*cp++ != '\\')
3274 continue;
3275 if (*cp++ != '$')
3276 continue;
3277 if (*cp == '*') { /* \\$* inserts all arguments */
3278 ib = 0;
3279 ie = r->argc - 1;
3280 } else { /* \\$1 .. \\$9 insert one argument */
3281 ib = ie = *cp - '1';
3282 if (ib < 0 || ib > 8)
3283 continue;
3284 }
3285 cp -= 2;
3286
3287 /*
3288 * Prevent infinite recursion.
3289 */
3290
3291 if (cp >= n2)
3292 expand_count = 1;
3293 else if (++expand_count > EXPAND_LIMIT) {
3294 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3295 ln, (int)(cp - n1), NULL);
3296 free(buf->buf);
3297 buf->buf = n1;
3298 return ROFF_IGN;
3299 }
3300
3301 /*
3302 * Determine the size of the expanded argument,
3303 * taking escaping of quotes into account.
3304 */
3305
3306 asz = ie > ib ? ie - ib : 0; /* for blanks */
3307 for (i = ib; i <= ie; i++) {
3308 for (ap = arg[i]; *ap != '\0'; ap++) {
3309 asz++;
3310 if (*ap == '"')
3311 asz += 3;
3312 }
3313 }
3314 if (asz != 3) {
3315
3316 /*
3317 * Determine the size of the rest of the
3318 * unexpanded macro, including the NUL.
3319 */
3320
3321 rsz = buf->sz - (cp - n1) - 3;
3322
3323 /*
3324 * When shrinking, move before
3325 * releasing the storage.
3326 */
3327
3328 if (asz < 3)
3329 memmove(cp + asz, cp + 3, rsz);
3330
3331 /*
3332 * Resize the storage for the macro
3333 * and readjust the parse pointer.
3334 */
3335
3336 buf->sz += asz - 3;
3337 n2 = mandoc_realloc(n1, buf->sz);
3338 cp = n2 + (cp - n1);
3339 n1 = n2;
3340
3341 /*
3342 * When growing, make room
3343 * for the expanded argument.
3344 */
3345
3346 if (asz > 3)
3347 memmove(cp + asz, cp + 3, rsz);
3348 }
3349
3350 /* Copy the expanded argument, escaping quotes. */
3351
3352 n2 = cp;
3353 for (i = ib; i <= ie; i++) {
3354 for (ap = arg[i]; *ap != '\0'; ap++) {
3355 if (*ap == '"') {
3356 memcpy(n2, "\\(dq", 4);
3357 n2 += 4;
3358 } else
3359 *n2++ = *ap;
3360 }
3361 if (i < ie)
3362 *n2++ = ' ';
3363 }
3364 }
3365
3366 /*
3367 * Replace the macro invocation
3368 * by the expanded macro.
3369 */
3370
3371 free(buf->buf);
3372 buf->buf = n1;
3373 *offs = 0;
3374
3375 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3376 ROFF_REPARSE : ROFF_APPEND;
3377 }
3378
3379 /*
3380 * Calling a high-level macro that was renamed with .rn.
3381 * r->current_string has already been set up by roff_parse().
3382 */
3383 static enum rofferr
3384 roff_renamed(ROFF_ARGS)
3385 {
3386 char *nbuf;
3387
3388 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3389 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3390 free(buf->buf);
3391 buf->buf = nbuf;
3392 return ROFF_CONT;
3393 }
3394
3395 static size_t
3396 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3397 {
3398 char *name, *cp;
3399 size_t namesz;
3400
3401 name = *cpp;
3402 if ('\0' == *name)
3403 return 0;
3404
3405 /* Read until end of name and terminate it with NUL. */
3406 for (cp = name; 1; cp++) {
3407 if ('\0' == *cp || ' ' == *cp) {
3408 namesz = cp - name;
3409 break;
3410 }
3411 if ('\\' != *cp)
3412 continue;
3413 namesz = cp - name;
3414 if ('{' == cp[1] || '}' == cp[1])
3415 break;
3416 cp++;
3417 if ('\\' == *cp)
3418 continue;
3419 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3420 "%.*s", (int)(cp - name + 1), name);
3421 mandoc_escape((const char **)&cp, NULL, NULL);
3422 break;
3423 }
3424
3425 /* Read past spaces. */
3426 while (' ' == *cp)
3427 cp++;
3428
3429 *cpp = cp;
3430 return namesz;
3431 }
3432
3433 /*
3434 * Store *string into the user-defined string called *name.
3435 * To clear an existing entry, call with (*r, *name, NULL, 0).
3436 * append == 0: replace mode
3437 * append == 1: single-line append mode
3438 * append == 2: multiline append mode, append '\n' after each call
3439 */
3440 static void
3441 roff_setstr(struct roff *r, const char *name, const char *string,
3442 int append)
3443 {
3444 size_t namesz;
3445
3446 namesz = strlen(name);
3447 roff_setstrn(&r->strtab, name, namesz, string,
3448 string ? strlen(string) : 0, append);
3449 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3450 }
3451
3452 static void
3453 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3454 const char *string, size_t stringsz, int append)
3455 {
3456 struct roffkv *n;
3457 char *c;
3458 int i;
3459 size_t oldch, newch;
3460
3461 /* Search for an existing string with the same name. */
3462 n = *r;
3463
3464 while (n && (namesz != n->key.sz ||
3465 strncmp(n->key.p, name, namesz)))
3466 n = n->next;
3467
3468 if (NULL == n) {
3469 /* Create a new string table entry. */
3470 n = mandoc_malloc(sizeof(struct roffkv));
3471 n->key.p = mandoc_strndup(name, namesz);
3472 n->key.sz = namesz;
3473 n->val.p = NULL;
3474 n->val.sz = 0;
3475 n->next = *r;
3476 *r = n;
3477 } else if (0 == append) {
3478 free(n->val.p);
3479 n->val.p = NULL;
3480 n->val.sz = 0;
3481 }
3482
3483 if (NULL == string)
3484 return;
3485
3486 /*
3487 * One additional byte for the '\n' in multiline mode,
3488 * and one for the terminating '\0'.
3489 */
3490 newch = stringsz + (1 < append ? 2u : 1u);
3491
3492 if (NULL == n->val.p) {
3493 n->val.p = mandoc_malloc(newch);
3494 *n->val.p = '\0';
3495 oldch = 0;
3496 } else {
3497 oldch = n->val.sz;
3498 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3499 }
3500
3501 /* Skip existing content in the destination buffer. */
3502 c = n->val.p + (int)oldch;
3503
3504 /* Append new content to the destination buffer. */
3505 i = 0;
3506 while (i < (int)stringsz) {
3507 /*
3508 * Rudimentary roff copy mode:
3509 * Handle escaped backslashes.
3510 */
3511 if ('\\' == string[i] && '\\' == string[i + 1])
3512 i++;
3513 *c++ = string[i++];
3514 }
3515
3516 /* Append terminating bytes. */
3517 if (1 < append)
3518 *c++ = '\n';
3519
3520 *c = '\0';
3521 n->val.sz = (int)(c - n->val.p);
3522 }
3523
3524 static const char *
3525 roff_getstrn(const struct roff *r, const char *name, size_t len,
3526 int *deftype)
3527 {
3528 const struct roffkv *n;
3529 int i;
3530 enum roff_tok tok;
3531
3532 if (*deftype & ROFFDEF_USER) {
3533 for (n = r->strtab; n != NULL; n = n->next) {
3534 if (strncmp(name, n->key.p, len) == 0 &&
3535 n->key.p[len] == '\0' &&
3536 n->val.p != NULL) {
3537 *deftype = ROFFDEF_USER;
3538 return n->val.p;
3539 }
3540 }
3541 }
3542 if (*deftype & ROFFDEF_PRE) {
3543 for (i = 0; i < PREDEFS_MAX; i++) {
3544 if (strncmp(name, predefs[i].name, len) == 0 &&
3545 predefs[i].name[len] == '\0') {
3546 *deftype = ROFFDEF_PRE;
3547 return predefs[i].str;
3548 }
3549 }
3550 }
3551 if (*deftype & ROFFDEF_REN) {
3552 for (n = r->rentab; n != NULL; n = n->next) {
3553 if (strncmp(name, n->key.p, len) == 0 &&
3554 n->key.p[len] == '\0' &&
3555 n->val.p != NULL) {
3556 *deftype = ROFFDEF_REN;
3557 return n->val.p;
3558 }
3559 }
3560 }
3561 if (*deftype & ROFFDEF_STD) {
3562 if (r->man->macroset != MACROSET_MAN) {
3563 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3564 if (strncmp(name, roff_name[tok], len) == 0 &&
3565 roff_name[tok][len] == '\0') {
3566 *deftype = ROFFDEF_STD;
3567 return NULL;
3568 }
3569 }
3570 }
3571 if (r->man->macroset != MACROSET_MDOC) {
3572 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3573 if (strncmp(name, roff_name[tok], len) == 0 &&
3574 roff_name[tok][len] == '\0') {
3575 *deftype = ROFFDEF_STD;
3576 return NULL;
3577 }
3578 }
3579 }
3580 }
3581 *deftype = 0;
3582 return NULL;
3583 }
3584
3585 static void
3586 roff_freestr(struct roffkv *r)
3587 {
3588 struct roffkv *n, *nn;
3589
3590 for (n = r; n; n = nn) {
3591 free(n->key.p);
3592 free(n->val.p);
3593 nn = n->next;
3594 free(n);
3595 }
3596 }
3597
3598 /* --- accessors and utility functions ------------------------------------ */
3599
3600 const struct tbl_span *
3601 roff_span(const struct roff *r)
3602 {
3603
3604 return r->tbl ? tbl_span(r->tbl) : NULL;
3605 }
3606
3607 const struct eqn *
3608 roff_eqn(const struct roff *r)
3609 {
3610
3611 return r->last_eqn ? &r->last_eqn->eqn : NULL;
3612 }
3613
3614 /*
3615 * Duplicate an input string, making the appropriate character
3616 * conversations (as stipulated by `tr') along the way.
3617 * Returns a heap-allocated string with all the replacements made.
3618 */
3619 char *
3620 roff_strdup(const struct roff *r, const char *p)
3621 {
3622 const struct roffkv *cp;
3623 char *res;
3624 const char *pp;
3625 size_t ssz, sz;
3626 enum mandoc_esc esc;
3627
3628 if (NULL == r->xmbtab && NULL == r->xtab)
3629 return mandoc_strdup(p);
3630 else if ('\0' == *p)
3631 return mandoc_strdup("");
3632
3633 /*
3634 * Step through each character looking for term matches
3635 * (remember that a `tr' can be invoked with an escape, which is
3636 * a glyph but the escape is multi-character).
3637 * We only do this if the character hash has been initialised
3638 * and the string is >0 length.
3639 */
3640
3641 res = NULL;
3642 ssz = 0;
3643
3644 while ('\0' != *p) {
3645 assert((unsigned int)*p < 128);
3646 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3647 sz = r->xtab[(int)*p].sz;
3648 res = mandoc_realloc(res, ssz + sz + 1);
3649 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3650 ssz += sz;
3651 p++;
3652 continue;
3653 } else if ('\\' != *p) {
3654 res = mandoc_realloc(res, ssz + 2);
3655 res[ssz++] = *p++;
3656 continue;
3657 }
3658
3659 /* Search for term matches. */
3660 for (cp = r->xmbtab; cp; cp = cp->next)
3661 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3662 break;
3663
3664 if (NULL != cp) {
3665 /*
3666 * A match has been found.
3667 * Append the match to the array and move
3668 * forward by its keysize.
3669 */
3670 res = mandoc_realloc(res,
3671 ssz + cp->val.sz + 1);
3672 memcpy(res + ssz, cp->val.p, cp->val.sz);
3673 ssz += cp->val.sz;
3674 p += (int)cp->key.sz;
3675 continue;
3676 }
3677
3678 /*
3679 * Handle escapes carefully: we need to copy
3680 * over just the escape itself, or else we might
3681 * do replacements within the escape itself.
3682 * Make sure to pass along the bogus string.
3683 */
3684 pp = p++;
3685 esc = mandoc_escape(&p, NULL, NULL);
3686 if (ESCAPE_ERROR == esc) {
3687 sz = strlen(pp);
3688 res = mandoc_realloc(res, ssz + sz + 1);
3689 memcpy(res + ssz, pp, sz);
3690 break;
3691 }
3692 /*
3693 * We bail out on bad escapes.
3694 * No need to warn: we already did so when
3695 * roff_res() was called.
3696 */
3697 sz = (int)(p - pp);
3698 res = mandoc_realloc(res, ssz + sz + 1);
3699 memcpy(res + ssz, pp, sz);
3700 ssz += sz;
3701 }
3702
3703 res[(int)ssz] = '\0';
3704 return res;
3705 }
3706
3707 int
3708 roff_getformat(const struct roff *r)
3709 {
3710
3711 return r->format;
3712 }
3713
3714 /*
3715 * Find out whether a line is a macro line or not.
3716 * If it is, adjust the current position and return one; if it isn't,
3717 * return zero and don't change the current position.
3718 * If the control character has been set with `.cc', then let that grain
3719 * precedence.
3720 * This is slighly contrary to groff, where using the non-breaking
3721 * control character when `cc' has been invoked will cause the
3722 * non-breaking macro contents to be printed verbatim.
3723 */
3724 int
3725 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3726 {
3727 int pos;
3728
3729 pos = *ppos;
3730
3731 if (r->control != '\0' && cp[pos] == r->control)
3732 pos++;
3733 else if (r->control != '\0')
3734 return 0;
3735 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3736 pos += 2;
3737 else if ('.' == cp[pos] || '\'' == cp[pos])
3738 pos++;
3739 else
3740 return 0;
3741
3742 while (' ' == cp[pos] || '\t' == cp[pos])
3743 pos++;
3744
3745 *ppos = pos;
3746 return 1;
3747 }