]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
round default width of tbl(7) text blocks in the same way as groff
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.312 2017/06/14 22:51:25 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 /*
45 * An incredibly-simple string buffer.
46 */
47 struct roffstr {
48 char *p; /* nil-terminated buffer */
49 size_t sz; /* saved strlen(p) */
50 };
51
52 /*
53 * A key-value roffstr pair as part of a singly-linked list.
54 */
55 struct roffkv {
56 struct roffstr key;
57 struct roffstr val;
58 struct roffkv *next; /* next in list */
59 };
60
61 /*
62 * A single number register as part of a singly-linked list.
63 */
64 struct roffreg {
65 struct roffstr key;
66 int val;
67 struct roffreg *next;
68 };
69
70 /*
71 * Association of request and macro names with token IDs.
72 */
73 struct roffreq {
74 enum roff_tok tok;
75 char name[];
76 };
77
78 struct roff {
79 struct mparse *parse; /* parse point */
80 struct roff_man *man; /* mdoc or man parser */
81 struct roffnode *last; /* leaf of stack */
82 int *rstack; /* stack of inverted `ie' values */
83 struct ohash *reqtab; /* request lookup table */
84 struct roffreg *regtab; /* number registers */
85 struct roffkv *strtab; /* user-defined strings & macros */
86 struct roffkv *rentab; /* renamed strings & macros */
87 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
88 struct roffstr *xtab; /* single-byte trans table (`tr') */
89 const char *current_string; /* value of last called user macro */
90 struct tbl_node *first_tbl; /* first table parsed */
91 struct tbl_node *last_tbl; /* last table parsed */
92 struct tbl_node *tbl; /* current table being parsed */
93 struct eqn_node *last_eqn; /* last equation parsed */
94 struct eqn_node *first_eqn; /* first equation parsed */
95 struct eqn_node *eqn; /* current equation being parsed */
96 int eqn_inline; /* current equation is inline */
97 int options; /* parse options */
98 int rstacksz; /* current size limit of rstack */
99 int rstackpos; /* position in rstack */
100 int format; /* current file in mdoc or man format */
101 int argc; /* number of args of the last macro */
102 char control; /* control character */
103 char escape; /* escape character */
104 };
105
106 struct roffnode {
107 enum roff_tok tok; /* type of node */
108 struct roffnode *parent; /* up one in stack */
109 int line; /* parse line */
110 int col; /* parse col */
111 char *name; /* node name, e.g. macro name */
112 char *end; /* end-rules: custom token */
113 int endspan; /* end-rules: next-line or infty */
114 int rule; /* current evaluation rule */
115 };
116
117 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
118 enum roff_tok tok, /* tok of macro */ \
119 struct buf *buf, /* input buffer */ \
120 int ln, /* parse line */ \
121 int ppos, /* original pos in buffer */ \
122 int pos, /* current pos in buffer */ \
123 int *offs /* reset offset of buffer data */
124
125 typedef enum rofferr (*roffproc)(ROFF_ARGS);
126
127 struct roffmac {
128 roffproc proc; /* process new macro */
129 roffproc text; /* process as child text of macro */
130 roffproc sub; /* process as child of macro */
131 int flags;
132 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
133 };
134
135 struct predef {
136 const char *name; /* predefined input name */
137 const char *str; /* replacement symbol */
138 };
139
140 #define PREDEF(__name, __str) \
141 { (__name), (__str) },
142
143 /* --- function prototypes ------------------------------------------------ */
144
145 static void roffnode_cleanscope(struct roff *);
146 static void roffnode_pop(struct roff *);
147 static void roffnode_push(struct roff *, enum roff_tok,
148 const char *, int, int);
149 static enum rofferr roff_als(ROFF_ARGS);
150 static enum rofferr roff_block(ROFF_ARGS);
151 static enum rofferr roff_block_text(ROFF_ARGS);
152 static enum rofferr roff_block_sub(ROFF_ARGS);
153 static enum rofferr roff_br(ROFF_ARGS);
154 static enum rofferr roff_cblock(ROFF_ARGS);
155 static enum rofferr roff_cc(ROFF_ARGS);
156 static void roff_ccond(struct roff *, int, int);
157 static enum rofferr roff_cond(ROFF_ARGS);
158 static enum rofferr roff_cond_text(ROFF_ARGS);
159 static enum rofferr roff_cond_sub(ROFF_ARGS);
160 static enum rofferr roff_ds(ROFF_ARGS);
161 static enum rofferr roff_ec(ROFF_ARGS);
162 static enum rofferr roff_eo(ROFF_ARGS);
163 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
164 static int roff_evalcond(struct roff *r, int, char *, int *);
165 static int roff_evalnum(struct roff *, int,
166 const char *, int *, int *, int);
167 static int roff_evalpar(struct roff *, int,
168 const char *, int *, int *, int);
169 static int roff_evalstrcond(const char *, int *);
170 static void roff_free1(struct roff *);
171 static void roff_freereg(struct roffreg *);
172 static void roff_freestr(struct roffkv *);
173 static size_t roff_getname(struct roff *, char **, int, int);
174 static int roff_getnum(const char *, int *, int *, int);
175 static int roff_getop(const char *, int *, char *);
176 static int roff_getregn(const struct roff *,
177 const char *, size_t);
178 static int roff_getregro(const struct roff *,
179 const char *name);
180 static const char *roff_getrenn(const struct roff *,
181 const char *, size_t);
182 static const char *roff_getstrn(const struct roff *,
183 const char *, size_t);
184 static int roff_hasregn(const struct roff *,
185 const char *, size_t);
186 static enum rofferr roff_insec(ROFF_ARGS);
187 static enum rofferr roff_it(ROFF_ARGS);
188 static enum rofferr roff_line_ignore(ROFF_ARGS);
189 static void roff_man_alloc1(struct roff_man *);
190 static void roff_man_free1(struct roff_man *);
191 static enum rofferr roff_manyarg(ROFF_ARGS);
192 static enum rofferr roff_nr(ROFF_ARGS);
193 static enum rofferr roff_onearg(ROFF_ARGS);
194 static enum roff_tok roff_parse(struct roff *, char *, int *,
195 int, int);
196 static enum rofferr roff_parsetext(struct roff *, struct buf *,
197 int, int *);
198 static enum rofferr roff_renamed(ROFF_ARGS);
199 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
200 static enum rofferr roff_rm(ROFF_ARGS);
201 static enum rofferr roff_rn(ROFF_ARGS);
202 static enum rofferr roff_rr(ROFF_ARGS);
203 static void roff_setstr(struct roff *,
204 const char *, const char *, int);
205 static void roff_setstrn(struct roffkv **, const char *,
206 size_t, const char *, size_t, int);
207 static enum rofferr roff_so(ROFF_ARGS);
208 static enum rofferr roff_tr(ROFF_ARGS);
209 static enum rofferr roff_Dd(ROFF_ARGS);
210 static enum rofferr roff_TH(ROFF_ARGS);
211 static enum rofferr roff_TE(ROFF_ARGS);
212 static enum rofferr roff_TS(ROFF_ARGS);
213 static enum rofferr roff_EQ(ROFF_ARGS);
214 static enum rofferr roff_EN(ROFF_ARGS);
215 static enum rofferr roff_T_(ROFF_ARGS);
216 static enum rofferr roff_unsupp(ROFF_ARGS);
217 static enum rofferr roff_userdef(ROFF_ARGS);
218
219 /* --- constant data ------------------------------------------------------ */
220
221 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
222 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
223
224 const char *__roff_name[MAN_MAX + 1] = {
225 "br", "ce", "ft", "ll",
226 "mc", "po", "rj", "sp",
227 "ta", "ti", NULL,
228 "ab", "ad", "af", "aln",
229 "als", "am", "am1", "ami",
230 "ami1", "as", "as1", "asciify",
231 "backtrace", "bd", "bleedat", "blm",
232 "box", "boxa", "bp", "BP",
233 "break", "breakchar", "brnl", "brp",
234 "brpnl", "c2", "cc",
235 "cf", "cflags", "ch", "char",
236 "chop", "class", "close", "CL",
237 "color", "composite", "continue", "cp",
238 "cropat", "cs", "cu", "da",
239 "dch", "Dd", "de", "de1",
240 "defcolor", "dei", "dei1", "device",
241 "devicem", "di", "do", "ds",
242 "ds1", "dwh", "dt", "ec",
243 "ecr", "ecs", "el", "em",
244 "EN", "eo", "EP", "EQ",
245 "errprint", "ev", "evc", "ex",
246 "fallback", "fam", "fc", "fchar",
247 "fcolor", "fdeferlig", "feature", "fkern",
248 "fl", "flig", "fp", "fps",
249 "fschar", "fspacewidth", "fspecial", "ftr",
250 "fzoom", "gcolor", "hc", "hcode",
251 "hidechar", "hla", "hlm", "hpf",
252 "hpfa", "hpfcode", "hw", "hy",
253 "hylang", "hylen", "hym", "hypp",
254 "hys", "ie", "if", "ig",
255 "index", "it", "itc", "IX",
256 "kern", "kernafter", "kernbefore", "kernpair",
257 "lc", "lc_ctype", "lds", "length",
258 "letadj", "lf", "lg", "lhang",
259 "linetabs", "lnr", "lnrf", "lpfx",
260 "ls", "lsm", "lt",
261 "mediasize", "minss", "mk", "mso",
262 "na", "ne", "nh", "nhychar",
263 "nm", "nn", "nop", "nr",
264 "nrf", "nroff", "ns", "nx",
265 "open", "opena", "os", "output",
266 "padj", "papersize", "pc", "pev",
267 "pi", "PI", "pl", "pm",
268 "pn", "pnr", "ps",
269 "psbb", "pshape", "pso", "ptr",
270 "pvs", "rchar", "rd", "recursionlimit",
271 "return", "rfschar", "rhang",
272 "rm", "rn", "rnn", "rr",
273 "rs", "rt", "schar", "sentchar",
274 "shc", "shift", "sizes", "so",
275 "spacewidth", "special", "spreadwarn", "ss",
276 "sty", "substring", "sv", "sy",
277 "T&", "tc", "TE",
278 "TH", "tkf", "tl",
279 "tm", "tm1", "tmc", "tr",
280 "track", "transchar", "trf", "trimat",
281 "trin", "trnt", "troff", "TS",
282 "uf", "ul", "unformat", "unwatch",
283 "unwatchn", "vpt", "vs", "warn",
284 "warnscale", "watch", "watchlength", "watchn",
285 "wh", "while", "write", "writec",
286 "writem", "xflag", ".", NULL,
287 NULL, "text",
288 "Dd", "Dt", "Os", "Sh",
289 "Ss", "Pp", "D1", "Dl",
290 "Bd", "Ed", "Bl", "El",
291 "It", "Ad", "An", "Ap",
292 "Ar", "Cd", "Cm", "Dv",
293 "Er", "Ev", "Ex", "Fa",
294 "Fd", "Fl", "Fn", "Ft",
295 "Ic", "In", "Li", "Nd",
296 "Nm", "Op", "Ot", "Pa",
297 "Rv", "St", "Va", "Vt",
298 "Xr", "%A", "%B", "%D",
299 "%I", "%J", "%N", "%O",
300 "%P", "%R", "%T", "%V",
301 "Ac", "Ao", "Aq", "At",
302 "Bc", "Bf", "Bo", "Bq",
303 "Bsx", "Bx", "Db", "Dc",
304 "Do", "Dq", "Ec", "Ef",
305 "Em", "Eo", "Fx", "Ms",
306 "No", "Ns", "Nx", "Ox",
307 "Pc", "Pf", "Po", "Pq",
308 "Qc", "Ql", "Qo", "Qq",
309 "Re", "Rs", "Sc", "So",
310 "Sq", "Sm", "Sx", "Sy",
311 "Tn", "Ux", "Xc", "Xo",
312 "Fo", "Fc", "Oo", "Oc",
313 "Bk", "Ek", "Bt", "Hf",
314 "Fr", "Ud", "Lb", "Lp",
315 "Lk", "Mt", "Brq", "Bro",
316 "Brc", "%C", "Es", "En",
317 "Dx", "%Q", "%U", "Ta",
318 NULL,
319 "TH", "SH", "SS", "TP",
320 "LP", "PP", "P", "IP",
321 "HP", "SM", "SB", "BI",
322 "IB", "BR", "RB", "R",
323 "B", "I", "IR", "RI",
324 "nf", "fi",
325 "RE", "RS", "DT", "UC",
326 "PD", "AT", "in",
327 "OP", "EX", "EE", "UR",
328 "UE", NULL
329 };
330 const char *const *roff_name = __roff_name;
331
332 static struct roffmac roffs[TOKEN_NONE] = {
333 { roff_br, NULL, NULL, 0 }, /* br */
334 { roff_onearg, NULL, NULL, 0 }, /* ce */
335 { roff_onearg, NULL, NULL, 0 }, /* ft */
336 { roff_onearg, NULL, NULL, 0 }, /* ll */
337 { roff_onearg, NULL, NULL, 0 }, /* mc */
338 { roff_onearg, NULL, NULL, 0 }, /* po */
339 { roff_onearg, NULL, NULL, 0 }, /* rj */
340 { roff_onearg, NULL, NULL, 0 }, /* sp */
341 { roff_manyarg, NULL, NULL, 0 }, /* ta */
342 { roff_onearg, NULL, NULL, 0 }, /* ti */
343 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
344 { roff_unsupp, NULL, NULL, 0 }, /* ab */
345 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
346 { roff_line_ignore, NULL, NULL, 0 }, /* af */
347 { roff_unsupp, NULL, NULL, 0 }, /* aln */
348 { roff_als, NULL, NULL, 0 }, /* als */
349 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
350 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
351 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
352 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
353 { roff_ds, NULL, NULL, 0 }, /* as */
354 { roff_ds, NULL, NULL, 0 }, /* as1 */
355 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
356 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
357 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
358 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
359 { roff_unsupp, NULL, NULL, 0 }, /* blm */
360 { roff_unsupp, NULL, NULL, 0 }, /* box */
361 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
362 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
363 { roff_unsupp, NULL, NULL, 0 }, /* BP */
364 { roff_unsupp, NULL, NULL, 0 }, /* break */
365 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
366 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
367 { roff_br, NULL, NULL, 0 }, /* brp */
368 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
369 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
370 { roff_cc, NULL, NULL, 0 }, /* cc */
371 { roff_insec, NULL, NULL, 0 }, /* cf */
372 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
373 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
374 { roff_unsupp, NULL, NULL, 0 }, /* char */
375 { roff_unsupp, NULL, NULL, 0 }, /* chop */
376 { roff_line_ignore, NULL, NULL, 0 }, /* class */
377 { roff_insec, NULL, NULL, 0 }, /* close */
378 { roff_unsupp, NULL, NULL, 0 }, /* CL */
379 { roff_line_ignore, NULL, NULL, 0 }, /* color */
380 { roff_unsupp, NULL, NULL, 0 }, /* composite */
381 { roff_unsupp, NULL, NULL, 0 }, /* continue */
382 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
383 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
384 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
385 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
386 { roff_unsupp, NULL, NULL, 0 }, /* da */
387 { roff_unsupp, NULL, NULL, 0 }, /* dch */
388 { roff_Dd, NULL, NULL, 0 }, /* Dd */
389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
391 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
394 { roff_unsupp, NULL, NULL, 0 }, /* device */
395 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
396 { roff_unsupp, NULL, NULL, 0 }, /* di */
397 { roff_unsupp, NULL, NULL, 0 }, /* do */
398 { roff_ds, NULL, NULL, 0 }, /* ds */
399 { roff_ds, NULL, NULL, 0 }, /* ds1 */
400 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
401 { roff_unsupp, NULL, NULL, 0 }, /* dt */
402 { roff_ec, NULL, NULL, 0 }, /* ec */
403 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
404 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
405 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
406 { roff_unsupp, NULL, NULL, 0 }, /* em */
407 { roff_EN, NULL, NULL, 0 }, /* EN */
408 { roff_eo, NULL, NULL, 0 }, /* eo */
409 { roff_unsupp, NULL, NULL, 0 }, /* EP */
410 { roff_EQ, NULL, NULL, 0 }, /* EQ */
411 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
412 { roff_unsupp, NULL, NULL, 0 }, /* ev */
413 { roff_unsupp, NULL, NULL, 0 }, /* evc */
414 { roff_unsupp, NULL, NULL, 0 }, /* ex */
415 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
416 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
417 { roff_unsupp, NULL, NULL, 0 }, /* fc */
418 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
419 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
420 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
421 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
422 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
423 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
424 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
425 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
426 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
427 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
428 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
429 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
430 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
431 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
432 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
433 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
434 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
435 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
436 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
437 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
438 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
439 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
440 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
441 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
442 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
443 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
444 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
445 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
446 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
447 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
448 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
449 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
450 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
451 { roff_unsupp, NULL, NULL, 0 }, /* index */
452 { roff_it, NULL, NULL, 0 }, /* it */
453 { roff_unsupp, NULL, NULL, 0 }, /* itc */
454 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
455 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
456 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
457 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
458 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
459 { roff_unsupp, NULL, NULL, 0 }, /* lc */
460 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
461 { roff_unsupp, NULL, NULL, 0 }, /* lds */
462 { roff_unsupp, NULL, NULL, 0 }, /* length */
463 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
464 { roff_insec, NULL, NULL, 0 }, /* lf */
465 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
466 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
467 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
468 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
469 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
470 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
471 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
472 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
473 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
474 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
475 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
476 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
477 { roff_insec, NULL, NULL, 0 }, /* mso */
478 { roff_line_ignore, NULL, NULL, 0 }, /* na */
479 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
480 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
481 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
482 { roff_unsupp, NULL, NULL, 0 }, /* nm */
483 { roff_unsupp, NULL, NULL, 0 }, /* nn */
484 { roff_unsupp, NULL, NULL, 0 }, /* nop */
485 { roff_nr, NULL, NULL, 0 }, /* nr */
486 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
487 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
488 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
489 { roff_insec, NULL, NULL, 0 }, /* nx */
490 { roff_insec, NULL, NULL, 0 }, /* open */
491 { roff_insec, NULL, NULL, 0 }, /* opena */
492 { roff_line_ignore, NULL, NULL, 0 }, /* os */
493 { roff_unsupp, NULL, NULL, 0 }, /* output */
494 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
495 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
496 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
497 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
498 { roff_insec, NULL, NULL, 0 }, /* pi */
499 { roff_unsupp, NULL, NULL, 0 }, /* PI */
500 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
501 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
502 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
503 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
504 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
505 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
506 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
507 { roff_insec, NULL, NULL, 0 }, /* pso */
508 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
509 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
510 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
511 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
512 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
513 { roff_unsupp, NULL, NULL, 0 }, /* return */
514 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
515 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
516 { roff_rm, NULL, NULL, 0 }, /* rm */
517 { roff_rn, NULL, NULL, 0 }, /* rn */
518 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
519 { roff_rr, NULL, NULL, 0 }, /* rr */
520 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
521 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
522 { roff_unsupp, NULL, NULL, 0 }, /* schar */
523 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
524 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
525 { roff_unsupp, NULL, NULL, 0 }, /* shift */
526 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
527 { roff_so, NULL, NULL, 0 }, /* so */
528 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
529 { roff_line_ignore, NULL, NULL, 0 }, /* special */
530 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
531 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
532 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
533 { roff_unsupp, NULL, NULL, 0 }, /* substring */
534 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
535 { roff_insec, NULL, NULL, 0 }, /* sy */
536 { roff_T_, NULL, NULL, 0 }, /* T& */
537 { roff_unsupp, NULL, NULL, 0 }, /* tc */
538 { roff_TE, NULL, NULL, 0 }, /* TE */
539 { roff_TH, NULL, NULL, 0 }, /* TH */
540 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
541 { roff_unsupp, NULL, NULL, 0 }, /* tl */
542 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
543 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
544 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
545 { roff_tr, NULL, NULL, 0 }, /* tr */
546 { roff_line_ignore, NULL, NULL, 0 }, /* track */
547 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
548 { roff_insec, NULL, NULL, 0 }, /* trf */
549 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
550 { roff_unsupp, NULL, NULL, 0 }, /* trin */
551 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
552 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
553 { roff_TS, NULL, NULL, 0 }, /* TS */
554 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
555 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
556 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
557 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
558 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
559 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
560 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
561 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
562 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
563 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
564 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
565 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
566 { roff_unsupp, NULL, NULL, 0 }, /* wh */
567 { roff_unsupp, NULL, NULL, 0 }, /* while */
568 { roff_insec, NULL, NULL, 0 }, /* write */
569 { roff_insec, NULL, NULL, 0 }, /* writec */
570 { roff_insec, NULL, NULL, 0 }, /* writem */
571 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
572 { roff_cblock, NULL, NULL, 0 }, /* . */
573 { roff_renamed, NULL, NULL, 0 },
574 { roff_userdef, NULL, NULL, 0 }
575 };
576
577 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
578 const char *const __mdoc_reserved[] = {
579 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
580 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
581 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
582 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
583 "Dt", "Dv", "Dx", "D1",
584 "Ec", "Ed", "Ef", "Ek", "El", "Em",
585 "En", "Eo", "Er", "Es", "Ev", "Ex",
586 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
587 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
588 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
589 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
590 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
591 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
592 "Sc", "Sh", "Sm", "So", "Sq",
593 "Ss", "St", "Sx", "Sy",
594 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
595 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
596 "%P", "%Q", "%R", "%T", "%U", "%V",
597 NULL
598 };
599
600 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
601 const char *const __man_reserved[] = {
602 "AT", "B", "BI", "BR", "DT",
603 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
604 "LP", "OP", "P", "PD", "PP",
605 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
606 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
607 NULL
608 };
609
610 /* Array of injected predefined strings. */
611 #define PREDEFS_MAX 38
612 static const struct predef predefs[PREDEFS_MAX] = {
613 #include "predefs.in"
614 };
615
616 static int roffce_lines; /* number of input lines to center */
617 static struct roff_node *roffce_node; /* active request */
618 static int roffit_lines; /* number of lines to delay */
619 static char *roffit_macro; /* nil-terminated macro line */
620
621
622 /* --- request table ------------------------------------------------------ */
623
624 struct ohash *
625 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
626 {
627 struct ohash *htab;
628 struct roffreq *req;
629 enum roff_tok tok;
630 size_t sz;
631 unsigned int slot;
632
633 htab = mandoc_malloc(sizeof(*htab));
634 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
635
636 for (tok = mintok; tok < maxtok; tok++) {
637 if (roff_name[tok] == NULL)
638 continue;
639 sz = strlen(roff_name[tok]);
640 req = mandoc_malloc(sizeof(*req) + sz + 1);
641 req->tok = tok;
642 memcpy(req->name, roff_name[tok], sz + 1);
643 slot = ohash_qlookup(htab, req->name);
644 ohash_insert(htab, slot, req);
645 }
646 return htab;
647 }
648
649 void
650 roffhash_free(struct ohash *htab)
651 {
652 struct roffreq *req;
653 unsigned int slot;
654
655 if (htab == NULL)
656 return;
657 for (req = ohash_first(htab, &slot); req != NULL;
658 req = ohash_next(htab, &slot))
659 free(req);
660 ohash_delete(htab);
661 free(htab);
662 }
663
664 enum roff_tok
665 roffhash_find(struct ohash *htab, const char *name, size_t sz)
666 {
667 struct roffreq *req;
668 const char *end;
669
670 if (sz) {
671 end = name + sz;
672 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
673 } else
674 req = ohash_find(htab, ohash_qlookup(htab, name));
675 return req == NULL ? TOKEN_NONE : req->tok;
676 }
677
678 /* --- stack of request blocks -------------------------------------------- */
679
680 /*
681 * Pop the current node off of the stack of roff instructions currently
682 * pending.
683 */
684 static void
685 roffnode_pop(struct roff *r)
686 {
687 struct roffnode *p;
688
689 assert(r->last);
690 p = r->last;
691
692 r->last = r->last->parent;
693 free(p->name);
694 free(p->end);
695 free(p);
696 }
697
698 /*
699 * Push a roff node onto the instruction stack. This must later be
700 * removed with roffnode_pop().
701 */
702 static void
703 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
704 int line, int col)
705 {
706 struct roffnode *p;
707
708 p = mandoc_calloc(1, sizeof(struct roffnode));
709 p->tok = tok;
710 if (name)
711 p->name = mandoc_strdup(name);
712 p->parent = r->last;
713 p->line = line;
714 p->col = col;
715 p->rule = p->parent ? p->parent->rule : 0;
716
717 r->last = p;
718 }
719
720 /* --- roff parser state data management ---------------------------------- */
721
722 static void
723 roff_free1(struct roff *r)
724 {
725 struct tbl_node *tbl;
726 struct eqn_node *e;
727 int i;
728
729 while (NULL != (tbl = r->first_tbl)) {
730 r->first_tbl = tbl->next;
731 tbl_free(tbl);
732 }
733 r->first_tbl = r->last_tbl = r->tbl = NULL;
734
735 while (NULL != (e = r->first_eqn)) {
736 r->first_eqn = e->next;
737 eqn_free(e);
738 }
739 r->first_eqn = r->last_eqn = r->eqn = NULL;
740
741 while (r->last)
742 roffnode_pop(r);
743
744 free (r->rstack);
745 r->rstack = NULL;
746 r->rstacksz = 0;
747 r->rstackpos = -1;
748
749 roff_freereg(r->regtab);
750 r->regtab = NULL;
751
752 roff_freestr(r->strtab);
753 roff_freestr(r->rentab);
754 roff_freestr(r->xmbtab);
755 r->strtab = r->rentab = r->xmbtab = NULL;
756
757 if (r->xtab)
758 for (i = 0; i < 128; i++)
759 free(r->xtab[i].p);
760 free(r->xtab);
761 r->xtab = NULL;
762 }
763
764 void
765 roff_reset(struct roff *r)
766 {
767 roff_free1(r);
768 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
769 r->control = '\0';
770 r->escape = '\\';
771 roffce_lines = 0;
772 roffce_node = NULL;
773 roffit_lines = 0;
774 roffit_macro = NULL;
775 }
776
777 void
778 roff_free(struct roff *r)
779 {
780 roff_free1(r);
781 roffhash_free(r->reqtab);
782 free(r);
783 }
784
785 struct roff *
786 roff_alloc(struct mparse *parse, int options)
787 {
788 struct roff *r;
789
790 r = mandoc_calloc(1, sizeof(struct roff));
791 r->parse = parse;
792 r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
793 r->options = options;
794 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
795 r->rstackpos = -1;
796 r->escape = '\\';
797 return r;
798 }
799
800 /* --- syntax tree state data management ---------------------------------- */
801
802 static void
803 roff_man_free1(struct roff_man *man)
804 {
805
806 if (man->first != NULL)
807 roff_node_delete(man, man->first);
808 free(man->meta.msec);
809 free(man->meta.vol);
810 free(man->meta.os);
811 free(man->meta.arch);
812 free(man->meta.title);
813 free(man->meta.name);
814 free(man->meta.date);
815 }
816
817 static void
818 roff_man_alloc1(struct roff_man *man)
819 {
820
821 memset(&man->meta, 0, sizeof(man->meta));
822 man->first = mandoc_calloc(1, sizeof(*man->first));
823 man->first->type = ROFFT_ROOT;
824 man->last = man->first;
825 man->last_es = NULL;
826 man->flags = 0;
827 man->macroset = MACROSET_NONE;
828 man->lastsec = man->lastnamed = SEC_NONE;
829 man->next = ROFF_NEXT_CHILD;
830 }
831
832 void
833 roff_man_reset(struct roff_man *man)
834 {
835
836 roff_man_free1(man);
837 roff_man_alloc1(man);
838 }
839
840 void
841 roff_man_free(struct roff_man *man)
842 {
843
844 roff_man_free1(man);
845 free(man);
846 }
847
848 struct roff_man *
849 roff_man_alloc(struct roff *roff, struct mparse *parse,
850 const char *defos, int quick)
851 {
852 struct roff_man *man;
853
854 man = mandoc_calloc(1, sizeof(*man));
855 man->parse = parse;
856 man->roff = roff;
857 man->defos = defos;
858 man->quick = quick;
859 roff_man_alloc1(man);
860 roff->man = man;
861 return man;
862 }
863
864 /* --- syntax tree handling ----------------------------------------------- */
865
866 struct roff_node *
867 roff_node_alloc(struct roff_man *man, int line, int pos,
868 enum roff_type type, int tok)
869 {
870 struct roff_node *n;
871
872 n = mandoc_calloc(1, sizeof(*n));
873 n->line = line;
874 n->pos = pos;
875 n->tok = tok;
876 n->type = type;
877 n->sec = man->lastsec;
878
879 if (man->flags & MDOC_SYNOPSIS)
880 n->flags |= NODE_SYNPRETTY;
881 else
882 n->flags &= ~NODE_SYNPRETTY;
883 if (man->flags & MDOC_NEWLINE)
884 n->flags |= NODE_LINE;
885 man->flags &= ~MDOC_NEWLINE;
886
887 return n;
888 }
889
890 void
891 roff_node_append(struct roff_man *man, struct roff_node *n)
892 {
893
894 switch (man->next) {
895 case ROFF_NEXT_SIBLING:
896 if (man->last->next != NULL) {
897 n->next = man->last->next;
898 man->last->next->prev = n;
899 } else
900 man->last->parent->last = n;
901 man->last->next = n;
902 n->prev = man->last;
903 n->parent = man->last->parent;
904 break;
905 case ROFF_NEXT_CHILD:
906 if (man->last->child != NULL) {
907 n->next = man->last->child;
908 man->last->child->prev = n;
909 } else
910 man->last->last = n;
911 man->last->child = n;
912 n->parent = man->last;
913 break;
914 default:
915 abort();
916 }
917 man->last = n;
918
919 switch (n->type) {
920 case ROFFT_HEAD:
921 n->parent->head = n;
922 break;
923 case ROFFT_BODY:
924 if (n->end != ENDBODY_NOT)
925 return;
926 n->parent->body = n;
927 break;
928 case ROFFT_TAIL:
929 n->parent->tail = n;
930 break;
931 default:
932 return;
933 }
934
935 /*
936 * Copy over the normalised-data pointer of our parent. Not
937 * everybody has one, but copying a null pointer is fine.
938 */
939
940 n->norm = n->parent->norm;
941 assert(n->parent->type == ROFFT_BLOCK);
942 }
943
944 void
945 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
946 {
947 struct roff_node *n;
948
949 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
950 n->string = roff_strdup(man->roff, word);
951 roff_node_append(man, n);
952 n->flags |= NODE_VALID | NODE_ENDED;
953 man->next = ROFF_NEXT_SIBLING;
954 }
955
956 void
957 roff_word_append(struct roff_man *man, const char *word)
958 {
959 struct roff_node *n;
960 char *addstr, *newstr;
961
962 n = man->last;
963 addstr = roff_strdup(man->roff, word);
964 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
965 free(addstr);
966 free(n->string);
967 n->string = newstr;
968 man->next = ROFF_NEXT_SIBLING;
969 }
970
971 void
972 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
973 {
974 struct roff_node *n;
975
976 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
977 roff_node_append(man, n);
978 man->next = ROFF_NEXT_CHILD;
979 }
980
981 struct roff_node *
982 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
983 {
984 struct roff_node *n;
985
986 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
987 roff_node_append(man, n);
988 man->next = ROFF_NEXT_CHILD;
989 return n;
990 }
991
992 struct roff_node *
993 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
994 {
995 struct roff_node *n;
996
997 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
998 roff_node_append(man, n);
999 man->next = ROFF_NEXT_CHILD;
1000 return n;
1001 }
1002
1003 struct roff_node *
1004 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1005 {
1006 struct roff_node *n;
1007
1008 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1009 roff_node_append(man, n);
1010 man->next = ROFF_NEXT_CHILD;
1011 return n;
1012 }
1013
1014 void
1015 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1016 {
1017 struct roff_node *n;
1018
1019 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1020 n->eqn = eqn;
1021 if (eqn->ln > man->last->line)
1022 n->flags |= NODE_LINE;
1023 roff_node_append(man, n);
1024 man->next = ROFF_NEXT_SIBLING;
1025 }
1026
1027 void
1028 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1029 {
1030 struct roff_node *n;
1031
1032 if (man->macroset == MACROSET_MAN)
1033 man_breakscope(man, ROFF_TS);
1034 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1035 n->span = tbl;
1036 roff_node_append(man, n);
1037 n->flags |= NODE_VALID | NODE_ENDED;
1038 man->next = ROFF_NEXT_SIBLING;
1039 }
1040
1041 void
1042 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1043 {
1044
1045 /* Adjust siblings. */
1046
1047 if (n->prev)
1048 n->prev->next = n->next;
1049 if (n->next)
1050 n->next->prev = n->prev;
1051
1052 /* Adjust parent. */
1053
1054 if (n->parent != NULL) {
1055 if (n->parent->child == n)
1056 n->parent->child = n->next;
1057 if (n->parent->last == n)
1058 n->parent->last = n->prev;
1059 }
1060
1061 /* Adjust parse point. */
1062
1063 if (man == NULL)
1064 return;
1065 if (man->last == n) {
1066 if (n->prev == NULL) {
1067 man->last = n->parent;
1068 man->next = ROFF_NEXT_CHILD;
1069 } else {
1070 man->last = n->prev;
1071 man->next = ROFF_NEXT_SIBLING;
1072 }
1073 }
1074 if (man->first == n)
1075 man->first = NULL;
1076 }
1077
1078 void
1079 roff_node_free(struct roff_node *n)
1080 {
1081
1082 if (n->args != NULL)
1083 mdoc_argv_free(n->args);
1084 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1085 free(n->norm);
1086 free(n->string);
1087 free(n);
1088 }
1089
1090 void
1091 roff_node_delete(struct roff_man *man, struct roff_node *n)
1092 {
1093
1094 while (n->child != NULL)
1095 roff_node_delete(man, n->child);
1096 roff_node_unlink(man, n);
1097 roff_node_free(n);
1098 }
1099
1100 void
1101 deroff(char **dest, const struct roff_node *n)
1102 {
1103 char *cp;
1104 size_t sz;
1105
1106 if (n->type != ROFFT_TEXT) {
1107 for (n = n->child; n != NULL; n = n->next)
1108 deroff(dest, n);
1109 return;
1110 }
1111
1112 /* Skip leading whitespace. */
1113
1114 for (cp = n->string; *cp != '\0'; cp++) {
1115 if (cp[0] == '\\' && cp[1] != '\0' &&
1116 strchr(" %&0^|~", cp[1]) != NULL)
1117 cp++;
1118 else if ( ! isspace((unsigned char)*cp))
1119 break;
1120 }
1121
1122 /* Skip trailing backslash. */
1123
1124 sz = strlen(cp);
1125 if (sz > 0 && cp[sz - 1] == '\\')
1126 sz--;
1127
1128 /* Skip trailing whitespace. */
1129
1130 for (; sz; sz--)
1131 if ( ! isspace((unsigned char)cp[sz-1]))
1132 break;
1133
1134 /* Skip empty strings. */
1135
1136 if (sz == 0)
1137 return;
1138
1139 if (*dest == NULL) {
1140 *dest = mandoc_strndup(cp, sz);
1141 return;
1142 }
1143
1144 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1145 free(*dest);
1146 *dest = cp;
1147 }
1148
1149 /* --- main functions of the roff parser ---------------------------------- */
1150
1151 /*
1152 * In the current line, expand escape sequences that tend to get
1153 * used in numerical expressions and conditional requests.
1154 * Also check the syntax of the remaining escape sequences.
1155 */
1156 static enum rofferr
1157 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1158 {
1159 char ubuf[24]; /* buffer to print the number */
1160 const char *start; /* start of the string to process */
1161 char *stesc; /* start of an escape sequence ('\\') */
1162 const char *stnam; /* start of the name, after "[(*" */
1163 const char *cp; /* end of the name, e.g. before ']' */
1164 const char *res; /* the string to be substituted */
1165 char *nbuf; /* new buffer to copy buf->buf to */
1166 size_t maxl; /* expected length of the escape name */
1167 size_t naml; /* actual length of the escape name */
1168 enum mandoc_esc esc; /* type of the escape sequence */
1169 int inaml; /* length returned from mandoc_escape() */
1170 int expand_count; /* to avoid infinite loops */
1171 int npos; /* position in numeric expression */
1172 int arg_complete; /* argument not interrupted by eol */
1173 int done; /* no more input available */
1174 char term; /* character terminating the escape */
1175
1176 /* Search forward for comments. */
1177
1178 done = 0;
1179 start = buf->buf + pos;
1180 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1181 if (stesc[0] != r->escape || stesc[1] == '\0')
1182 continue;
1183 stesc++;
1184 if (*stesc != '"' && *stesc != '#')
1185 continue;
1186 cp = strchr(stesc--, '\0') - 1;
1187 if (*cp == '\n') {
1188 done = 1;
1189 cp--;
1190 }
1191 if (*cp == ' ' || *cp == '\t')
1192 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1193 ln, cp - buf->buf, NULL);
1194 while (stesc > start && stesc[-1] == ' ')
1195 stesc--;
1196 *stesc = '\0';
1197 break;
1198 }
1199 if (stesc == start)
1200 return ROFF_CONT;
1201 stesc--;
1202
1203 /* Notice the end of the input. */
1204
1205 if (*stesc == '\n') {
1206 *stesc-- = '\0';
1207 done = 1;
1208 }
1209
1210 expand_count = 0;
1211 while (stesc >= start) {
1212
1213 /* Search backwards for the next backslash. */
1214
1215 if (*stesc != r->escape) {
1216 if (*stesc == '\\') {
1217 *stesc = '\0';
1218 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1219 buf->buf, stesc + 1) + 1;
1220 start = nbuf + pos;
1221 stesc = nbuf + (stesc - buf->buf);
1222 free(buf->buf);
1223 buf->buf = nbuf;
1224 }
1225 stesc--;
1226 continue;
1227 }
1228
1229 /* If it is escaped, skip it. */
1230
1231 for (cp = stesc - 1; cp >= start; cp--)
1232 if (*cp != r->escape)
1233 break;
1234
1235 if ((stesc - cp) % 2 == 0) {
1236 while (stesc > cp)
1237 *stesc-- = '\\';
1238 continue;
1239 } else if (stesc[1] != '\0') {
1240 *stesc = '\\';
1241 } else {
1242 *stesc-- = '\0';
1243 if (done)
1244 continue;
1245 else
1246 return ROFF_APPEND;
1247 }
1248
1249 /* Decide whether to expand or to check only. */
1250
1251 term = '\0';
1252 cp = stesc + 1;
1253 switch (*cp) {
1254 case '*':
1255 res = NULL;
1256 break;
1257 case 'B':
1258 case 'w':
1259 term = cp[1];
1260 /* FALLTHROUGH */
1261 case 'n':
1262 res = ubuf;
1263 break;
1264 default:
1265 esc = mandoc_escape(&cp, &stnam, &inaml);
1266 if (esc == ESCAPE_ERROR ||
1267 (esc == ESCAPE_SPECIAL &&
1268 mchars_spec2cp(stnam, inaml) < 0))
1269 mandoc_vmsg(MANDOCERR_ESC_BAD,
1270 r->parse, ln, (int)(stesc - buf->buf),
1271 "%.*s", (int)(cp - stesc), stesc);
1272 stesc--;
1273 continue;
1274 }
1275
1276 if (EXPAND_LIMIT < ++expand_count) {
1277 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1278 ln, (int)(stesc - buf->buf), NULL);
1279 return ROFF_IGN;
1280 }
1281
1282 /*
1283 * The third character decides the length
1284 * of the name of the string or register.
1285 * Save a pointer to the name.
1286 */
1287
1288 if (term == '\0') {
1289 switch (*++cp) {
1290 case '\0':
1291 maxl = 0;
1292 break;
1293 case '(':
1294 cp++;
1295 maxl = 2;
1296 break;
1297 case '[':
1298 cp++;
1299 term = ']';
1300 maxl = 0;
1301 break;
1302 default:
1303 maxl = 1;
1304 break;
1305 }
1306 } else {
1307 cp += 2;
1308 maxl = 0;
1309 }
1310 stnam = cp;
1311
1312 /* Advance to the end of the name. */
1313
1314 naml = 0;
1315 arg_complete = 1;
1316 while (maxl == 0 || naml < maxl) {
1317 if (*cp == '\0') {
1318 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1319 ln, (int)(stesc - buf->buf), stesc);
1320 arg_complete = 0;
1321 break;
1322 }
1323 if (maxl == 0 && *cp == term) {
1324 cp++;
1325 break;
1326 }
1327 if (*cp++ != '\\' || stesc[1] != 'w') {
1328 naml++;
1329 continue;
1330 }
1331 switch (mandoc_escape(&cp, NULL, NULL)) {
1332 case ESCAPE_SPECIAL:
1333 case ESCAPE_UNICODE:
1334 case ESCAPE_NUMBERED:
1335 case ESCAPE_OVERSTRIKE:
1336 naml++;
1337 break;
1338 default:
1339 break;
1340 }
1341 }
1342
1343 /*
1344 * Retrieve the replacement string; if it is
1345 * undefined, resume searching for escapes.
1346 */
1347
1348 switch (stesc[1]) {
1349 case '*':
1350 if (arg_complete)
1351 res = roff_getstrn(r, stnam, naml);
1352 break;
1353 case 'B':
1354 npos = 0;
1355 ubuf[0] = arg_complete &&
1356 roff_evalnum(r, ln, stnam, &npos,
1357 NULL, ROFFNUM_SCALE) &&
1358 stnam + npos + 1 == cp ? '1' : '0';
1359 ubuf[1] = '\0';
1360 break;
1361 case 'n':
1362 if (arg_complete)
1363 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1364 roff_getregn(r, stnam, naml));
1365 else
1366 ubuf[0] = '\0';
1367 break;
1368 case 'w':
1369 /* use even incomplete args */
1370 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1371 24 * (int)naml);
1372 break;
1373 }
1374
1375 if (res == NULL) {
1376 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1377 r->parse, ln, (int)(stesc - buf->buf),
1378 "%.*s", (int)naml, stnam);
1379 res = "";
1380 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1381 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1382 ln, (int)(stesc - buf->buf), NULL);
1383 return ROFF_IGN;
1384 }
1385
1386 /* Replace the escape sequence by the string. */
1387
1388 *stesc = '\0';
1389 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1390 buf->buf, res, cp) + 1;
1391
1392 /* Prepare for the next replacement. */
1393
1394 start = nbuf + pos;
1395 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1396 free(buf->buf);
1397 buf->buf = nbuf;
1398 }
1399 return ROFF_CONT;
1400 }
1401
1402 /*
1403 * Process text streams.
1404 */
1405 static enum rofferr
1406 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1407 {
1408 size_t sz;
1409 const char *start;
1410 char *p;
1411 int isz;
1412 enum mandoc_esc esc;
1413
1414 /* Spring the input line trap. */
1415
1416 if (roffit_lines == 1) {
1417 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1418 free(buf->buf);
1419 buf->buf = p;
1420 buf->sz = isz + 1;
1421 *offs = 0;
1422 free(roffit_macro);
1423 roffit_lines = 0;
1424 return ROFF_REPARSE;
1425 } else if (roffit_lines > 1)
1426 --roffit_lines;
1427
1428 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1429 if (roffce_lines < 1) {
1430 r->man->last = roffce_node;
1431 r->man->next = ROFF_NEXT_SIBLING;
1432 roffce_lines = 0;
1433 roffce_node = NULL;
1434 } else
1435 roffce_lines--;
1436 }
1437
1438 /* Convert all breakable hyphens into ASCII_HYPH. */
1439
1440 start = p = buf->buf + pos;
1441
1442 while (*p != '\0') {
1443 sz = strcspn(p, "-\\");
1444 p += sz;
1445
1446 if (*p == '\0')
1447 break;
1448
1449 if (*p == '\\') {
1450 /* Skip over escapes. */
1451 p++;
1452 esc = mandoc_escape((const char **)&p, NULL, NULL);
1453 if (esc == ESCAPE_ERROR)
1454 break;
1455 while (*p == '-')
1456 p++;
1457 continue;
1458 } else if (p == start) {
1459 p++;
1460 continue;
1461 }
1462
1463 if (isalpha((unsigned char)p[-1]) &&
1464 isalpha((unsigned char)p[1]))
1465 *p = ASCII_HYPH;
1466 p++;
1467 }
1468 return ROFF_CONT;
1469 }
1470
1471 enum rofferr
1472 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1473 {
1474 enum roff_tok t;
1475 enum rofferr e;
1476 int pos; /* parse point */
1477 int spos; /* saved parse point for messages */
1478 int ppos; /* original offset in buf->buf */
1479 int ctl; /* macro line (boolean) */
1480
1481 ppos = pos = *offs;
1482
1483 /* Handle in-line equation delimiters. */
1484
1485 if (r->tbl == NULL &&
1486 r->last_eqn != NULL && r->last_eqn->delim &&
1487 (r->eqn == NULL || r->eqn_inline)) {
1488 e = roff_eqndelim(r, buf, pos);
1489 if (e == ROFF_REPARSE)
1490 return e;
1491 assert(e == ROFF_CONT);
1492 }
1493
1494 /* Expand some escape sequences. */
1495
1496 e = roff_res(r, buf, ln, pos);
1497 if (e == ROFF_IGN || e == ROFF_APPEND)
1498 return e;
1499 assert(e == ROFF_CONT);
1500
1501 ctl = roff_getcontrol(r, buf->buf, &pos);
1502
1503 /*
1504 * First, if a scope is open and we're not a macro, pass the
1505 * text through the macro's filter.
1506 * Equations process all content themselves.
1507 * Tables process almost all content themselves, but we want
1508 * to warn about macros before passing it there.
1509 */
1510
1511 if (r->last != NULL && ! ctl) {
1512 t = r->last->tok;
1513 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1514 if (e == ROFF_IGN)
1515 return e;
1516 assert(e == ROFF_CONT);
1517 }
1518 if (r->eqn != NULL)
1519 return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1520 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1521 return tbl_read(r->tbl, ln, buf->buf, ppos);
1522 if ( ! ctl)
1523 return roff_parsetext(r, buf, pos, offs);
1524
1525 /* Skip empty request lines. */
1526
1527 if (buf->buf[pos] == '"') {
1528 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1529 ln, pos, NULL);
1530 return ROFF_IGN;
1531 } else if (buf->buf[pos] == '\0')
1532 return ROFF_IGN;
1533
1534 /*
1535 * If a scope is open, go to the child handler for that macro,
1536 * as it may want to preprocess before doing anything with it.
1537 * Don't do so if an equation is open.
1538 */
1539
1540 if (r->last) {
1541 t = r->last->tok;
1542 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1543 }
1544
1545 /* No scope is open. This is a new request or macro. */
1546
1547 spos = pos;
1548 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1549
1550 /* Tables ignore most macros. */
1551
1552 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1553 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1554 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1555 ln, pos, buf->buf + spos);
1556 if (t != TOKEN_NONE)
1557 return ROFF_IGN;
1558 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1559 pos++;
1560 while (buf->buf[pos] == ' ')
1561 pos++;
1562 return tbl_read(r->tbl, ln, buf->buf, pos);
1563 }
1564
1565 /* For now, let high level macros abort .ce mode. */
1566
1567 if (ctl && roffce_node != NULL &&
1568 (t == TOKEN_NONE || t == ROFF_EQ || t == ROFF_TS)) {
1569 r->man->last = roffce_node;
1570 r->man->next = ROFF_NEXT_SIBLING;
1571 roffce_lines = 0;
1572 roffce_node = NULL;
1573 }
1574
1575 /*
1576 * This is neither a roff request nor a user-defined macro.
1577 * Let the standard macro set parsers handle it.
1578 */
1579
1580 if (t == TOKEN_NONE)
1581 return ROFF_CONT;
1582
1583 /* Execute a roff request or a user defined macro. */
1584
1585 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1586 }
1587
1588 void
1589 roff_endparse(struct roff *r)
1590 {
1591
1592 if (r->last)
1593 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1594 r->last->line, r->last->col,
1595 roff_name[r->last->tok]);
1596
1597 if (r->eqn) {
1598 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1599 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1600 eqn_end(&r->eqn);
1601 }
1602
1603 if (r->tbl) {
1604 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1605 r->tbl->line, r->tbl->pos, "TS");
1606 tbl_end(&r->tbl);
1607 }
1608 }
1609
1610 /*
1611 * Parse a roff node's type from the input buffer. This must be in the
1612 * form of ".foo xxx" in the usual way.
1613 */
1614 static enum roff_tok
1615 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1616 {
1617 char *cp;
1618 const char *mac;
1619 size_t maclen;
1620 enum roff_tok t;
1621
1622 cp = buf + *pos;
1623
1624 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1625 return TOKEN_NONE;
1626
1627 mac = cp;
1628 maclen = roff_getname(r, &cp, ln, ppos);
1629
1630 t = (r->current_string = roff_getstrn(r, mac, maclen)) ?
1631 ROFF_USERDEF :
1632 (r->current_string = roff_getrenn(r, mac, maclen)) ?
1633 ROFF_RENAMED : roffhash_find(r->reqtab, mac, maclen);
1634
1635 if (t != TOKEN_NONE)
1636 *pos = cp - buf;
1637
1638 return t;
1639 }
1640
1641 /* --- handling of request blocks ----------------------------------------- */
1642
1643 static enum rofferr
1644 roff_cblock(ROFF_ARGS)
1645 {
1646
1647 /*
1648 * A block-close `..' should only be invoked as a child of an
1649 * ignore macro, otherwise raise a warning and just ignore it.
1650 */
1651
1652 if (r->last == NULL) {
1653 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1654 ln, ppos, "..");
1655 return ROFF_IGN;
1656 }
1657
1658 switch (r->last->tok) {
1659 case ROFF_am:
1660 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1661 case ROFF_ami:
1662 case ROFF_de:
1663 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1664 case ROFF_dei:
1665 case ROFF_ig:
1666 break;
1667 default:
1668 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1669 ln, ppos, "..");
1670 return ROFF_IGN;
1671 }
1672
1673 if (buf->buf[pos] != '\0')
1674 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1675 ".. %s", buf->buf + pos);
1676
1677 roffnode_pop(r);
1678 roffnode_cleanscope(r);
1679 return ROFF_IGN;
1680
1681 }
1682
1683 static void
1684 roffnode_cleanscope(struct roff *r)
1685 {
1686
1687 while (r->last) {
1688 if (--r->last->endspan != 0)
1689 break;
1690 roffnode_pop(r);
1691 }
1692 }
1693
1694 static void
1695 roff_ccond(struct roff *r, int ln, int ppos)
1696 {
1697
1698 if (NULL == r->last) {
1699 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1700 ln, ppos, "\\}");
1701 return;
1702 }
1703
1704 switch (r->last->tok) {
1705 case ROFF_el:
1706 case ROFF_ie:
1707 case ROFF_if:
1708 break;
1709 default:
1710 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1711 ln, ppos, "\\}");
1712 return;
1713 }
1714
1715 if (r->last->endspan > -1) {
1716 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1717 ln, ppos, "\\}");
1718 return;
1719 }
1720
1721 roffnode_pop(r);
1722 roffnode_cleanscope(r);
1723 return;
1724 }
1725
1726 static enum rofferr
1727 roff_block(ROFF_ARGS)
1728 {
1729 const char *name;
1730 char *iname, *cp;
1731 size_t namesz;
1732
1733 /* Ignore groff compatibility mode for now. */
1734
1735 if (tok == ROFF_de1)
1736 tok = ROFF_de;
1737 else if (tok == ROFF_dei1)
1738 tok = ROFF_dei;
1739 else if (tok == ROFF_am1)
1740 tok = ROFF_am;
1741 else if (tok == ROFF_ami1)
1742 tok = ROFF_ami;
1743
1744 /* Parse the macro name argument. */
1745
1746 cp = buf->buf + pos;
1747 if (tok == ROFF_ig) {
1748 iname = NULL;
1749 namesz = 0;
1750 } else {
1751 iname = cp;
1752 namesz = roff_getname(r, &cp, ln, ppos);
1753 iname[namesz] = '\0';
1754 }
1755
1756 /* Resolve the macro name argument if it is indirect. */
1757
1758 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1759 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1760 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1761 r->parse, ln, (int)(iname - buf->buf),
1762 "%.*s", (int)namesz, iname);
1763 namesz = 0;
1764 } else
1765 namesz = strlen(name);
1766 } else
1767 name = iname;
1768
1769 if (namesz == 0 && tok != ROFF_ig) {
1770 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1771 ln, ppos, roff_name[tok]);
1772 return ROFF_IGN;
1773 }
1774
1775 roffnode_push(r, tok, name, ln, ppos);
1776
1777 /*
1778 * At the beginning of a `de' macro, clear the existing string
1779 * with the same name, if there is one. New content will be
1780 * appended from roff_block_text() in multiline mode.
1781 */
1782
1783 if (tok == ROFF_de || tok == ROFF_dei) {
1784 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1785 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1786 }
1787
1788 if (*cp == '\0')
1789 return ROFF_IGN;
1790
1791 /* Get the custom end marker. */
1792
1793 iname = cp;
1794 namesz = roff_getname(r, &cp, ln, ppos);
1795
1796 /* Resolve the end marker if it is indirect. */
1797
1798 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1799 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1800 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1801 r->parse, ln, (int)(iname - buf->buf),
1802 "%.*s", (int)namesz, iname);
1803 namesz = 0;
1804 } else
1805 namesz = strlen(name);
1806 } else
1807 name = iname;
1808
1809 if (namesz)
1810 r->last->end = mandoc_strndup(name, namesz);
1811
1812 if (*cp != '\0')
1813 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1814 ln, pos, ".%s ... %s", roff_name[tok], cp);
1815
1816 return ROFF_IGN;
1817 }
1818
1819 static enum rofferr
1820 roff_block_sub(ROFF_ARGS)
1821 {
1822 enum roff_tok t;
1823 int i, j;
1824
1825 /*
1826 * First check whether a custom macro exists at this level. If
1827 * it does, then check against it. This is some of groff's
1828 * stranger behaviours. If we encountered a custom end-scope
1829 * tag and that tag also happens to be a "real" macro, then we
1830 * need to try interpreting it again as a real macro. If it's
1831 * not, then return ignore. Else continue.
1832 */
1833
1834 if (r->last->end) {
1835 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1836 if (buf->buf[i] != r->last->end[j])
1837 break;
1838
1839 if (r->last->end[j] == '\0' &&
1840 (buf->buf[i] == '\0' ||
1841 buf->buf[i] == ' ' ||
1842 buf->buf[i] == '\t')) {
1843 roffnode_pop(r);
1844 roffnode_cleanscope(r);
1845
1846 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1847 i++;
1848
1849 pos = i;
1850 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1851 TOKEN_NONE)
1852 return ROFF_RERUN;
1853 return ROFF_IGN;
1854 }
1855 }
1856
1857 /*
1858 * If we have no custom end-query or lookup failed, then try
1859 * pulling it out of the hashtable.
1860 */
1861
1862 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1863
1864 if (t != ROFF_cblock) {
1865 if (tok != ROFF_ig)
1866 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1867 return ROFF_IGN;
1868 }
1869
1870 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1871 }
1872
1873 static enum rofferr
1874 roff_block_text(ROFF_ARGS)
1875 {
1876
1877 if (tok != ROFF_ig)
1878 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1879
1880 return ROFF_IGN;
1881 }
1882
1883 static enum rofferr
1884 roff_cond_sub(ROFF_ARGS)
1885 {
1886 enum roff_tok t;
1887 char *ep;
1888 int rr;
1889
1890 rr = r->last->rule;
1891 roffnode_cleanscope(r);
1892 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1893
1894 /*
1895 * Fully handle known macros when they are structurally
1896 * required or when the conditional evaluated to true.
1897 */
1898
1899 if (t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT))
1900 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1901
1902 /*
1903 * If `\}' occurs on a macro line without a preceding macro,
1904 * drop the line completely.
1905 */
1906
1907 ep = buf->buf + pos;
1908 if (ep[0] == '\\' && ep[1] == '}')
1909 rr = 0;
1910
1911 /* Always check for the closing delimiter `\}'. */
1912
1913 while ((ep = strchr(ep, '\\')) != NULL) {
1914 if (*(++ep) == '}') {
1915 *ep = '&';
1916 roff_ccond(r, ln, ep - buf->buf - 1);
1917 }
1918 if (*ep != '\0')
1919 ++ep;
1920 }
1921 return rr ? ROFF_CONT : ROFF_IGN;
1922 }
1923
1924 static enum rofferr
1925 roff_cond_text(ROFF_ARGS)
1926 {
1927 char *ep;
1928 int rr;
1929
1930 rr = r->last->rule;
1931 roffnode_cleanscope(r);
1932
1933 ep = buf->buf + pos;
1934 while ((ep = strchr(ep, '\\')) != NULL) {
1935 if (*(++ep) == '}') {
1936 *ep = '&';
1937 roff_ccond(r, ln, ep - buf->buf - 1);
1938 }
1939 if (*ep != '\0')
1940 ++ep;
1941 }
1942 return rr ? ROFF_CONT : ROFF_IGN;
1943 }
1944
1945 /* --- handling of numeric and conditional expressions -------------------- */
1946
1947 /*
1948 * Parse a single signed integer number. Stop at the first non-digit.
1949 * If there is at least one digit, return success and advance the
1950 * parse point, else return failure and let the parse point unchanged.
1951 * Ignore overflows, treat them just like the C language.
1952 */
1953 static int
1954 roff_getnum(const char *v, int *pos, int *res, int flags)
1955 {
1956 int myres, scaled, n, p;
1957
1958 if (NULL == res)
1959 res = &myres;
1960
1961 p = *pos;
1962 n = v[p] == '-';
1963 if (n || v[p] == '+')
1964 p++;
1965
1966 if (flags & ROFFNUM_WHITE)
1967 while (isspace((unsigned char)v[p]))
1968 p++;
1969
1970 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1971 *res = 10 * *res + v[p] - '0';
1972 if (p == *pos + n)
1973 return 0;
1974
1975 if (n)
1976 *res = -*res;
1977
1978 /* Each number may be followed by one optional scaling unit. */
1979
1980 switch (v[p]) {
1981 case 'f':
1982 scaled = *res * 65536;
1983 break;
1984 case 'i':
1985 scaled = *res * 240;
1986 break;
1987 case 'c':
1988 scaled = *res * 240 / 2.54;
1989 break;
1990 case 'v':
1991 case 'P':
1992 scaled = *res * 40;
1993 break;
1994 case 'm':
1995 case 'n':
1996 scaled = *res * 24;
1997 break;
1998 case 'p':
1999 scaled = *res * 10 / 3;
2000 break;
2001 case 'u':
2002 scaled = *res;
2003 break;
2004 case 'M':
2005 scaled = *res * 6 / 25;
2006 break;
2007 default:
2008 scaled = *res;
2009 p--;
2010 break;
2011 }
2012 if (flags & ROFFNUM_SCALE)
2013 *res = scaled;
2014
2015 *pos = p + 1;
2016 return 1;
2017 }
2018
2019 /*
2020 * Evaluate a string comparison condition.
2021 * The first character is the delimiter.
2022 * Succeed if the string up to its second occurrence
2023 * matches the string up to its third occurence.
2024 * Advance the cursor after the third occurrence
2025 * or lacking that, to the end of the line.
2026 */
2027 static int
2028 roff_evalstrcond(const char *v, int *pos)
2029 {
2030 const char *s1, *s2, *s3;
2031 int match;
2032
2033 match = 0;
2034 s1 = v + *pos; /* initial delimiter */
2035 s2 = s1 + 1; /* for scanning the first string */
2036 s3 = strchr(s2, *s1); /* for scanning the second string */
2037
2038 if (NULL == s3) /* found no middle delimiter */
2039 goto out;
2040
2041 while ('\0' != *++s3) {
2042 if (*s2 != *s3) { /* mismatch */
2043 s3 = strchr(s3, *s1);
2044 break;
2045 }
2046 if (*s3 == *s1) { /* found the final delimiter */
2047 match = 1;
2048 break;
2049 }
2050 s2++;
2051 }
2052
2053 out:
2054 if (NULL == s3)
2055 s3 = strchr(s2, '\0');
2056 else if (*s3 != '\0')
2057 s3++;
2058 *pos = s3 - v;
2059 return match;
2060 }
2061
2062 /*
2063 * Evaluate an optionally negated single character, numerical,
2064 * or string condition.
2065 */
2066 static int
2067 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2068 {
2069 char *cp, *name;
2070 size_t sz;
2071 int number, savepos, istrue, wanttrue;
2072
2073 if ('!' == v[*pos]) {
2074 wanttrue = 0;
2075 (*pos)++;
2076 } else
2077 wanttrue = 1;
2078
2079 switch (v[*pos]) {
2080 case '\0':
2081 return 0;
2082 case 'n':
2083 case 'o':
2084 (*pos)++;
2085 return wanttrue;
2086 case 'c':
2087 case 'e':
2088 case 't':
2089 case 'v':
2090 (*pos)++;
2091 return !wanttrue;
2092 case 'd':
2093 case 'r':
2094 cp = v + *pos + 1;
2095 while (*cp == ' ')
2096 cp++;
2097 name = cp;
2098 sz = roff_getname(r, &cp, ln, cp - v);
2099 istrue = sz && (v[*pos] == 'r' ? roff_hasregn(r, name, sz) :
2100 (roff_getstrn(r, name, sz) != NULL ||
2101 roff_getrenn(r, name, sz) != NULL));
2102 *pos = cp - v;
2103 return istrue == wanttrue;
2104 default:
2105 break;
2106 }
2107
2108 savepos = *pos;
2109 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2110 return (number > 0) == wanttrue;
2111 else if (*pos == savepos)
2112 return roff_evalstrcond(v, pos) == wanttrue;
2113 else
2114 return 0;
2115 }
2116
2117 static enum rofferr
2118 roff_line_ignore(ROFF_ARGS)
2119 {
2120
2121 return ROFF_IGN;
2122 }
2123
2124 static enum rofferr
2125 roff_insec(ROFF_ARGS)
2126 {
2127
2128 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2129 ln, ppos, roff_name[tok]);
2130 return ROFF_IGN;
2131 }
2132
2133 static enum rofferr
2134 roff_unsupp(ROFF_ARGS)
2135 {
2136
2137 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2138 ln, ppos, roff_name[tok]);
2139 return ROFF_IGN;
2140 }
2141
2142 static enum rofferr
2143 roff_cond(ROFF_ARGS)
2144 {
2145
2146 roffnode_push(r, tok, NULL, ln, ppos);
2147
2148 /*
2149 * An `.el' has no conditional body: it will consume the value
2150 * of the current rstack entry set in prior `ie' calls or
2151 * defaults to DENY.
2152 *
2153 * If we're not an `el', however, then evaluate the conditional.
2154 */
2155
2156 r->last->rule = tok == ROFF_el ?
2157 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2158 roff_evalcond(r, ln, buf->buf, &pos);
2159
2160 /*
2161 * An if-else will put the NEGATION of the current evaluated
2162 * conditional into the stack of rules.
2163 */
2164
2165 if (tok == ROFF_ie) {
2166 if (r->rstackpos + 1 == r->rstacksz) {
2167 r->rstacksz += 16;
2168 r->rstack = mandoc_reallocarray(r->rstack,
2169 r->rstacksz, sizeof(int));
2170 }
2171 r->rstack[++r->rstackpos] = !r->last->rule;
2172 }
2173
2174 /* If the parent has false as its rule, then so do we. */
2175
2176 if (r->last->parent && !r->last->parent->rule)
2177 r->last->rule = 0;
2178
2179 /*
2180 * Determine scope.
2181 * If there is nothing on the line after the conditional,
2182 * not even whitespace, use next-line scope.
2183 */
2184
2185 if (buf->buf[pos] == '\0') {
2186 r->last->endspan = 2;
2187 goto out;
2188 }
2189
2190 while (buf->buf[pos] == ' ')
2191 pos++;
2192
2193 /* An opening brace requests multiline scope. */
2194
2195 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2196 r->last->endspan = -1;
2197 pos += 2;
2198 while (buf->buf[pos] == ' ')
2199 pos++;
2200 goto out;
2201 }
2202
2203 /*
2204 * Anything else following the conditional causes
2205 * single-line scope. Warn if the scope contains
2206 * nothing but trailing whitespace.
2207 */
2208
2209 if (buf->buf[pos] == '\0')
2210 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2211 ln, ppos, roff_name[tok]);
2212
2213 r->last->endspan = 1;
2214
2215 out:
2216 *offs = pos;
2217 return ROFF_RERUN;
2218 }
2219
2220 static enum rofferr
2221 roff_ds(ROFF_ARGS)
2222 {
2223 char *string;
2224 const char *name;
2225 size_t namesz;
2226
2227 /* Ignore groff compatibility mode for now. */
2228
2229 if (tok == ROFF_ds1)
2230 tok = ROFF_ds;
2231 else if (tok == ROFF_as1)
2232 tok = ROFF_as;
2233
2234 /*
2235 * The first word is the name of the string.
2236 * If it is empty or terminated by an escape sequence,
2237 * abort the `ds' request without defining anything.
2238 */
2239
2240 name = string = buf->buf + pos;
2241 if (*name == '\0')
2242 return ROFF_IGN;
2243
2244 namesz = roff_getname(r, &string, ln, pos);
2245 if (name[namesz] == '\\')
2246 return ROFF_IGN;
2247
2248 /* Read past the initial double-quote, if any. */
2249 if (*string == '"')
2250 string++;
2251
2252 /* The rest is the value. */
2253 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2254 ROFF_as == tok);
2255 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2256 return ROFF_IGN;
2257 }
2258
2259 /*
2260 * Parse a single operator, one or two characters long.
2261 * If the operator is recognized, return success and advance the
2262 * parse point, else return failure and let the parse point unchanged.
2263 */
2264 static int
2265 roff_getop(const char *v, int *pos, char *res)
2266 {
2267
2268 *res = v[*pos];
2269
2270 switch (*res) {
2271 case '+':
2272 case '-':
2273 case '*':
2274 case '/':
2275 case '%':
2276 case '&':
2277 case ':':
2278 break;
2279 case '<':
2280 switch (v[*pos + 1]) {
2281 case '=':
2282 *res = 'l';
2283 (*pos)++;
2284 break;
2285 case '>':
2286 *res = '!';
2287 (*pos)++;
2288 break;
2289 case '?':
2290 *res = 'i';
2291 (*pos)++;
2292 break;
2293 default:
2294 break;
2295 }
2296 break;
2297 case '>':
2298 switch (v[*pos + 1]) {
2299 case '=':
2300 *res = 'g';
2301 (*pos)++;
2302 break;
2303 case '?':
2304 *res = 'a';
2305 (*pos)++;
2306 break;
2307 default:
2308 break;
2309 }
2310 break;
2311 case '=':
2312 if ('=' == v[*pos + 1])
2313 (*pos)++;
2314 break;
2315 default:
2316 return 0;
2317 }
2318 (*pos)++;
2319
2320 return *res;
2321 }
2322
2323 /*
2324 * Evaluate either a parenthesized numeric expression
2325 * or a single signed integer number.
2326 */
2327 static int
2328 roff_evalpar(struct roff *r, int ln,
2329 const char *v, int *pos, int *res, int flags)
2330 {
2331
2332 if ('(' != v[*pos])
2333 return roff_getnum(v, pos, res, flags);
2334
2335 (*pos)++;
2336 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2337 return 0;
2338
2339 /*
2340 * Omission of the closing parenthesis
2341 * is an error in validation mode,
2342 * but ignored in evaluation mode.
2343 */
2344
2345 if (')' == v[*pos])
2346 (*pos)++;
2347 else if (NULL == res)
2348 return 0;
2349
2350 return 1;
2351 }
2352
2353 /*
2354 * Evaluate a complete numeric expression.
2355 * Proceed left to right, there is no concept of precedence.
2356 */
2357 static int
2358 roff_evalnum(struct roff *r, int ln, const char *v,
2359 int *pos, int *res, int flags)
2360 {
2361 int mypos, operand2;
2362 char operator;
2363
2364 if (NULL == pos) {
2365 mypos = 0;
2366 pos = &mypos;
2367 }
2368
2369 if (flags & ROFFNUM_WHITE)
2370 while (isspace((unsigned char)v[*pos]))
2371 (*pos)++;
2372
2373 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2374 return 0;
2375
2376 while (1) {
2377 if (flags & ROFFNUM_WHITE)
2378 while (isspace((unsigned char)v[*pos]))
2379 (*pos)++;
2380
2381 if ( ! roff_getop(v, pos, &operator))
2382 break;
2383
2384 if (flags & ROFFNUM_WHITE)
2385 while (isspace((unsigned char)v[*pos]))
2386 (*pos)++;
2387
2388 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2389 return 0;
2390
2391 if (flags & ROFFNUM_WHITE)
2392 while (isspace((unsigned char)v[*pos]))
2393 (*pos)++;
2394
2395 if (NULL == res)
2396 continue;
2397
2398 switch (operator) {
2399 case '+':
2400 *res += operand2;
2401 break;
2402 case '-':
2403 *res -= operand2;
2404 break;
2405 case '*':
2406 *res *= operand2;
2407 break;
2408 case '/':
2409 if (operand2 == 0) {
2410 mandoc_msg(MANDOCERR_DIVZERO,
2411 r->parse, ln, *pos, v);
2412 *res = 0;
2413 break;
2414 }
2415 *res /= operand2;
2416 break;
2417 case '%':
2418 if (operand2 == 0) {
2419 mandoc_msg(MANDOCERR_DIVZERO,
2420 r->parse, ln, *pos, v);
2421 *res = 0;
2422 break;
2423 }
2424 *res %= operand2;
2425 break;
2426 case '<':
2427 *res = *res < operand2;
2428 break;
2429 case '>':
2430 *res = *res > operand2;
2431 break;
2432 case 'l':
2433 *res = *res <= operand2;
2434 break;
2435 case 'g':
2436 *res = *res >= operand2;
2437 break;
2438 case '=':
2439 *res = *res == operand2;
2440 break;
2441 case '!':
2442 *res = *res != operand2;
2443 break;
2444 case '&':
2445 *res = *res && operand2;
2446 break;
2447 case ':':
2448 *res = *res || operand2;
2449 break;
2450 case 'i':
2451 if (operand2 < *res)
2452 *res = operand2;
2453 break;
2454 case 'a':
2455 if (operand2 > *res)
2456 *res = operand2;
2457 break;
2458 default:
2459 abort();
2460 }
2461 }
2462 return 1;
2463 }
2464
2465 /* --- register management ------------------------------------------------ */
2466
2467 void
2468 roff_setreg(struct roff *r, const char *name, int val, char sign)
2469 {
2470 struct roffreg *reg;
2471
2472 /* Search for an existing register with the same name. */
2473 reg = r->regtab;
2474
2475 while (reg && strcmp(name, reg->key.p))
2476 reg = reg->next;
2477
2478 if (NULL == reg) {
2479 /* Create a new register. */
2480 reg = mandoc_malloc(sizeof(struct roffreg));
2481 reg->key.p = mandoc_strdup(name);
2482 reg->key.sz = strlen(name);
2483 reg->val = 0;
2484 reg->next = r->regtab;
2485 r->regtab = reg;
2486 }
2487
2488 if ('+' == sign)
2489 reg->val += val;
2490 else if ('-' == sign)
2491 reg->val -= val;
2492 else
2493 reg->val = val;
2494 }
2495
2496 /*
2497 * Handle some predefined read-only number registers.
2498 * For now, return -1 if the requested register is not predefined;
2499 * in case a predefined read-only register having the value -1
2500 * were to turn up, another special value would have to be chosen.
2501 */
2502 static int
2503 roff_getregro(const struct roff *r, const char *name)
2504 {
2505
2506 switch (*name) {
2507 case '$': /* Number of arguments of the last macro evaluated. */
2508 return r->argc;
2509 case 'A': /* ASCII approximation mode is always off. */
2510 return 0;
2511 case 'g': /* Groff compatibility mode is always on. */
2512 return 1;
2513 case 'H': /* Fixed horizontal resolution. */
2514 return 24;
2515 case 'j': /* Always adjust left margin only. */
2516 return 0;
2517 case 'T': /* Some output device is always defined. */
2518 return 1;
2519 case 'V': /* Fixed vertical resolution. */
2520 return 40;
2521 default:
2522 return -1;
2523 }
2524 }
2525
2526 int
2527 roff_getreg(const struct roff *r, const char *name)
2528 {
2529 struct roffreg *reg;
2530 int val;
2531
2532 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2533 val = roff_getregro(r, name + 1);
2534 if (-1 != val)
2535 return val;
2536 }
2537
2538 for (reg = r->regtab; reg; reg = reg->next)
2539 if (0 == strcmp(name, reg->key.p))
2540 return reg->val;
2541
2542 return 0;
2543 }
2544
2545 static int
2546 roff_getregn(const struct roff *r, const char *name, size_t len)
2547 {
2548 struct roffreg *reg;
2549 int val;
2550
2551 if ('.' == name[0] && 2 == len) {
2552 val = roff_getregro(r, name + 1);
2553 if (-1 != val)
2554 return val;
2555 }
2556
2557 for (reg = r->regtab; reg; reg = reg->next)
2558 if (len == reg->key.sz &&
2559 0 == strncmp(name, reg->key.p, len))
2560 return reg->val;
2561
2562 return 0;
2563 }
2564
2565 static int
2566 roff_hasregn(const struct roff *r, const char *name, size_t len)
2567 {
2568 struct roffreg *reg;
2569 int val;
2570
2571 if ('.' == name[0] && 2 == len) {
2572 val = roff_getregro(r, name + 1);
2573 if (-1 != val)
2574 return 1;
2575 }
2576
2577 for (reg = r->regtab; reg; reg = reg->next)
2578 if (len == reg->key.sz &&
2579 0 == strncmp(name, reg->key.p, len))
2580 return 1;
2581
2582 return 0;
2583 }
2584
2585 static void
2586 roff_freereg(struct roffreg *reg)
2587 {
2588 struct roffreg *old_reg;
2589
2590 while (NULL != reg) {
2591 free(reg->key.p);
2592 old_reg = reg;
2593 reg = reg->next;
2594 free(old_reg);
2595 }
2596 }
2597
2598 static enum rofferr
2599 roff_nr(ROFF_ARGS)
2600 {
2601 char *key, *val;
2602 size_t keysz;
2603 int iv;
2604 char sign;
2605
2606 key = val = buf->buf + pos;
2607 if (*key == '\0')
2608 return ROFF_IGN;
2609
2610 keysz = roff_getname(r, &val, ln, pos);
2611 if (key[keysz] == '\\')
2612 return ROFF_IGN;
2613 key[keysz] = '\0';
2614
2615 sign = *val;
2616 if (sign == '+' || sign == '-')
2617 val++;
2618
2619 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2620 roff_setreg(r, key, iv, sign);
2621
2622 return ROFF_IGN;
2623 }
2624
2625 static enum rofferr
2626 roff_rr(ROFF_ARGS)
2627 {
2628 struct roffreg *reg, **prev;
2629 char *name, *cp;
2630 size_t namesz;
2631
2632 name = cp = buf->buf + pos;
2633 if (*name == '\0')
2634 return ROFF_IGN;
2635 namesz = roff_getname(r, &cp, ln, pos);
2636 name[namesz] = '\0';
2637
2638 prev = &r->regtab;
2639 while (1) {
2640 reg = *prev;
2641 if (reg == NULL || !strcmp(name, reg->key.p))
2642 break;
2643 prev = &reg->next;
2644 }
2645 if (reg != NULL) {
2646 *prev = reg->next;
2647 free(reg->key.p);
2648 free(reg);
2649 }
2650 return ROFF_IGN;
2651 }
2652
2653 /* --- handler functions for roff requests -------------------------------- */
2654
2655 static enum rofferr
2656 roff_rm(ROFF_ARGS)
2657 {
2658 const char *name;
2659 char *cp;
2660 size_t namesz;
2661
2662 cp = buf->buf + pos;
2663 while (*cp != '\0') {
2664 name = cp;
2665 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2666 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2667 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2668 if (name[namesz] == '\\')
2669 break;
2670 }
2671 return ROFF_IGN;
2672 }
2673
2674 static enum rofferr
2675 roff_it(ROFF_ARGS)
2676 {
2677 int iv;
2678
2679 /* Parse the number of lines. */
2680
2681 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2682 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2683 ln, ppos, buf->buf + 1);
2684 return ROFF_IGN;
2685 }
2686
2687 while (isspace((unsigned char)buf->buf[pos]))
2688 pos++;
2689
2690 /*
2691 * Arm the input line trap.
2692 * Special-casing "an-trap" is an ugly workaround to cope
2693 * with DocBook stupidly fiddling with man(7) internals.
2694 */
2695
2696 roffit_lines = iv;
2697 roffit_macro = mandoc_strdup(iv != 1 ||
2698 strcmp(buf->buf + pos, "an-trap") ?
2699 buf->buf + pos : "br");
2700 return ROFF_IGN;
2701 }
2702
2703 static enum rofferr
2704 roff_Dd(ROFF_ARGS)
2705 {
2706 const char *const *cp;
2707
2708 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2709 for (cp = __mdoc_reserved; *cp; cp++)
2710 roff_setstr(r, *cp, NULL, 0);
2711
2712 if (r->format == 0)
2713 r->format = MPARSE_MDOC;
2714
2715 return ROFF_CONT;
2716 }
2717
2718 static enum rofferr
2719 roff_TH(ROFF_ARGS)
2720 {
2721 const char *const *cp;
2722
2723 if ((r->options & MPARSE_QUICK) == 0)
2724 for (cp = __man_reserved; *cp; cp++)
2725 roff_setstr(r, *cp, NULL, 0);
2726
2727 if (r->format == 0)
2728 r->format = MPARSE_MAN;
2729
2730 return ROFF_CONT;
2731 }
2732
2733 static enum rofferr
2734 roff_TE(ROFF_ARGS)
2735 {
2736
2737 if (NULL == r->tbl)
2738 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2739 ln, ppos, "TE");
2740 else if ( ! tbl_end(&r->tbl)) {
2741 free(buf->buf);
2742 buf->buf = mandoc_strdup(".sp");
2743 buf->sz = 4;
2744 return ROFF_REPARSE;
2745 }
2746 return ROFF_IGN;
2747 }
2748
2749 static enum rofferr
2750 roff_T_(ROFF_ARGS)
2751 {
2752
2753 if (NULL == r->tbl)
2754 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2755 ln, ppos, "T&");
2756 else
2757 tbl_restart(ln, ppos, r->tbl);
2758
2759 return ROFF_IGN;
2760 }
2761
2762 /*
2763 * Handle in-line equation delimiters.
2764 */
2765 static enum rofferr
2766 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2767 {
2768 char *cp1, *cp2;
2769 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2770
2771 /*
2772 * Outside equations, look for an opening delimiter.
2773 * If we are inside an equation, we already know it is
2774 * in-line, or this function wouldn't have been called;
2775 * so look for a closing delimiter.
2776 */
2777
2778 cp1 = buf->buf + pos;
2779 cp2 = strchr(cp1, r->eqn == NULL ?
2780 r->last_eqn->odelim : r->last_eqn->cdelim);
2781 if (cp2 == NULL)
2782 return ROFF_CONT;
2783
2784 *cp2++ = '\0';
2785 bef_pr = bef_nl = aft_nl = aft_pr = "";
2786
2787 /* Handle preceding text, protecting whitespace. */
2788
2789 if (*buf->buf != '\0') {
2790 if (r->eqn == NULL)
2791 bef_pr = "\\&";
2792 bef_nl = "\n";
2793 }
2794
2795 /*
2796 * Prepare replacing the delimiter with an equation macro
2797 * and drop leading white space from the equation.
2798 */
2799
2800 if (r->eqn == NULL) {
2801 while (*cp2 == ' ')
2802 cp2++;
2803 mac = ".EQ";
2804 } else
2805 mac = ".EN";
2806
2807 /* Handle following text, protecting whitespace. */
2808
2809 if (*cp2 != '\0') {
2810 aft_nl = "\n";
2811 if (r->eqn != NULL)
2812 aft_pr = "\\&";
2813 }
2814
2815 /* Do the actual replacement. */
2816
2817 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2818 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2819 free(buf->buf);
2820 buf->buf = cp1;
2821
2822 /* Toggle the in-line state of the eqn subsystem. */
2823
2824 r->eqn_inline = r->eqn == NULL;
2825 return ROFF_REPARSE;
2826 }
2827
2828 static enum rofferr
2829 roff_EQ(ROFF_ARGS)
2830 {
2831 struct eqn_node *e;
2832
2833 assert(r->eqn == NULL);
2834 e = eqn_alloc(ppos, ln, r->parse);
2835
2836 if (r->last_eqn) {
2837 r->last_eqn->next = e;
2838 e->delim = r->last_eqn->delim;
2839 e->odelim = r->last_eqn->odelim;
2840 e->cdelim = r->last_eqn->cdelim;
2841 } else
2842 r->first_eqn = r->last_eqn = e;
2843
2844 r->eqn = r->last_eqn = e;
2845
2846 if (buf->buf[pos] != '\0')
2847 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2848 ".EQ %s", buf->buf + pos);
2849
2850 return ROFF_IGN;
2851 }
2852
2853 static enum rofferr
2854 roff_EN(ROFF_ARGS)
2855 {
2856
2857 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2858 return ROFF_IGN;
2859 }
2860
2861 static enum rofferr
2862 roff_TS(ROFF_ARGS)
2863 {
2864 struct tbl_node *tbl;
2865
2866 if (r->tbl) {
2867 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2868 ln, ppos, "TS breaks TS");
2869 tbl_end(&r->tbl);
2870 }
2871
2872 tbl = tbl_alloc(ppos, ln, r->parse);
2873
2874 if (r->last_tbl)
2875 r->last_tbl->next = tbl;
2876 else
2877 r->first_tbl = r->last_tbl = tbl;
2878
2879 r->tbl = r->last_tbl = tbl;
2880 return ROFF_IGN;
2881 }
2882
2883 static enum rofferr
2884 roff_onearg(ROFF_ARGS)
2885 {
2886 struct roff_node *n;
2887 char *cp;
2888 int npos;
2889
2890 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
2891 (tok == ROFF_sp || tok == ROFF_ti))
2892 man_breakscope(r->man, tok);
2893
2894 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
2895 r->man->last = roffce_node;
2896 r->man->next = ROFF_NEXT_SIBLING;
2897 }
2898
2899 roff_elem_alloc(r->man, ln, ppos, tok);
2900 n = r->man->last;
2901
2902 cp = buf->buf + pos;
2903 if (*cp != '\0') {
2904 while (*cp != '\0' && *cp != ' ')
2905 cp++;
2906 while (*cp == ' ')
2907 *cp++ = '\0';
2908 if (*cp != '\0')
2909 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2910 r->parse, ln, cp - buf->buf,
2911 "%s ... %s", roff_name[tok], cp);
2912 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2913 }
2914
2915 if (tok == ROFF_ce || tok == ROFF_rj) {
2916 if (r->man->last->type == ROFFT_ELEM) {
2917 roff_word_alloc(r->man, ln, pos, "1");
2918 r->man->last->flags |= NODE_NOSRC;
2919 }
2920 npos = 0;
2921 if (roff_evalnum(r, ln, r->man->last->string, &npos,
2922 &roffce_lines, 0) == 0) {
2923 mandoc_vmsg(MANDOCERR_CE_NONUM,
2924 r->parse, ln, pos, "ce %s", buf->buf + pos);
2925 roffce_lines = 1;
2926 }
2927 if (roffce_lines < 1) {
2928 r->man->last = r->man->last->parent;
2929 roffce_node = NULL;
2930 roffce_lines = 0;
2931 } else
2932 roffce_node = r->man->last->parent;
2933 } else {
2934 n->flags |= NODE_VALID | NODE_ENDED;
2935 r->man->last = n;
2936 }
2937 n->flags |= NODE_LINE;
2938 r->man->next = ROFF_NEXT_SIBLING;
2939 return ROFF_IGN;
2940 }
2941
2942 static enum rofferr
2943 roff_manyarg(ROFF_ARGS)
2944 {
2945 struct roff_node *n;
2946 char *sp, *ep;
2947
2948 roff_elem_alloc(r->man, ln, ppos, tok);
2949 n = r->man->last;
2950
2951 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
2952 while (*ep != '\0' && *ep != ' ')
2953 ep++;
2954 while (*ep == ' ')
2955 *ep++ = '\0';
2956 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
2957 }
2958
2959 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
2960 r->man->last = n;
2961 r->man->next = ROFF_NEXT_SIBLING;
2962 return ROFF_IGN;
2963 }
2964
2965 static enum rofferr
2966 roff_als(ROFF_ARGS)
2967 {
2968 char *oldn, *newn, *end, *value;
2969 size_t oldsz, newsz, valsz;
2970
2971 newn = oldn = buf->buf + pos;
2972 if (*newn == '\0')
2973 return ROFF_IGN;
2974
2975 newsz = roff_getname(r, &oldn, ln, pos);
2976 if (newn[newsz] == '\\' || *oldn == '\0')
2977 return ROFF_IGN;
2978
2979 end = oldn;
2980 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
2981 if (oldsz == 0)
2982 return ROFF_IGN;
2983
2984 valsz = mandoc_asprintf(&value, ".%.*s \\$*\n", (int)oldsz, oldn);
2985 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
2986 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
2987 free(value);
2988 return ROFF_IGN;
2989 }
2990
2991 static enum rofferr
2992 roff_br(ROFF_ARGS)
2993 {
2994 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
2995 man_breakscope(r->man, ROFF_br);
2996 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
2997 if (buf->buf[pos] != '\0')
2998 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2999 "%s %s", roff_name[tok], buf->buf + pos);
3000 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3001 r->man->next = ROFF_NEXT_SIBLING;
3002 return ROFF_IGN;
3003 }
3004
3005 static enum rofferr
3006 roff_cc(ROFF_ARGS)
3007 {
3008 const char *p;
3009
3010 p = buf->buf + pos;
3011
3012 if (*p == '\0' || (r->control = *p++) == '.')
3013 r->control = '\0';
3014
3015 if (*p != '\0')
3016 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3017 ln, p - buf->buf, "cc ... %s", p);
3018
3019 return ROFF_IGN;
3020 }
3021
3022 static enum rofferr
3023 roff_ec(ROFF_ARGS)
3024 {
3025 const char *p;
3026
3027 p = buf->buf + pos;
3028 if (*p == '\0')
3029 r->escape = '\\';
3030 else {
3031 r->escape = *p;
3032 if (*++p != '\0')
3033 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3034 ln, p - buf->buf, "ec ... %s", p);
3035 }
3036 return ROFF_IGN;
3037 }
3038
3039 static enum rofferr
3040 roff_eo(ROFF_ARGS)
3041 {
3042 r->escape = '\0';
3043 if (buf->buf[pos] != '\0')
3044 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3045 ln, pos, "eo %s", buf->buf + pos);
3046 return ROFF_IGN;
3047 }
3048
3049 static enum rofferr
3050 roff_tr(ROFF_ARGS)
3051 {
3052 const char *p, *first, *second;
3053 size_t fsz, ssz;
3054 enum mandoc_esc esc;
3055
3056 p = buf->buf + pos;
3057
3058 if (*p == '\0') {
3059 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3060 return ROFF_IGN;
3061 }
3062
3063 while (*p != '\0') {
3064 fsz = ssz = 1;
3065
3066 first = p++;
3067 if (*first == '\\') {
3068 esc = mandoc_escape(&p, NULL, NULL);
3069 if (esc == ESCAPE_ERROR) {
3070 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3071 ln, (int)(p - buf->buf), first);
3072 return ROFF_IGN;
3073 }
3074 fsz = (size_t)(p - first);
3075 }
3076
3077 second = p++;
3078 if (*second == '\\') {
3079 esc = mandoc_escape(&p, NULL, NULL);
3080 if (esc == ESCAPE_ERROR) {
3081 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3082 ln, (int)(p - buf->buf), second);
3083 return ROFF_IGN;
3084 }
3085 ssz = (size_t)(p - second);
3086 } else if (*second == '\0') {
3087 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3088 ln, first - buf->buf, "tr %s", first);
3089 second = " ";
3090 p--;
3091 }
3092
3093 if (fsz > 1) {
3094 roff_setstrn(&r->xmbtab, first, fsz,
3095 second, ssz, 0);
3096 continue;
3097 }
3098
3099 if (r->xtab == NULL)
3100 r->xtab = mandoc_calloc(128,
3101 sizeof(struct roffstr));
3102
3103 free(r->xtab[(int)*first].p);
3104 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3105 r->xtab[(int)*first].sz = ssz;
3106 }
3107
3108 return ROFF_IGN;
3109 }
3110
3111 static enum rofferr
3112 roff_rn(ROFF_ARGS)
3113 {
3114 const char *value;
3115 char *oldn, *newn, *end;
3116 size_t oldsz, newsz;
3117
3118 oldn = newn = buf->buf + pos;
3119 if (*oldn == '\0')
3120 return ROFF_IGN;
3121
3122 oldsz = roff_getname(r, &newn, ln, pos);
3123 if (oldn[oldsz] == '\\' || *newn == '\0')
3124 return ROFF_IGN;
3125
3126 end = newn;
3127 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3128 if (newsz == 0)
3129 return ROFF_IGN;
3130
3131 /*
3132 * Rename a user-defined macro bearing the old name,
3133 * overriding an existing renamed high-level macro
3134 * bearing the new name, if that exists.
3135 */
3136
3137 if ((value = roff_getstrn(r, oldn, oldsz)) != NULL) {
3138 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3139 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3140 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3141 return ROFF_IGN;
3142 }
3143
3144 /*
3145 * Rename a high-level macro bearing the old name,
3146 * either renaming it a second time if it was already
3147 * renamed before, or renaming it for the first time.
3148 * In both cases, override an existing user-defined
3149 * macro bearing the new name, if that exists.
3150 */
3151
3152 if ((value = roff_getrenn(r, oldn, oldsz)) != NULL) {
3153 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3154 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3155 } else
3156 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3157 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3158 return ROFF_IGN;
3159 }
3160
3161 static enum rofferr
3162 roff_so(ROFF_ARGS)
3163 {
3164 char *name, *cp;
3165
3166 name = buf->buf + pos;
3167 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3168
3169 /*
3170 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3171 * opening anything that's not in our cwd or anything beneath
3172 * it. Thus, explicitly disallow traversing up the file-system
3173 * or using absolute paths.
3174 */
3175
3176 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3177 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3178 ".so %s", name);
3179 buf->sz = mandoc_asprintf(&cp,
3180 ".sp\nSee the file %s.\n.sp", name) + 1;
3181 free(buf->buf);
3182 buf->buf = cp;
3183 *offs = 0;
3184 return ROFF_REPARSE;
3185 }
3186
3187 *offs = pos;
3188 return ROFF_SO;
3189 }
3190
3191 /* --- user defined strings and macros ------------------------------------ */
3192
3193 static enum rofferr
3194 roff_userdef(ROFF_ARGS)
3195 {
3196 const char *arg[16], *ap;
3197 char *cp, *n1, *n2;
3198 int expand_count, i, ib, ie;
3199 size_t asz, rsz;
3200
3201 /*
3202 * Collect pointers to macro argument strings
3203 * and NUL-terminate them.
3204 */
3205
3206 r->argc = 0;
3207 cp = buf->buf + pos;
3208 for (i = 0; i < 16; i++) {
3209 if (*cp == '\0')
3210 arg[i] = "";
3211 else {
3212 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3213 r->argc = i + 1;
3214 }
3215 }
3216
3217 /*
3218 * Expand macro arguments.
3219 */
3220
3221 buf->sz = strlen(r->current_string) + 1;
3222 n1 = n2 = cp = mandoc_malloc(buf->sz);
3223 memcpy(n1, r->current_string, buf->sz);
3224 expand_count = 0;
3225 while (*cp != '\0') {
3226
3227 /* Scan ahead for the next argument invocation. */
3228
3229 if (*cp++ != '\\')
3230 continue;
3231 if (*cp++ != '$')
3232 continue;
3233 if (*cp == '*') { /* \\$* inserts all arguments */
3234 ib = 0;
3235 ie = r->argc - 1;
3236 } else { /* \\$1 .. \\$9 insert one argument */
3237 ib = ie = *cp - '1';
3238 if (ib < 0 || ib > 8)
3239 continue;
3240 }
3241 cp -= 2;
3242
3243 /*
3244 * Prevent infinite recursion.
3245 */
3246
3247 if (cp >= n2)
3248 expand_count = 1;
3249 else if (++expand_count > EXPAND_LIMIT) {
3250 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3251 ln, (int)(cp - n1), NULL);
3252 free(buf->buf);
3253 buf->buf = n1;
3254 return ROFF_IGN;
3255 }
3256
3257 /*
3258 * Determine the size of the expanded argument,
3259 * taking escaping of quotes into account.
3260 */
3261
3262 asz = ie > ib ? ie - ib : 0; /* for blanks */
3263 for (i = ib; i <= ie; i++) {
3264 for (ap = arg[i]; *ap != '\0'; ap++) {
3265 asz++;
3266 if (*ap == '"')
3267 asz += 3;
3268 }
3269 }
3270 if (asz != 3) {
3271
3272 /*
3273 * Determine the size of the rest of the
3274 * unexpanded macro, including the NUL.
3275 */
3276
3277 rsz = buf->sz - (cp - n1) - 3;
3278
3279 /*
3280 * When shrinking, move before
3281 * releasing the storage.
3282 */
3283
3284 if (asz < 3)
3285 memmove(cp + asz, cp + 3, rsz);
3286
3287 /*
3288 * Resize the storage for the macro
3289 * and readjust the parse pointer.
3290 */
3291
3292 buf->sz += asz - 3;
3293 n2 = mandoc_realloc(n1, buf->sz);
3294 cp = n2 + (cp - n1);
3295 n1 = n2;
3296
3297 /*
3298 * When growing, make room
3299 * for the expanded argument.
3300 */
3301
3302 if (asz > 3)
3303 memmove(cp + asz, cp + 3, rsz);
3304 }
3305
3306 /* Copy the expanded argument, escaping quotes. */
3307
3308 n2 = cp;
3309 for (i = ib; i <= ie; i++) {
3310 for (ap = arg[i]; *ap != '\0'; ap++) {
3311 if (*ap == '"') {
3312 memcpy(n2, "\\(dq", 4);
3313 n2 += 4;
3314 } else
3315 *n2++ = *ap;
3316 }
3317 if (i < ie)
3318 *n2++ = ' ';
3319 }
3320 }
3321
3322 /*
3323 * Replace the macro invocation
3324 * by the expanded macro.
3325 */
3326
3327 free(buf->buf);
3328 buf->buf = n1;
3329 *offs = 0;
3330
3331 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3332 ROFF_REPARSE : ROFF_APPEND;
3333 }
3334
3335 /*
3336 * Calling a high-level macro that was renamed with .rn.
3337 * r->current_string has already been set up by roff_parse().
3338 */
3339 static enum rofferr
3340 roff_renamed(ROFF_ARGS)
3341 {
3342 char *nbuf;
3343
3344 buf->sz = mandoc_asprintf(&nbuf, ".%s %s", r->current_string,
3345 buf->buf + pos) + 1;
3346 free(buf->buf);
3347 buf->buf = nbuf;
3348 return ROFF_CONT;
3349 }
3350
3351 static size_t
3352 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3353 {
3354 char *name, *cp;
3355 size_t namesz;
3356
3357 name = *cpp;
3358 if ('\0' == *name)
3359 return 0;
3360
3361 /* Read until end of name and terminate it with NUL. */
3362 for (cp = name; 1; cp++) {
3363 if ('\0' == *cp || ' ' == *cp) {
3364 namesz = cp - name;
3365 break;
3366 }
3367 if ('\\' != *cp)
3368 continue;
3369 namesz = cp - name;
3370 if ('{' == cp[1] || '}' == cp[1])
3371 break;
3372 cp++;
3373 if ('\\' == *cp)
3374 continue;
3375 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3376 "%.*s", (int)(cp - name + 1), name);
3377 mandoc_escape((const char **)&cp, NULL, NULL);
3378 break;
3379 }
3380
3381 /* Read past spaces. */
3382 while (' ' == *cp)
3383 cp++;
3384
3385 *cpp = cp;
3386 return namesz;
3387 }
3388
3389 /*
3390 * Store *string into the user-defined string called *name.
3391 * To clear an existing entry, call with (*r, *name, NULL, 0).
3392 * append == 0: replace mode
3393 * append == 1: single-line append mode
3394 * append == 2: multiline append mode, append '\n' after each call
3395 */
3396 static void
3397 roff_setstr(struct roff *r, const char *name, const char *string,
3398 int append)
3399 {
3400
3401 roff_setstrn(&r->strtab, name, strlen(name), string,
3402 string ? strlen(string) : 0, append);
3403 }
3404
3405 static void
3406 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3407 const char *string, size_t stringsz, int append)
3408 {
3409 struct roffkv *n;
3410 char *c;
3411 int i;
3412 size_t oldch, newch;
3413
3414 /* Search for an existing string with the same name. */
3415 n = *r;
3416
3417 while (n && (namesz != n->key.sz ||
3418 strncmp(n->key.p, name, namesz)))
3419 n = n->next;
3420
3421 if (NULL == n) {
3422 /* Create a new string table entry. */
3423 n = mandoc_malloc(sizeof(struct roffkv));
3424 n->key.p = mandoc_strndup(name, namesz);
3425 n->key.sz = namesz;
3426 n->val.p = NULL;
3427 n->val.sz = 0;
3428 n->next = *r;
3429 *r = n;
3430 } else if (0 == append) {
3431 free(n->val.p);
3432 n->val.p = NULL;
3433 n->val.sz = 0;
3434 }
3435
3436 if (NULL == string)
3437 return;
3438
3439 /*
3440 * One additional byte for the '\n' in multiline mode,
3441 * and one for the terminating '\0'.
3442 */
3443 newch = stringsz + (1 < append ? 2u : 1u);
3444
3445 if (NULL == n->val.p) {
3446 n->val.p = mandoc_malloc(newch);
3447 *n->val.p = '\0';
3448 oldch = 0;
3449 } else {
3450 oldch = n->val.sz;
3451 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3452 }
3453
3454 /* Skip existing content in the destination buffer. */
3455 c = n->val.p + (int)oldch;
3456
3457 /* Append new content to the destination buffer. */
3458 i = 0;
3459 while (i < (int)stringsz) {
3460 /*
3461 * Rudimentary roff copy mode:
3462 * Handle escaped backslashes.
3463 */
3464 if ('\\' == string[i] && '\\' == string[i + 1])
3465 i++;
3466 *c++ = string[i++];
3467 }
3468
3469 /* Append terminating bytes. */
3470 if (1 < append)
3471 *c++ = '\n';
3472
3473 *c = '\0';
3474 n->val.sz = (int)(c - n->val.p);
3475 }
3476
3477 static const char *
3478 roff_getstrn(const struct roff *r, const char *name, size_t len)
3479 {
3480 const struct roffkv *n;
3481 int i;
3482
3483 for (n = r->strtab; n; n = n->next)
3484 if (0 == strncmp(name, n->key.p, len) &&
3485 '\0' == n->key.p[(int)len])
3486 return n->val.p;
3487
3488 for (i = 0; i < PREDEFS_MAX; i++)
3489 if (0 == strncmp(name, predefs[i].name, len) &&
3490 '\0' == predefs[i].name[(int)len])
3491 return predefs[i].str;
3492
3493 return NULL;
3494 }
3495
3496 /*
3497 * Check whether *name is the renamed name of a high-level macro.
3498 * Return the standard name, or NULL if it is not.
3499 */
3500 static const char *
3501 roff_getrenn(const struct roff *r, const char *name, size_t len)
3502 {
3503 const struct roffkv *n;
3504
3505 for (n = r->rentab; n; n = n->next)
3506 if (0 == strncmp(name, n->key.p, len) &&
3507 '\0' == n->key.p[(int)len])
3508 return n->val.p;
3509
3510 return NULL;
3511 }
3512
3513 static void
3514 roff_freestr(struct roffkv *r)
3515 {
3516 struct roffkv *n, *nn;
3517
3518 for (n = r; n; n = nn) {
3519 free(n->key.p);
3520 free(n->val.p);
3521 nn = n->next;
3522 free(n);
3523 }
3524 }
3525
3526 /* --- accessors and utility functions ------------------------------------ */
3527
3528 const struct tbl_span *
3529 roff_span(const struct roff *r)
3530 {
3531
3532 return r->tbl ? tbl_span(r->tbl) : NULL;
3533 }
3534
3535 const struct eqn *
3536 roff_eqn(const struct roff *r)
3537 {
3538
3539 return r->last_eqn ? &r->last_eqn->eqn : NULL;
3540 }
3541
3542 /*
3543 * Duplicate an input string, making the appropriate character
3544 * conversations (as stipulated by `tr') along the way.
3545 * Returns a heap-allocated string with all the replacements made.
3546 */
3547 char *
3548 roff_strdup(const struct roff *r, const char *p)
3549 {
3550 const struct roffkv *cp;
3551 char *res;
3552 const char *pp;
3553 size_t ssz, sz;
3554 enum mandoc_esc esc;
3555
3556 if (NULL == r->xmbtab && NULL == r->xtab)
3557 return mandoc_strdup(p);
3558 else if ('\0' == *p)
3559 return mandoc_strdup("");
3560
3561 /*
3562 * Step through each character looking for term matches
3563 * (remember that a `tr' can be invoked with an escape, which is
3564 * a glyph but the escape is multi-character).
3565 * We only do this if the character hash has been initialised
3566 * and the string is >0 length.
3567 */
3568
3569 res = NULL;
3570 ssz = 0;
3571
3572 while ('\0' != *p) {
3573 assert((unsigned int)*p < 128);
3574 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3575 sz = r->xtab[(int)*p].sz;
3576 res = mandoc_realloc(res, ssz + sz + 1);
3577 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3578 ssz += sz;
3579 p++;
3580 continue;
3581 } else if ('\\' != *p) {
3582 res = mandoc_realloc(res, ssz + 2);
3583 res[ssz++] = *p++;
3584 continue;
3585 }
3586
3587 /* Search for term matches. */
3588 for (cp = r->xmbtab; cp; cp = cp->next)
3589 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3590 break;
3591
3592 if (NULL != cp) {
3593 /*
3594 * A match has been found.
3595 * Append the match to the array and move
3596 * forward by its keysize.
3597 */
3598 res = mandoc_realloc(res,
3599 ssz + cp->val.sz + 1);
3600 memcpy(res + ssz, cp->val.p, cp->val.sz);
3601 ssz += cp->val.sz;
3602 p += (int)cp->key.sz;
3603 continue;
3604 }
3605
3606 /*
3607 * Handle escapes carefully: we need to copy
3608 * over just the escape itself, or else we might
3609 * do replacements within the escape itself.
3610 * Make sure to pass along the bogus string.
3611 */
3612 pp = p++;
3613 esc = mandoc_escape(&p, NULL, NULL);
3614 if (ESCAPE_ERROR == esc) {
3615 sz = strlen(pp);
3616 res = mandoc_realloc(res, ssz + sz + 1);
3617 memcpy(res + ssz, pp, sz);
3618 break;
3619 }
3620 /*
3621 * We bail out on bad escapes.
3622 * No need to warn: we already did so when
3623 * roff_res() was called.
3624 */
3625 sz = (int)(p - pp);
3626 res = mandoc_realloc(res, ssz + sz + 1);
3627 memcpy(res + ssz, pp, sz);
3628 ssz += sz;
3629 }
3630
3631 res[(int)ssz] = '\0';
3632 return res;
3633 }
3634
3635 int
3636 roff_getformat(const struct roff *r)
3637 {
3638
3639 return r->format;
3640 }
3641
3642 /*
3643 * Find out whether a line is a macro line or not.
3644 * If it is, adjust the current position and return one; if it isn't,
3645 * return zero and don't change the current position.
3646 * If the control character has been set with `.cc', then let that grain
3647 * precedence.
3648 * This is slighly contrary to groff, where using the non-breaking
3649 * control character when `cc' has been invoked will cause the
3650 * non-breaking macro contents to be printed verbatim.
3651 */
3652 int
3653 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3654 {
3655 int pos;
3656
3657 pos = *ppos;
3658
3659 if (r->control != '\0' && cp[pos] == r->control)
3660 pos++;
3661 else if (r->control != '\0')
3662 return 0;
3663 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3664 pos += 2;
3665 else if ('.' == cp[pos] || '\'' == cp[pos])
3666 pos++;
3667 else
3668 return 0;
3669
3670 while (' ' == cp[pos] || '\t' == cp[pos])
3671 pos++;
3672
3673 *ppos = pos;
3674 return 1;
3675 }