]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Implement w layout specifier (minimum column width).
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.306 2017/06/07 00:50:34 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 /*
45 * An incredibly-simple string buffer.
46 */
47 struct roffstr {
48 char *p; /* nil-terminated buffer */
49 size_t sz; /* saved strlen(p) */
50 };
51
52 /*
53 * A key-value roffstr pair as part of a singly-linked list.
54 */
55 struct roffkv {
56 struct roffstr key;
57 struct roffstr val;
58 struct roffkv *next; /* next in list */
59 };
60
61 /*
62 * A single number register as part of a singly-linked list.
63 */
64 struct roffreg {
65 struct roffstr key;
66 int val;
67 struct roffreg *next;
68 };
69
70 /*
71 * Association of request and macro names with token IDs.
72 */
73 struct roffreq {
74 enum roff_tok tok;
75 char name[];
76 };
77
78 struct roff {
79 struct mparse *parse; /* parse point */
80 struct roff_man *man; /* mdoc or man parser */
81 struct roffnode *last; /* leaf of stack */
82 int *rstack; /* stack of inverted `ie' values */
83 struct ohash *reqtab; /* request lookup table */
84 struct roffreg *regtab; /* number registers */
85 struct roffkv *strtab; /* user-defined strings & macros */
86 struct roffkv *rentab; /* renamed strings & macros */
87 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
88 struct roffstr *xtab; /* single-byte trans table (`tr') */
89 const char *current_string; /* value of last called user macro */
90 struct tbl_node *first_tbl; /* first table parsed */
91 struct tbl_node *last_tbl; /* last table parsed */
92 struct tbl_node *tbl; /* current table being parsed */
93 struct eqn_node *last_eqn; /* last equation parsed */
94 struct eqn_node *first_eqn; /* first equation parsed */
95 struct eqn_node *eqn; /* current equation being parsed */
96 int eqn_inline; /* current equation is inline */
97 int options; /* parse options */
98 int rstacksz; /* current size limit of rstack */
99 int rstackpos; /* position in rstack */
100 int format; /* current file in mdoc or man format */
101 int argc; /* number of args of the last macro */
102 char control; /* control character */
103 char escape; /* escape character */
104 };
105
106 struct roffnode {
107 enum roff_tok tok; /* type of node */
108 struct roffnode *parent; /* up one in stack */
109 int line; /* parse line */
110 int col; /* parse col */
111 char *name; /* node name, e.g. macro name */
112 char *end; /* end-rules: custom token */
113 int endspan; /* end-rules: next-line or infty */
114 int rule; /* current evaluation rule */
115 };
116
117 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
118 enum roff_tok tok, /* tok of macro */ \
119 struct buf *buf, /* input buffer */ \
120 int ln, /* parse line */ \
121 int ppos, /* original pos in buffer */ \
122 int pos, /* current pos in buffer */ \
123 int *offs /* reset offset of buffer data */
124
125 typedef enum rofferr (*roffproc)(ROFF_ARGS);
126
127 struct roffmac {
128 roffproc proc; /* process new macro */
129 roffproc text; /* process as child text of macro */
130 roffproc sub; /* process as child of macro */
131 int flags;
132 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
133 };
134
135 struct predef {
136 const char *name; /* predefined input name */
137 const char *str; /* replacement symbol */
138 };
139
140 #define PREDEF(__name, __str) \
141 { (__name), (__str) },
142
143 /* --- function prototypes ------------------------------------------------ */
144
145 static void roffnode_cleanscope(struct roff *);
146 static void roffnode_pop(struct roff *);
147 static void roffnode_push(struct roff *, enum roff_tok,
148 const char *, int, int);
149 static enum rofferr roff_block(ROFF_ARGS);
150 static enum rofferr roff_block_text(ROFF_ARGS);
151 static enum rofferr roff_block_sub(ROFF_ARGS);
152 static enum rofferr roff_br(ROFF_ARGS);
153 static enum rofferr roff_cblock(ROFF_ARGS);
154 static enum rofferr roff_cc(ROFF_ARGS);
155 static void roff_ccond(struct roff *, int, int);
156 static enum rofferr roff_cond(ROFF_ARGS);
157 static enum rofferr roff_cond_text(ROFF_ARGS);
158 static enum rofferr roff_cond_sub(ROFF_ARGS);
159 static enum rofferr roff_ds(ROFF_ARGS);
160 static enum rofferr roff_ec(ROFF_ARGS);
161 static enum rofferr roff_eo(ROFF_ARGS);
162 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
163 static int roff_evalcond(struct roff *r, int, char *, int *);
164 static int roff_evalnum(struct roff *, int,
165 const char *, int *, int *, int);
166 static int roff_evalpar(struct roff *, int,
167 const char *, int *, int *, int);
168 static int roff_evalstrcond(const char *, int *);
169 static void roff_free1(struct roff *);
170 static void roff_freereg(struct roffreg *);
171 static void roff_freestr(struct roffkv *);
172 static size_t roff_getname(struct roff *, char **, int, int);
173 static int roff_getnum(const char *, int *, int *, int);
174 static int roff_getop(const char *, int *, char *);
175 static int roff_getregn(const struct roff *,
176 const char *, size_t);
177 static int roff_getregro(const struct roff *,
178 const char *name);
179 static const char *roff_getrenn(const struct roff *,
180 const char *, size_t);
181 static const char *roff_getstrn(const struct roff *,
182 const char *, size_t);
183 static int roff_hasregn(const struct roff *,
184 const char *, size_t);
185 static enum rofferr roff_insec(ROFF_ARGS);
186 static enum rofferr roff_it(ROFF_ARGS);
187 static enum rofferr roff_line_ignore(ROFF_ARGS);
188 static void roff_man_alloc1(struct roff_man *);
189 static void roff_man_free1(struct roff_man *);
190 static enum rofferr roff_manyarg(ROFF_ARGS);
191 static enum rofferr roff_nr(ROFF_ARGS);
192 static enum rofferr roff_onearg(ROFF_ARGS);
193 static enum roff_tok roff_parse(struct roff *, char *, int *,
194 int, int);
195 static enum rofferr roff_parsetext(struct roff *, struct buf *,
196 int, int *);
197 static enum rofferr roff_renamed(ROFF_ARGS);
198 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
199 static enum rofferr roff_rm(ROFF_ARGS);
200 static enum rofferr roff_rn(ROFF_ARGS);
201 static enum rofferr roff_rr(ROFF_ARGS);
202 static void roff_setstr(struct roff *,
203 const char *, const char *, int);
204 static void roff_setstrn(struct roffkv **, const char *,
205 size_t, const char *, size_t, int);
206 static enum rofferr roff_so(ROFF_ARGS);
207 static enum rofferr roff_tr(ROFF_ARGS);
208 static enum rofferr roff_Dd(ROFF_ARGS);
209 static enum rofferr roff_TH(ROFF_ARGS);
210 static enum rofferr roff_TE(ROFF_ARGS);
211 static enum rofferr roff_TS(ROFF_ARGS);
212 static enum rofferr roff_EQ(ROFF_ARGS);
213 static enum rofferr roff_EN(ROFF_ARGS);
214 static enum rofferr roff_T_(ROFF_ARGS);
215 static enum rofferr roff_unsupp(ROFF_ARGS);
216 static enum rofferr roff_userdef(ROFF_ARGS);
217
218 /* --- constant data ------------------------------------------------------ */
219
220 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
221 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
222
223 const char *__roff_name[MAN_MAX + 1] = {
224 "br", "ce", "ft", "ll",
225 "mc", "sp", "ta", "ti",
226 NULL,
227 "ab", "ad", "af", "aln",
228 "als", "am", "am1", "ami",
229 "ami1", "as", "as1", "asciify",
230 "backtrace", "bd", "bleedat", "blm",
231 "box", "boxa", "bp", "BP",
232 "break", "breakchar", "brnl", "brp",
233 "brpnl", "c2", "cc",
234 "cf", "cflags", "ch", "char",
235 "chop", "class", "close", "CL",
236 "color", "composite", "continue", "cp",
237 "cropat", "cs", "cu", "da",
238 "dch", "Dd", "de", "de1",
239 "defcolor", "dei", "dei1", "device",
240 "devicem", "di", "do", "ds",
241 "ds1", "dwh", "dt", "ec",
242 "ecr", "ecs", "el", "em",
243 "EN", "eo", "EP", "EQ",
244 "errprint", "ev", "evc", "ex",
245 "fallback", "fam", "fc", "fchar",
246 "fcolor", "fdeferlig", "feature", "fkern",
247 "fl", "flig", "fp", "fps",
248 "fschar", "fspacewidth", "fspecial", "ftr",
249 "fzoom", "gcolor", "hc", "hcode",
250 "hidechar", "hla", "hlm", "hpf",
251 "hpfa", "hpfcode", "hw", "hy",
252 "hylang", "hylen", "hym", "hypp",
253 "hys", "ie", "if", "ig",
254 "index", "it", "itc", "IX",
255 "kern", "kernafter", "kernbefore", "kernpair",
256 "lc", "lc_ctype", "lds", "length",
257 "letadj", "lf", "lg", "lhang",
258 "linetabs", "lnr", "lnrf", "lpfx",
259 "ls", "lsm", "lt",
260 "mediasize", "minss", "mk", "mso",
261 "na", "ne", "nh", "nhychar",
262 "nm", "nn", "nop", "nr",
263 "nrf", "nroff", "ns", "nx",
264 "open", "opena", "os", "output",
265 "padj", "papersize", "pc", "pev",
266 "pi", "PI", "pl", "pm",
267 "pn", "pnr", "po", "ps",
268 "psbb", "pshape", "pso", "ptr",
269 "pvs", "rchar", "rd", "recursionlimit",
270 "return", "rfschar", "rhang", "rj",
271 "rm", "rn", "rnn", "rr",
272 "rs", "rt", "schar", "sentchar",
273 "shc", "shift", "sizes", "so",
274 "spacewidth", "special", "spreadwarn", "ss",
275 "sty", "substring", "sv", "sy",
276 "T&", "tc", "TE",
277 "TH", "tkf", "tl",
278 "tm", "tm1", "tmc", "tr",
279 "track", "transchar", "trf", "trimat",
280 "trin", "trnt", "troff", "TS",
281 "uf", "ul", "unformat", "unwatch",
282 "unwatchn", "vpt", "vs", "warn",
283 "warnscale", "watch", "watchlength", "watchn",
284 "wh", "while", "write", "writec",
285 "writem", "xflag", ".", NULL,
286 NULL, "text",
287 "Dd", "Dt", "Os", "Sh",
288 "Ss", "Pp", "D1", "Dl",
289 "Bd", "Ed", "Bl", "El",
290 "It", "Ad", "An", "Ap",
291 "Ar", "Cd", "Cm", "Dv",
292 "Er", "Ev", "Ex", "Fa",
293 "Fd", "Fl", "Fn", "Ft",
294 "Ic", "In", "Li", "Nd",
295 "Nm", "Op", "Ot", "Pa",
296 "Rv", "St", "Va", "Vt",
297 "Xr", "%A", "%B", "%D",
298 "%I", "%J", "%N", "%O",
299 "%P", "%R", "%T", "%V",
300 "Ac", "Ao", "Aq", "At",
301 "Bc", "Bf", "Bo", "Bq",
302 "Bsx", "Bx", "Db", "Dc",
303 "Do", "Dq", "Ec", "Ef",
304 "Em", "Eo", "Fx", "Ms",
305 "No", "Ns", "Nx", "Ox",
306 "Pc", "Pf", "Po", "Pq",
307 "Qc", "Ql", "Qo", "Qq",
308 "Re", "Rs", "Sc", "So",
309 "Sq", "Sm", "Sx", "Sy",
310 "Tn", "Ux", "Xc", "Xo",
311 "Fo", "Fc", "Oo", "Oc",
312 "Bk", "Ek", "Bt", "Hf",
313 "Fr", "Ud", "Lb", "Lp",
314 "Lk", "Mt", "Brq", "Bro",
315 "Brc", "%C", "Es", "En",
316 "Dx", "%Q", "%U", "Ta",
317 NULL,
318 "TH", "SH", "SS", "TP",
319 "LP", "PP", "P", "IP",
320 "HP", "SM", "SB", "BI",
321 "IB", "BR", "RB", "R",
322 "B", "I", "IR", "RI",
323 "nf", "fi",
324 "RE", "RS", "DT", "UC",
325 "PD", "AT", "in",
326 "OP", "EX", "EE", "UR",
327 "UE", NULL
328 };
329 const char *const *roff_name = __roff_name;
330
331 static struct roffmac roffs[TOKEN_NONE] = {
332 { roff_br, NULL, NULL, 0 }, /* br */
333 { roff_onearg, NULL, NULL, 0 }, /* ce */
334 { roff_onearg, NULL, NULL, 0 }, /* ft */
335 { roff_onearg, NULL, NULL, 0 }, /* ll */
336 { roff_onearg, NULL, NULL, 0 }, /* mc */
337 { roff_onearg, NULL, NULL, 0 }, /* sp */
338 { roff_manyarg, NULL, NULL, 0 }, /* ta */
339 { roff_onearg, NULL, NULL, 0 }, /* ti */
340 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
341 { roff_unsupp, NULL, NULL, 0 }, /* ab */
342 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
343 { roff_line_ignore, NULL, NULL, 0 }, /* af */
344 { roff_unsupp, NULL, NULL, 0 }, /* aln */
345 { roff_unsupp, NULL, NULL, 0 }, /* als */
346 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
347 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
348 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
349 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
350 { roff_ds, NULL, NULL, 0 }, /* as */
351 { roff_ds, NULL, NULL, 0 }, /* as1 */
352 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
353 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
354 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
355 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
356 { roff_unsupp, NULL, NULL, 0 }, /* blm */
357 { roff_unsupp, NULL, NULL, 0 }, /* box */
358 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
359 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
360 { roff_unsupp, NULL, NULL, 0 }, /* BP */
361 { roff_unsupp, NULL, NULL, 0 }, /* break */
362 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
363 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
364 { roff_br, NULL, NULL, 0 }, /* brp */
365 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
366 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
367 { roff_cc, NULL, NULL, 0 }, /* cc */
368 { roff_insec, NULL, NULL, 0 }, /* cf */
369 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
370 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
371 { roff_unsupp, NULL, NULL, 0 }, /* char */
372 { roff_unsupp, NULL, NULL, 0 }, /* chop */
373 { roff_line_ignore, NULL, NULL, 0 }, /* class */
374 { roff_insec, NULL, NULL, 0 }, /* close */
375 { roff_unsupp, NULL, NULL, 0 }, /* CL */
376 { roff_line_ignore, NULL, NULL, 0 }, /* color */
377 { roff_unsupp, NULL, NULL, 0 }, /* composite */
378 { roff_unsupp, NULL, NULL, 0 }, /* continue */
379 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
380 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
381 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
382 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
383 { roff_unsupp, NULL, NULL, 0 }, /* da */
384 { roff_unsupp, NULL, NULL, 0 }, /* dch */
385 { roff_Dd, NULL, NULL, 0 }, /* Dd */
386 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
387 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
388 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
391 { roff_unsupp, NULL, NULL, 0 }, /* device */
392 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
393 { roff_unsupp, NULL, NULL, 0 }, /* di */
394 { roff_unsupp, NULL, NULL, 0 }, /* do */
395 { roff_ds, NULL, NULL, 0 }, /* ds */
396 { roff_ds, NULL, NULL, 0 }, /* ds1 */
397 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
398 { roff_unsupp, NULL, NULL, 0 }, /* dt */
399 { roff_ec, NULL, NULL, 0 }, /* ec */
400 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
401 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
402 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
403 { roff_unsupp, NULL, NULL, 0 }, /* em */
404 { roff_EN, NULL, NULL, 0 }, /* EN */
405 { roff_eo, NULL, NULL, 0 }, /* eo */
406 { roff_unsupp, NULL, NULL, 0 }, /* EP */
407 { roff_EQ, NULL, NULL, 0 }, /* EQ */
408 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
409 { roff_unsupp, NULL, NULL, 0 }, /* ev */
410 { roff_unsupp, NULL, NULL, 0 }, /* evc */
411 { roff_unsupp, NULL, NULL, 0 }, /* ex */
412 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
413 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
414 { roff_unsupp, NULL, NULL, 0 }, /* fc */
415 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
416 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
417 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
418 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
419 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
420 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
421 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
422 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
423 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
424 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
425 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
426 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
427 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
428 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
429 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
430 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
431 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
432 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
433 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
434 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
435 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
436 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
437 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
438 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
439 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
440 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
441 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
442 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
443 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
444 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
445 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
446 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
447 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
448 { roff_unsupp, NULL, NULL, 0 }, /* index */
449 { roff_it, NULL, NULL, 0 }, /* it */
450 { roff_unsupp, NULL, NULL, 0 }, /* itc */
451 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
452 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
453 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
454 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
455 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
456 { roff_unsupp, NULL, NULL, 0 }, /* lc */
457 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
458 { roff_unsupp, NULL, NULL, 0 }, /* lds */
459 { roff_unsupp, NULL, NULL, 0 }, /* length */
460 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
461 { roff_insec, NULL, NULL, 0 }, /* lf */
462 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
463 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
464 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
465 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
466 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
467 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
468 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
469 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
470 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
471 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
472 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
473 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
474 { roff_insec, NULL, NULL, 0 }, /* mso */
475 { roff_line_ignore, NULL, NULL, 0 }, /* na */
476 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
477 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
478 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
479 { roff_unsupp, NULL, NULL, 0 }, /* nm */
480 { roff_unsupp, NULL, NULL, 0 }, /* nn */
481 { roff_unsupp, NULL, NULL, 0 }, /* nop */
482 { roff_nr, NULL, NULL, 0 }, /* nr */
483 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
484 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
485 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
486 { roff_insec, NULL, NULL, 0 }, /* nx */
487 { roff_insec, NULL, NULL, 0 }, /* open */
488 { roff_insec, NULL, NULL, 0 }, /* opena */
489 { roff_line_ignore, NULL, NULL, 0 }, /* os */
490 { roff_unsupp, NULL, NULL, 0 }, /* output */
491 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
492 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
493 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
494 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
495 { roff_insec, NULL, NULL, 0 }, /* pi */
496 { roff_unsupp, NULL, NULL, 0 }, /* PI */
497 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
498 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
499 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
500 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
501 { roff_line_ignore, NULL, NULL, 0 }, /* po */
502 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
503 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
504 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
505 { roff_insec, NULL, NULL, 0 }, /* pso */
506 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
507 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
508 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
509 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
510 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
511 { roff_unsupp, NULL, NULL, 0 }, /* return */
512 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
513 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
514 { roff_line_ignore, NULL, NULL, 0 }, /* rj */
515 { roff_rm, NULL, NULL, 0 }, /* rm */
516 { roff_rn, NULL, NULL, 0 }, /* rn */
517 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
518 { roff_rr, NULL, NULL, 0 }, /* rr */
519 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
520 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
521 { roff_unsupp, NULL, NULL, 0 }, /* schar */
522 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
523 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
524 { roff_unsupp, NULL, NULL, 0 }, /* shift */
525 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
526 { roff_so, NULL, NULL, 0 }, /* so */
527 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
528 { roff_line_ignore, NULL, NULL, 0 }, /* special */
529 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
530 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
531 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
532 { roff_unsupp, NULL, NULL, 0 }, /* substring */
533 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
534 { roff_insec, NULL, NULL, 0 }, /* sy */
535 { roff_T_, NULL, NULL, 0 }, /* T& */
536 { roff_unsupp, NULL, NULL, 0 }, /* tc */
537 { roff_TE, NULL, NULL, 0 }, /* TE */
538 { roff_TH, NULL, NULL, 0 }, /* TH */
539 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
540 { roff_unsupp, NULL, NULL, 0 }, /* tl */
541 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
542 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
543 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
544 { roff_tr, NULL, NULL, 0 }, /* tr */
545 { roff_line_ignore, NULL, NULL, 0 }, /* track */
546 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
547 { roff_insec, NULL, NULL, 0 }, /* trf */
548 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
549 { roff_unsupp, NULL, NULL, 0 }, /* trin */
550 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
551 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
552 { roff_TS, NULL, NULL, 0 }, /* TS */
553 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
554 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
555 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
556 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
557 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
558 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
559 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
560 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
561 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
562 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
563 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
564 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
565 { roff_unsupp, NULL, NULL, 0 }, /* wh */
566 { roff_unsupp, NULL, NULL, 0 }, /* while */
567 { roff_insec, NULL, NULL, 0 }, /* write */
568 { roff_insec, NULL, NULL, 0 }, /* writec */
569 { roff_insec, NULL, NULL, 0 }, /* writem */
570 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
571 { roff_cblock, NULL, NULL, 0 }, /* . */
572 { roff_renamed, NULL, NULL, 0 },
573 { roff_userdef, NULL, NULL, 0 }
574 };
575
576 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
577 const char *const __mdoc_reserved[] = {
578 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
579 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
580 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
581 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
582 "Dt", "Dv", "Dx", "D1",
583 "Ec", "Ed", "Ef", "Ek", "El", "Em",
584 "En", "Eo", "Er", "Es", "Ev", "Ex",
585 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
586 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
587 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
588 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
589 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
590 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
591 "Sc", "Sh", "Sm", "So", "Sq",
592 "Ss", "St", "Sx", "Sy",
593 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
594 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
595 "%P", "%Q", "%R", "%T", "%U", "%V",
596 NULL
597 };
598
599 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
600 const char *const __man_reserved[] = {
601 "AT", "B", "BI", "BR", "DT",
602 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
603 "LP", "OP", "P", "PD", "PP",
604 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
605 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
606 NULL
607 };
608
609 /* Array of injected predefined strings. */
610 #define PREDEFS_MAX 38
611 static const struct predef predefs[PREDEFS_MAX] = {
612 #include "predefs.in"
613 };
614
615 static int roffce_lines; /* number of input lines to center */
616 static struct roff_node *roffce_node; /* active request */
617 static int roffit_lines; /* number of lines to delay */
618 static char *roffit_macro; /* nil-terminated macro line */
619
620
621 /* --- request table ------------------------------------------------------ */
622
623 struct ohash *
624 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
625 {
626 struct ohash *htab;
627 struct roffreq *req;
628 enum roff_tok tok;
629 size_t sz;
630 unsigned int slot;
631
632 htab = mandoc_malloc(sizeof(*htab));
633 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
634
635 for (tok = mintok; tok < maxtok; tok++) {
636 if (roff_name[tok] == NULL)
637 continue;
638 sz = strlen(roff_name[tok]);
639 req = mandoc_malloc(sizeof(*req) + sz + 1);
640 req->tok = tok;
641 memcpy(req->name, roff_name[tok], sz + 1);
642 slot = ohash_qlookup(htab, req->name);
643 ohash_insert(htab, slot, req);
644 }
645 return htab;
646 }
647
648 void
649 roffhash_free(struct ohash *htab)
650 {
651 struct roffreq *req;
652 unsigned int slot;
653
654 if (htab == NULL)
655 return;
656 for (req = ohash_first(htab, &slot); req != NULL;
657 req = ohash_next(htab, &slot))
658 free(req);
659 ohash_delete(htab);
660 free(htab);
661 }
662
663 enum roff_tok
664 roffhash_find(struct ohash *htab, const char *name, size_t sz)
665 {
666 struct roffreq *req;
667 const char *end;
668
669 if (sz) {
670 end = name + sz;
671 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
672 } else
673 req = ohash_find(htab, ohash_qlookup(htab, name));
674 return req == NULL ? TOKEN_NONE : req->tok;
675 }
676
677 /* --- stack of request blocks -------------------------------------------- */
678
679 /*
680 * Pop the current node off of the stack of roff instructions currently
681 * pending.
682 */
683 static void
684 roffnode_pop(struct roff *r)
685 {
686 struct roffnode *p;
687
688 assert(r->last);
689 p = r->last;
690
691 r->last = r->last->parent;
692 free(p->name);
693 free(p->end);
694 free(p);
695 }
696
697 /*
698 * Push a roff node onto the instruction stack. This must later be
699 * removed with roffnode_pop().
700 */
701 static void
702 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
703 int line, int col)
704 {
705 struct roffnode *p;
706
707 p = mandoc_calloc(1, sizeof(struct roffnode));
708 p->tok = tok;
709 if (name)
710 p->name = mandoc_strdup(name);
711 p->parent = r->last;
712 p->line = line;
713 p->col = col;
714 p->rule = p->parent ? p->parent->rule : 0;
715
716 r->last = p;
717 }
718
719 /* --- roff parser state data management ---------------------------------- */
720
721 static void
722 roff_free1(struct roff *r)
723 {
724 struct tbl_node *tbl;
725 struct eqn_node *e;
726 int i;
727
728 while (NULL != (tbl = r->first_tbl)) {
729 r->first_tbl = tbl->next;
730 tbl_free(tbl);
731 }
732 r->first_tbl = r->last_tbl = r->tbl = NULL;
733
734 while (NULL != (e = r->first_eqn)) {
735 r->first_eqn = e->next;
736 eqn_free(e);
737 }
738 r->first_eqn = r->last_eqn = r->eqn = NULL;
739
740 while (r->last)
741 roffnode_pop(r);
742
743 free (r->rstack);
744 r->rstack = NULL;
745 r->rstacksz = 0;
746 r->rstackpos = -1;
747
748 roff_freereg(r->regtab);
749 r->regtab = NULL;
750
751 roff_freestr(r->strtab);
752 roff_freestr(r->rentab);
753 roff_freestr(r->xmbtab);
754 r->strtab = r->rentab = r->xmbtab = NULL;
755
756 if (r->xtab)
757 for (i = 0; i < 128; i++)
758 free(r->xtab[i].p);
759 free(r->xtab);
760 r->xtab = NULL;
761 }
762
763 void
764 roff_reset(struct roff *r)
765 {
766 roff_free1(r);
767 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
768 r->control = '\0';
769 r->escape = '\\';
770 }
771
772 void
773 roff_free(struct roff *r)
774 {
775 roff_free1(r);
776 roffhash_free(r->reqtab);
777 free(r);
778 }
779
780 struct roff *
781 roff_alloc(struct mparse *parse, int options)
782 {
783 struct roff *r;
784
785 r = mandoc_calloc(1, sizeof(struct roff));
786 r->parse = parse;
787 r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
788 r->options = options;
789 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
790 r->rstackpos = -1;
791 r->escape = '\\';
792 return r;
793 }
794
795 /* --- syntax tree state data management ---------------------------------- */
796
797 static void
798 roff_man_free1(struct roff_man *man)
799 {
800
801 if (man->first != NULL)
802 roff_node_delete(man, man->first);
803 free(man->meta.msec);
804 free(man->meta.vol);
805 free(man->meta.os);
806 free(man->meta.arch);
807 free(man->meta.title);
808 free(man->meta.name);
809 free(man->meta.date);
810 }
811
812 static void
813 roff_man_alloc1(struct roff_man *man)
814 {
815
816 memset(&man->meta, 0, sizeof(man->meta));
817 man->first = mandoc_calloc(1, sizeof(*man->first));
818 man->first->type = ROFFT_ROOT;
819 man->last = man->first;
820 man->last_es = NULL;
821 man->flags = 0;
822 man->macroset = MACROSET_NONE;
823 man->lastsec = man->lastnamed = SEC_NONE;
824 man->next = ROFF_NEXT_CHILD;
825 }
826
827 void
828 roff_man_reset(struct roff_man *man)
829 {
830
831 roff_man_free1(man);
832 roff_man_alloc1(man);
833 }
834
835 void
836 roff_man_free(struct roff_man *man)
837 {
838
839 roff_man_free1(man);
840 free(man);
841 }
842
843 struct roff_man *
844 roff_man_alloc(struct roff *roff, struct mparse *parse,
845 const char *defos, int quick)
846 {
847 struct roff_man *man;
848
849 man = mandoc_calloc(1, sizeof(*man));
850 man->parse = parse;
851 man->roff = roff;
852 man->defos = defos;
853 man->quick = quick;
854 roff_man_alloc1(man);
855 roff->man = man;
856 return man;
857 }
858
859 /* --- syntax tree handling ----------------------------------------------- */
860
861 struct roff_node *
862 roff_node_alloc(struct roff_man *man, int line, int pos,
863 enum roff_type type, int tok)
864 {
865 struct roff_node *n;
866
867 n = mandoc_calloc(1, sizeof(*n));
868 n->line = line;
869 n->pos = pos;
870 n->tok = tok;
871 n->type = type;
872 n->sec = man->lastsec;
873
874 if (man->flags & MDOC_SYNOPSIS)
875 n->flags |= NODE_SYNPRETTY;
876 else
877 n->flags &= ~NODE_SYNPRETTY;
878 if (man->flags & MDOC_NEWLINE)
879 n->flags |= NODE_LINE;
880 man->flags &= ~MDOC_NEWLINE;
881
882 return n;
883 }
884
885 void
886 roff_node_append(struct roff_man *man, struct roff_node *n)
887 {
888
889 switch (man->next) {
890 case ROFF_NEXT_SIBLING:
891 if (man->last->next != NULL) {
892 n->next = man->last->next;
893 man->last->next->prev = n;
894 } else
895 man->last->parent->last = n;
896 man->last->next = n;
897 n->prev = man->last;
898 n->parent = man->last->parent;
899 break;
900 case ROFF_NEXT_CHILD:
901 if (man->last->child != NULL) {
902 n->next = man->last->child;
903 man->last->child->prev = n;
904 } else
905 man->last->last = n;
906 man->last->child = n;
907 n->parent = man->last;
908 break;
909 default:
910 abort();
911 }
912 man->last = n;
913
914 switch (n->type) {
915 case ROFFT_HEAD:
916 n->parent->head = n;
917 break;
918 case ROFFT_BODY:
919 if (n->end != ENDBODY_NOT)
920 return;
921 n->parent->body = n;
922 break;
923 case ROFFT_TAIL:
924 n->parent->tail = n;
925 break;
926 default:
927 return;
928 }
929
930 /*
931 * Copy over the normalised-data pointer of our parent. Not
932 * everybody has one, but copying a null pointer is fine.
933 */
934
935 n->norm = n->parent->norm;
936 assert(n->parent->type == ROFFT_BLOCK);
937 }
938
939 void
940 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
941 {
942 struct roff_node *n;
943
944 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
945 n->string = roff_strdup(man->roff, word);
946 roff_node_append(man, n);
947 n->flags |= NODE_VALID | NODE_ENDED;
948 man->next = ROFF_NEXT_SIBLING;
949 }
950
951 void
952 roff_word_append(struct roff_man *man, const char *word)
953 {
954 struct roff_node *n;
955 char *addstr, *newstr;
956
957 n = man->last;
958 addstr = roff_strdup(man->roff, word);
959 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
960 free(addstr);
961 free(n->string);
962 n->string = newstr;
963 man->next = ROFF_NEXT_SIBLING;
964 }
965
966 void
967 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
968 {
969 struct roff_node *n;
970
971 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
972 roff_node_append(man, n);
973 man->next = ROFF_NEXT_CHILD;
974 }
975
976 struct roff_node *
977 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
978 {
979 struct roff_node *n;
980
981 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
982 roff_node_append(man, n);
983 man->next = ROFF_NEXT_CHILD;
984 return n;
985 }
986
987 struct roff_node *
988 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
989 {
990 struct roff_node *n;
991
992 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
993 roff_node_append(man, n);
994 man->next = ROFF_NEXT_CHILD;
995 return n;
996 }
997
998 struct roff_node *
999 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1000 {
1001 struct roff_node *n;
1002
1003 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1004 roff_node_append(man, n);
1005 man->next = ROFF_NEXT_CHILD;
1006 return n;
1007 }
1008
1009 void
1010 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1011 {
1012 struct roff_node *n;
1013
1014 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1015 n->eqn = eqn;
1016 if (eqn->ln > man->last->line)
1017 n->flags |= NODE_LINE;
1018 roff_node_append(man, n);
1019 man->next = ROFF_NEXT_SIBLING;
1020 }
1021
1022 void
1023 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1024 {
1025 struct roff_node *n;
1026
1027 if (man->macroset == MACROSET_MAN)
1028 man_breakscope(man, ROFF_TS);
1029 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1030 n->span = tbl;
1031 roff_node_append(man, n);
1032 n->flags |= NODE_VALID | NODE_ENDED;
1033 man->next = ROFF_NEXT_SIBLING;
1034 }
1035
1036 void
1037 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1038 {
1039
1040 /* Adjust siblings. */
1041
1042 if (n->prev)
1043 n->prev->next = n->next;
1044 if (n->next)
1045 n->next->prev = n->prev;
1046
1047 /* Adjust parent. */
1048
1049 if (n->parent != NULL) {
1050 if (n->parent->child == n)
1051 n->parent->child = n->next;
1052 if (n->parent->last == n)
1053 n->parent->last = n->prev;
1054 }
1055
1056 /* Adjust parse point. */
1057
1058 if (man == NULL)
1059 return;
1060 if (man->last == n) {
1061 if (n->prev == NULL) {
1062 man->last = n->parent;
1063 man->next = ROFF_NEXT_CHILD;
1064 } else {
1065 man->last = n->prev;
1066 man->next = ROFF_NEXT_SIBLING;
1067 }
1068 }
1069 if (man->first == n)
1070 man->first = NULL;
1071 }
1072
1073 void
1074 roff_node_free(struct roff_node *n)
1075 {
1076
1077 if (n->args != NULL)
1078 mdoc_argv_free(n->args);
1079 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1080 free(n->norm);
1081 free(n->string);
1082 free(n);
1083 }
1084
1085 void
1086 roff_node_delete(struct roff_man *man, struct roff_node *n)
1087 {
1088
1089 while (n->child != NULL)
1090 roff_node_delete(man, n->child);
1091 roff_node_unlink(man, n);
1092 roff_node_free(n);
1093 }
1094
1095 void
1096 deroff(char **dest, const struct roff_node *n)
1097 {
1098 char *cp;
1099 size_t sz;
1100
1101 if (n->type != ROFFT_TEXT) {
1102 for (n = n->child; n != NULL; n = n->next)
1103 deroff(dest, n);
1104 return;
1105 }
1106
1107 /* Skip leading whitespace. */
1108
1109 for (cp = n->string; *cp != '\0'; cp++) {
1110 if (cp[0] == '\\' && cp[1] != '\0' &&
1111 strchr(" %&0^|~", cp[1]) != NULL)
1112 cp++;
1113 else if ( ! isspace((unsigned char)*cp))
1114 break;
1115 }
1116
1117 /* Skip trailing backslash. */
1118
1119 sz = strlen(cp);
1120 if (sz > 0 && cp[sz - 1] == '\\')
1121 sz--;
1122
1123 /* Skip trailing whitespace. */
1124
1125 for (; sz; sz--)
1126 if ( ! isspace((unsigned char)cp[sz-1]))
1127 break;
1128
1129 /* Skip empty strings. */
1130
1131 if (sz == 0)
1132 return;
1133
1134 if (*dest == NULL) {
1135 *dest = mandoc_strndup(cp, sz);
1136 return;
1137 }
1138
1139 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1140 free(*dest);
1141 *dest = cp;
1142 }
1143
1144 /* --- main functions of the roff parser ---------------------------------- */
1145
1146 /*
1147 * In the current line, expand escape sequences that tend to get
1148 * used in numerical expressions and conditional requests.
1149 * Also check the syntax of the remaining escape sequences.
1150 */
1151 static enum rofferr
1152 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1153 {
1154 char ubuf[24]; /* buffer to print the number */
1155 const char *start; /* start of the string to process */
1156 char *stesc; /* start of an escape sequence ('\\') */
1157 const char *stnam; /* start of the name, after "[(*" */
1158 const char *cp; /* end of the name, e.g. before ']' */
1159 const char *res; /* the string to be substituted */
1160 char *nbuf; /* new buffer to copy buf->buf to */
1161 size_t maxl; /* expected length of the escape name */
1162 size_t naml; /* actual length of the escape name */
1163 enum mandoc_esc esc; /* type of the escape sequence */
1164 int inaml; /* length returned from mandoc_escape() */
1165 int expand_count; /* to avoid infinite loops */
1166 int npos; /* position in numeric expression */
1167 int arg_complete; /* argument not interrupted by eol */
1168 int done; /* no more input available */
1169 char term; /* character terminating the escape */
1170
1171 /* Search forward for comments. */
1172
1173 done = 0;
1174 start = buf->buf + pos;
1175 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1176 if (stesc[0] != r->escape || stesc[1] == '\0')
1177 continue;
1178 stesc++;
1179 if (*stesc != '"' && *stesc != '#')
1180 continue;
1181 cp = strchr(stesc--, '\0') - 1;
1182 if (*cp == '\n') {
1183 done = 1;
1184 cp--;
1185 }
1186 if (*cp == ' ' || *cp == '\t')
1187 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1188 ln, cp - buf->buf, NULL);
1189 while (stesc > start && stesc[-1] == ' ')
1190 stesc--;
1191 *stesc = '\0';
1192 break;
1193 }
1194 if (stesc == start)
1195 return ROFF_CONT;
1196 stesc--;
1197
1198 /* Notice the end of the input. */
1199
1200 if (*stesc == '\n') {
1201 *stesc-- = '\0';
1202 done = 1;
1203 }
1204
1205 expand_count = 0;
1206 while (stesc >= start) {
1207
1208 /* Search backwards for the next backslash. */
1209
1210 if (*stesc != r->escape) {
1211 if (*stesc == '\\') {
1212 *stesc = '\0';
1213 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1214 buf->buf, stesc + 1) + 1;
1215 start = nbuf + pos;
1216 stesc = nbuf + (stesc - buf->buf);
1217 free(buf->buf);
1218 buf->buf = nbuf;
1219 }
1220 stesc--;
1221 continue;
1222 }
1223
1224 /* If it is escaped, skip it. */
1225
1226 for (cp = stesc - 1; cp >= start; cp--)
1227 if (*cp != r->escape)
1228 break;
1229
1230 if ((stesc - cp) % 2 == 0) {
1231 while (stesc > cp)
1232 *stesc-- = '\\';
1233 continue;
1234 } else if (stesc[1] != '\0') {
1235 *stesc = '\\';
1236 } else {
1237 *stesc-- = '\0';
1238 if (done)
1239 continue;
1240 else
1241 return ROFF_APPEND;
1242 }
1243
1244 /* Decide whether to expand or to check only. */
1245
1246 term = '\0';
1247 cp = stesc + 1;
1248 switch (*cp) {
1249 case '*':
1250 res = NULL;
1251 break;
1252 case 'B':
1253 case 'w':
1254 term = cp[1];
1255 /* FALLTHROUGH */
1256 case 'n':
1257 res = ubuf;
1258 break;
1259 default:
1260 esc = mandoc_escape(&cp, &stnam, &inaml);
1261 if (esc == ESCAPE_ERROR ||
1262 (esc == ESCAPE_SPECIAL &&
1263 mchars_spec2cp(stnam, inaml) < 0))
1264 mandoc_vmsg(MANDOCERR_ESC_BAD,
1265 r->parse, ln, (int)(stesc - buf->buf),
1266 "%.*s", (int)(cp - stesc), stesc);
1267 stesc--;
1268 continue;
1269 }
1270
1271 if (EXPAND_LIMIT < ++expand_count) {
1272 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1273 ln, (int)(stesc - buf->buf), NULL);
1274 return ROFF_IGN;
1275 }
1276
1277 /*
1278 * The third character decides the length
1279 * of the name of the string or register.
1280 * Save a pointer to the name.
1281 */
1282
1283 if (term == '\0') {
1284 switch (*++cp) {
1285 case '\0':
1286 maxl = 0;
1287 break;
1288 case '(':
1289 cp++;
1290 maxl = 2;
1291 break;
1292 case '[':
1293 cp++;
1294 term = ']';
1295 maxl = 0;
1296 break;
1297 default:
1298 maxl = 1;
1299 break;
1300 }
1301 } else {
1302 cp += 2;
1303 maxl = 0;
1304 }
1305 stnam = cp;
1306
1307 /* Advance to the end of the name. */
1308
1309 naml = 0;
1310 arg_complete = 1;
1311 while (maxl == 0 || naml < maxl) {
1312 if (*cp == '\0') {
1313 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1314 ln, (int)(stesc - buf->buf), stesc);
1315 arg_complete = 0;
1316 break;
1317 }
1318 if (maxl == 0 && *cp == term) {
1319 cp++;
1320 break;
1321 }
1322 if (*cp++ != '\\' || stesc[1] != 'w') {
1323 naml++;
1324 continue;
1325 }
1326 switch (mandoc_escape(&cp, NULL, NULL)) {
1327 case ESCAPE_SPECIAL:
1328 case ESCAPE_UNICODE:
1329 case ESCAPE_NUMBERED:
1330 case ESCAPE_OVERSTRIKE:
1331 naml++;
1332 break;
1333 default:
1334 break;
1335 }
1336 }
1337
1338 /*
1339 * Retrieve the replacement string; if it is
1340 * undefined, resume searching for escapes.
1341 */
1342
1343 switch (stesc[1]) {
1344 case '*':
1345 if (arg_complete)
1346 res = roff_getstrn(r, stnam, naml);
1347 break;
1348 case 'B':
1349 npos = 0;
1350 ubuf[0] = arg_complete &&
1351 roff_evalnum(r, ln, stnam, &npos,
1352 NULL, ROFFNUM_SCALE) &&
1353 stnam + npos + 1 == cp ? '1' : '0';
1354 ubuf[1] = '\0';
1355 break;
1356 case 'n':
1357 if (arg_complete)
1358 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1359 roff_getregn(r, stnam, naml));
1360 else
1361 ubuf[0] = '\0';
1362 break;
1363 case 'w':
1364 /* use even incomplete args */
1365 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1366 24 * (int)naml);
1367 break;
1368 }
1369
1370 if (res == NULL) {
1371 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1372 r->parse, ln, (int)(stesc - buf->buf),
1373 "%.*s", (int)naml, stnam);
1374 res = "";
1375 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1376 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1377 ln, (int)(stesc - buf->buf), NULL);
1378 return ROFF_IGN;
1379 }
1380
1381 /* Replace the escape sequence by the string. */
1382
1383 *stesc = '\0';
1384 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1385 buf->buf, res, cp) + 1;
1386
1387 /* Prepare for the next replacement. */
1388
1389 start = nbuf + pos;
1390 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1391 free(buf->buf);
1392 buf->buf = nbuf;
1393 }
1394 return ROFF_CONT;
1395 }
1396
1397 /*
1398 * Process text streams.
1399 */
1400 static enum rofferr
1401 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1402 {
1403 size_t sz;
1404 const char *start;
1405 char *p;
1406 int isz;
1407 enum mandoc_esc esc;
1408
1409 /* Spring the input line trap. */
1410
1411 if (roffit_lines == 1) {
1412 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1413 free(buf->buf);
1414 buf->buf = p;
1415 buf->sz = isz + 1;
1416 *offs = 0;
1417 free(roffit_macro);
1418 roffit_lines = 0;
1419 return ROFF_REPARSE;
1420 } else if (roffit_lines > 1)
1421 --roffit_lines;
1422
1423 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1424 if (roffce_lines < 1) {
1425 r->man->last = roffce_node;
1426 r->man->next = ROFF_NEXT_SIBLING;
1427 roffce_lines = 0;
1428 roffce_node = NULL;
1429 } else
1430 roffce_lines--;
1431 }
1432
1433 /* Convert all breakable hyphens into ASCII_HYPH. */
1434
1435 start = p = buf->buf + pos;
1436
1437 while (*p != '\0') {
1438 sz = strcspn(p, "-\\");
1439 p += sz;
1440
1441 if (*p == '\0')
1442 break;
1443
1444 if (*p == '\\') {
1445 /* Skip over escapes. */
1446 p++;
1447 esc = mandoc_escape((const char **)&p, NULL, NULL);
1448 if (esc == ESCAPE_ERROR)
1449 break;
1450 while (*p == '-')
1451 p++;
1452 continue;
1453 } else if (p == start) {
1454 p++;
1455 continue;
1456 }
1457
1458 if (isalpha((unsigned char)p[-1]) &&
1459 isalpha((unsigned char)p[1]))
1460 *p = ASCII_HYPH;
1461 p++;
1462 }
1463 return ROFF_CONT;
1464 }
1465
1466 enum rofferr
1467 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1468 {
1469 enum roff_tok t;
1470 enum rofferr e;
1471 int pos; /* parse point */
1472 int spos; /* saved parse point for messages */
1473 int ppos; /* original offset in buf->buf */
1474 int ctl; /* macro line (boolean) */
1475
1476 ppos = pos = *offs;
1477
1478 /* Handle in-line equation delimiters. */
1479
1480 if (r->tbl == NULL &&
1481 r->last_eqn != NULL && r->last_eqn->delim &&
1482 (r->eqn == NULL || r->eqn_inline)) {
1483 e = roff_eqndelim(r, buf, pos);
1484 if (e == ROFF_REPARSE)
1485 return e;
1486 assert(e == ROFF_CONT);
1487 }
1488
1489 /* Expand some escape sequences. */
1490
1491 e = roff_res(r, buf, ln, pos);
1492 if (e == ROFF_IGN || e == ROFF_APPEND)
1493 return e;
1494 assert(e == ROFF_CONT);
1495
1496 ctl = roff_getcontrol(r, buf->buf, &pos);
1497
1498 /*
1499 * First, if a scope is open and we're not a macro, pass the
1500 * text through the macro's filter.
1501 * Equations process all content themselves.
1502 * Tables process almost all content themselves, but we want
1503 * to warn about macros before passing it there.
1504 */
1505
1506 if (r->last != NULL && ! ctl) {
1507 t = r->last->tok;
1508 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1509 if (e == ROFF_IGN)
1510 return e;
1511 assert(e == ROFF_CONT);
1512 }
1513 if (r->eqn != NULL)
1514 return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1515 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1516 return tbl_read(r->tbl, ln, buf->buf, ppos);
1517 if ( ! ctl)
1518 return roff_parsetext(r, buf, pos, offs);
1519
1520 /* Skip empty request lines. */
1521
1522 if (buf->buf[pos] == '"') {
1523 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1524 ln, pos, NULL);
1525 return ROFF_IGN;
1526 } else if (buf->buf[pos] == '\0')
1527 return ROFF_IGN;
1528
1529 /*
1530 * If a scope is open, go to the child handler for that macro,
1531 * as it may want to preprocess before doing anything with it.
1532 * Don't do so if an equation is open.
1533 */
1534
1535 if (r->last) {
1536 t = r->last->tok;
1537 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1538 }
1539
1540 /* No scope is open. This is a new request or macro. */
1541
1542 spos = pos;
1543 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1544
1545 /* Tables ignore most macros. */
1546
1547 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS)) {
1548 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1549 ln, pos, buf->buf + spos);
1550 if (t == ROFF_TS)
1551 return ROFF_IGN;
1552 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1553 pos++;
1554 while (buf->buf[pos] == ' ')
1555 pos++;
1556 return tbl_read(r->tbl, ln, buf->buf, pos);
1557 }
1558
1559 /* For now, let high level macros abort .ce mode. */
1560
1561 if (ctl && roffce_node != NULL &&
1562 (t == TOKEN_NONE || t == ROFF_EQ || t == ROFF_TS)) {
1563 r->man->last = roffce_node;
1564 r->man->next = ROFF_NEXT_SIBLING;
1565 roffce_lines = 0;
1566 roffce_node = NULL;
1567 }
1568
1569 /*
1570 * This is neither a roff request nor a user-defined macro.
1571 * Let the standard macro set parsers handle it.
1572 */
1573
1574 if (t == TOKEN_NONE)
1575 return ROFF_CONT;
1576
1577 /* Execute a roff request or a user defined macro. */
1578
1579 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1580 }
1581
1582 void
1583 roff_endparse(struct roff *r)
1584 {
1585
1586 if (r->last)
1587 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1588 r->last->line, r->last->col,
1589 roff_name[r->last->tok]);
1590
1591 if (r->eqn) {
1592 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1593 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1594 eqn_end(&r->eqn);
1595 }
1596
1597 if (r->tbl) {
1598 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1599 r->tbl->line, r->tbl->pos, "TS");
1600 tbl_end(&r->tbl);
1601 }
1602 }
1603
1604 /*
1605 * Parse a roff node's type from the input buffer. This must be in the
1606 * form of ".foo xxx" in the usual way.
1607 */
1608 static enum roff_tok
1609 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1610 {
1611 char *cp;
1612 const char *mac;
1613 size_t maclen;
1614 enum roff_tok t;
1615
1616 cp = buf + *pos;
1617
1618 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1619 return TOKEN_NONE;
1620
1621 mac = cp;
1622 maclen = roff_getname(r, &cp, ln, ppos);
1623
1624 t = (r->current_string = roff_getstrn(r, mac, maclen)) ?
1625 ROFF_USERDEF :
1626 (r->current_string = roff_getrenn(r, mac, maclen)) ?
1627 ROFF_RENAMED : roffhash_find(r->reqtab, mac, maclen);
1628
1629 if (t != TOKEN_NONE)
1630 *pos = cp - buf;
1631
1632 return t;
1633 }
1634
1635 /* --- handling of request blocks ----------------------------------------- */
1636
1637 static enum rofferr
1638 roff_cblock(ROFF_ARGS)
1639 {
1640
1641 /*
1642 * A block-close `..' should only be invoked as a child of an
1643 * ignore macro, otherwise raise a warning and just ignore it.
1644 */
1645
1646 if (r->last == NULL) {
1647 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1648 ln, ppos, "..");
1649 return ROFF_IGN;
1650 }
1651
1652 switch (r->last->tok) {
1653 case ROFF_am:
1654 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1655 case ROFF_ami:
1656 case ROFF_de:
1657 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1658 case ROFF_dei:
1659 case ROFF_ig:
1660 break;
1661 default:
1662 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1663 ln, ppos, "..");
1664 return ROFF_IGN;
1665 }
1666
1667 if (buf->buf[pos] != '\0')
1668 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1669 ".. %s", buf->buf + pos);
1670
1671 roffnode_pop(r);
1672 roffnode_cleanscope(r);
1673 return ROFF_IGN;
1674
1675 }
1676
1677 static void
1678 roffnode_cleanscope(struct roff *r)
1679 {
1680
1681 while (r->last) {
1682 if (--r->last->endspan != 0)
1683 break;
1684 roffnode_pop(r);
1685 }
1686 }
1687
1688 static void
1689 roff_ccond(struct roff *r, int ln, int ppos)
1690 {
1691
1692 if (NULL == r->last) {
1693 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1694 ln, ppos, "\\}");
1695 return;
1696 }
1697
1698 switch (r->last->tok) {
1699 case ROFF_el:
1700 case ROFF_ie:
1701 case ROFF_if:
1702 break;
1703 default:
1704 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1705 ln, ppos, "\\}");
1706 return;
1707 }
1708
1709 if (r->last->endspan > -1) {
1710 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1711 ln, ppos, "\\}");
1712 return;
1713 }
1714
1715 roffnode_pop(r);
1716 roffnode_cleanscope(r);
1717 return;
1718 }
1719
1720 static enum rofferr
1721 roff_block(ROFF_ARGS)
1722 {
1723 const char *name;
1724 char *iname, *cp;
1725 size_t namesz;
1726
1727 /* Ignore groff compatibility mode for now. */
1728
1729 if (tok == ROFF_de1)
1730 tok = ROFF_de;
1731 else if (tok == ROFF_dei1)
1732 tok = ROFF_dei;
1733 else if (tok == ROFF_am1)
1734 tok = ROFF_am;
1735 else if (tok == ROFF_ami1)
1736 tok = ROFF_ami;
1737
1738 /* Parse the macro name argument. */
1739
1740 cp = buf->buf + pos;
1741 if (tok == ROFF_ig) {
1742 iname = NULL;
1743 namesz = 0;
1744 } else {
1745 iname = cp;
1746 namesz = roff_getname(r, &cp, ln, ppos);
1747 iname[namesz] = '\0';
1748 }
1749
1750 /* Resolve the macro name argument if it is indirect. */
1751
1752 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1753 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1754 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1755 r->parse, ln, (int)(iname - buf->buf),
1756 "%.*s", (int)namesz, iname);
1757 namesz = 0;
1758 } else
1759 namesz = strlen(name);
1760 } else
1761 name = iname;
1762
1763 if (namesz == 0 && tok != ROFF_ig) {
1764 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1765 ln, ppos, roff_name[tok]);
1766 return ROFF_IGN;
1767 }
1768
1769 roffnode_push(r, tok, name, ln, ppos);
1770
1771 /*
1772 * At the beginning of a `de' macro, clear the existing string
1773 * with the same name, if there is one. New content will be
1774 * appended from roff_block_text() in multiline mode.
1775 */
1776
1777 if (tok == ROFF_de || tok == ROFF_dei)
1778 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1779
1780 if (*cp == '\0')
1781 return ROFF_IGN;
1782
1783 /* Get the custom end marker. */
1784
1785 iname = cp;
1786 namesz = roff_getname(r, &cp, ln, ppos);
1787
1788 /* Resolve the end marker if it is indirect. */
1789
1790 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1791 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1792 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1793 r->parse, ln, (int)(iname - buf->buf),
1794 "%.*s", (int)namesz, iname);
1795 namesz = 0;
1796 } else
1797 namesz = strlen(name);
1798 } else
1799 name = iname;
1800
1801 if (namesz)
1802 r->last->end = mandoc_strndup(name, namesz);
1803
1804 if (*cp != '\0')
1805 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1806 ln, pos, ".%s ... %s", roff_name[tok], cp);
1807
1808 return ROFF_IGN;
1809 }
1810
1811 static enum rofferr
1812 roff_block_sub(ROFF_ARGS)
1813 {
1814 enum roff_tok t;
1815 int i, j;
1816
1817 /*
1818 * First check whether a custom macro exists at this level. If
1819 * it does, then check against it. This is some of groff's
1820 * stranger behaviours. If we encountered a custom end-scope
1821 * tag and that tag also happens to be a "real" macro, then we
1822 * need to try interpreting it again as a real macro. If it's
1823 * not, then return ignore. Else continue.
1824 */
1825
1826 if (r->last->end) {
1827 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1828 if (buf->buf[i] != r->last->end[j])
1829 break;
1830
1831 if (r->last->end[j] == '\0' &&
1832 (buf->buf[i] == '\0' ||
1833 buf->buf[i] == ' ' ||
1834 buf->buf[i] == '\t')) {
1835 roffnode_pop(r);
1836 roffnode_cleanscope(r);
1837
1838 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1839 i++;
1840
1841 pos = i;
1842 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1843 TOKEN_NONE)
1844 return ROFF_RERUN;
1845 return ROFF_IGN;
1846 }
1847 }
1848
1849 /*
1850 * If we have no custom end-query or lookup failed, then try
1851 * pulling it out of the hashtable.
1852 */
1853
1854 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1855
1856 if (t != ROFF_cblock) {
1857 if (tok != ROFF_ig)
1858 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1859 return ROFF_IGN;
1860 }
1861
1862 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1863 }
1864
1865 static enum rofferr
1866 roff_block_text(ROFF_ARGS)
1867 {
1868
1869 if (tok != ROFF_ig)
1870 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1871
1872 return ROFF_IGN;
1873 }
1874
1875 static enum rofferr
1876 roff_cond_sub(ROFF_ARGS)
1877 {
1878 enum roff_tok t;
1879 char *ep;
1880 int rr;
1881
1882 rr = r->last->rule;
1883 roffnode_cleanscope(r);
1884 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1885
1886 /*
1887 * Fully handle known macros when they are structurally
1888 * required or when the conditional evaluated to true.
1889 */
1890
1891 if (t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT))
1892 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1893
1894 /*
1895 * If `\}' occurs on a macro line without a preceding macro,
1896 * drop the line completely.
1897 */
1898
1899 ep = buf->buf + pos;
1900 if (ep[0] == '\\' && ep[1] == '}')
1901 rr = 0;
1902
1903 /* Always check for the closing delimiter `\}'. */
1904
1905 while ((ep = strchr(ep, '\\')) != NULL) {
1906 if (*(++ep) == '}') {
1907 *ep = '&';
1908 roff_ccond(r, ln, ep - buf->buf - 1);
1909 }
1910 if (*ep != '\0')
1911 ++ep;
1912 }
1913 return rr ? ROFF_CONT : ROFF_IGN;
1914 }
1915
1916 static enum rofferr
1917 roff_cond_text(ROFF_ARGS)
1918 {
1919 char *ep;
1920 int rr;
1921
1922 rr = r->last->rule;
1923 roffnode_cleanscope(r);
1924
1925 ep = buf->buf + pos;
1926 while ((ep = strchr(ep, '\\')) != NULL) {
1927 if (*(++ep) == '}') {
1928 *ep = '&';
1929 roff_ccond(r, ln, ep - buf->buf - 1);
1930 }
1931 if (*ep != '\0')
1932 ++ep;
1933 }
1934 return rr ? ROFF_CONT : ROFF_IGN;
1935 }
1936
1937 /* --- handling of numeric and conditional expressions -------------------- */
1938
1939 /*
1940 * Parse a single signed integer number. Stop at the first non-digit.
1941 * If there is at least one digit, return success and advance the
1942 * parse point, else return failure and let the parse point unchanged.
1943 * Ignore overflows, treat them just like the C language.
1944 */
1945 static int
1946 roff_getnum(const char *v, int *pos, int *res, int flags)
1947 {
1948 int myres, scaled, n, p;
1949
1950 if (NULL == res)
1951 res = &myres;
1952
1953 p = *pos;
1954 n = v[p] == '-';
1955 if (n || v[p] == '+')
1956 p++;
1957
1958 if (flags & ROFFNUM_WHITE)
1959 while (isspace((unsigned char)v[p]))
1960 p++;
1961
1962 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1963 *res = 10 * *res + v[p] - '0';
1964 if (p == *pos + n)
1965 return 0;
1966
1967 if (n)
1968 *res = -*res;
1969
1970 /* Each number may be followed by one optional scaling unit. */
1971
1972 switch (v[p]) {
1973 case 'f':
1974 scaled = *res * 65536;
1975 break;
1976 case 'i':
1977 scaled = *res * 240;
1978 break;
1979 case 'c':
1980 scaled = *res * 240 / 2.54;
1981 break;
1982 case 'v':
1983 case 'P':
1984 scaled = *res * 40;
1985 break;
1986 case 'm':
1987 case 'n':
1988 scaled = *res * 24;
1989 break;
1990 case 'p':
1991 scaled = *res * 10 / 3;
1992 break;
1993 case 'u':
1994 scaled = *res;
1995 break;
1996 case 'M':
1997 scaled = *res * 6 / 25;
1998 break;
1999 default:
2000 scaled = *res;
2001 p--;
2002 break;
2003 }
2004 if (flags & ROFFNUM_SCALE)
2005 *res = scaled;
2006
2007 *pos = p + 1;
2008 return 1;
2009 }
2010
2011 /*
2012 * Evaluate a string comparison condition.
2013 * The first character is the delimiter.
2014 * Succeed if the string up to its second occurrence
2015 * matches the string up to its third occurence.
2016 * Advance the cursor after the third occurrence
2017 * or lacking that, to the end of the line.
2018 */
2019 static int
2020 roff_evalstrcond(const char *v, int *pos)
2021 {
2022 const char *s1, *s2, *s3;
2023 int match;
2024
2025 match = 0;
2026 s1 = v + *pos; /* initial delimiter */
2027 s2 = s1 + 1; /* for scanning the first string */
2028 s3 = strchr(s2, *s1); /* for scanning the second string */
2029
2030 if (NULL == s3) /* found no middle delimiter */
2031 goto out;
2032
2033 while ('\0' != *++s3) {
2034 if (*s2 != *s3) { /* mismatch */
2035 s3 = strchr(s3, *s1);
2036 break;
2037 }
2038 if (*s3 == *s1) { /* found the final delimiter */
2039 match = 1;
2040 break;
2041 }
2042 s2++;
2043 }
2044
2045 out:
2046 if (NULL == s3)
2047 s3 = strchr(s2, '\0');
2048 else if (*s3 != '\0')
2049 s3++;
2050 *pos = s3 - v;
2051 return match;
2052 }
2053
2054 /*
2055 * Evaluate an optionally negated single character, numerical,
2056 * or string condition.
2057 */
2058 static int
2059 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2060 {
2061 char *cp, *name;
2062 size_t sz;
2063 int number, savepos, wanttrue;
2064
2065 if ('!' == v[*pos]) {
2066 wanttrue = 0;
2067 (*pos)++;
2068 } else
2069 wanttrue = 1;
2070
2071 switch (v[*pos]) {
2072 case '\0':
2073 return 0;
2074 case 'n':
2075 case 'o':
2076 (*pos)++;
2077 return wanttrue;
2078 case 'c':
2079 case 'd':
2080 case 'e':
2081 case 't':
2082 case 'v':
2083 (*pos)++;
2084 return !wanttrue;
2085 case 'r':
2086 cp = name = v + ++*pos;
2087 sz = roff_getname(r, &cp, ln, *pos);
2088 *pos = cp - v;
2089 return (sz && roff_hasregn(r, name, sz)) == wanttrue;
2090 default:
2091 break;
2092 }
2093
2094 savepos = *pos;
2095 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2096 return (number > 0) == wanttrue;
2097 else if (*pos == savepos)
2098 return roff_evalstrcond(v, pos) == wanttrue;
2099 else
2100 return 0;
2101 }
2102
2103 static enum rofferr
2104 roff_line_ignore(ROFF_ARGS)
2105 {
2106
2107 return ROFF_IGN;
2108 }
2109
2110 static enum rofferr
2111 roff_insec(ROFF_ARGS)
2112 {
2113
2114 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2115 ln, ppos, roff_name[tok]);
2116 return ROFF_IGN;
2117 }
2118
2119 static enum rofferr
2120 roff_unsupp(ROFF_ARGS)
2121 {
2122
2123 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2124 ln, ppos, roff_name[tok]);
2125 return ROFF_IGN;
2126 }
2127
2128 static enum rofferr
2129 roff_cond(ROFF_ARGS)
2130 {
2131
2132 roffnode_push(r, tok, NULL, ln, ppos);
2133
2134 /*
2135 * An `.el' has no conditional body: it will consume the value
2136 * of the current rstack entry set in prior `ie' calls or
2137 * defaults to DENY.
2138 *
2139 * If we're not an `el', however, then evaluate the conditional.
2140 */
2141
2142 r->last->rule = tok == ROFF_el ?
2143 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2144 roff_evalcond(r, ln, buf->buf, &pos);
2145
2146 /*
2147 * An if-else will put the NEGATION of the current evaluated
2148 * conditional into the stack of rules.
2149 */
2150
2151 if (tok == ROFF_ie) {
2152 if (r->rstackpos + 1 == r->rstacksz) {
2153 r->rstacksz += 16;
2154 r->rstack = mandoc_reallocarray(r->rstack,
2155 r->rstacksz, sizeof(int));
2156 }
2157 r->rstack[++r->rstackpos] = !r->last->rule;
2158 }
2159
2160 /* If the parent has false as its rule, then so do we. */
2161
2162 if (r->last->parent && !r->last->parent->rule)
2163 r->last->rule = 0;
2164
2165 /*
2166 * Determine scope.
2167 * If there is nothing on the line after the conditional,
2168 * not even whitespace, use next-line scope.
2169 */
2170
2171 if (buf->buf[pos] == '\0') {
2172 r->last->endspan = 2;
2173 goto out;
2174 }
2175
2176 while (buf->buf[pos] == ' ')
2177 pos++;
2178
2179 /* An opening brace requests multiline scope. */
2180
2181 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2182 r->last->endspan = -1;
2183 pos += 2;
2184 while (buf->buf[pos] == ' ')
2185 pos++;
2186 goto out;
2187 }
2188
2189 /*
2190 * Anything else following the conditional causes
2191 * single-line scope. Warn if the scope contains
2192 * nothing but trailing whitespace.
2193 */
2194
2195 if (buf->buf[pos] == '\0')
2196 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2197 ln, ppos, roff_name[tok]);
2198
2199 r->last->endspan = 1;
2200
2201 out:
2202 *offs = pos;
2203 return ROFF_RERUN;
2204 }
2205
2206 static enum rofferr
2207 roff_ds(ROFF_ARGS)
2208 {
2209 char *string;
2210 const char *name;
2211 size_t namesz;
2212
2213 /* Ignore groff compatibility mode for now. */
2214
2215 if (tok == ROFF_ds1)
2216 tok = ROFF_ds;
2217 else if (tok == ROFF_as1)
2218 tok = ROFF_as;
2219
2220 /*
2221 * The first word is the name of the string.
2222 * If it is empty or terminated by an escape sequence,
2223 * abort the `ds' request without defining anything.
2224 */
2225
2226 name = string = buf->buf + pos;
2227 if (*name == '\0')
2228 return ROFF_IGN;
2229
2230 namesz = roff_getname(r, &string, ln, pos);
2231 if (name[namesz] == '\\')
2232 return ROFF_IGN;
2233
2234 /* Read past the initial double-quote, if any. */
2235 if (*string == '"')
2236 string++;
2237
2238 /* The rest is the value. */
2239 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2240 ROFF_as == tok);
2241 return ROFF_IGN;
2242 }
2243
2244 /*
2245 * Parse a single operator, one or two characters long.
2246 * If the operator is recognized, return success and advance the
2247 * parse point, else return failure and let the parse point unchanged.
2248 */
2249 static int
2250 roff_getop(const char *v, int *pos, char *res)
2251 {
2252
2253 *res = v[*pos];
2254
2255 switch (*res) {
2256 case '+':
2257 case '-':
2258 case '*':
2259 case '/':
2260 case '%':
2261 case '&':
2262 case ':':
2263 break;
2264 case '<':
2265 switch (v[*pos + 1]) {
2266 case '=':
2267 *res = 'l';
2268 (*pos)++;
2269 break;
2270 case '>':
2271 *res = '!';
2272 (*pos)++;
2273 break;
2274 case '?':
2275 *res = 'i';
2276 (*pos)++;
2277 break;
2278 default:
2279 break;
2280 }
2281 break;
2282 case '>':
2283 switch (v[*pos + 1]) {
2284 case '=':
2285 *res = 'g';
2286 (*pos)++;
2287 break;
2288 case '?':
2289 *res = 'a';
2290 (*pos)++;
2291 break;
2292 default:
2293 break;
2294 }
2295 break;
2296 case '=':
2297 if ('=' == v[*pos + 1])
2298 (*pos)++;
2299 break;
2300 default:
2301 return 0;
2302 }
2303 (*pos)++;
2304
2305 return *res;
2306 }
2307
2308 /*
2309 * Evaluate either a parenthesized numeric expression
2310 * or a single signed integer number.
2311 */
2312 static int
2313 roff_evalpar(struct roff *r, int ln,
2314 const char *v, int *pos, int *res, int flags)
2315 {
2316
2317 if ('(' != v[*pos])
2318 return roff_getnum(v, pos, res, flags);
2319
2320 (*pos)++;
2321 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2322 return 0;
2323
2324 /*
2325 * Omission of the closing parenthesis
2326 * is an error in validation mode,
2327 * but ignored in evaluation mode.
2328 */
2329
2330 if (')' == v[*pos])
2331 (*pos)++;
2332 else if (NULL == res)
2333 return 0;
2334
2335 return 1;
2336 }
2337
2338 /*
2339 * Evaluate a complete numeric expression.
2340 * Proceed left to right, there is no concept of precedence.
2341 */
2342 static int
2343 roff_evalnum(struct roff *r, int ln, const char *v,
2344 int *pos, int *res, int flags)
2345 {
2346 int mypos, operand2;
2347 char operator;
2348
2349 if (NULL == pos) {
2350 mypos = 0;
2351 pos = &mypos;
2352 }
2353
2354 if (flags & ROFFNUM_WHITE)
2355 while (isspace((unsigned char)v[*pos]))
2356 (*pos)++;
2357
2358 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2359 return 0;
2360
2361 while (1) {
2362 if (flags & ROFFNUM_WHITE)
2363 while (isspace((unsigned char)v[*pos]))
2364 (*pos)++;
2365
2366 if ( ! roff_getop(v, pos, &operator))
2367 break;
2368
2369 if (flags & ROFFNUM_WHITE)
2370 while (isspace((unsigned char)v[*pos]))
2371 (*pos)++;
2372
2373 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2374 return 0;
2375
2376 if (flags & ROFFNUM_WHITE)
2377 while (isspace((unsigned char)v[*pos]))
2378 (*pos)++;
2379
2380 if (NULL == res)
2381 continue;
2382
2383 switch (operator) {
2384 case '+':
2385 *res += operand2;
2386 break;
2387 case '-':
2388 *res -= operand2;
2389 break;
2390 case '*':
2391 *res *= operand2;
2392 break;
2393 case '/':
2394 if (operand2 == 0) {
2395 mandoc_msg(MANDOCERR_DIVZERO,
2396 r->parse, ln, *pos, v);
2397 *res = 0;
2398 break;
2399 }
2400 *res /= operand2;
2401 break;
2402 case '%':
2403 if (operand2 == 0) {
2404 mandoc_msg(MANDOCERR_DIVZERO,
2405 r->parse, ln, *pos, v);
2406 *res = 0;
2407 break;
2408 }
2409 *res %= operand2;
2410 break;
2411 case '<':
2412 *res = *res < operand2;
2413 break;
2414 case '>':
2415 *res = *res > operand2;
2416 break;
2417 case 'l':
2418 *res = *res <= operand2;
2419 break;
2420 case 'g':
2421 *res = *res >= operand2;
2422 break;
2423 case '=':
2424 *res = *res == operand2;
2425 break;
2426 case '!':
2427 *res = *res != operand2;
2428 break;
2429 case '&':
2430 *res = *res && operand2;
2431 break;
2432 case ':':
2433 *res = *res || operand2;
2434 break;
2435 case 'i':
2436 if (operand2 < *res)
2437 *res = operand2;
2438 break;
2439 case 'a':
2440 if (operand2 > *res)
2441 *res = operand2;
2442 break;
2443 default:
2444 abort();
2445 }
2446 }
2447 return 1;
2448 }
2449
2450 /* --- register management ------------------------------------------------ */
2451
2452 void
2453 roff_setreg(struct roff *r, const char *name, int val, char sign)
2454 {
2455 struct roffreg *reg;
2456
2457 /* Search for an existing register with the same name. */
2458 reg = r->regtab;
2459
2460 while (reg && strcmp(name, reg->key.p))
2461 reg = reg->next;
2462
2463 if (NULL == reg) {
2464 /* Create a new register. */
2465 reg = mandoc_malloc(sizeof(struct roffreg));
2466 reg->key.p = mandoc_strdup(name);
2467 reg->key.sz = strlen(name);
2468 reg->val = 0;
2469 reg->next = r->regtab;
2470 r->regtab = reg;
2471 }
2472
2473 if ('+' == sign)
2474 reg->val += val;
2475 else if ('-' == sign)
2476 reg->val -= val;
2477 else
2478 reg->val = val;
2479 }
2480
2481 /*
2482 * Handle some predefined read-only number registers.
2483 * For now, return -1 if the requested register is not predefined;
2484 * in case a predefined read-only register having the value -1
2485 * were to turn up, another special value would have to be chosen.
2486 */
2487 static int
2488 roff_getregro(const struct roff *r, const char *name)
2489 {
2490
2491 switch (*name) {
2492 case '$': /* Number of arguments of the last macro evaluated. */
2493 return r->argc;
2494 case 'A': /* ASCII approximation mode is always off. */
2495 return 0;
2496 case 'g': /* Groff compatibility mode is always on. */
2497 return 1;
2498 case 'H': /* Fixed horizontal resolution. */
2499 return 24;
2500 case 'j': /* Always adjust left margin only. */
2501 return 0;
2502 case 'T': /* Some output device is always defined. */
2503 return 1;
2504 case 'V': /* Fixed vertical resolution. */
2505 return 40;
2506 default:
2507 return -1;
2508 }
2509 }
2510
2511 int
2512 roff_getreg(const struct roff *r, const char *name)
2513 {
2514 struct roffreg *reg;
2515 int val;
2516
2517 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2518 val = roff_getregro(r, name + 1);
2519 if (-1 != val)
2520 return val;
2521 }
2522
2523 for (reg = r->regtab; reg; reg = reg->next)
2524 if (0 == strcmp(name, reg->key.p))
2525 return reg->val;
2526
2527 return 0;
2528 }
2529
2530 static int
2531 roff_getregn(const struct roff *r, const char *name, size_t len)
2532 {
2533 struct roffreg *reg;
2534 int val;
2535
2536 if ('.' == name[0] && 2 == len) {
2537 val = roff_getregro(r, name + 1);
2538 if (-1 != val)
2539 return val;
2540 }
2541
2542 for (reg = r->regtab; reg; reg = reg->next)
2543 if (len == reg->key.sz &&
2544 0 == strncmp(name, reg->key.p, len))
2545 return reg->val;
2546
2547 return 0;
2548 }
2549
2550 static int
2551 roff_hasregn(const struct roff *r, const char *name, size_t len)
2552 {
2553 struct roffreg *reg;
2554 int val;
2555
2556 if ('.' == name[0] && 2 == len) {
2557 val = roff_getregro(r, name + 1);
2558 if (-1 != val)
2559 return 1;
2560 }
2561
2562 for (reg = r->regtab; reg; reg = reg->next)
2563 if (len == reg->key.sz &&
2564 0 == strncmp(name, reg->key.p, len))
2565 return 1;
2566
2567 return 0;
2568 }
2569
2570 static void
2571 roff_freereg(struct roffreg *reg)
2572 {
2573 struct roffreg *old_reg;
2574
2575 while (NULL != reg) {
2576 free(reg->key.p);
2577 old_reg = reg;
2578 reg = reg->next;
2579 free(old_reg);
2580 }
2581 }
2582
2583 static enum rofferr
2584 roff_nr(ROFF_ARGS)
2585 {
2586 char *key, *val;
2587 size_t keysz;
2588 int iv;
2589 char sign;
2590
2591 key = val = buf->buf + pos;
2592 if (*key == '\0')
2593 return ROFF_IGN;
2594
2595 keysz = roff_getname(r, &val, ln, pos);
2596 if (key[keysz] == '\\')
2597 return ROFF_IGN;
2598 key[keysz] = '\0';
2599
2600 sign = *val;
2601 if (sign == '+' || sign == '-')
2602 val++;
2603
2604 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2605 roff_setreg(r, key, iv, sign);
2606
2607 return ROFF_IGN;
2608 }
2609
2610 static enum rofferr
2611 roff_rr(ROFF_ARGS)
2612 {
2613 struct roffreg *reg, **prev;
2614 char *name, *cp;
2615 size_t namesz;
2616
2617 name = cp = buf->buf + pos;
2618 if (*name == '\0')
2619 return ROFF_IGN;
2620 namesz = roff_getname(r, &cp, ln, pos);
2621 name[namesz] = '\0';
2622
2623 prev = &r->regtab;
2624 while (1) {
2625 reg = *prev;
2626 if (reg == NULL || !strcmp(name, reg->key.p))
2627 break;
2628 prev = &reg->next;
2629 }
2630 if (reg != NULL) {
2631 *prev = reg->next;
2632 free(reg->key.p);
2633 free(reg);
2634 }
2635 return ROFF_IGN;
2636 }
2637
2638 /* --- handler functions for roff requests -------------------------------- */
2639
2640 static enum rofferr
2641 roff_rm(ROFF_ARGS)
2642 {
2643 const char *name;
2644 char *cp;
2645 size_t namesz;
2646
2647 cp = buf->buf + pos;
2648 while (*cp != '\0') {
2649 name = cp;
2650 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2651 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2652 if (name[namesz] == '\\')
2653 break;
2654 }
2655 return ROFF_IGN;
2656 }
2657
2658 static enum rofferr
2659 roff_it(ROFF_ARGS)
2660 {
2661 int iv;
2662
2663 /* Parse the number of lines. */
2664
2665 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2666 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2667 ln, ppos, buf->buf + 1);
2668 return ROFF_IGN;
2669 }
2670
2671 while (isspace((unsigned char)buf->buf[pos]))
2672 pos++;
2673
2674 /*
2675 * Arm the input line trap.
2676 * Special-casing "an-trap" is an ugly workaround to cope
2677 * with DocBook stupidly fiddling with man(7) internals.
2678 */
2679
2680 roffit_lines = iv;
2681 roffit_macro = mandoc_strdup(iv != 1 ||
2682 strcmp(buf->buf + pos, "an-trap") ?
2683 buf->buf + pos : "br");
2684 return ROFF_IGN;
2685 }
2686
2687 static enum rofferr
2688 roff_Dd(ROFF_ARGS)
2689 {
2690 const char *const *cp;
2691
2692 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2693 for (cp = __mdoc_reserved; *cp; cp++)
2694 roff_setstr(r, *cp, NULL, 0);
2695
2696 if (r->format == 0)
2697 r->format = MPARSE_MDOC;
2698
2699 return ROFF_CONT;
2700 }
2701
2702 static enum rofferr
2703 roff_TH(ROFF_ARGS)
2704 {
2705 const char *const *cp;
2706
2707 if ((r->options & MPARSE_QUICK) == 0)
2708 for (cp = __man_reserved; *cp; cp++)
2709 roff_setstr(r, *cp, NULL, 0);
2710
2711 if (r->format == 0)
2712 r->format = MPARSE_MAN;
2713
2714 return ROFF_CONT;
2715 }
2716
2717 static enum rofferr
2718 roff_TE(ROFF_ARGS)
2719 {
2720
2721 if (NULL == r->tbl)
2722 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2723 ln, ppos, "TE");
2724 else if ( ! tbl_end(&r->tbl)) {
2725 free(buf->buf);
2726 buf->buf = mandoc_strdup(".sp");
2727 buf->sz = 4;
2728 return ROFF_REPARSE;
2729 }
2730 return ROFF_IGN;
2731 }
2732
2733 static enum rofferr
2734 roff_T_(ROFF_ARGS)
2735 {
2736
2737 if (NULL == r->tbl)
2738 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2739 ln, ppos, "T&");
2740 else
2741 tbl_restart(ln, ppos, r->tbl);
2742
2743 return ROFF_IGN;
2744 }
2745
2746 /*
2747 * Handle in-line equation delimiters.
2748 */
2749 static enum rofferr
2750 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2751 {
2752 char *cp1, *cp2;
2753 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2754
2755 /*
2756 * Outside equations, look for an opening delimiter.
2757 * If we are inside an equation, we already know it is
2758 * in-line, or this function wouldn't have been called;
2759 * so look for a closing delimiter.
2760 */
2761
2762 cp1 = buf->buf + pos;
2763 cp2 = strchr(cp1, r->eqn == NULL ?
2764 r->last_eqn->odelim : r->last_eqn->cdelim);
2765 if (cp2 == NULL)
2766 return ROFF_CONT;
2767
2768 *cp2++ = '\0';
2769 bef_pr = bef_nl = aft_nl = aft_pr = "";
2770
2771 /* Handle preceding text, protecting whitespace. */
2772
2773 if (*buf->buf != '\0') {
2774 if (r->eqn == NULL)
2775 bef_pr = "\\&";
2776 bef_nl = "\n";
2777 }
2778
2779 /*
2780 * Prepare replacing the delimiter with an equation macro
2781 * and drop leading white space from the equation.
2782 */
2783
2784 if (r->eqn == NULL) {
2785 while (*cp2 == ' ')
2786 cp2++;
2787 mac = ".EQ";
2788 } else
2789 mac = ".EN";
2790
2791 /* Handle following text, protecting whitespace. */
2792
2793 if (*cp2 != '\0') {
2794 aft_nl = "\n";
2795 if (r->eqn != NULL)
2796 aft_pr = "\\&";
2797 }
2798
2799 /* Do the actual replacement. */
2800
2801 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2802 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2803 free(buf->buf);
2804 buf->buf = cp1;
2805
2806 /* Toggle the in-line state of the eqn subsystem. */
2807
2808 r->eqn_inline = r->eqn == NULL;
2809 return ROFF_REPARSE;
2810 }
2811
2812 static enum rofferr
2813 roff_EQ(ROFF_ARGS)
2814 {
2815 struct eqn_node *e;
2816
2817 assert(r->eqn == NULL);
2818 e = eqn_alloc(ppos, ln, r->parse);
2819
2820 if (r->last_eqn) {
2821 r->last_eqn->next = e;
2822 e->delim = r->last_eqn->delim;
2823 e->odelim = r->last_eqn->odelim;
2824 e->cdelim = r->last_eqn->cdelim;
2825 } else
2826 r->first_eqn = r->last_eqn = e;
2827
2828 r->eqn = r->last_eqn = e;
2829
2830 if (buf->buf[pos] != '\0')
2831 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2832 ".EQ %s", buf->buf + pos);
2833
2834 return ROFF_IGN;
2835 }
2836
2837 static enum rofferr
2838 roff_EN(ROFF_ARGS)
2839 {
2840
2841 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2842 return ROFF_IGN;
2843 }
2844
2845 static enum rofferr
2846 roff_TS(ROFF_ARGS)
2847 {
2848 struct tbl_node *tbl;
2849
2850 if (r->tbl) {
2851 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2852 ln, ppos, "TS breaks TS");
2853 tbl_end(&r->tbl);
2854 }
2855
2856 tbl = tbl_alloc(ppos, ln, r->parse);
2857
2858 if (r->last_tbl)
2859 r->last_tbl->next = tbl;
2860 else
2861 r->first_tbl = r->last_tbl = tbl;
2862
2863 r->tbl = r->last_tbl = tbl;
2864 return ROFF_IGN;
2865 }
2866
2867 static enum rofferr
2868 roff_onearg(ROFF_ARGS)
2869 {
2870 struct roff_node *n;
2871 char *cp;
2872 int npos;
2873
2874 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
2875 (tok == ROFF_sp || tok == ROFF_ti))
2876 man_breakscope(r->man, tok);
2877
2878 if (tok == ROFF_ce && roffce_node != NULL) {
2879 r->man->last = roffce_node;
2880 r->man->next = ROFF_NEXT_SIBLING;
2881 }
2882
2883 roff_elem_alloc(r->man, ln, ppos, tok);
2884 n = r->man->last;
2885
2886 cp = buf->buf + pos;
2887 if (*cp != '\0') {
2888 while (*cp != '\0' && *cp != ' ')
2889 cp++;
2890 while (*cp == ' ')
2891 *cp++ = '\0';
2892 if (*cp != '\0')
2893 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2894 r->parse, ln, cp - buf->buf,
2895 "%s ... %s", roff_name[tok], cp);
2896 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2897 }
2898
2899 if (tok == ROFF_ce) {
2900 if (r->man->last->tok == ROFF_ce) {
2901 roff_word_alloc(r->man, ln, pos, "1");
2902 r->man->last->flags |= NODE_NOSRC;
2903 }
2904 npos = 0;
2905 if (roff_evalnum(r, ln, r->man->last->string, &npos,
2906 &roffce_lines, 0) == 0) {
2907 mandoc_vmsg(MANDOCERR_CE_NONUM,
2908 r->parse, ln, pos, "ce %s", buf->buf + pos);
2909 roffce_lines = 1;
2910 }
2911 if (roffce_lines < 1) {
2912 r->man->last = r->man->last->parent;
2913 roffce_node = NULL;
2914 roffce_lines = 0;
2915 } else
2916 roffce_node = r->man->last->parent;
2917 } else {
2918 n->flags |= NODE_VALID | NODE_ENDED;
2919 r->man->last = n;
2920 }
2921 n->flags |= NODE_LINE;
2922 r->man->next = ROFF_NEXT_SIBLING;
2923 return ROFF_IGN;
2924 }
2925
2926 static enum rofferr
2927 roff_manyarg(ROFF_ARGS)
2928 {
2929 struct roff_node *n;
2930 char *sp, *ep;
2931
2932 roff_elem_alloc(r->man, ln, ppos, tok);
2933 n = r->man->last;
2934
2935 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
2936 while (*ep != '\0' && *ep != ' ')
2937 ep++;
2938 while (*ep == ' ')
2939 *ep++ = '\0';
2940 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
2941 }
2942
2943 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
2944 r->man->last = n;
2945 r->man->next = ROFF_NEXT_SIBLING;
2946 return ROFF_IGN;
2947 }
2948
2949 static enum rofferr
2950 roff_br(ROFF_ARGS)
2951 {
2952 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
2953 man_breakscope(r->man, ROFF_br);
2954 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
2955 if (buf->buf[pos] != '\0')
2956 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2957 "%s %s", roff_name[tok], buf->buf + pos);
2958 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
2959 r->man->next = ROFF_NEXT_SIBLING;
2960 return ROFF_IGN;
2961 }
2962
2963 static enum rofferr
2964 roff_cc(ROFF_ARGS)
2965 {
2966 const char *p;
2967
2968 p = buf->buf + pos;
2969
2970 if (*p == '\0' || (r->control = *p++) == '.')
2971 r->control = '\0';
2972
2973 if (*p != '\0')
2974 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2975 ln, p - buf->buf, "cc ... %s", p);
2976
2977 return ROFF_IGN;
2978 }
2979
2980 static enum rofferr
2981 roff_ec(ROFF_ARGS)
2982 {
2983 const char *p;
2984
2985 p = buf->buf + pos;
2986 if (*p == '\0')
2987 r->escape = '\\';
2988 else {
2989 r->escape = *p;
2990 if (*++p != '\0')
2991 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2992 ln, p - buf->buf, "ec ... %s", p);
2993 }
2994 return ROFF_IGN;
2995 }
2996
2997 static enum rofferr
2998 roff_eo(ROFF_ARGS)
2999 {
3000 r->escape = '\0';
3001 if (buf->buf[pos] != '\0')
3002 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3003 ln, pos, "eo %s", buf->buf + pos);
3004 return ROFF_IGN;
3005 }
3006
3007 static enum rofferr
3008 roff_tr(ROFF_ARGS)
3009 {
3010 const char *p, *first, *second;
3011 size_t fsz, ssz;
3012 enum mandoc_esc esc;
3013
3014 p = buf->buf + pos;
3015
3016 if (*p == '\0') {
3017 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3018 return ROFF_IGN;
3019 }
3020
3021 while (*p != '\0') {
3022 fsz = ssz = 1;
3023
3024 first = p++;
3025 if (*first == '\\') {
3026 esc = mandoc_escape(&p, NULL, NULL);
3027 if (esc == ESCAPE_ERROR) {
3028 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3029 ln, (int)(p - buf->buf), first);
3030 return ROFF_IGN;
3031 }
3032 fsz = (size_t)(p - first);
3033 }
3034
3035 second = p++;
3036 if (*second == '\\') {
3037 esc = mandoc_escape(&p, NULL, NULL);
3038 if (esc == ESCAPE_ERROR) {
3039 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3040 ln, (int)(p - buf->buf), second);
3041 return ROFF_IGN;
3042 }
3043 ssz = (size_t)(p - second);
3044 } else if (*second == '\0') {
3045 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3046 ln, first - buf->buf, "tr %s", first);
3047 second = " ";
3048 p--;
3049 }
3050
3051 if (fsz > 1) {
3052 roff_setstrn(&r->xmbtab, first, fsz,
3053 second, ssz, 0);
3054 continue;
3055 }
3056
3057 if (r->xtab == NULL)
3058 r->xtab = mandoc_calloc(128,
3059 sizeof(struct roffstr));
3060
3061 free(r->xtab[(int)*first].p);
3062 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3063 r->xtab[(int)*first].sz = ssz;
3064 }
3065
3066 return ROFF_IGN;
3067 }
3068
3069 static enum rofferr
3070 roff_rn(ROFF_ARGS)
3071 {
3072 const char *value;
3073 char *oldn, *newn, *end;
3074 size_t oldsz, newsz;
3075
3076 oldn = newn = buf->buf + pos;
3077 if (*oldn == '\0')
3078 return ROFF_IGN;
3079
3080 oldsz = roff_getname(r, &newn, ln, pos);
3081 if (oldn[oldsz] == '\\' || *newn == '\0')
3082 return ROFF_IGN;
3083
3084 end = newn;
3085 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3086 if (newsz == 0)
3087 return ROFF_IGN;
3088
3089 /*
3090 * Rename a user-defined macro bearing the old name,
3091 * overriding an existing renamed high-level macro
3092 * bearing the new name, if that exists.
3093 */
3094
3095 if ((value = roff_getstrn(r, oldn, oldsz)) != NULL) {
3096 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3097 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3098 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3099 return ROFF_IGN;
3100 }
3101
3102 /*
3103 * Rename a high-level macro bearing the old name,
3104 * either renaming it a second time if it was already
3105 * renamed before, or renaming it for the first time.
3106 * In both cases, override an existing user-defined
3107 * macro bearing the new name, if that exists.
3108 */
3109
3110 if ((value = roff_getrenn(r, oldn, oldsz)) != NULL) {
3111 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3112 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3113 } else
3114 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3115 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3116 return ROFF_IGN;
3117 }
3118
3119 static enum rofferr
3120 roff_so(ROFF_ARGS)
3121 {
3122 char *name, *cp;
3123
3124 name = buf->buf + pos;
3125 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3126
3127 /*
3128 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3129 * opening anything that's not in our cwd or anything beneath
3130 * it. Thus, explicitly disallow traversing up the file-system
3131 * or using absolute paths.
3132 */
3133
3134 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3135 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3136 ".so %s", name);
3137 buf->sz = mandoc_asprintf(&cp,
3138 ".sp\nSee the file %s.\n.sp", name) + 1;
3139 free(buf->buf);
3140 buf->buf = cp;
3141 *offs = 0;
3142 return ROFF_REPARSE;
3143 }
3144
3145 *offs = pos;
3146 return ROFF_SO;
3147 }
3148
3149 /* --- user defined strings and macros ------------------------------------ */
3150
3151 static enum rofferr
3152 roff_userdef(ROFF_ARGS)
3153 {
3154 const char *arg[9], *ap;
3155 char *cp, *n1, *n2;
3156 int expand_count, i, ib, ie;
3157 size_t asz, rsz;
3158
3159 /*
3160 * Collect pointers to macro argument strings
3161 * and NUL-terminate them.
3162 */
3163
3164 r->argc = 0;
3165 cp = buf->buf + pos;
3166 for (i = 0; i < 9; i++) {
3167 if (*cp == '\0')
3168 arg[i] = "";
3169 else {
3170 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3171 r->argc = i + 1;
3172 }
3173 }
3174
3175 /*
3176 * Expand macro arguments.
3177 */
3178
3179 buf->sz = strlen(r->current_string) + 1;
3180 n1 = n2 = cp = mandoc_malloc(buf->sz);
3181 memcpy(n1, r->current_string, buf->sz);
3182 expand_count = 0;
3183 while (*cp != '\0') {
3184
3185 /* Scan ahead for the next argument invocation. */
3186
3187 if (*cp++ != '\\')
3188 continue;
3189 if (*cp++ != '$')
3190 continue;
3191 if (*cp == '*') { /* \\$* inserts all arguments */
3192 ib = 0;
3193 ie = r->argc - 1;
3194 } else { /* \\$1 .. \\$9 insert one argument */
3195 ib = ie = *cp - '1';
3196 if (ib < 0 || ib > 8)
3197 continue;
3198 }
3199 cp -= 2;
3200
3201 /*
3202 * Prevent infinite recursion.
3203 */
3204
3205 if (cp >= n2)
3206 expand_count = 1;
3207 else if (++expand_count > EXPAND_LIMIT) {
3208 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3209 ln, (int)(cp - n1), NULL);
3210 free(buf->buf);
3211 buf->buf = n1;
3212 return ROFF_IGN;
3213 }
3214
3215 /*
3216 * Determine the size of the expanded argument,
3217 * taking escaping of quotes into account.
3218 */
3219
3220 asz = ie > ib ? ie - ib : 0; /* for blanks */
3221 for (i = ib; i <= ie; i++) {
3222 for (ap = arg[i]; *ap != '\0'; ap++) {
3223 asz++;
3224 if (*ap == '"')
3225 asz += 3;
3226 }
3227 }
3228 if (asz != 3) {
3229
3230 /*
3231 * Determine the size of the rest of the
3232 * unexpanded macro, including the NUL.
3233 */
3234
3235 rsz = buf->sz - (cp - n1) - 3;
3236
3237 /*
3238 * When shrinking, move before
3239 * releasing the storage.
3240 */
3241
3242 if (asz < 3)
3243 memmove(cp + asz, cp + 3, rsz);
3244
3245 /*
3246 * Resize the storage for the macro
3247 * and readjust the parse pointer.
3248 */
3249
3250 buf->sz += asz - 3;
3251 n2 = mandoc_realloc(n1, buf->sz);
3252 cp = n2 + (cp - n1);
3253 n1 = n2;
3254
3255 /*
3256 * When growing, make room
3257 * for the expanded argument.
3258 */
3259
3260 if (asz > 3)
3261 memmove(cp + asz, cp + 3, rsz);
3262 }
3263
3264 /* Copy the expanded argument, escaping quotes. */
3265
3266 n2 = cp;
3267 for (i = ib; i <= ie; i++) {
3268 for (ap = arg[i]; *ap != '\0'; ap++) {
3269 if (*ap == '"') {
3270 memcpy(n2, "\\(dq", 4);
3271 n2 += 4;
3272 } else
3273 *n2++ = *ap;
3274 }
3275 if (i < ie)
3276 *n2++ = ' ';
3277 }
3278 }
3279
3280 /*
3281 * Replace the macro invocation
3282 * by the expanded macro.
3283 */
3284
3285 free(buf->buf);
3286 buf->buf = n1;
3287 *offs = 0;
3288
3289 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3290 ROFF_REPARSE : ROFF_APPEND;
3291 }
3292
3293 /*
3294 * Calling a high-level macro that was renamed with .rn.
3295 * r->current_string has already been set up by roff_parse().
3296 */
3297 static enum rofferr
3298 roff_renamed(ROFF_ARGS)
3299 {
3300 char *nbuf;
3301
3302 buf->sz = mandoc_asprintf(&nbuf, ".%s %s", r->current_string,
3303 buf->buf + pos) + 1;
3304 free(buf->buf);
3305 buf->buf = nbuf;
3306 return ROFF_CONT;
3307 }
3308
3309 static size_t
3310 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3311 {
3312 char *name, *cp;
3313 size_t namesz;
3314
3315 name = *cpp;
3316 if ('\0' == *name)
3317 return 0;
3318
3319 /* Read until end of name and terminate it with NUL. */
3320 for (cp = name; 1; cp++) {
3321 if ('\0' == *cp || ' ' == *cp) {
3322 namesz = cp - name;
3323 break;
3324 }
3325 if ('\\' != *cp)
3326 continue;
3327 namesz = cp - name;
3328 if ('{' == cp[1] || '}' == cp[1])
3329 break;
3330 cp++;
3331 if ('\\' == *cp)
3332 continue;
3333 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3334 "%.*s", (int)(cp - name + 1), name);
3335 mandoc_escape((const char **)&cp, NULL, NULL);
3336 break;
3337 }
3338
3339 /* Read past spaces. */
3340 while (' ' == *cp)
3341 cp++;
3342
3343 *cpp = cp;
3344 return namesz;
3345 }
3346
3347 /*
3348 * Store *string into the user-defined string called *name.
3349 * To clear an existing entry, call with (*r, *name, NULL, 0).
3350 * append == 0: replace mode
3351 * append == 1: single-line append mode
3352 * append == 2: multiline append mode, append '\n' after each call
3353 */
3354 static void
3355 roff_setstr(struct roff *r, const char *name, const char *string,
3356 int append)
3357 {
3358
3359 roff_setstrn(&r->strtab, name, strlen(name), string,
3360 string ? strlen(string) : 0, append);
3361 }
3362
3363 static void
3364 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3365 const char *string, size_t stringsz, int append)
3366 {
3367 struct roffkv *n;
3368 char *c;
3369 int i;
3370 size_t oldch, newch;
3371
3372 /* Search for an existing string with the same name. */
3373 n = *r;
3374
3375 while (n && (namesz != n->key.sz ||
3376 strncmp(n->key.p, name, namesz)))
3377 n = n->next;
3378
3379 if (NULL == n) {
3380 /* Create a new string table entry. */
3381 n = mandoc_malloc(sizeof(struct roffkv));
3382 n->key.p = mandoc_strndup(name, namesz);
3383 n->key.sz = namesz;
3384 n->val.p = NULL;
3385 n->val.sz = 0;
3386 n->next = *r;
3387 *r = n;
3388 } else if (0 == append) {
3389 free(n->val.p);
3390 n->val.p = NULL;
3391 n->val.sz = 0;
3392 }
3393
3394 if (NULL == string)
3395 return;
3396
3397 /*
3398 * One additional byte for the '\n' in multiline mode,
3399 * and one for the terminating '\0'.
3400 */
3401 newch = stringsz + (1 < append ? 2u : 1u);
3402
3403 if (NULL == n->val.p) {
3404 n->val.p = mandoc_malloc(newch);
3405 *n->val.p = '\0';
3406 oldch = 0;
3407 } else {
3408 oldch = n->val.sz;
3409 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3410 }
3411
3412 /* Skip existing content in the destination buffer. */
3413 c = n->val.p + (int)oldch;
3414
3415 /* Append new content to the destination buffer. */
3416 i = 0;
3417 while (i < (int)stringsz) {
3418 /*
3419 * Rudimentary roff copy mode:
3420 * Handle escaped backslashes.
3421 */
3422 if ('\\' == string[i] && '\\' == string[i + 1])
3423 i++;
3424 *c++ = string[i++];
3425 }
3426
3427 /* Append terminating bytes. */
3428 if (1 < append)
3429 *c++ = '\n';
3430
3431 *c = '\0';
3432 n->val.sz = (int)(c - n->val.p);
3433 }
3434
3435 static const char *
3436 roff_getstrn(const struct roff *r, const char *name, size_t len)
3437 {
3438 const struct roffkv *n;
3439 int i;
3440
3441 for (n = r->strtab; n; n = n->next)
3442 if (0 == strncmp(name, n->key.p, len) &&
3443 '\0' == n->key.p[(int)len])
3444 return n->val.p;
3445
3446 for (i = 0; i < PREDEFS_MAX; i++)
3447 if (0 == strncmp(name, predefs[i].name, len) &&
3448 '\0' == predefs[i].name[(int)len])
3449 return predefs[i].str;
3450
3451 return NULL;
3452 }
3453
3454 /*
3455 * Check whether *name is the renamed name of a high-level macro.
3456 * Return the standard name, or NULL if it is not.
3457 */
3458 static const char *
3459 roff_getrenn(const struct roff *r, const char *name, size_t len)
3460 {
3461 const struct roffkv *n;
3462
3463 for (n = r->rentab; n; n = n->next)
3464 if (0 == strncmp(name, n->key.p, len) &&
3465 '\0' == n->key.p[(int)len])
3466 return n->val.p;
3467
3468 return NULL;
3469 }
3470
3471 static void
3472 roff_freestr(struct roffkv *r)
3473 {
3474 struct roffkv *n, *nn;
3475
3476 for (n = r; n; n = nn) {
3477 free(n->key.p);
3478 free(n->val.p);
3479 nn = n->next;
3480 free(n);
3481 }
3482 }
3483
3484 /* --- accessors and utility functions ------------------------------------ */
3485
3486 const struct tbl_span *
3487 roff_span(const struct roff *r)
3488 {
3489
3490 return r->tbl ? tbl_span(r->tbl) : NULL;
3491 }
3492
3493 const struct eqn *
3494 roff_eqn(const struct roff *r)
3495 {
3496
3497 return r->last_eqn ? &r->last_eqn->eqn : NULL;
3498 }
3499
3500 /*
3501 * Duplicate an input string, making the appropriate character
3502 * conversations (as stipulated by `tr') along the way.
3503 * Returns a heap-allocated string with all the replacements made.
3504 */
3505 char *
3506 roff_strdup(const struct roff *r, const char *p)
3507 {
3508 const struct roffkv *cp;
3509 char *res;
3510 const char *pp;
3511 size_t ssz, sz;
3512 enum mandoc_esc esc;
3513
3514 if (NULL == r->xmbtab && NULL == r->xtab)
3515 return mandoc_strdup(p);
3516 else if ('\0' == *p)
3517 return mandoc_strdup("");
3518
3519 /*
3520 * Step through each character looking for term matches
3521 * (remember that a `tr' can be invoked with an escape, which is
3522 * a glyph but the escape is multi-character).
3523 * We only do this if the character hash has been initialised
3524 * and the string is >0 length.
3525 */
3526
3527 res = NULL;
3528 ssz = 0;
3529
3530 while ('\0' != *p) {
3531 assert((unsigned int)*p < 128);
3532 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3533 sz = r->xtab[(int)*p].sz;
3534 res = mandoc_realloc(res, ssz + sz + 1);
3535 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3536 ssz += sz;
3537 p++;
3538 continue;
3539 } else if ('\\' != *p) {
3540 res = mandoc_realloc(res, ssz + 2);
3541 res[ssz++] = *p++;
3542 continue;
3543 }
3544
3545 /* Search for term matches. */
3546 for (cp = r->xmbtab; cp; cp = cp->next)
3547 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3548 break;
3549
3550 if (NULL != cp) {
3551 /*
3552 * A match has been found.
3553 * Append the match to the array and move
3554 * forward by its keysize.
3555 */
3556 res = mandoc_realloc(res,
3557 ssz + cp->val.sz + 1);
3558 memcpy(res + ssz, cp->val.p, cp->val.sz);
3559 ssz += cp->val.sz;
3560 p += (int)cp->key.sz;
3561 continue;
3562 }
3563
3564 /*
3565 * Handle escapes carefully: we need to copy
3566 * over just the escape itself, or else we might
3567 * do replacements within the escape itself.
3568 * Make sure to pass along the bogus string.
3569 */
3570 pp = p++;
3571 esc = mandoc_escape(&p, NULL, NULL);
3572 if (ESCAPE_ERROR == esc) {
3573 sz = strlen(pp);
3574 res = mandoc_realloc(res, ssz + sz + 1);
3575 memcpy(res + ssz, pp, sz);
3576 break;
3577 }
3578 /*
3579 * We bail out on bad escapes.
3580 * No need to warn: we already did so when
3581 * roff_res() was called.
3582 */
3583 sz = (int)(p - pp);
3584 res = mandoc_realloc(res, ssz + sz + 1);
3585 memcpy(res + ssz, pp, sz);
3586 ssz += sz;
3587 }
3588
3589 res[(int)ssz] = '\0';
3590 return res;
3591 }
3592
3593 int
3594 roff_getformat(const struct roff *r)
3595 {
3596
3597 return r->format;
3598 }
3599
3600 /*
3601 * Find out whether a line is a macro line or not.
3602 * If it is, adjust the current position and return one; if it isn't,
3603 * return zero and don't change the current position.
3604 * If the control character has been set with `.cc', then let that grain
3605 * precedence.
3606 * This is slighly contrary to groff, where using the non-breaking
3607 * control character when `cc' has been invoked will cause the
3608 * non-breaking macro contents to be printed verbatim.
3609 */
3610 int
3611 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3612 {
3613 int pos;
3614
3615 pos = *ppos;
3616
3617 if (r->control != '\0' && cp[pos] == r->control)
3618 pos++;
3619 else if (r->control != '\0')
3620 return 0;
3621 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3622 pos += 2;
3623 else if ('.' == cp[pos] || '\'' == cp[pos])
3624 pos++;
3625 else
3626 return 0;
3627
3628 while (' ' == cp[pos] || '\t' == cp[pos])
3629 pos++;
3630
3631 *ppos = pos;
3632 return 1;
3633 }