]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
move .ll to the roff modules
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.298 2017/05/05 13:17:55 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
34 #include "roff.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libroff.h"
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 /*
45 * An incredibly-simple string buffer.
46 */
47 struct roffstr {
48 char *p; /* nil-terminated buffer */
49 size_t sz; /* saved strlen(p) */
50 };
51
52 /*
53 * A key-value roffstr pair as part of a singly-linked list.
54 */
55 struct roffkv {
56 struct roffstr key;
57 struct roffstr val;
58 struct roffkv *next; /* next in list */
59 };
60
61 /*
62 * A single number register as part of a singly-linked list.
63 */
64 struct roffreg {
65 struct roffstr key;
66 int val;
67 struct roffreg *next;
68 };
69
70 /*
71 * Association of request and macro names with token IDs.
72 */
73 struct roffreq {
74 enum roff_tok tok;
75 char name[];
76 };
77
78 struct roff {
79 struct mparse *parse; /* parse point */
80 struct roff_man *man; /* mdoc or man parser */
81 struct roffnode *last; /* leaf of stack */
82 int *rstack; /* stack of inverted `ie' values */
83 struct ohash *reqtab; /* request lookup table */
84 struct roffreg *regtab; /* number registers */
85 struct roffkv *strtab; /* user-defined strings & macros */
86 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
87 struct roffstr *xtab; /* single-byte trans table (`tr') */
88 const char *current_string; /* value of last called user macro */
89 struct tbl_node *first_tbl; /* first table parsed */
90 struct tbl_node *last_tbl; /* last table parsed */
91 struct tbl_node *tbl; /* current table being parsed */
92 struct eqn_node *last_eqn; /* last equation parsed */
93 struct eqn_node *first_eqn; /* first equation parsed */
94 struct eqn_node *eqn; /* current equation being parsed */
95 int eqn_inline; /* current equation is inline */
96 int options; /* parse options */
97 int rstacksz; /* current size limit of rstack */
98 int rstackpos; /* position in rstack */
99 int format; /* current file in mdoc or man format */
100 int argc; /* number of args of the last macro */
101 char control; /* control character */
102 };
103
104 struct roffnode {
105 enum roff_tok tok; /* type of node */
106 struct roffnode *parent; /* up one in stack */
107 int line; /* parse line */
108 int col; /* parse col */
109 char *name; /* node name, e.g. macro name */
110 char *end; /* end-rules: custom token */
111 int endspan; /* end-rules: next-line or infty */
112 int rule; /* current evaluation rule */
113 };
114
115 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
116 enum roff_tok tok, /* tok of macro */ \
117 struct buf *buf, /* input buffer */ \
118 int ln, /* parse line */ \
119 int ppos, /* original pos in buffer */ \
120 int pos, /* current pos in buffer */ \
121 int *offs /* reset offset of buffer data */
122
123 typedef enum rofferr (*roffproc)(ROFF_ARGS);
124
125 struct roffmac {
126 roffproc proc; /* process new macro */
127 roffproc text; /* process as child text of macro */
128 roffproc sub; /* process as child of macro */
129 int flags;
130 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
131 };
132
133 struct predef {
134 const char *name; /* predefined input name */
135 const char *str; /* replacement symbol */
136 };
137
138 #define PREDEF(__name, __str) \
139 { (__name), (__str) },
140
141 /* --- function prototypes ------------------------------------------------ */
142
143 static void roffnode_cleanscope(struct roff *);
144 static void roffnode_pop(struct roff *);
145 static void roffnode_push(struct roff *, enum roff_tok,
146 const char *, int, int);
147 static enum rofferr roff_block(ROFF_ARGS);
148 static enum rofferr roff_block_text(ROFF_ARGS);
149 static enum rofferr roff_block_sub(ROFF_ARGS);
150 static enum rofferr roff_br(ROFF_ARGS);
151 static enum rofferr roff_cblock(ROFF_ARGS);
152 static enum rofferr roff_cc(ROFF_ARGS);
153 static void roff_ccond(struct roff *, int, int);
154 static enum rofferr roff_cond(ROFF_ARGS);
155 static enum rofferr roff_cond_text(ROFF_ARGS);
156 static enum rofferr roff_cond_sub(ROFF_ARGS);
157 static enum rofferr roff_ds(ROFF_ARGS);
158 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
159 static int roff_evalcond(struct roff *r, int, char *, int *);
160 static int roff_evalnum(struct roff *, int,
161 const char *, int *, int *, int);
162 static int roff_evalpar(struct roff *, int,
163 const char *, int *, int *, int);
164 static int roff_evalstrcond(const char *, int *);
165 static void roff_free1(struct roff *);
166 static void roff_freereg(struct roffreg *);
167 static void roff_freestr(struct roffkv *);
168 static size_t roff_getname(struct roff *, char **, int, int);
169 static int roff_getnum(const char *, int *, int *, int);
170 static int roff_getop(const char *, int *, char *);
171 static int roff_getregn(const struct roff *,
172 const char *, size_t);
173 static int roff_getregro(const struct roff *,
174 const char *name);
175 static const char *roff_getstrn(const struct roff *,
176 const char *, size_t);
177 static int roff_hasregn(const struct roff *,
178 const char *, size_t);
179 static enum rofferr roff_insec(ROFF_ARGS);
180 static enum rofferr roff_it(ROFF_ARGS);
181 static enum rofferr roff_line_ignore(ROFF_ARGS);
182 static void roff_man_alloc1(struct roff_man *);
183 static void roff_man_free1(struct roff_man *);
184 static enum rofferr roff_nr(ROFF_ARGS);
185 static enum rofferr roff_onearg(ROFF_ARGS);
186 static enum roff_tok roff_parse(struct roff *, char *, int *,
187 int, int);
188 static enum rofferr roff_parsetext(struct buf *, int, int *);
189 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
190 static enum rofferr roff_rm(ROFF_ARGS);
191 static enum rofferr roff_rr(ROFF_ARGS);
192 static void roff_setstr(struct roff *,
193 const char *, const char *, int);
194 static void roff_setstrn(struct roffkv **, const char *,
195 size_t, const char *, size_t, int);
196 static enum rofferr roff_so(ROFF_ARGS);
197 static enum rofferr roff_tr(ROFF_ARGS);
198 static enum rofferr roff_Dd(ROFF_ARGS);
199 static enum rofferr roff_TH(ROFF_ARGS);
200 static enum rofferr roff_TE(ROFF_ARGS);
201 static enum rofferr roff_TS(ROFF_ARGS);
202 static enum rofferr roff_EQ(ROFF_ARGS);
203 static enum rofferr roff_EN(ROFF_ARGS);
204 static enum rofferr roff_T_(ROFF_ARGS);
205 static enum rofferr roff_unsupp(ROFF_ARGS);
206 static enum rofferr roff_userdef(ROFF_ARGS);
207
208 /* --- constant data ------------------------------------------------------ */
209
210 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
211 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
212
213 const char *__roff_name[MAN_MAX + 1] = {
214 "br", "ft", "ll", NULL,
215 "ab", "ad", "af", "aln",
216 "als", "am", "am1", "ami",
217 "ami1", "as", "as1", "asciify",
218 "backtrace", "bd", "bleedat", "blm",
219 "box", "boxa", "bp", "BP",
220 "break", "breakchar", "brnl", "brp",
221 "brpnl", "c2", "cc", "ce",
222 "cf", "cflags", "ch", "char",
223 "chop", "class", "close", "CL",
224 "color", "composite", "continue", "cp",
225 "cropat", "cs", "cu", "da",
226 "dch", "Dd", "de", "de1",
227 "defcolor", "dei", "dei1", "device",
228 "devicem", "di", "do", "ds",
229 "ds1", "dwh", "dt", "ec",
230 "ecr", "ecs", "el", "em",
231 "EN", "eo", "EP", "EQ",
232 "errprint", "ev", "evc", "ex",
233 "fallback", "fam", "fc", "fchar",
234 "fcolor", "fdeferlig", "feature", "fkern",
235 "fl", "flig", "fp", "fps",
236 "fschar", "fspacewidth", "fspecial", "ftr",
237 "fzoom", "gcolor", "hc", "hcode",
238 "hidechar", "hla", "hlm", "hpf",
239 "hpfa", "hpfcode", "hw", "hy",
240 "hylang", "hylen", "hym", "hypp",
241 "hys", "ie", "if", "ig",
242 "index", "it", "itc", "IX",
243 "kern", "kernafter", "kernbefore", "kernpair",
244 "lc", "lc_ctype", "lds", "length",
245 "letadj", "lf", "lg", "lhang",
246 "linetabs", "lnr", "lnrf", "lpfx",
247 "ls", "lsm", "lt", "mc",
248 "mediasize", "minss", "mk", "mso",
249 "na", "ne", "nh", "nhychar",
250 "nm", "nn", "nop", "nr",
251 "nrf", "nroff", "ns", "nx",
252 "open", "opena", "os", "output",
253 "padj", "papersize", "pc", "pev",
254 "pi", "PI", "pl", "pm",
255 "pn", "pnr", "po", "ps",
256 "psbb", "pshape", "pso", "ptr",
257 "pvs", "rchar", "rd", "recursionlimit",
258 "return", "rfschar", "rhang", "rj",
259 "rm", "rn", "rnn", "rr",
260 "rs", "rt", "schar", "sentchar",
261 "shc", "shift", "sizes", "so",
262 "spacewidth", "special", "spreadwarn", "ss",
263 "sty", "substring", "sv", "sy",
264 "T&", "ta", "tc", "TE",
265 "TH", "ti", "tkf", "tl",
266 "tm", "tm1", "tmc", "tr",
267 "track", "transchar", "trf", "trimat",
268 "trin", "trnt", "troff", "TS",
269 "uf", "ul", "unformat", "unwatch",
270 "unwatchn", "vpt", "vs", "warn",
271 "warnscale", "watch", "watchlength", "watchn",
272 "wh", "while", "write", "writec",
273 "writem", "xflag", ".", NULL,
274 "text",
275 "Dd", "Dt", "Os", "Sh",
276 "Ss", "Pp", "D1", "Dl",
277 "Bd", "Ed", "Bl", "El",
278 "It", "Ad", "An", "Ap",
279 "Ar", "Cd", "Cm", "Dv",
280 "Er", "Ev", "Ex", "Fa",
281 "Fd", "Fl", "Fn", "Ft",
282 "Ic", "In", "Li", "Nd",
283 "Nm", "Op", "Ot", "Pa",
284 "Rv", "St", "Va", "Vt",
285 "Xr", "%A", "%B", "%D",
286 "%I", "%J", "%N", "%O",
287 "%P", "%R", "%T", "%V",
288 "Ac", "Ao", "Aq", "At",
289 "Bc", "Bf", "Bo", "Bq",
290 "Bsx", "Bx", "Db", "Dc",
291 "Do", "Dq", "Ec", "Ef",
292 "Em", "Eo", "Fx", "Ms",
293 "No", "Ns", "Nx", "Ox",
294 "Pc", "Pf", "Po", "Pq",
295 "Qc", "Ql", "Qo", "Qq",
296 "Re", "Rs", "Sc", "So",
297 "Sq", "Sm", "Sx", "Sy",
298 "Tn", "Ux", "Xc", "Xo",
299 "Fo", "Fc", "Oo", "Oc",
300 "Bk", "Ek", "Bt", "Hf",
301 "Fr", "Ud", "Lb", "Lp",
302 "Lk", "Mt", "Brq", "Bro",
303 "Brc", "%C", "Es", "En",
304 "Dx", "%Q", "sp",
305 "%U", "Ta", NULL,
306 "TH", "SH", "SS", "TP",
307 "LP", "PP", "P", "IP",
308 "HP", "SM", "SB", "BI",
309 "IB", "BR", "RB", "R",
310 "B", "I", "IR", "RI",
311 "sp", "nf", "fi",
312 "RE", "RS", "DT", "UC",
313 "PD", "AT", "in",
314 "OP", "EX", "EE", "UR",
315 "UE", NULL
316 };
317 const char *const *roff_name = __roff_name;
318
319 static struct roffmac roffs[TOKEN_NONE] = {
320 { roff_br, NULL, NULL, 0 }, /* br */
321 { roff_onearg, NULL, NULL, 0 }, /* ft */
322 { roff_onearg, NULL, NULL, 0 }, /* ll */
323 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
324 { roff_unsupp, NULL, NULL, 0 }, /* ab */
325 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
326 { roff_line_ignore, NULL, NULL, 0 }, /* af */
327 { roff_unsupp, NULL, NULL, 0 }, /* aln */
328 { roff_unsupp, NULL, NULL, 0 }, /* als */
329 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
330 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
331 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
332 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
333 { roff_ds, NULL, NULL, 0 }, /* as */
334 { roff_ds, NULL, NULL, 0 }, /* as1 */
335 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
336 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
337 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
338 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
339 { roff_unsupp, NULL, NULL, 0 }, /* blm */
340 { roff_unsupp, NULL, NULL, 0 }, /* box */
341 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
342 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
343 { roff_unsupp, NULL, NULL, 0 }, /* BP */
344 { roff_unsupp, NULL, NULL, 0 }, /* break */
345 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
346 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
347 { roff_br, NULL, NULL, 0 }, /* brp */
348 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
349 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
350 { roff_cc, NULL, NULL, 0 }, /* cc */
351 { roff_line_ignore, NULL, NULL, 0 }, /* ce */
352 { roff_insec, NULL, NULL, 0 }, /* cf */
353 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
354 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
355 { roff_unsupp, NULL, NULL, 0 }, /* char */
356 { roff_unsupp, NULL, NULL, 0 }, /* chop */
357 { roff_line_ignore, NULL, NULL, 0 }, /* class */
358 { roff_insec, NULL, NULL, 0 }, /* close */
359 { roff_unsupp, NULL, NULL, 0 }, /* CL */
360 { roff_line_ignore, NULL, NULL, 0 }, /* color */
361 { roff_unsupp, NULL, NULL, 0 }, /* composite */
362 { roff_unsupp, NULL, NULL, 0 }, /* continue */
363 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
364 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
365 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
366 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
367 { roff_unsupp, NULL, NULL, 0 }, /* da */
368 { roff_unsupp, NULL, NULL, 0 }, /* dch */
369 { roff_Dd, NULL, NULL, 0 }, /* Dd */
370 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
371 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
372 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
373 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
374 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
375 { roff_unsupp, NULL, NULL, 0 }, /* device */
376 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
377 { roff_unsupp, NULL, NULL, 0 }, /* di */
378 { roff_unsupp, NULL, NULL, 0 }, /* do */
379 { roff_ds, NULL, NULL, 0 }, /* ds */
380 { roff_ds, NULL, NULL, 0 }, /* ds1 */
381 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
382 { roff_unsupp, NULL, NULL, 0 }, /* dt */
383 { roff_unsupp, NULL, NULL, 0 }, /* ec */
384 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
385 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
386 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
387 { roff_unsupp, NULL, NULL, 0 }, /* em */
388 { roff_EN, NULL, NULL, 0 }, /* EN */
389 { roff_unsupp, NULL, NULL, 0 }, /* eo */
390 { roff_unsupp, NULL, NULL, 0 }, /* EP */
391 { roff_EQ, NULL, NULL, 0 }, /* EQ */
392 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
393 { roff_unsupp, NULL, NULL, 0 }, /* ev */
394 { roff_unsupp, NULL, NULL, 0 }, /* evc */
395 { roff_unsupp, NULL, NULL, 0 }, /* ex */
396 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
397 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
398 { roff_unsupp, NULL, NULL, 0 }, /* fc */
399 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
400 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
401 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
402 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
403 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
404 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
405 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
406 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
407 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
408 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
409 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
410 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
411 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
412 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
413 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
414 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
415 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
416 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
417 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
418 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
419 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
420 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
421 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
422 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
423 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
424 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
425 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
426 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
427 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
428 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
429 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
430 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
431 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
432 { roff_unsupp, NULL, NULL, 0 }, /* index */
433 { roff_it, NULL, NULL, 0 }, /* it */
434 { roff_unsupp, NULL, NULL, 0 }, /* itc */
435 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
436 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
437 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
438 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
439 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
440 { roff_unsupp, NULL, NULL, 0 }, /* lc */
441 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
442 { roff_unsupp, NULL, NULL, 0 }, /* lds */
443 { roff_unsupp, NULL, NULL, 0 }, /* length */
444 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
445 { roff_insec, NULL, NULL, 0 }, /* lf */
446 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
447 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
448 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
449 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
450 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
451 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
452 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
453 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
454 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
455 { roff_line_ignore, NULL, NULL, 0 }, /* mc */
456 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
457 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
458 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
459 { roff_insec, NULL, NULL, 0 }, /* mso */
460 { roff_line_ignore, NULL, NULL, 0 }, /* na */
461 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
462 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
463 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
464 { roff_unsupp, NULL, NULL, 0 }, /* nm */
465 { roff_unsupp, NULL, NULL, 0 }, /* nn */
466 { roff_unsupp, NULL, NULL, 0 }, /* nop */
467 { roff_nr, NULL, NULL, 0 }, /* nr */
468 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
469 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
470 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
471 { roff_insec, NULL, NULL, 0 }, /* nx */
472 { roff_insec, NULL, NULL, 0 }, /* open */
473 { roff_insec, NULL, NULL, 0 }, /* opena */
474 { roff_line_ignore, NULL, NULL, 0 }, /* os */
475 { roff_unsupp, NULL, NULL, 0 }, /* output */
476 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
477 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
478 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
479 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
480 { roff_insec, NULL, NULL, 0 }, /* pi */
481 { roff_unsupp, NULL, NULL, 0 }, /* PI */
482 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
483 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
484 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
485 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
486 { roff_line_ignore, NULL, NULL, 0 }, /* po */
487 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
488 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
489 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
490 { roff_insec, NULL, NULL, 0 }, /* pso */
491 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
492 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
493 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
494 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
495 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
496 { roff_unsupp, NULL, NULL, 0 }, /* return */
497 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
498 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
499 { roff_line_ignore, NULL, NULL, 0 }, /* rj */
500 { roff_rm, NULL, NULL, 0 }, /* rm */
501 { roff_unsupp, NULL, NULL, 0 }, /* rn */
502 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
503 { roff_rr, NULL, NULL, 0 }, /* rr */
504 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
505 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
506 { roff_unsupp, NULL, NULL, 0 }, /* schar */
507 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
508 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
509 { roff_unsupp, NULL, NULL, 0 }, /* shift */
510 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
511 { roff_so, NULL, NULL, 0 }, /* so */
512 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
513 { roff_line_ignore, NULL, NULL, 0 }, /* special */
514 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
515 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
516 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
517 { roff_unsupp, NULL, NULL, 0 }, /* substring */
518 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
519 { roff_insec, NULL, NULL, 0 }, /* sy */
520 { roff_T_, NULL, NULL, 0 }, /* T& */
521 { roff_unsupp, NULL, NULL, 0 }, /* ta */
522 { roff_unsupp, NULL, NULL, 0 }, /* tc */
523 { roff_TE, NULL, NULL, 0 }, /* TE */
524 { roff_TH, NULL, NULL, 0 }, /* TH */
525 { roff_unsupp, NULL, NULL, 0 }, /* ti */
526 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
527 { roff_unsupp, NULL, NULL, 0 }, /* tl */
528 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
529 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
530 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
531 { roff_tr, NULL, NULL, 0 }, /* tr */
532 { roff_line_ignore, NULL, NULL, 0 }, /* track */
533 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
534 { roff_insec, NULL, NULL, 0 }, /* trf */
535 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
536 { roff_unsupp, NULL, NULL, 0 }, /* trin */
537 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
538 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
539 { roff_TS, NULL, NULL, 0 }, /* TS */
540 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
541 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
542 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
543 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
544 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
545 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
546 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
547 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
548 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
549 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
550 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
551 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
552 { roff_unsupp, NULL, NULL, 0 }, /* wh */
553 { roff_unsupp, NULL, NULL, 0 }, /* while */
554 { roff_insec, NULL, NULL, 0 }, /* write */
555 { roff_insec, NULL, NULL, 0 }, /* writec */
556 { roff_insec, NULL, NULL, 0 }, /* writem */
557 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
558 { roff_cblock, NULL, NULL, 0 }, /* . */
559 { roff_userdef, NULL, NULL, 0 }
560 };
561
562 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
563 const char *const __mdoc_reserved[] = {
564 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
565 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
566 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
567 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
568 "Dt", "Dv", "Dx", "D1",
569 "Ec", "Ed", "Ef", "Ek", "El", "Em",
570 "En", "Eo", "Er", "Es", "Ev", "Ex",
571 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
572 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
573 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
574 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
575 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
576 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
577 "Sc", "Sh", "Sm", "So", "Sq",
578 "Ss", "St", "Sx", "Sy",
579 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
580 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
581 "%P", "%Q", "%R", "%T", "%U", "%V",
582 NULL
583 };
584
585 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
586 const char *const __man_reserved[] = {
587 "AT", "B", "BI", "BR", "DT",
588 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
589 "LP", "OP", "P", "PD", "PP",
590 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
591 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
592 NULL
593 };
594
595 /* Array of injected predefined strings. */
596 #define PREDEFS_MAX 38
597 static const struct predef predefs[PREDEFS_MAX] = {
598 #include "predefs.in"
599 };
600
601 static int roffit_lines; /* number of lines to delay */
602 static char *roffit_macro; /* nil-terminated macro line */
603
604
605 /* --- request table ------------------------------------------------------ */
606
607 struct ohash *
608 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
609 {
610 struct ohash *htab;
611 struct roffreq *req;
612 enum roff_tok tok;
613 size_t sz;
614 unsigned int slot;
615
616 htab = mandoc_malloc(sizeof(*htab));
617 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
618
619 for (tok = mintok; tok < maxtok; tok++) {
620 if (roff_name[tok] == NULL)
621 continue;
622 sz = strlen(roff_name[tok]);
623 req = mandoc_malloc(sizeof(*req) + sz + 1);
624 req->tok = tok;
625 memcpy(req->name, roff_name[tok], sz + 1);
626 slot = ohash_qlookup(htab, req->name);
627 ohash_insert(htab, slot, req);
628 }
629 return htab;
630 }
631
632 void
633 roffhash_free(struct ohash *htab)
634 {
635 struct roffreq *req;
636 unsigned int slot;
637
638 if (htab == NULL)
639 return;
640 for (req = ohash_first(htab, &slot); req != NULL;
641 req = ohash_next(htab, &slot))
642 free(req);
643 ohash_delete(htab);
644 free(htab);
645 }
646
647 enum roff_tok
648 roffhash_find(struct ohash *htab, const char *name, size_t sz)
649 {
650 struct roffreq *req;
651 const char *end;
652
653 if (sz) {
654 end = name + sz;
655 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
656 } else
657 req = ohash_find(htab, ohash_qlookup(htab, name));
658 return req == NULL ? TOKEN_NONE : req->tok;
659 }
660
661 /* --- stack of request blocks -------------------------------------------- */
662
663 /*
664 * Pop the current node off of the stack of roff instructions currently
665 * pending.
666 */
667 static void
668 roffnode_pop(struct roff *r)
669 {
670 struct roffnode *p;
671
672 assert(r->last);
673 p = r->last;
674
675 r->last = r->last->parent;
676 free(p->name);
677 free(p->end);
678 free(p);
679 }
680
681 /*
682 * Push a roff node onto the instruction stack. This must later be
683 * removed with roffnode_pop().
684 */
685 static void
686 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
687 int line, int col)
688 {
689 struct roffnode *p;
690
691 p = mandoc_calloc(1, sizeof(struct roffnode));
692 p->tok = tok;
693 if (name)
694 p->name = mandoc_strdup(name);
695 p->parent = r->last;
696 p->line = line;
697 p->col = col;
698 p->rule = p->parent ? p->parent->rule : 0;
699
700 r->last = p;
701 }
702
703 /* --- roff parser state data management ---------------------------------- */
704
705 static void
706 roff_free1(struct roff *r)
707 {
708 struct tbl_node *tbl;
709 struct eqn_node *e;
710 int i;
711
712 while (NULL != (tbl = r->first_tbl)) {
713 r->first_tbl = tbl->next;
714 tbl_free(tbl);
715 }
716 r->first_tbl = r->last_tbl = r->tbl = NULL;
717
718 while (NULL != (e = r->first_eqn)) {
719 r->first_eqn = e->next;
720 eqn_free(e);
721 }
722 r->first_eqn = r->last_eqn = r->eqn = NULL;
723
724 while (r->last)
725 roffnode_pop(r);
726
727 free (r->rstack);
728 r->rstack = NULL;
729 r->rstacksz = 0;
730 r->rstackpos = -1;
731
732 roff_freereg(r->regtab);
733 r->regtab = NULL;
734
735 roff_freestr(r->strtab);
736 roff_freestr(r->xmbtab);
737 r->strtab = r->xmbtab = NULL;
738
739 if (r->xtab)
740 for (i = 0; i < 128; i++)
741 free(r->xtab[i].p);
742 free(r->xtab);
743 r->xtab = NULL;
744 }
745
746 void
747 roff_reset(struct roff *r)
748 {
749 roff_free1(r);
750 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
751 r->control = 0;
752 }
753
754 void
755 roff_free(struct roff *r)
756 {
757 roff_free1(r);
758 roffhash_free(r->reqtab);
759 free(r);
760 }
761
762 struct roff *
763 roff_alloc(struct mparse *parse, int options)
764 {
765 struct roff *r;
766
767 r = mandoc_calloc(1, sizeof(struct roff));
768 r->parse = parse;
769 r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
770 r->options = options;
771 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
772 r->rstackpos = -1;
773 return r;
774 }
775
776 /* --- syntax tree state data management ---------------------------------- */
777
778 static void
779 roff_man_free1(struct roff_man *man)
780 {
781
782 if (man->first != NULL)
783 roff_node_delete(man, man->first);
784 free(man->meta.msec);
785 free(man->meta.vol);
786 free(man->meta.os);
787 free(man->meta.arch);
788 free(man->meta.title);
789 free(man->meta.name);
790 free(man->meta.date);
791 }
792
793 static void
794 roff_man_alloc1(struct roff_man *man)
795 {
796
797 memset(&man->meta, 0, sizeof(man->meta));
798 man->first = mandoc_calloc(1, sizeof(*man->first));
799 man->first->type = ROFFT_ROOT;
800 man->last = man->first;
801 man->last_es = NULL;
802 man->flags = 0;
803 man->macroset = MACROSET_NONE;
804 man->lastsec = man->lastnamed = SEC_NONE;
805 man->next = ROFF_NEXT_CHILD;
806 }
807
808 void
809 roff_man_reset(struct roff_man *man)
810 {
811
812 roff_man_free1(man);
813 roff_man_alloc1(man);
814 }
815
816 void
817 roff_man_free(struct roff_man *man)
818 {
819
820 roff_man_free1(man);
821 free(man);
822 }
823
824 struct roff_man *
825 roff_man_alloc(struct roff *roff, struct mparse *parse,
826 const char *defos, int quick)
827 {
828 struct roff_man *man;
829
830 man = mandoc_calloc(1, sizeof(*man));
831 man->parse = parse;
832 man->roff = roff;
833 man->defos = defos;
834 man->quick = quick;
835 roff_man_alloc1(man);
836 roff->man = man;
837 return man;
838 }
839
840 /* --- syntax tree handling ----------------------------------------------- */
841
842 struct roff_node *
843 roff_node_alloc(struct roff_man *man, int line, int pos,
844 enum roff_type type, int tok)
845 {
846 struct roff_node *n;
847
848 n = mandoc_calloc(1, sizeof(*n));
849 n->line = line;
850 n->pos = pos;
851 n->tok = tok;
852 n->type = type;
853 n->sec = man->lastsec;
854
855 if (man->flags & MDOC_SYNOPSIS)
856 n->flags |= NODE_SYNPRETTY;
857 else
858 n->flags &= ~NODE_SYNPRETTY;
859 if (man->flags & MDOC_NEWLINE)
860 n->flags |= NODE_LINE;
861 man->flags &= ~MDOC_NEWLINE;
862
863 return n;
864 }
865
866 void
867 roff_node_append(struct roff_man *man, struct roff_node *n)
868 {
869
870 switch (man->next) {
871 case ROFF_NEXT_SIBLING:
872 if (man->last->next != NULL) {
873 n->next = man->last->next;
874 man->last->next->prev = n;
875 } else
876 man->last->parent->last = n;
877 man->last->next = n;
878 n->prev = man->last;
879 n->parent = man->last->parent;
880 break;
881 case ROFF_NEXT_CHILD:
882 if (man->last->child != NULL) {
883 n->next = man->last->child;
884 man->last->child->prev = n;
885 } else
886 man->last->last = n;
887 man->last->child = n;
888 n->parent = man->last;
889 break;
890 default:
891 abort();
892 }
893 man->last = n;
894
895 switch (n->type) {
896 case ROFFT_HEAD:
897 n->parent->head = n;
898 break;
899 case ROFFT_BODY:
900 if (n->end != ENDBODY_NOT)
901 return;
902 n->parent->body = n;
903 break;
904 case ROFFT_TAIL:
905 n->parent->tail = n;
906 break;
907 default:
908 return;
909 }
910
911 /*
912 * Copy over the normalised-data pointer of our parent. Not
913 * everybody has one, but copying a null pointer is fine.
914 */
915
916 n->norm = n->parent->norm;
917 assert(n->parent->type == ROFFT_BLOCK);
918 }
919
920 void
921 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
922 {
923 struct roff_node *n;
924
925 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
926 n->string = roff_strdup(man->roff, word);
927 roff_node_append(man, n);
928 n->flags |= NODE_VALID | NODE_ENDED;
929 man->next = ROFF_NEXT_SIBLING;
930 }
931
932 void
933 roff_word_append(struct roff_man *man, const char *word)
934 {
935 struct roff_node *n;
936 char *addstr, *newstr;
937
938 n = man->last;
939 addstr = roff_strdup(man->roff, word);
940 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
941 free(addstr);
942 free(n->string);
943 n->string = newstr;
944 man->next = ROFF_NEXT_SIBLING;
945 }
946
947 void
948 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
949 {
950 struct roff_node *n;
951
952 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
953 roff_node_append(man, n);
954 man->next = ROFF_NEXT_CHILD;
955 }
956
957 struct roff_node *
958 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
959 {
960 struct roff_node *n;
961
962 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
963 roff_node_append(man, n);
964 man->next = ROFF_NEXT_CHILD;
965 return n;
966 }
967
968 struct roff_node *
969 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
970 {
971 struct roff_node *n;
972
973 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
974 roff_node_append(man, n);
975 man->next = ROFF_NEXT_CHILD;
976 return n;
977 }
978
979 struct roff_node *
980 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
981 {
982 struct roff_node *n;
983
984 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
985 roff_node_append(man, n);
986 man->next = ROFF_NEXT_CHILD;
987 return n;
988 }
989
990 void
991 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
992 {
993 struct roff_node *n;
994
995 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
996 n->eqn = eqn;
997 if (eqn->ln > man->last->line)
998 n->flags |= NODE_LINE;
999 roff_node_append(man, n);
1000 man->next = ROFF_NEXT_SIBLING;
1001 }
1002
1003 void
1004 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1005 {
1006 struct roff_node *n;
1007
1008 if (man->macroset == MACROSET_MAN)
1009 man_breakscope(man, TOKEN_NONE);
1010 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1011 n->span = tbl;
1012 roff_node_append(man, n);
1013 n->flags |= NODE_VALID | NODE_ENDED;
1014 man->next = ROFF_NEXT_SIBLING;
1015 }
1016
1017 void
1018 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1019 {
1020
1021 /* Adjust siblings. */
1022
1023 if (n->prev)
1024 n->prev->next = n->next;
1025 if (n->next)
1026 n->next->prev = n->prev;
1027
1028 /* Adjust parent. */
1029
1030 if (n->parent != NULL) {
1031 if (n->parent->child == n)
1032 n->parent->child = n->next;
1033 if (n->parent->last == n)
1034 n->parent->last = n->prev;
1035 }
1036
1037 /* Adjust parse point. */
1038
1039 if (man == NULL)
1040 return;
1041 if (man->last == n) {
1042 if (n->prev == NULL) {
1043 man->last = n->parent;
1044 man->next = ROFF_NEXT_CHILD;
1045 } else {
1046 man->last = n->prev;
1047 man->next = ROFF_NEXT_SIBLING;
1048 }
1049 }
1050 if (man->first == n)
1051 man->first = NULL;
1052 }
1053
1054 void
1055 roff_node_free(struct roff_node *n)
1056 {
1057
1058 if (n->args != NULL)
1059 mdoc_argv_free(n->args);
1060 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1061 free(n->norm);
1062 free(n->string);
1063 free(n);
1064 }
1065
1066 void
1067 roff_node_delete(struct roff_man *man, struct roff_node *n)
1068 {
1069
1070 while (n->child != NULL)
1071 roff_node_delete(man, n->child);
1072 roff_node_unlink(man, n);
1073 roff_node_free(n);
1074 }
1075
1076 void
1077 deroff(char **dest, const struct roff_node *n)
1078 {
1079 char *cp;
1080 size_t sz;
1081
1082 if (n->type != ROFFT_TEXT) {
1083 for (n = n->child; n != NULL; n = n->next)
1084 deroff(dest, n);
1085 return;
1086 }
1087
1088 /* Skip leading whitespace. */
1089
1090 for (cp = n->string; *cp != '\0'; cp++) {
1091 if (cp[0] == '\\' && cp[1] != '\0' &&
1092 strchr(" %&0^|~", cp[1]) != NULL)
1093 cp++;
1094 else if ( ! isspace((unsigned char)*cp))
1095 break;
1096 }
1097
1098 /* Skip trailing backslash. */
1099
1100 sz = strlen(cp);
1101 if (sz > 0 && cp[sz - 1] == '\\')
1102 sz--;
1103
1104 /* Skip trailing whitespace. */
1105
1106 for (; sz; sz--)
1107 if ( ! isspace((unsigned char)cp[sz-1]))
1108 break;
1109
1110 /* Skip empty strings. */
1111
1112 if (sz == 0)
1113 return;
1114
1115 if (*dest == NULL) {
1116 *dest = mandoc_strndup(cp, sz);
1117 return;
1118 }
1119
1120 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1121 free(*dest);
1122 *dest = cp;
1123 }
1124
1125 /* --- main functions of the roff parser ---------------------------------- */
1126
1127 /*
1128 * In the current line, expand escape sequences that tend to get
1129 * used in numerical expressions and conditional requests.
1130 * Also check the syntax of the remaining escape sequences.
1131 */
1132 static enum rofferr
1133 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1134 {
1135 char ubuf[24]; /* buffer to print the number */
1136 const char *start; /* start of the string to process */
1137 char *stesc; /* start of an escape sequence ('\\') */
1138 const char *stnam; /* start of the name, after "[(*" */
1139 const char *cp; /* end of the name, e.g. before ']' */
1140 const char *res; /* the string to be substituted */
1141 char *nbuf; /* new buffer to copy buf->buf to */
1142 size_t maxl; /* expected length of the escape name */
1143 size_t naml; /* actual length of the escape name */
1144 enum mandoc_esc esc; /* type of the escape sequence */
1145 int inaml; /* length returned from mandoc_escape() */
1146 int expand_count; /* to avoid infinite loops */
1147 int npos; /* position in numeric expression */
1148 int arg_complete; /* argument not interrupted by eol */
1149 char term; /* character terminating the escape */
1150
1151 expand_count = 0;
1152 start = buf->buf + pos;
1153 stesc = strchr(start, '\0') - 1;
1154 while (stesc-- > start) {
1155
1156 /* Search backwards for the next backslash. */
1157
1158 if (*stesc != '\\')
1159 continue;
1160
1161 /* If it is escaped, skip it. */
1162
1163 for (cp = stesc - 1; cp >= start; cp--)
1164 if (*cp != '\\')
1165 break;
1166
1167 if ((stesc - cp) % 2 == 0) {
1168 stesc = (char *)cp;
1169 continue;
1170 }
1171
1172 /* Decide whether to expand or to check only. */
1173
1174 term = '\0';
1175 cp = stesc + 1;
1176 switch (*cp) {
1177 case '*':
1178 res = NULL;
1179 break;
1180 case 'B':
1181 case 'w':
1182 term = cp[1];
1183 /* FALLTHROUGH */
1184 case 'n':
1185 res = ubuf;
1186 break;
1187 default:
1188 esc = mandoc_escape(&cp, &stnam, &inaml);
1189 if (esc == ESCAPE_ERROR ||
1190 (esc == ESCAPE_SPECIAL &&
1191 mchars_spec2cp(stnam, inaml) < 0))
1192 mandoc_vmsg(MANDOCERR_ESC_BAD,
1193 r->parse, ln, (int)(stesc - buf->buf),
1194 "%.*s", (int)(cp - stesc), stesc);
1195 continue;
1196 }
1197
1198 if (EXPAND_LIMIT < ++expand_count) {
1199 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1200 ln, (int)(stesc - buf->buf), NULL);
1201 return ROFF_IGN;
1202 }
1203
1204 /*
1205 * The third character decides the length
1206 * of the name of the string or register.
1207 * Save a pointer to the name.
1208 */
1209
1210 if (term == '\0') {
1211 switch (*++cp) {
1212 case '\0':
1213 maxl = 0;
1214 break;
1215 case '(':
1216 cp++;
1217 maxl = 2;
1218 break;
1219 case '[':
1220 cp++;
1221 term = ']';
1222 maxl = 0;
1223 break;
1224 default:
1225 maxl = 1;
1226 break;
1227 }
1228 } else {
1229 cp += 2;
1230 maxl = 0;
1231 }
1232 stnam = cp;
1233
1234 /* Advance to the end of the name. */
1235
1236 naml = 0;
1237 arg_complete = 1;
1238 while (maxl == 0 || naml < maxl) {
1239 if (*cp == '\0') {
1240 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1241 ln, (int)(stesc - buf->buf), stesc);
1242 arg_complete = 0;
1243 break;
1244 }
1245 if (maxl == 0 && *cp == term) {
1246 cp++;
1247 break;
1248 }
1249 if (*cp++ != '\\' || stesc[1] != 'w') {
1250 naml++;
1251 continue;
1252 }
1253 switch (mandoc_escape(&cp, NULL, NULL)) {
1254 case ESCAPE_SPECIAL:
1255 case ESCAPE_UNICODE:
1256 case ESCAPE_NUMBERED:
1257 case ESCAPE_OVERSTRIKE:
1258 naml++;
1259 break;
1260 default:
1261 break;
1262 }
1263 }
1264
1265 /*
1266 * Retrieve the replacement string; if it is
1267 * undefined, resume searching for escapes.
1268 */
1269
1270 switch (stesc[1]) {
1271 case '*':
1272 if (arg_complete)
1273 res = roff_getstrn(r, stnam, naml);
1274 break;
1275 case 'B':
1276 npos = 0;
1277 ubuf[0] = arg_complete &&
1278 roff_evalnum(r, ln, stnam, &npos,
1279 NULL, ROFFNUM_SCALE) &&
1280 stnam + npos + 1 == cp ? '1' : '0';
1281 ubuf[1] = '\0';
1282 break;
1283 case 'n':
1284 if (arg_complete)
1285 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1286 roff_getregn(r, stnam, naml));
1287 else
1288 ubuf[0] = '\0';
1289 break;
1290 case 'w':
1291 /* use even incomplete args */
1292 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1293 24 * (int)naml);
1294 break;
1295 }
1296
1297 if (res == NULL) {
1298 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1299 r->parse, ln, (int)(stesc - buf->buf),
1300 "%.*s", (int)naml, stnam);
1301 res = "";
1302 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1303 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1304 ln, (int)(stesc - buf->buf), NULL);
1305 return ROFF_IGN;
1306 }
1307
1308 /* Replace the escape sequence by the string. */
1309
1310 *stesc = '\0';
1311 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1312 buf->buf, res, cp) + 1;
1313
1314 /* Prepare for the next replacement. */
1315
1316 start = nbuf + pos;
1317 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1318 free(buf->buf);
1319 buf->buf = nbuf;
1320 }
1321 return ROFF_CONT;
1322 }
1323
1324 /*
1325 * Process text streams.
1326 */
1327 static enum rofferr
1328 roff_parsetext(struct buf *buf, int pos, int *offs)
1329 {
1330 size_t sz;
1331 const char *start;
1332 char *p;
1333 int isz;
1334 enum mandoc_esc esc;
1335
1336 /* Spring the input line trap. */
1337
1338 if (roffit_lines == 1) {
1339 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1340 free(buf->buf);
1341 buf->buf = p;
1342 buf->sz = isz + 1;
1343 *offs = 0;
1344 free(roffit_macro);
1345 roffit_lines = 0;
1346 return ROFF_REPARSE;
1347 } else if (roffit_lines > 1)
1348 --roffit_lines;
1349
1350 /* Convert all breakable hyphens into ASCII_HYPH. */
1351
1352 start = p = buf->buf + pos;
1353
1354 while (*p != '\0') {
1355 sz = strcspn(p, "-\\");
1356 p += sz;
1357
1358 if (*p == '\0')
1359 break;
1360
1361 if (*p == '\\') {
1362 /* Skip over escapes. */
1363 p++;
1364 esc = mandoc_escape((const char **)&p, NULL, NULL);
1365 if (esc == ESCAPE_ERROR)
1366 break;
1367 while (*p == '-')
1368 p++;
1369 continue;
1370 } else if (p == start) {
1371 p++;
1372 continue;
1373 }
1374
1375 if (isalpha((unsigned char)p[-1]) &&
1376 isalpha((unsigned char)p[1]))
1377 *p = ASCII_HYPH;
1378 p++;
1379 }
1380 return ROFF_CONT;
1381 }
1382
1383 enum rofferr
1384 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1385 {
1386 enum roff_tok t;
1387 enum rofferr e;
1388 int pos; /* parse point */
1389 int spos; /* saved parse point for messages */
1390 int ppos; /* original offset in buf->buf */
1391 int ctl; /* macro line (boolean) */
1392
1393 ppos = pos = *offs;
1394
1395 /* Handle in-line equation delimiters. */
1396
1397 if (r->tbl == NULL &&
1398 r->last_eqn != NULL && r->last_eqn->delim &&
1399 (r->eqn == NULL || r->eqn_inline)) {
1400 e = roff_eqndelim(r, buf, pos);
1401 if (e == ROFF_REPARSE)
1402 return e;
1403 assert(e == ROFF_CONT);
1404 }
1405
1406 /* Expand some escape sequences. */
1407
1408 e = roff_res(r, buf, ln, pos);
1409 if (e == ROFF_IGN)
1410 return e;
1411 assert(e == ROFF_CONT);
1412
1413 ctl = roff_getcontrol(r, buf->buf, &pos);
1414
1415 /*
1416 * First, if a scope is open and we're not a macro, pass the
1417 * text through the macro's filter.
1418 * Equations process all content themselves.
1419 * Tables process almost all content themselves, but we want
1420 * to warn about macros before passing it there.
1421 */
1422
1423 if (r->last != NULL && ! ctl) {
1424 t = r->last->tok;
1425 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1426 if (e == ROFF_IGN)
1427 return e;
1428 assert(e == ROFF_CONT);
1429 }
1430 if (r->eqn != NULL)
1431 return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1432 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1433 return tbl_read(r->tbl, ln, buf->buf, ppos);
1434 if ( ! ctl)
1435 return roff_parsetext(buf, pos, offs);
1436
1437 /* Skip empty request lines. */
1438
1439 if (buf->buf[pos] == '"') {
1440 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1441 ln, pos, NULL);
1442 return ROFF_IGN;
1443 } else if (buf->buf[pos] == '\0')
1444 return ROFF_IGN;
1445
1446 /*
1447 * If a scope is open, go to the child handler for that macro,
1448 * as it may want to preprocess before doing anything with it.
1449 * Don't do so if an equation is open.
1450 */
1451
1452 if (r->last) {
1453 t = r->last->tok;
1454 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1455 }
1456
1457 /* No scope is open. This is a new request or macro. */
1458
1459 spos = pos;
1460 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1461
1462 /* Tables ignore most macros. */
1463
1464 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS)) {
1465 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1466 ln, pos, buf->buf + spos);
1467 if (t == ROFF_TS)
1468 return ROFF_IGN;
1469 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1470 pos++;
1471 while (buf->buf[pos] == ' ')
1472 pos++;
1473 return tbl_read(r->tbl, ln, buf->buf, pos);
1474 }
1475
1476 /*
1477 * This is neither a roff request nor a user-defined macro.
1478 * Let the standard macro set parsers handle it.
1479 */
1480
1481 if (t == TOKEN_NONE)
1482 return ROFF_CONT;
1483
1484 /* Execute a roff request or a user defined macro. */
1485
1486 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1487 }
1488
1489 void
1490 roff_endparse(struct roff *r)
1491 {
1492
1493 if (r->last)
1494 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1495 r->last->line, r->last->col,
1496 roff_name[r->last->tok]);
1497
1498 if (r->eqn) {
1499 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1500 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1501 eqn_end(&r->eqn);
1502 }
1503
1504 if (r->tbl) {
1505 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1506 r->tbl->line, r->tbl->pos, "TS");
1507 tbl_end(&r->tbl);
1508 }
1509 }
1510
1511 /*
1512 * Parse a roff node's type from the input buffer. This must be in the
1513 * form of ".foo xxx" in the usual way.
1514 */
1515 static enum roff_tok
1516 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1517 {
1518 char *cp;
1519 const char *mac;
1520 size_t maclen;
1521 enum roff_tok t;
1522
1523 cp = buf + *pos;
1524
1525 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1526 return TOKEN_NONE;
1527
1528 mac = cp;
1529 maclen = roff_getname(r, &cp, ln, ppos);
1530
1531 t = (r->current_string = roff_getstrn(r, mac, maclen))
1532 ? ROFF_USERDEF : roffhash_find(r->reqtab, mac, maclen);
1533
1534 if (t != TOKEN_NONE)
1535 *pos = cp - buf;
1536
1537 return t;
1538 }
1539
1540 /* --- handling of request blocks ----------------------------------------- */
1541
1542 static enum rofferr
1543 roff_cblock(ROFF_ARGS)
1544 {
1545
1546 /*
1547 * A block-close `..' should only be invoked as a child of an
1548 * ignore macro, otherwise raise a warning and just ignore it.
1549 */
1550
1551 if (r->last == NULL) {
1552 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1553 ln, ppos, "..");
1554 return ROFF_IGN;
1555 }
1556
1557 switch (r->last->tok) {
1558 case ROFF_am:
1559 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1560 case ROFF_ami:
1561 case ROFF_de:
1562 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1563 case ROFF_dei:
1564 case ROFF_ig:
1565 break;
1566 default:
1567 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1568 ln, ppos, "..");
1569 return ROFF_IGN;
1570 }
1571
1572 if (buf->buf[pos] != '\0')
1573 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1574 ".. %s", buf->buf + pos);
1575
1576 roffnode_pop(r);
1577 roffnode_cleanscope(r);
1578 return ROFF_IGN;
1579
1580 }
1581
1582 static void
1583 roffnode_cleanscope(struct roff *r)
1584 {
1585
1586 while (r->last) {
1587 if (--r->last->endspan != 0)
1588 break;
1589 roffnode_pop(r);
1590 }
1591 }
1592
1593 static void
1594 roff_ccond(struct roff *r, int ln, int ppos)
1595 {
1596
1597 if (NULL == r->last) {
1598 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1599 ln, ppos, "\\}");
1600 return;
1601 }
1602
1603 switch (r->last->tok) {
1604 case ROFF_el:
1605 case ROFF_ie:
1606 case ROFF_if:
1607 break;
1608 default:
1609 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1610 ln, ppos, "\\}");
1611 return;
1612 }
1613
1614 if (r->last->endspan > -1) {
1615 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1616 ln, ppos, "\\}");
1617 return;
1618 }
1619
1620 roffnode_pop(r);
1621 roffnode_cleanscope(r);
1622 return;
1623 }
1624
1625 static enum rofferr
1626 roff_block(ROFF_ARGS)
1627 {
1628 const char *name;
1629 char *iname, *cp;
1630 size_t namesz;
1631
1632 /* Ignore groff compatibility mode for now. */
1633
1634 if (tok == ROFF_de1)
1635 tok = ROFF_de;
1636 else if (tok == ROFF_dei1)
1637 tok = ROFF_dei;
1638 else if (tok == ROFF_am1)
1639 tok = ROFF_am;
1640 else if (tok == ROFF_ami1)
1641 tok = ROFF_ami;
1642
1643 /* Parse the macro name argument. */
1644
1645 cp = buf->buf + pos;
1646 if (tok == ROFF_ig) {
1647 iname = NULL;
1648 namesz = 0;
1649 } else {
1650 iname = cp;
1651 namesz = roff_getname(r, &cp, ln, ppos);
1652 iname[namesz] = '\0';
1653 }
1654
1655 /* Resolve the macro name argument if it is indirect. */
1656
1657 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1658 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1659 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1660 r->parse, ln, (int)(iname - buf->buf),
1661 "%.*s", (int)namesz, iname);
1662 namesz = 0;
1663 } else
1664 namesz = strlen(name);
1665 } else
1666 name = iname;
1667
1668 if (namesz == 0 && tok != ROFF_ig) {
1669 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1670 ln, ppos, roff_name[tok]);
1671 return ROFF_IGN;
1672 }
1673
1674 roffnode_push(r, tok, name, ln, ppos);
1675
1676 /*
1677 * At the beginning of a `de' macro, clear the existing string
1678 * with the same name, if there is one. New content will be
1679 * appended from roff_block_text() in multiline mode.
1680 */
1681
1682 if (tok == ROFF_de || tok == ROFF_dei)
1683 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1684
1685 if (*cp == '\0')
1686 return ROFF_IGN;
1687
1688 /* Get the custom end marker. */
1689
1690 iname = cp;
1691 namesz = roff_getname(r, &cp, ln, ppos);
1692
1693 /* Resolve the end marker if it is indirect. */
1694
1695 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1696 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1697 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1698 r->parse, ln, (int)(iname - buf->buf),
1699 "%.*s", (int)namesz, iname);
1700 namesz = 0;
1701 } else
1702 namesz = strlen(name);
1703 } else
1704 name = iname;
1705
1706 if (namesz)
1707 r->last->end = mandoc_strndup(name, namesz);
1708
1709 if (*cp != '\0')
1710 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1711 ln, pos, ".%s ... %s", roff_name[tok], cp);
1712
1713 return ROFF_IGN;
1714 }
1715
1716 static enum rofferr
1717 roff_block_sub(ROFF_ARGS)
1718 {
1719 enum roff_tok t;
1720 int i, j;
1721
1722 /*
1723 * First check whether a custom macro exists at this level. If
1724 * it does, then check against it. This is some of groff's
1725 * stranger behaviours. If we encountered a custom end-scope
1726 * tag and that tag also happens to be a "real" macro, then we
1727 * need to try interpreting it again as a real macro. If it's
1728 * not, then return ignore. Else continue.
1729 */
1730
1731 if (r->last->end) {
1732 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1733 if (buf->buf[i] != r->last->end[j])
1734 break;
1735
1736 if (r->last->end[j] == '\0' &&
1737 (buf->buf[i] == '\0' ||
1738 buf->buf[i] == ' ' ||
1739 buf->buf[i] == '\t')) {
1740 roffnode_pop(r);
1741 roffnode_cleanscope(r);
1742
1743 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1744 i++;
1745
1746 pos = i;
1747 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1748 TOKEN_NONE)
1749 return ROFF_RERUN;
1750 return ROFF_IGN;
1751 }
1752 }
1753
1754 /*
1755 * If we have no custom end-query or lookup failed, then try
1756 * pulling it out of the hashtable.
1757 */
1758
1759 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1760
1761 if (t != ROFF_cblock) {
1762 if (tok != ROFF_ig)
1763 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1764 return ROFF_IGN;
1765 }
1766
1767 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1768 }
1769
1770 static enum rofferr
1771 roff_block_text(ROFF_ARGS)
1772 {
1773
1774 if (tok != ROFF_ig)
1775 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1776
1777 return ROFF_IGN;
1778 }
1779
1780 static enum rofferr
1781 roff_cond_sub(ROFF_ARGS)
1782 {
1783 enum roff_tok t;
1784 char *ep;
1785 int rr;
1786
1787 rr = r->last->rule;
1788 roffnode_cleanscope(r);
1789 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1790
1791 /*
1792 * Fully handle known macros when they are structurally
1793 * required or when the conditional evaluated to true.
1794 */
1795
1796 if (t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT))
1797 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1798
1799 /*
1800 * If `\}' occurs on a macro line without a preceding macro,
1801 * drop the line completely.
1802 */
1803
1804 ep = buf->buf + pos;
1805 if (ep[0] == '\\' && ep[1] == '}')
1806 rr = 0;
1807
1808 /* Always check for the closing delimiter `\}'. */
1809
1810 while ((ep = strchr(ep, '\\')) != NULL) {
1811 if (*(++ep) == '}') {
1812 *ep = '&';
1813 roff_ccond(r, ln, ep - buf->buf - 1);
1814 }
1815 if (*ep != '\0')
1816 ++ep;
1817 }
1818 return rr ? ROFF_CONT : ROFF_IGN;
1819 }
1820
1821 static enum rofferr
1822 roff_cond_text(ROFF_ARGS)
1823 {
1824 char *ep;
1825 int rr;
1826
1827 rr = r->last->rule;
1828 roffnode_cleanscope(r);
1829
1830 ep = buf->buf + pos;
1831 while ((ep = strchr(ep, '\\')) != NULL) {
1832 if (*(++ep) == '}') {
1833 *ep = '&';
1834 roff_ccond(r, ln, ep - buf->buf - 1);
1835 }
1836 if (*ep != '\0')
1837 ++ep;
1838 }
1839 return rr ? ROFF_CONT : ROFF_IGN;
1840 }
1841
1842 /* --- handling of numeric and conditional expressions -------------------- */
1843
1844 /*
1845 * Parse a single signed integer number. Stop at the first non-digit.
1846 * If there is at least one digit, return success and advance the
1847 * parse point, else return failure and let the parse point unchanged.
1848 * Ignore overflows, treat them just like the C language.
1849 */
1850 static int
1851 roff_getnum(const char *v, int *pos, int *res, int flags)
1852 {
1853 int myres, scaled, n, p;
1854
1855 if (NULL == res)
1856 res = &myres;
1857
1858 p = *pos;
1859 n = v[p] == '-';
1860 if (n || v[p] == '+')
1861 p++;
1862
1863 if (flags & ROFFNUM_WHITE)
1864 while (isspace((unsigned char)v[p]))
1865 p++;
1866
1867 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1868 *res = 10 * *res + v[p] - '0';
1869 if (p == *pos + n)
1870 return 0;
1871
1872 if (n)
1873 *res = -*res;
1874
1875 /* Each number may be followed by one optional scaling unit. */
1876
1877 switch (v[p]) {
1878 case 'f':
1879 scaled = *res * 65536;
1880 break;
1881 case 'i':
1882 scaled = *res * 240;
1883 break;
1884 case 'c':
1885 scaled = *res * 240 / 2.54;
1886 break;
1887 case 'v':
1888 case 'P':
1889 scaled = *res * 40;
1890 break;
1891 case 'm':
1892 case 'n':
1893 scaled = *res * 24;
1894 break;
1895 case 'p':
1896 scaled = *res * 10 / 3;
1897 break;
1898 case 'u':
1899 scaled = *res;
1900 break;
1901 case 'M':
1902 scaled = *res * 6 / 25;
1903 break;
1904 default:
1905 scaled = *res;
1906 p--;
1907 break;
1908 }
1909 if (flags & ROFFNUM_SCALE)
1910 *res = scaled;
1911
1912 *pos = p + 1;
1913 return 1;
1914 }
1915
1916 /*
1917 * Evaluate a string comparison condition.
1918 * The first character is the delimiter.
1919 * Succeed if the string up to its second occurrence
1920 * matches the string up to its third occurence.
1921 * Advance the cursor after the third occurrence
1922 * or lacking that, to the end of the line.
1923 */
1924 static int
1925 roff_evalstrcond(const char *v, int *pos)
1926 {
1927 const char *s1, *s2, *s3;
1928 int match;
1929
1930 match = 0;
1931 s1 = v + *pos; /* initial delimiter */
1932 s2 = s1 + 1; /* for scanning the first string */
1933 s3 = strchr(s2, *s1); /* for scanning the second string */
1934
1935 if (NULL == s3) /* found no middle delimiter */
1936 goto out;
1937
1938 while ('\0' != *++s3) {
1939 if (*s2 != *s3) { /* mismatch */
1940 s3 = strchr(s3, *s1);
1941 break;
1942 }
1943 if (*s3 == *s1) { /* found the final delimiter */
1944 match = 1;
1945 break;
1946 }
1947 s2++;
1948 }
1949
1950 out:
1951 if (NULL == s3)
1952 s3 = strchr(s2, '\0');
1953 else if (*s3 != '\0')
1954 s3++;
1955 *pos = s3 - v;
1956 return match;
1957 }
1958
1959 /*
1960 * Evaluate an optionally negated single character, numerical,
1961 * or string condition.
1962 */
1963 static int
1964 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
1965 {
1966 char *cp, *name;
1967 size_t sz;
1968 int number, savepos, wanttrue;
1969
1970 if ('!' == v[*pos]) {
1971 wanttrue = 0;
1972 (*pos)++;
1973 } else
1974 wanttrue = 1;
1975
1976 switch (v[*pos]) {
1977 case '\0':
1978 return 0;
1979 case 'n':
1980 case 'o':
1981 (*pos)++;
1982 return wanttrue;
1983 case 'c':
1984 case 'd':
1985 case 'e':
1986 case 't':
1987 case 'v':
1988 (*pos)++;
1989 return !wanttrue;
1990 case 'r':
1991 cp = name = v + ++*pos;
1992 sz = roff_getname(r, &cp, ln, *pos);
1993 *pos = cp - v;
1994 return (sz && roff_hasregn(r, name, sz)) == wanttrue;
1995 default:
1996 break;
1997 }
1998
1999 savepos = *pos;
2000 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2001 return (number > 0) == wanttrue;
2002 else if (*pos == savepos)
2003 return roff_evalstrcond(v, pos) == wanttrue;
2004 else
2005 return 0;
2006 }
2007
2008 static enum rofferr
2009 roff_line_ignore(ROFF_ARGS)
2010 {
2011
2012 return ROFF_IGN;
2013 }
2014
2015 static enum rofferr
2016 roff_insec(ROFF_ARGS)
2017 {
2018
2019 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2020 ln, ppos, roff_name[tok]);
2021 return ROFF_IGN;
2022 }
2023
2024 static enum rofferr
2025 roff_unsupp(ROFF_ARGS)
2026 {
2027
2028 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2029 ln, ppos, roff_name[tok]);
2030 return ROFF_IGN;
2031 }
2032
2033 static enum rofferr
2034 roff_cond(ROFF_ARGS)
2035 {
2036
2037 roffnode_push(r, tok, NULL, ln, ppos);
2038
2039 /*
2040 * An `.el' has no conditional body: it will consume the value
2041 * of the current rstack entry set in prior `ie' calls or
2042 * defaults to DENY.
2043 *
2044 * If we're not an `el', however, then evaluate the conditional.
2045 */
2046
2047 r->last->rule = tok == ROFF_el ?
2048 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2049 roff_evalcond(r, ln, buf->buf, &pos);
2050
2051 /*
2052 * An if-else will put the NEGATION of the current evaluated
2053 * conditional into the stack of rules.
2054 */
2055
2056 if (tok == ROFF_ie) {
2057 if (r->rstackpos + 1 == r->rstacksz) {
2058 r->rstacksz += 16;
2059 r->rstack = mandoc_reallocarray(r->rstack,
2060 r->rstacksz, sizeof(int));
2061 }
2062 r->rstack[++r->rstackpos] = !r->last->rule;
2063 }
2064
2065 /* If the parent has false as its rule, then so do we. */
2066
2067 if (r->last->parent && !r->last->parent->rule)
2068 r->last->rule = 0;
2069
2070 /*
2071 * Determine scope.
2072 * If there is nothing on the line after the conditional,
2073 * not even whitespace, use next-line scope.
2074 */
2075
2076 if (buf->buf[pos] == '\0') {
2077 r->last->endspan = 2;
2078 goto out;
2079 }
2080
2081 while (buf->buf[pos] == ' ')
2082 pos++;
2083
2084 /* An opening brace requests multiline scope. */
2085
2086 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2087 r->last->endspan = -1;
2088 pos += 2;
2089 while (buf->buf[pos] == ' ')
2090 pos++;
2091 goto out;
2092 }
2093
2094 /*
2095 * Anything else following the conditional causes
2096 * single-line scope. Warn if the scope contains
2097 * nothing but trailing whitespace.
2098 */
2099
2100 if (buf->buf[pos] == '\0')
2101 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2102 ln, ppos, roff_name[tok]);
2103
2104 r->last->endspan = 1;
2105
2106 out:
2107 *offs = pos;
2108 return ROFF_RERUN;
2109 }
2110
2111 static enum rofferr
2112 roff_ds(ROFF_ARGS)
2113 {
2114 char *string;
2115 const char *name;
2116 size_t namesz;
2117
2118 /* Ignore groff compatibility mode for now. */
2119
2120 if (tok == ROFF_ds1)
2121 tok = ROFF_ds;
2122 else if (tok == ROFF_as1)
2123 tok = ROFF_as;
2124
2125 /*
2126 * The first word is the name of the string.
2127 * If it is empty or terminated by an escape sequence,
2128 * abort the `ds' request without defining anything.
2129 */
2130
2131 name = string = buf->buf + pos;
2132 if (*name == '\0')
2133 return ROFF_IGN;
2134
2135 namesz = roff_getname(r, &string, ln, pos);
2136 if (name[namesz] == '\\')
2137 return ROFF_IGN;
2138
2139 /* Read past the initial double-quote, if any. */
2140 if (*string == '"')
2141 string++;
2142
2143 /* The rest is the value. */
2144 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2145 ROFF_as == tok);
2146 return ROFF_IGN;
2147 }
2148
2149 /*
2150 * Parse a single operator, one or two characters long.
2151 * If the operator is recognized, return success and advance the
2152 * parse point, else return failure and let the parse point unchanged.
2153 */
2154 static int
2155 roff_getop(const char *v, int *pos, char *res)
2156 {
2157
2158 *res = v[*pos];
2159
2160 switch (*res) {
2161 case '+':
2162 case '-':
2163 case '*':
2164 case '/':
2165 case '%':
2166 case '&':
2167 case ':':
2168 break;
2169 case '<':
2170 switch (v[*pos + 1]) {
2171 case '=':
2172 *res = 'l';
2173 (*pos)++;
2174 break;
2175 case '>':
2176 *res = '!';
2177 (*pos)++;
2178 break;
2179 case '?':
2180 *res = 'i';
2181 (*pos)++;
2182 break;
2183 default:
2184 break;
2185 }
2186 break;
2187 case '>':
2188 switch (v[*pos + 1]) {
2189 case '=':
2190 *res = 'g';
2191 (*pos)++;
2192 break;
2193 case '?':
2194 *res = 'a';
2195 (*pos)++;
2196 break;
2197 default:
2198 break;
2199 }
2200 break;
2201 case '=':
2202 if ('=' == v[*pos + 1])
2203 (*pos)++;
2204 break;
2205 default:
2206 return 0;
2207 }
2208 (*pos)++;
2209
2210 return *res;
2211 }
2212
2213 /*
2214 * Evaluate either a parenthesized numeric expression
2215 * or a single signed integer number.
2216 */
2217 static int
2218 roff_evalpar(struct roff *r, int ln,
2219 const char *v, int *pos, int *res, int flags)
2220 {
2221
2222 if ('(' != v[*pos])
2223 return roff_getnum(v, pos, res, flags);
2224
2225 (*pos)++;
2226 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2227 return 0;
2228
2229 /*
2230 * Omission of the closing parenthesis
2231 * is an error in validation mode,
2232 * but ignored in evaluation mode.
2233 */
2234
2235 if (')' == v[*pos])
2236 (*pos)++;
2237 else if (NULL == res)
2238 return 0;
2239
2240 return 1;
2241 }
2242
2243 /*
2244 * Evaluate a complete numeric expression.
2245 * Proceed left to right, there is no concept of precedence.
2246 */
2247 static int
2248 roff_evalnum(struct roff *r, int ln, const char *v,
2249 int *pos, int *res, int flags)
2250 {
2251 int mypos, operand2;
2252 char operator;
2253
2254 if (NULL == pos) {
2255 mypos = 0;
2256 pos = &mypos;
2257 }
2258
2259 if (flags & ROFFNUM_WHITE)
2260 while (isspace((unsigned char)v[*pos]))
2261 (*pos)++;
2262
2263 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2264 return 0;
2265
2266 while (1) {
2267 if (flags & ROFFNUM_WHITE)
2268 while (isspace((unsigned char)v[*pos]))
2269 (*pos)++;
2270
2271 if ( ! roff_getop(v, pos, &operator))
2272 break;
2273
2274 if (flags & ROFFNUM_WHITE)
2275 while (isspace((unsigned char)v[*pos]))
2276 (*pos)++;
2277
2278 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2279 return 0;
2280
2281 if (flags & ROFFNUM_WHITE)
2282 while (isspace((unsigned char)v[*pos]))
2283 (*pos)++;
2284
2285 if (NULL == res)
2286 continue;
2287
2288 switch (operator) {
2289 case '+':
2290 *res += operand2;
2291 break;
2292 case '-':
2293 *res -= operand2;
2294 break;
2295 case '*':
2296 *res *= operand2;
2297 break;
2298 case '/':
2299 if (operand2 == 0) {
2300 mandoc_msg(MANDOCERR_DIVZERO,
2301 r->parse, ln, *pos, v);
2302 *res = 0;
2303 break;
2304 }
2305 *res /= operand2;
2306 break;
2307 case '%':
2308 if (operand2 == 0) {
2309 mandoc_msg(MANDOCERR_DIVZERO,
2310 r->parse, ln, *pos, v);
2311 *res = 0;
2312 break;
2313 }
2314 *res %= operand2;
2315 break;
2316 case '<':
2317 *res = *res < operand2;
2318 break;
2319 case '>':
2320 *res = *res > operand2;
2321 break;
2322 case 'l':
2323 *res = *res <= operand2;
2324 break;
2325 case 'g':
2326 *res = *res >= operand2;
2327 break;
2328 case '=':
2329 *res = *res == operand2;
2330 break;
2331 case '!':
2332 *res = *res != operand2;
2333 break;
2334 case '&':
2335 *res = *res && operand2;
2336 break;
2337 case ':':
2338 *res = *res || operand2;
2339 break;
2340 case 'i':
2341 if (operand2 < *res)
2342 *res = operand2;
2343 break;
2344 case 'a':
2345 if (operand2 > *res)
2346 *res = operand2;
2347 break;
2348 default:
2349 abort();
2350 }
2351 }
2352 return 1;
2353 }
2354
2355 /* --- register management ------------------------------------------------ */
2356
2357 void
2358 roff_setreg(struct roff *r, const char *name, int val, char sign)
2359 {
2360 struct roffreg *reg;
2361
2362 /* Search for an existing register with the same name. */
2363 reg = r->regtab;
2364
2365 while (reg && strcmp(name, reg->key.p))
2366 reg = reg->next;
2367
2368 if (NULL == reg) {
2369 /* Create a new register. */
2370 reg = mandoc_malloc(sizeof(struct roffreg));
2371 reg->key.p = mandoc_strdup(name);
2372 reg->key.sz = strlen(name);
2373 reg->val = 0;
2374 reg->next = r->regtab;
2375 r->regtab = reg;
2376 }
2377
2378 if ('+' == sign)
2379 reg->val += val;
2380 else if ('-' == sign)
2381 reg->val -= val;
2382 else
2383 reg->val = val;
2384 }
2385
2386 /*
2387 * Handle some predefined read-only number registers.
2388 * For now, return -1 if the requested register is not predefined;
2389 * in case a predefined read-only register having the value -1
2390 * were to turn up, another special value would have to be chosen.
2391 */
2392 static int
2393 roff_getregro(const struct roff *r, const char *name)
2394 {
2395
2396 switch (*name) {
2397 case '$': /* Number of arguments of the last macro evaluated. */
2398 return r->argc;
2399 case 'A': /* ASCII approximation mode is always off. */
2400 return 0;
2401 case 'g': /* Groff compatibility mode is always on. */
2402 return 1;
2403 case 'H': /* Fixed horizontal resolution. */
2404 return 24;
2405 case 'j': /* Always adjust left margin only. */
2406 return 0;
2407 case 'T': /* Some output device is always defined. */
2408 return 1;
2409 case 'V': /* Fixed vertical resolution. */
2410 return 40;
2411 default:
2412 return -1;
2413 }
2414 }
2415
2416 int
2417 roff_getreg(const struct roff *r, const char *name)
2418 {
2419 struct roffreg *reg;
2420 int val;
2421
2422 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2423 val = roff_getregro(r, name + 1);
2424 if (-1 != val)
2425 return val;
2426 }
2427
2428 for (reg = r->regtab; reg; reg = reg->next)
2429 if (0 == strcmp(name, reg->key.p))
2430 return reg->val;
2431
2432 return 0;
2433 }
2434
2435 static int
2436 roff_getregn(const struct roff *r, const char *name, size_t len)
2437 {
2438 struct roffreg *reg;
2439 int val;
2440
2441 if ('.' == name[0] && 2 == len) {
2442 val = roff_getregro(r, name + 1);
2443 if (-1 != val)
2444 return val;
2445 }
2446
2447 for (reg = r->regtab; reg; reg = reg->next)
2448 if (len == reg->key.sz &&
2449 0 == strncmp(name, reg->key.p, len))
2450 return reg->val;
2451
2452 return 0;
2453 }
2454
2455 static int
2456 roff_hasregn(const struct roff *r, const char *name, size_t len)
2457 {
2458 struct roffreg *reg;
2459 int val;
2460
2461 if ('.' == name[0] && 2 == len) {
2462 val = roff_getregro(r, name + 1);
2463 if (-1 != val)
2464 return 1;
2465 }
2466
2467 for (reg = r->regtab; reg; reg = reg->next)
2468 if (len == reg->key.sz &&
2469 0 == strncmp(name, reg->key.p, len))
2470 return 1;
2471
2472 return 0;
2473 }
2474
2475 static void
2476 roff_freereg(struct roffreg *reg)
2477 {
2478 struct roffreg *old_reg;
2479
2480 while (NULL != reg) {
2481 free(reg->key.p);
2482 old_reg = reg;
2483 reg = reg->next;
2484 free(old_reg);
2485 }
2486 }
2487
2488 static enum rofferr
2489 roff_nr(ROFF_ARGS)
2490 {
2491 char *key, *val;
2492 size_t keysz;
2493 int iv;
2494 char sign;
2495
2496 key = val = buf->buf + pos;
2497 if (*key == '\0')
2498 return ROFF_IGN;
2499
2500 keysz = roff_getname(r, &val, ln, pos);
2501 if (key[keysz] == '\\')
2502 return ROFF_IGN;
2503 key[keysz] = '\0';
2504
2505 sign = *val;
2506 if (sign == '+' || sign == '-')
2507 val++;
2508
2509 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2510 roff_setreg(r, key, iv, sign);
2511
2512 return ROFF_IGN;
2513 }
2514
2515 static enum rofferr
2516 roff_rr(ROFF_ARGS)
2517 {
2518 struct roffreg *reg, **prev;
2519 char *name, *cp;
2520 size_t namesz;
2521
2522 name = cp = buf->buf + pos;
2523 if (*name == '\0')
2524 return ROFF_IGN;
2525 namesz = roff_getname(r, &cp, ln, pos);
2526 name[namesz] = '\0';
2527
2528 prev = &r->regtab;
2529 while (1) {
2530 reg = *prev;
2531 if (reg == NULL || !strcmp(name, reg->key.p))
2532 break;
2533 prev = &reg->next;
2534 }
2535 if (reg != NULL) {
2536 *prev = reg->next;
2537 free(reg->key.p);
2538 free(reg);
2539 }
2540 return ROFF_IGN;
2541 }
2542
2543 /* --- handler functions for roff requests -------------------------------- */
2544
2545 static enum rofferr
2546 roff_rm(ROFF_ARGS)
2547 {
2548 const char *name;
2549 char *cp;
2550 size_t namesz;
2551
2552 cp = buf->buf + pos;
2553 while (*cp != '\0') {
2554 name = cp;
2555 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2556 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2557 if (name[namesz] == '\\')
2558 break;
2559 }
2560 return ROFF_IGN;
2561 }
2562
2563 static enum rofferr
2564 roff_it(ROFF_ARGS)
2565 {
2566 int iv;
2567
2568 /* Parse the number of lines. */
2569
2570 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2571 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2572 ln, ppos, buf->buf + 1);
2573 return ROFF_IGN;
2574 }
2575
2576 while (isspace((unsigned char)buf->buf[pos]))
2577 pos++;
2578
2579 /*
2580 * Arm the input line trap.
2581 * Special-casing "an-trap" is an ugly workaround to cope
2582 * with DocBook stupidly fiddling with man(7) internals.
2583 */
2584
2585 roffit_lines = iv;
2586 roffit_macro = mandoc_strdup(iv != 1 ||
2587 strcmp(buf->buf + pos, "an-trap") ?
2588 buf->buf + pos : "br");
2589 return ROFF_IGN;
2590 }
2591
2592 static enum rofferr
2593 roff_Dd(ROFF_ARGS)
2594 {
2595 const char *const *cp;
2596
2597 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2598 for (cp = __mdoc_reserved; *cp; cp++)
2599 roff_setstr(r, *cp, NULL, 0);
2600
2601 if (r->format == 0)
2602 r->format = MPARSE_MDOC;
2603
2604 return ROFF_CONT;
2605 }
2606
2607 static enum rofferr
2608 roff_TH(ROFF_ARGS)
2609 {
2610 const char *const *cp;
2611
2612 if ((r->options & MPARSE_QUICK) == 0)
2613 for (cp = __man_reserved; *cp; cp++)
2614 roff_setstr(r, *cp, NULL, 0);
2615
2616 if (r->format == 0)
2617 r->format = MPARSE_MAN;
2618
2619 return ROFF_CONT;
2620 }
2621
2622 static enum rofferr
2623 roff_TE(ROFF_ARGS)
2624 {
2625
2626 if (NULL == r->tbl)
2627 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2628 ln, ppos, "TE");
2629 else if ( ! tbl_end(&r->tbl)) {
2630 free(buf->buf);
2631 buf->buf = mandoc_strdup(".sp");
2632 buf->sz = 4;
2633 return ROFF_REPARSE;
2634 }
2635 return ROFF_IGN;
2636 }
2637
2638 static enum rofferr
2639 roff_T_(ROFF_ARGS)
2640 {
2641
2642 if (NULL == r->tbl)
2643 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2644 ln, ppos, "T&");
2645 else
2646 tbl_restart(ln, ppos, r->tbl);
2647
2648 return ROFF_IGN;
2649 }
2650
2651 /*
2652 * Handle in-line equation delimiters.
2653 */
2654 static enum rofferr
2655 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2656 {
2657 char *cp1, *cp2;
2658 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2659
2660 /*
2661 * Outside equations, look for an opening delimiter.
2662 * If we are inside an equation, we already know it is
2663 * in-line, or this function wouldn't have been called;
2664 * so look for a closing delimiter.
2665 */
2666
2667 cp1 = buf->buf + pos;
2668 cp2 = strchr(cp1, r->eqn == NULL ?
2669 r->last_eqn->odelim : r->last_eqn->cdelim);
2670 if (cp2 == NULL)
2671 return ROFF_CONT;
2672
2673 *cp2++ = '\0';
2674 bef_pr = bef_nl = aft_nl = aft_pr = "";
2675
2676 /* Handle preceding text, protecting whitespace. */
2677
2678 if (*buf->buf != '\0') {
2679 if (r->eqn == NULL)
2680 bef_pr = "\\&";
2681 bef_nl = "\n";
2682 }
2683
2684 /*
2685 * Prepare replacing the delimiter with an equation macro
2686 * and drop leading white space from the equation.
2687 */
2688
2689 if (r->eqn == NULL) {
2690 while (*cp2 == ' ')
2691 cp2++;
2692 mac = ".EQ";
2693 } else
2694 mac = ".EN";
2695
2696 /* Handle following text, protecting whitespace. */
2697
2698 if (*cp2 != '\0') {
2699 aft_nl = "\n";
2700 if (r->eqn != NULL)
2701 aft_pr = "\\&";
2702 }
2703
2704 /* Do the actual replacement. */
2705
2706 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2707 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2708 free(buf->buf);
2709 buf->buf = cp1;
2710
2711 /* Toggle the in-line state of the eqn subsystem. */
2712
2713 r->eqn_inline = r->eqn == NULL;
2714 return ROFF_REPARSE;
2715 }
2716
2717 static enum rofferr
2718 roff_EQ(ROFF_ARGS)
2719 {
2720 struct eqn_node *e;
2721
2722 assert(r->eqn == NULL);
2723 e = eqn_alloc(ppos, ln, r->parse);
2724
2725 if (r->last_eqn) {
2726 r->last_eqn->next = e;
2727 e->delim = r->last_eqn->delim;
2728 e->odelim = r->last_eqn->odelim;
2729 e->cdelim = r->last_eqn->cdelim;
2730 } else
2731 r->first_eqn = r->last_eqn = e;
2732
2733 r->eqn = r->last_eqn = e;
2734
2735 if (buf->buf[pos] != '\0')
2736 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2737 ".EQ %s", buf->buf + pos);
2738
2739 return ROFF_IGN;
2740 }
2741
2742 static enum rofferr
2743 roff_EN(ROFF_ARGS)
2744 {
2745
2746 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2747 return ROFF_IGN;
2748 }
2749
2750 static enum rofferr
2751 roff_TS(ROFF_ARGS)
2752 {
2753 struct tbl_node *tbl;
2754
2755 if (r->tbl) {
2756 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2757 ln, ppos, "TS breaks TS");
2758 tbl_end(&r->tbl);
2759 }
2760
2761 tbl = tbl_alloc(ppos, ln, r->parse);
2762
2763 if (r->last_tbl)
2764 r->last_tbl->next = tbl;
2765 else
2766 r->first_tbl = r->last_tbl = tbl;
2767
2768 r->tbl = r->last_tbl = tbl;
2769 return ROFF_IGN;
2770 }
2771
2772 static enum rofferr
2773 roff_onearg(ROFF_ARGS)
2774 {
2775 struct roff_node *n;
2776 char *cp;
2777
2778 roff_elem_alloc(r->man, ln, ppos, tok);
2779 n = r->man->last;
2780
2781 cp = buf->buf + pos;
2782 if (*cp != '\0') {
2783 while (*cp != '\0' && *cp != ' ')
2784 cp++;
2785 while (*cp == ' ')
2786 *cp++ = '\0';
2787 if (*cp != '\0')
2788 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2789 r->parse, ln, cp - buf->buf,
2790 "%s ... %s", roff_name[tok], cp);
2791 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2792 }
2793
2794 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
2795 r->man->last = n;
2796 r->man->next = ROFF_NEXT_SIBLING;
2797 return ROFF_IGN;
2798 }
2799
2800 static enum rofferr
2801 roff_br(ROFF_ARGS)
2802 {
2803 roff_elem_alloc(r->man, ln, ppos, ROFF_br);
2804 if (buf->buf[pos] != '\0')
2805 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2806 "%s %s", roff_name[tok], buf->buf + pos);
2807 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
2808 r->man->next = ROFF_NEXT_SIBLING;
2809 return ROFF_IGN;
2810 }
2811
2812 static enum rofferr
2813 roff_cc(ROFF_ARGS)
2814 {
2815 const char *p;
2816
2817 p = buf->buf + pos;
2818
2819 if (*p == '\0' || (r->control = *p++) == '.')
2820 r->control = 0;
2821
2822 if (*p != '\0')
2823 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2824 ln, p - buf->buf, "cc ... %s", p);
2825
2826 return ROFF_IGN;
2827 }
2828
2829 static enum rofferr
2830 roff_tr(ROFF_ARGS)
2831 {
2832 const char *p, *first, *second;
2833 size_t fsz, ssz;
2834 enum mandoc_esc esc;
2835
2836 p = buf->buf + pos;
2837
2838 if (*p == '\0') {
2839 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2840 return ROFF_IGN;
2841 }
2842
2843 while (*p != '\0') {
2844 fsz = ssz = 1;
2845
2846 first = p++;
2847 if (*first == '\\') {
2848 esc = mandoc_escape(&p, NULL, NULL);
2849 if (esc == ESCAPE_ERROR) {
2850 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2851 ln, (int)(p - buf->buf), first);
2852 return ROFF_IGN;
2853 }
2854 fsz = (size_t)(p - first);
2855 }
2856
2857 second = p++;
2858 if (*second == '\\') {
2859 esc = mandoc_escape(&p, NULL, NULL);
2860 if (esc == ESCAPE_ERROR) {
2861 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2862 ln, (int)(p - buf->buf), second);
2863 return ROFF_IGN;
2864 }
2865 ssz = (size_t)(p - second);
2866 } else if (*second == '\0') {
2867 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2868 ln, first - buf->buf, "tr %s", first);
2869 second = " ";
2870 p--;
2871 }
2872
2873 if (fsz > 1) {
2874 roff_setstrn(&r->xmbtab, first, fsz,
2875 second, ssz, 0);
2876 continue;
2877 }
2878
2879 if (r->xtab == NULL)
2880 r->xtab = mandoc_calloc(128,
2881 sizeof(struct roffstr));
2882
2883 free(r->xtab[(int)*first].p);
2884 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2885 r->xtab[(int)*first].sz = ssz;
2886 }
2887
2888 return ROFF_IGN;
2889 }
2890
2891 static enum rofferr
2892 roff_so(ROFF_ARGS)
2893 {
2894 char *name, *cp;
2895
2896 name = buf->buf + pos;
2897 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2898
2899 /*
2900 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2901 * opening anything that's not in our cwd or anything beneath
2902 * it. Thus, explicitly disallow traversing up the file-system
2903 * or using absolute paths.
2904 */
2905
2906 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2907 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2908 ".so %s", name);
2909 buf->sz = mandoc_asprintf(&cp,
2910 ".sp\nSee the file %s.\n.sp", name) + 1;
2911 free(buf->buf);
2912 buf->buf = cp;
2913 *offs = 0;
2914 return ROFF_REPARSE;
2915 }
2916
2917 *offs = pos;
2918 return ROFF_SO;
2919 }
2920
2921 /* --- user defined strings and macros ------------------------------------ */
2922
2923 static enum rofferr
2924 roff_userdef(ROFF_ARGS)
2925 {
2926 const char *arg[9], *ap;
2927 char *cp, *n1, *n2;
2928 int expand_count, i, ib, ie;
2929 size_t asz, rsz;
2930
2931 /*
2932 * Collect pointers to macro argument strings
2933 * and NUL-terminate them.
2934 */
2935
2936 r->argc = 0;
2937 cp = buf->buf + pos;
2938 for (i = 0; i < 9; i++) {
2939 if (*cp == '\0')
2940 arg[i] = "";
2941 else {
2942 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
2943 r->argc = i + 1;
2944 }
2945 }
2946
2947 /*
2948 * Expand macro arguments.
2949 */
2950
2951 buf->sz = strlen(r->current_string) + 1;
2952 n1 = n2 = cp = mandoc_malloc(buf->sz);
2953 memcpy(n1, r->current_string, buf->sz);
2954 expand_count = 0;
2955 while (*cp != '\0') {
2956
2957 /* Scan ahead for the next argument invocation. */
2958
2959 if (*cp++ != '\\')
2960 continue;
2961 if (*cp++ != '$')
2962 continue;
2963 if (*cp == '*') { /* \\$* inserts all arguments */
2964 ib = 0;
2965 ie = r->argc - 1;
2966 } else { /* \\$1 .. \\$9 insert one argument */
2967 ib = ie = *cp - '1';
2968 if (ib < 0 || ib > 8)
2969 continue;
2970 }
2971 cp -= 2;
2972
2973 /*
2974 * Prevent infinite recursion.
2975 */
2976
2977 if (cp >= n2)
2978 expand_count = 1;
2979 else if (++expand_count > EXPAND_LIMIT) {
2980 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
2981 ln, (int)(cp - n1), NULL);
2982 free(buf->buf);
2983 buf->buf = n1;
2984 return ROFF_IGN;
2985 }
2986
2987 /*
2988 * Determine the size of the expanded argument,
2989 * taking escaping of quotes into account.
2990 */
2991
2992 asz = ie > ib ? ie - ib : 0; /* for blanks */
2993 for (i = ib; i <= ie; i++) {
2994 for (ap = arg[i]; *ap != '\0'; ap++) {
2995 asz++;
2996 if (*ap == '"')
2997 asz += 3;
2998 }
2999 }
3000 if (asz != 3) {
3001
3002 /*
3003 * Determine the size of the rest of the
3004 * unexpanded macro, including the NUL.
3005 */
3006
3007 rsz = buf->sz - (cp - n1) - 3;
3008
3009 /*
3010 * When shrinking, move before
3011 * releasing the storage.
3012 */
3013
3014 if (asz < 3)
3015 memmove(cp + asz, cp + 3, rsz);
3016
3017 /*
3018 * Resize the storage for the macro
3019 * and readjust the parse pointer.
3020 */
3021
3022 buf->sz += asz - 3;
3023 n2 = mandoc_realloc(n1, buf->sz);
3024 cp = n2 + (cp - n1);
3025 n1 = n2;
3026
3027 /*
3028 * When growing, make room
3029 * for the expanded argument.
3030 */
3031
3032 if (asz > 3)
3033 memmove(cp + asz, cp + 3, rsz);
3034 }
3035
3036 /* Copy the expanded argument, escaping quotes. */
3037
3038 n2 = cp;
3039 for (i = ib; i <= ie; i++) {
3040 for (ap = arg[i]; *ap != '\0'; ap++) {
3041 if (*ap == '"') {
3042 memcpy(n2, "\\(dq", 4);
3043 n2 += 4;
3044 } else
3045 *n2++ = *ap;
3046 }
3047 if (i < ie)
3048 *n2++ = ' ';
3049 }
3050 }
3051
3052 /*
3053 * Replace the macro invocation
3054 * by the expanded macro.
3055 */
3056
3057 free(buf->buf);
3058 buf->buf = n1;
3059 *offs = 0;
3060
3061 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3062 ROFF_REPARSE : ROFF_APPEND;
3063 }
3064
3065 static size_t
3066 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3067 {
3068 char *name, *cp;
3069 size_t namesz;
3070
3071 name = *cpp;
3072 if ('\0' == *name)
3073 return 0;
3074
3075 /* Read until end of name and terminate it with NUL. */
3076 for (cp = name; 1; cp++) {
3077 if ('\0' == *cp || ' ' == *cp) {
3078 namesz = cp - name;
3079 break;
3080 }
3081 if ('\\' != *cp)
3082 continue;
3083 namesz = cp - name;
3084 if ('{' == cp[1] || '}' == cp[1])
3085 break;
3086 cp++;
3087 if ('\\' == *cp)
3088 continue;
3089 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3090 "%.*s", (int)(cp - name + 1), name);
3091 mandoc_escape((const char **)&cp, NULL, NULL);
3092 break;
3093 }
3094
3095 /* Read past spaces. */
3096 while (' ' == *cp)
3097 cp++;
3098
3099 *cpp = cp;
3100 return namesz;
3101 }
3102
3103 /*
3104 * Store *string into the user-defined string called *name.
3105 * To clear an existing entry, call with (*r, *name, NULL, 0).
3106 * append == 0: replace mode
3107 * append == 1: single-line append mode
3108 * append == 2: multiline append mode, append '\n' after each call
3109 */
3110 static void
3111 roff_setstr(struct roff *r, const char *name, const char *string,
3112 int append)
3113 {
3114
3115 roff_setstrn(&r->strtab, name, strlen(name), string,
3116 string ? strlen(string) : 0, append);
3117 }
3118
3119 static void
3120 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3121 const char *string, size_t stringsz, int append)
3122 {
3123 struct roffkv *n;
3124 char *c;
3125 int i;
3126 size_t oldch, newch;
3127
3128 /* Search for an existing string with the same name. */
3129 n = *r;
3130
3131 while (n && (namesz != n->key.sz ||
3132 strncmp(n->key.p, name, namesz)))
3133 n = n->next;
3134
3135 if (NULL == n) {
3136 /* Create a new string table entry. */
3137 n = mandoc_malloc(sizeof(struct roffkv));
3138 n->key.p = mandoc_strndup(name, namesz);
3139 n->key.sz = namesz;
3140 n->val.p = NULL;
3141 n->val.sz = 0;
3142 n->next = *r;
3143 *r = n;
3144 } else if (0 == append) {
3145 free(n->val.p);
3146 n->val.p = NULL;
3147 n->val.sz = 0;
3148 }
3149
3150 if (NULL == string)
3151 return;
3152
3153 /*
3154 * One additional byte for the '\n' in multiline mode,
3155 * and one for the terminating '\0'.
3156 */
3157 newch = stringsz + (1 < append ? 2u : 1u);
3158
3159 if (NULL == n->val.p) {
3160 n->val.p = mandoc_malloc(newch);
3161 *n->val.p = '\0';
3162 oldch = 0;
3163 } else {
3164 oldch = n->val.sz;
3165 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3166 }
3167
3168 /* Skip existing content in the destination buffer. */
3169 c = n->val.p + (int)oldch;
3170
3171 /* Append new content to the destination buffer. */
3172 i = 0;
3173 while (i < (int)stringsz) {
3174 /*
3175 * Rudimentary roff copy mode:
3176 * Handle escaped backslashes.
3177 */
3178 if ('\\' == string[i] && '\\' == string[i + 1])
3179 i++;
3180 *c++ = string[i++];
3181 }
3182
3183 /* Append terminating bytes. */
3184 if (1 < append)
3185 *c++ = '\n';
3186
3187 *c = '\0';
3188 n->val.sz = (int)(c - n->val.p);
3189 }
3190
3191 static const char *
3192 roff_getstrn(const struct roff *r, const char *name, size_t len)
3193 {
3194 const struct roffkv *n;
3195 int i;
3196
3197 for (n = r->strtab; n; n = n->next)
3198 if (0 == strncmp(name, n->key.p, len) &&
3199 '\0' == n->key.p[(int)len])
3200 return n->val.p;
3201
3202 for (i = 0; i < PREDEFS_MAX; i++)
3203 if (0 == strncmp(name, predefs[i].name, len) &&
3204 '\0' == predefs[i].name[(int)len])
3205 return predefs[i].str;
3206
3207 return NULL;
3208 }
3209
3210 static void
3211 roff_freestr(struct roffkv *r)
3212 {
3213 struct roffkv *n, *nn;
3214
3215 for (n = r; n; n = nn) {
3216 free(n->key.p);
3217 free(n->val.p);
3218 nn = n->next;
3219 free(n);
3220 }
3221 }
3222
3223 /* --- accessors and utility functions ------------------------------------ */
3224
3225 const struct tbl_span *
3226 roff_span(const struct roff *r)
3227 {
3228
3229 return r->tbl ? tbl_span(r->tbl) : NULL;
3230 }
3231
3232 const struct eqn *
3233 roff_eqn(const struct roff *r)
3234 {
3235
3236 return r->last_eqn ? &r->last_eqn->eqn : NULL;
3237 }
3238
3239 /*
3240 * Duplicate an input string, making the appropriate character
3241 * conversations (as stipulated by `tr') along the way.
3242 * Returns a heap-allocated string with all the replacements made.
3243 */
3244 char *
3245 roff_strdup(const struct roff *r, const char *p)
3246 {
3247 const struct roffkv *cp;
3248 char *res;
3249 const char *pp;
3250 size_t ssz, sz;
3251 enum mandoc_esc esc;
3252
3253 if (NULL == r->xmbtab && NULL == r->xtab)
3254 return mandoc_strdup(p);
3255 else if ('\0' == *p)
3256 return mandoc_strdup("");
3257
3258 /*
3259 * Step through each character looking for term matches
3260 * (remember that a `tr' can be invoked with an escape, which is
3261 * a glyph but the escape is multi-character).
3262 * We only do this if the character hash has been initialised
3263 * and the string is >0 length.
3264 */
3265
3266 res = NULL;
3267 ssz = 0;
3268
3269 while ('\0' != *p) {
3270 assert((unsigned int)*p < 128);
3271 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3272 sz = r->xtab[(int)*p].sz;
3273 res = mandoc_realloc(res, ssz + sz + 1);
3274 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3275 ssz += sz;
3276 p++;
3277 continue;
3278 } else if ('\\' != *p) {
3279 res = mandoc_realloc(res, ssz + 2);
3280 res[ssz++] = *p++;
3281 continue;
3282 }
3283
3284 /* Search for term matches. */
3285 for (cp = r->xmbtab; cp; cp = cp->next)
3286 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3287 break;
3288
3289 if (NULL != cp) {
3290 /*
3291 * A match has been found.
3292 * Append the match to the array and move
3293 * forward by its keysize.
3294 */
3295 res = mandoc_realloc(res,
3296 ssz + cp->val.sz + 1);
3297 memcpy(res + ssz, cp->val.p, cp->val.sz);
3298 ssz += cp->val.sz;
3299 p += (int)cp->key.sz;
3300 continue;
3301 }
3302
3303 /*
3304 * Handle escapes carefully: we need to copy
3305 * over just the escape itself, or else we might
3306 * do replacements within the escape itself.
3307 * Make sure to pass along the bogus string.
3308 */
3309 pp = p++;
3310 esc = mandoc_escape(&p, NULL, NULL);
3311 if (ESCAPE_ERROR == esc) {
3312 sz = strlen(pp);
3313 res = mandoc_realloc(res, ssz + sz + 1);
3314 memcpy(res + ssz, pp, sz);
3315 break;
3316 }
3317 /*
3318 * We bail out on bad escapes.
3319 * No need to warn: we already did so when
3320 * roff_res() was called.
3321 */
3322 sz = (int)(p - pp);
3323 res = mandoc_realloc(res, ssz + sz + 1);
3324 memcpy(res + ssz, pp, sz);
3325 ssz += sz;
3326 }
3327
3328 res[(int)ssz] = '\0';
3329 return res;
3330 }
3331
3332 int
3333 roff_getformat(const struct roff *r)
3334 {
3335
3336 return r->format;
3337 }
3338
3339 /*
3340 * Find out whether a line is a macro line or not.
3341 * If it is, adjust the current position and return one; if it isn't,
3342 * return zero and don't change the current position.
3343 * If the control character has been set with `.cc', then let that grain
3344 * precedence.
3345 * This is slighly contrary to groff, where using the non-breaking
3346 * control character when `cc' has been invoked will cause the
3347 * non-breaking macro contents to be printed verbatim.
3348 */
3349 int
3350 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3351 {
3352 int pos;
3353
3354 pos = *ppos;
3355
3356 if (0 != r->control && cp[pos] == r->control)
3357 pos++;
3358 else if (0 != r->control)
3359 return 0;
3360 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3361 pos += 2;
3362 else if ('.' == cp[pos] || '\'' == cp[pos])
3363 pos++;
3364 else
3365 return 0;
3366
3367 while (' ' == cp[pos] || '\t' == cp[pos])
3368 pos++;
3369
3370 *ppos = pos;
3371 return 1;
3372 }