]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.386 2022/04/30 18:51:36 schwarze Exp $ */
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 * Implementation of the roff(7) parser for mandoc(1).
22 #include <sys/types.h>
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
63 /* --- data types --------------------------------------------------------- */
66 * An incredibly-simple string buffer.
69 char *p
; /* nil-terminated buffer */
70 size_t sz
; /* saved strlen(p) */
74 * A key-value roffstr pair as part of a singly-linked list.
79 struct roffkv
*next
; /* next in list */
83 * A single number register as part of a singly-linked list.
93 * Association of request and macro names with token IDs.
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
111 struct roff_man
*man
; /* mdoc or man parser */
112 struct roffnode
*last
; /* leaf of stack */
113 struct mctx
*mstack
; /* stack of macro contexts */
114 int *rstack
; /* stack of inverted `ie' values */
115 struct ohash
*reqtab
; /* request lookup table */
116 struct roffreg
*regtab
; /* number registers */
117 struct roffkv
*strtab
; /* user-defined strings & macros */
118 struct roffkv
*rentab
; /* renamed strings & macros */
119 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
120 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
121 const char *current_string
; /* value of last called user macro */
122 struct tbl_node
*first_tbl
; /* first table parsed */
123 struct tbl_node
*last_tbl
; /* last table parsed */
124 struct tbl_node
*tbl
; /* current table being parsed */
125 struct eqn_node
*last_eqn
; /* equation parser */
126 struct eqn_node
*eqn
; /* active equation parser */
127 int eqn_inline
; /* current equation is inline */
128 int options
; /* parse options */
129 int mstacksz
; /* current size of mstack */
130 int mstackpos
; /* position in mstack */
131 int rstacksz
; /* current size limit of rstack */
132 int rstackpos
; /* position in rstack */
133 int format
; /* current file in mdoc or man format */
134 char control
; /* control character */
135 char escape
; /* escape character */
139 * A macro definition, condition, or ignored block.
142 enum roff_tok tok
; /* type of node */
143 struct roffnode
*parent
; /* up one in stack */
144 int line
; /* parse line */
145 int col
; /* parse col */
146 char *name
; /* node name, e.g. macro name */
147 char *end
; /* custom end macro of the block */
148 int endspan
; /* scope to: 1=eol 2=next line -1=\} */
149 int rule
; /* content is: 1=evaluated 0=skipped */
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
160 typedef int (*roffproc
)(ROFF_ARGS
);
163 roffproc proc
; /* process new macro */
164 roffproc text
; /* process as child text of macro */
165 roffproc sub
; /* process as child of macro */
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
171 const char *name
; /* predefined input name */
172 const char *str
; /* replacement symbol */
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
178 /* --- function prototypes ------------------------------------------------ */
180 static int roffnode_cleanscope(struct roff
*);
181 static int roffnode_pop(struct roff
*);
182 static void roffnode_push(struct roff
*, enum roff_tok
,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man
*, int, struct tbl_node
*);
185 static int roff_als(ROFF_ARGS
);
186 static int roff_block(ROFF_ARGS
);
187 static int roff_block_text(ROFF_ARGS
);
188 static int roff_block_sub(ROFF_ARGS
);
189 static int roff_break(ROFF_ARGS
);
190 static int roff_cblock(ROFF_ARGS
);
191 static int roff_cc(ROFF_ARGS
);
192 static int roff_ccond(struct roff
*, int, int);
193 static int roff_char(ROFF_ARGS
);
194 static int roff_cond(ROFF_ARGS
);
195 static int roff_cond_checkend(ROFF_ARGS
);
196 static int roff_cond_text(ROFF_ARGS
);
197 static int roff_cond_sub(ROFF_ARGS
);
198 static int roff_ds(ROFF_ARGS
);
199 static int roff_ec(ROFF_ARGS
);
200 static int roff_eo(ROFF_ARGS
);
201 static int roff_eqndelim(struct roff
*, struct buf
*, int);
202 static int roff_evalcond(struct roff
*, int, char *, int *);
203 static int roff_evalnum(struct roff
*, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff
*, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff
*, struct buf
*,
210 static void roff_free1(struct roff
*);
211 static void roff_freereg(struct roffreg
*);
212 static void roff_freestr(struct roffkv
*);
213 static size_t roff_getname(struct roff
*, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff
*,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff
*,
220 static const char *roff_getstrn(struct roff
*,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff
*,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS
);
225 static int roff_it(ROFF_ARGS
);
226 static int roff_line_ignore(ROFF_ARGS
);
227 static void roff_man_alloc1(struct roff_man
*);
228 static void roff_man_free1(struct roff_man
*);
229 static int roff_manyarg(ROFF_ARGS
);
230 static int roff_mc(ROFF_ARGS
);
231 static int roff_noarg(ROFF_ARGS
);
232 static int roff_nop(ROFF_ARGS
);
233 static int roff_nr(ROFF_ARGS
);
234 static int roff_onearg(ROFF_ARGS
);
235 static enum roff_tok
roff_parse(struct roff
*, char *, int *,
237 static int roff_parsetext(struct roff
*, struct buf
*,
239 static int roff_renamed(ROFF_ARGS
);
240 static int roff_req_or_macro(ROFF_ARGS
);
241 static int roff_return(ROFF_ARGS
);
242 static int roff_rm(ROFF_ARGS
);
243 static int roff_rn(ROFF_ARGS
);
244 static int roff_rr(ROFF_ARGS
);
245 static void roff_setregn(struct roff
*, const char *,
246 size_t, int, char, int);
247 static void roff_setstr(struct roff
*,
248 const char *, const char *, int);
249 static void roff_setstrn(struct roffkv
**, const char *,
250 size_t, const char *, size_t, int);
251 static int roff_shift(ROFF_ARGS
);
252 static int roff_so(ROFF_ARGS
);
253 static int roff_tr(ROFF_ARGS
);
254 static int roff_Dd(ROFF_ARGS
);
255 static int roff_TE(ROFF_ARGS
);
256 static int roff_TS(ROFF_ARGS
);
257 static int roff_EQ(ROFF_ARGS
);
258 static int roff_EN(ROFF_ARGS
);
259 static int roff_T_(ROFF_ARGS
);
260 static int roff_unsupp(ROFF_ARGS
);
261 static int roff_userdef(ROFF_ARGS
);
263 /* --- constant data ------------------------------------------------------ */
265 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
266 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
268 const char *__roff_name
[MAN_MAX
+ 1] = {
269 "br", "ce", "fi", "ft",
273 "ab", "ad", "af", "aln",
274 "als", "am", "am1", "ami",
275 "ami1", "as", "as1", "asciify",
276 "backtrace", "bd", "bleedat", "blm",
277 "box", "boxa", "bp", "BP",
278 "break", "breakchar", "brnl", "brp",
280 "cf", "cflags", "ch", "char",
281 "chop", "class", "close", "CL",
282 "color", "composite", "continue", "cp",
283 "cropat", "cs", "cu", "da",
284 "dch", "Dd", "de", "de1",
285 "defcolor", "dei", "dei1", "device",
286 "devicem", "di", "do", "ds",
287 "ds1", "dwh", "dt", "ec",
288 "ecr", "ecs", "el", "em",
289 "EN", "eo", "EP", "EQ",
290 "errprint", "ev", "evc", "ex",
291 "fallback", "fam", "fc", "fchar",
292 "fcolor", "fdeferlig", "feature", "fkern",
293 "fl", "flig", "fp", "fps",
294 "fschar", "fspacewidth", "fspecial", "ftr",
295 "fzoom", "gcolor", "hc", "hcode",
296 "hidechar", "hla", "hlm", "hpf",
297 "hpfa", "hpfcode", "hw", "hy",
298 "hylang", "hylen", "hym", "hypp",
299 "hys", "ie", "if", "ig",
300 "index", "it", "itc", "IX",
301 "kern", "kernafter", "kernbefore", "kernpair",
302 "lc", "lc_ctype", "lds", "length",
303 "letadj", "lf", "lg", "lhang",
304 "linetabs", "lnr", "lnrf", "lpfx",
306 "mediasize", "minss", "mk", "mso",
307 "na", "ne", "nh", "nhychar",
308 "nm", "nn", "nop", "nr",
309 "nrf", "nroff", "ns", "nx",
310 "open", "opena", "os", "output",
311 "padj", "papersize", "pc", "pev",
312 "pi", "PI", "pl", "pm",
314 "psbb", "pshape", "pso", "ptr",
315 "pvs", "rchar", "rd", "recursionlimit",
316 "return", "rfschar", "rhang",
317 "rm", "rn", "rnn", "rr",
318 "rs", "rt", "schar", "sentchar",
319 "shc", "shift", "sizes", "so",
320 "spacewidth", "special", "spreadwarn", "ss",
321 "sty", "substring", "sv", "sy",
324 "tm", "tm1", "tmc", "tr",
325 "track", "transchar", "trf", "trimat",
326 "trin", "trnt", "troff", "TS",
327 "uf", "ul", "unformat", "unwatch",
328 "unwatchn", "vpt", "vs", "warn",
329 "warnscale", "watch", "watchlength", "watchn",
330 "wh", "while", "write", "writec",
331 "writem", "xflag", ".", NULL
,
333 "Dd", "Dt", "Os", "Sh",
334 "Ss", "Pp", "D1", "Dl",
335 "Bd", "Ed", "Bl", "El",
336 "It", "Ad", "An", "Ap",
337 "Ar", "Cd", "Cm", "Dv",
338 "Er", "Ev", "Ex", "Fa",
339 "Fd", "Fl", "Fn", "Ft",
340 "Ic", "In", "Li", "Nd",
341 "Nm", "Op", "Ot", "Pa",
342 "Rv", "St", "Va", "Vt",
343 "Xr", "%A", "%B", "%D",
344 "%I", "%J", "%N", "%O",
345 "%P", "%R", "%T", "%V",
346 "Ac", "Ao", "Aq", "At",
347 "Bc", "Bf", "Bo", "Bq",
348 "Bsx", "Bx", "Db", "Dc",
349 "Do", "Dq", "Ec", "Ef",
350 "Em", "Eo", "Fx", "Ms",
351 "No", "Ns", "Nx", "Ox",
352 "Pc", "Pf", "Po", "Pq",
353 "Qc", "Ql", "Qo", "Qq",
354 "Re", "Rs", "Sc", "So",
355 "Sq", "Sm", "Sx", "Sy",
356 "Tn", "Ux", "Xc", "Xo",
357 "Fo", "Fc", "Oo", "Oc",
358 "Bk", "Ek", "Bt", "Hf",
359 "Fr", "Ud", "Lb", "Lp",
360 "Lk", "Mt", "Brq", "Bro",
361 "Brc", "%C", "Es", "En",
362 "Dx", "%Q", "%U", "Ta",
364 "TH", "SH", "SS", "TP",
366 "LP", "PP", "P", "IP",
367 "HP", "SM", "SB", "BI",
368 "IB", "BR", "RB", "R",
369 "B", "I", "IR", "RI",
370 "RE", "RS", "DT", "UC",
374 "UE", "MT", "ME", NULL
376 const char *const *roff_name
= __roff_name
;
378 static struct roffmac roffs
[TOKEN_NONE
] = {
379 { roff_noarg
, NULL
, NULL
, 0 }, /* br */
380 { roff_onearg
, NULL
, NULL
, 0 }, /* ce */
381 { roff_noarg
, NULL
, NULL
, 0 }, /* fi */
382 { roff_onearg
, NULL
, NULL
, 0 }, /* ft */
383 { roff_onearg
, NULL
, NULL
, 0 }, /* ll */
384 { roff_mc
, NULL
, NULL
, 0 }, /* mc */
385 { roff_noarg
, NULL
, NULL
, 0 }, /* nf */
386 { roff_onearg
, NULL
, NULL
, 0 }, /* po */
387 { roff_onearg
, NULL
, NULL
, 0 }, /* rj */
388 { roff_onearg
, NULL
, NULL
, 0 }, /* sp */
389 { roff_manyarg
, NULL
, NULL
, 0 }, /* ta */
390 { roff_onearg
, NULL
, NULL
, 0 }, /* ti */
391 { NULL
, NULL
, NULL
, 0 }, /* ROFF_MAX */
392 { roff_unsupp
, NULL
, NULL
, 0 }, /* ab */
393 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ad */
394 { roff_line_ignore
, NULL
, NULL
, 0 }, /* af */
395 { roff_unsupp
, NULL
, NULL
, 0 }, /* aln */
396 { roff_als
, NULL
, NULL
, 0 }, /* als */
397 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am */
398 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am1 */
399 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami */
400 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami1 */
401 { roff_ds
, NULL
, NULL
, 0 }, /* as */
402 { roff_ds
, NULL
, NULL
, 0 }, /* as1 */
403 { roff_unsupp
, NULL
, NULL
, 0 }, /* asciify */
404 { roff_line_ignore
, NULL
, NULL
, 0 }, /* backtrace */
405 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bd */
406 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bleedat */
407 { roff_unsupp
, NULL
, NULL
, 0 }, /* blm */
408 { roff_unsupp
, NULL
, NULL
, 0 }, /* box */
409 { roff_unsupp
, NULL
, NULL
, 0 }, /* boxa */
410 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bp */
411 { roff_unsupp
, NULL
, NULL
, 0 }, /* BP */
412 { roff_break
, NULL
, NULL
, 0 }, /* break */
413 { roff_line_ignore
, NULL
, NULL
, 0 }, /* breakchar */
414 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brnl */
415 { roff_noarg
, NULL
, NULL
, 0 }, /* brp */
416 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brpnl */
417 { roff_unsupp
, NULL
, NULL
, 0 }, /* c2 */
418 { roff_cc
, NULL
, NULL
, 0 }, /* cc */
419 { roff_insec
, NULL
, NULL
, 0 }, /* cf */
420 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cflags */
421 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ch */
422 { roff_char
, NULL
, NULL
, 0 }, /* char */
423 { roff_unsupp
, NULL
, NULL
, 0 }, /* chop */
424 { roff_line_ignore
, NULL
, NULL
, 0 }, /* class */
425 { roff_insec
, NULL
, NULL
, 0 }, /* close */
426 { roff_unsupp
, NULL
, NULL
, 0 }, /* CL */
427 { roff_line_ignore
, NULL
, NULL
, 0 }, /* color */
428 { roff_unsupp
, NULL
, NULL
, 0 }, /* composite */
429 { roff_unsupp
, NULL
, NULL
, 0 }, /* continue */
430 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cp */
431 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cropat */
432 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cs */
433 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cu */
434 { roff_unsupp
, NULL
, NULL
, 0 }, /* da */
435 { roff_unsupp
, NULL
, NULL
, 0 }, /* dch */
436 { roff_Dd
, NULL
, NULL
, 0 }, /* Dd */
437 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de */
438 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de1 */
439 { roff_line_ignore
, NULL
, NULL
, 0 }, /* defcolor */
440 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei */
441 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei1 */
442 { roff_unsupp
, NULL
, NULL
, 0 }, /* device */
443 { roff_unsupp
, NULL
, NULL
, 0 }, /* devicem */
444 { roff_unsupp
, NULL
, NULL
, 0 }, /* di */
445 { roff_unsupp
, NULL
, NULL
, 0 }, /* do */
446 { roff_ds
, NULL
, NULL
, 0 }, /* ds */
447 { roff_ds
, NULL
, NULL
, 0 }, /* ds1 */
448 { roff_unsupp
, NULL
, NULL
, 0 }, /* dwh */
449 { roff_unsupp
, NULL
, NULL
, 0 }, /* dt */
450 { roff_ec
, NULL
, NULL
, 0 }, /* ec */
451 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecr */
452 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecs */
453 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* el */
454 { roff_unsupp
, NULL
, NULL
, 0 }, /* em */
455 { roff_EN
, NULL
, NULL
, 0 }, /* EN */
456 { roff_eo
, NULL
, NULL
, 0 }, /* eo */
457 { roff_unsupp
, NULL
, NULL
, 0 }, /* EP */
458 { roff_EQ
, NULL
, NULL
, 0 }, /* EQ */
459 { roff_line_ignore
, NULL
, NULL
, 0 }, /* errprint */
460 { roff_unsupp
, NULL
, NULL
, 0 }, /* ev */
461 { roff_unsupp
, NULL
, NULL
, 0 }, /* evc */
462 { roff_unsupp
, NULL
, NULL
, 0 }, /* ex */
463 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fallback */
464 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fam */
465 { roff_unsupp
, NULL
, NULL
, 0 }, /* fc */
466 { roff_unsupp
, NULL
, NULL
, 0 }, /* fchar */
467 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fcolor */
468 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fdeferlig */
469 { roff_line_ignore
, NULL
, NULL
, 0 }, /* feature */
470 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fkern */
471 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fl */
472 { roff_line_ignore
, NULL
, NULL
, 0 }, /* flig */
473 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fp */
474 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fps */
475 { roff_unsupp
, NULL
, NULL
, 0 }, /* fschar */
476 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspacewidth */
477 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspecial */
478 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ftr */
479 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fzoom */
480 { roff_line_ignore
, NULL
, NULL
, 0 }, /* gcolor */
481 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hc */
482 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hcode */
483 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hidechar */
484 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hla */
485 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hlm */
486 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpf */
487 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfa */
488 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfcode */
489 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hw */
490 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hy */
491 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylang */
492 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylen */
493 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hym */
494 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hypp */
495 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hys */
496 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* ie */
497 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* if */
498 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ig */
499 { roff_unsupp
, NULL
, NULL
, 0 }, /* index */
500 { roff_it
, NULL
, NULL
, 0 }, /* it */
501 { roff_unsupp
, NULL
, NULL
, 0 }, /* itc */
502 { roff_line_ignore
, NULL
, NULL
, 0 }, /* IX */
503 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kern */
504 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernafter */
505 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernbefore */
506 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernpair */
507 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc */
508 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc_ctype */
509 { roff_unsupp
, NULL
, NULL
, 0 }, /* lds */
510 { roff_unsupp
, NULL
, NULL
, 0 }, /* length */
511 { roff_line_ignore
, NULL
, NULL
, 0 }, /* letadj */
512 { roff_insec
, NULL
, NULL
, 0 }, /* lf */
513 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lg */
514 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lhang */
515 { roff_unsupp
, NULL
, NULL
, 0 }, /* linetabs */
516 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnr */
517 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnrf */
518 { roff_unsupp
, NULL
, NULL
, 0 }, /* lpfx */
519 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ls */
520 { roff_unsupp
, NULL
, NULL
, 0 }, /* lsm */
521 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lt */
522 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mediasize */
523 { roff_line_ignore
, NULL
, NULL
, 0 }, /* minss */
524 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mk */
525 { roff_insec
, NULL
, NULL
, 0 }, /* mso */
526 { roff_line_ignore
, NULL
, NULL
, 0 }, /* na */
527 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ne */
528 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nh */
529 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nhychar */
530 { roff_unsupp
, NULL
, NULL
, 0 }, /* nm */
531 { roff_unsupp
, NULL
, NULL
, 0 }, /* nn */
532 { roff_nop
, NULL
, NULL
, 0 }, /* nop */
533 { roff_nr
, NULL
, NULL
, 0 }, /* nr */
534 { roff_unsupp
, NULL
, NULL
, 0 }, /* nrf */
535 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nroff */
536 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ns */
537 { roff_insec
, NULL
, NULL
, 0 }, /* nx */
538 { roff_insec
, NULL
, NULL
, 0 }, /* open */
539 { roff_insec
, NULL
, NULL
, 0 }, /* opena */
540 { roff_line_ignore
, NULL
, NULL
, 0 }, /* os */
541 { roff_unsupp
, NULL
, NULL
, 0 }, /* output */
542 { roff_line_ignore
, NULL
, NULL
, 0 }, /* padj */
543 { roff_line_ignore
, NULL
, NULL
, 0 }, /* papersize */
544 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pc */
545 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pev */
546 { roff_insec
, NULL
, NULL
, 0 }, /* pi */
547 { roff_unsupp
, NULL
, NULL
, 0 }, /* PI */
548 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pl */
549 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pm */
550 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pn */
551 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pnr */
552 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ps */
553 { roff_unsupp
, NULL
, NULL
, 0 }, /* psbb */
554 { roff_unsupp
, NULL
, NULL
, 0 }, /* pshape */
555 { roff_insec
, NULL
, NULL
, 0 }, /* pso */
556 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ptr */
557 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pvs */
558 { roff_unsupp
, NULL
, NULL
, 0 }, /* rchar */
559 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rd */
560 { roff_line_ignore
, NULL
, NULL
, 0 }, /* recursionlimit */
561 { roff_return
, NULL
, NULL
, 0 }, /* return */
562 { roff_unsupp
, NULL
, NULL
, 0 }, /* rfschar */
563 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rhang */
564 { roff_rm
, NULL
, NULL
, 0 }, /* rm */
565 { roff_rn
, NULL
, NULL
, 0 }, /* rn */
566 { roff_unsupp
, NULL
, NULL
, 0 }, /* rnn */
567 { roff_rr
, NULL
, NULL
, 0 }, /* rr */
568 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rs */
569 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rt */
570 { roff_unsupp
, NULL
, NULL
, 0 }, /* schar */
571 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sentchar */
572 { roff_line_ignore
, NULL
, NULL
, 0 }, /* shc */
573 { roff_shift
, NULL
, NULL
, 0 }, /* shift */
574 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sizes */
575 { roff_so
, NULL
, NULL
, 0 }, /* so */
576 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spacewidth */
577 { roff_line_ignore
, NULL
, NULL
, 0 }, /* special */
578 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spreadwarn */
579 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ss */
580 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sty */
581 { roff_unsupp
, NULL
, NULL
, 0 }, /* substring */
582 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sv */
583 { roff_insec
, NULL
, NULL
, 0 }, /* sy */
584 { roff_T_
, NULL
, NULL
, 0 }, /* T& */
585 { roff_unsupp
, NULL
, NULL
, 0 }, /* tc */
586 { roff_TE
, NULL
, NULL
, 0 }, /* TE */
587 { roff_Dd
, NULL
, NULL
, 0 }, /* TH */
588 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tkf */
589 { roff_unsupp
, NULL
, NULL
, 0 }, /* tl */
590 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm */
591 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm1 */
592 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tmc */
593 { roff_tr
, NULL
, NULL
, 0 }, /* tr */
594 { roff_line_ignore
, NULL
, NULL
, 0 }, /* track */
595 { roff_line_ignore
, NULL
, NULL
, 0 }, /* transchar */
596 { roff_insec
, NULL
, NULL
, 0 }, /* trf */
597 { roff_line_ignore
, NULL
, NULL
, 0 }, /* trimat */
598 { roff_unsupp
, NULL
, NULL
, 0 }, /* trin */
599 { roff_unsupp
, NULL
, NULL
, 0 }, /* trnt */
600 { roff_line_ignore
, NULL
, NULL
, 0 }, /* troff */
601 { roff_TS
, NULL
, NULL
, 0 }, /* TS */
602 { roff_line_ignore
, NULL
, NULL
, 0 }, /* uf */
603 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ul */
604 { roff_unsupp
, NULL
, NULL
, 0 }, /* unformat */
605 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatch */
606 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatchn */
607 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vpt */
608 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vs */
609 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warn */
610 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warnscale */
611 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watch */
612 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchlength */
613 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchn */
614 { roff_unsupp
, NULL
, NULL
, 0 }, /* wh */
615 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /*while*/
616 { roff_insec
, NULL
, NULL
, 0 }, /* write */
617 { roff_insec
, NULL
, NULL
, 0 }, /* writec */
618 { roff_insec
, NULL
, NULL
, 0 }, /* writem */
619 { roff_line_ignore
, NULL
, NULL
, 0 }, /* xflag */
620 { roff_cblock
, NULL
, NULL
, 0 }, /* . */
621 { roff_renamed
, NULL
, NULL
, 0 },
622 { roff_userdef
, NULL
, NULL
, 0 }
625 /* Array of injected predefined strings. */
626 #define PREDEFS_MAX 38
627 static const struct predef predefs
[PREDEFS_MAX
] = {
628 #include "predefs.in"
631 static int roffce_lines
; /* number of input lines to center */
632 static struct roff_node
*roffce_node
; /* active request */
633 static int roffit_lines
; /* number of lines to delay */
634 static char *roffit_macro
; /* nil-terminated macro line */
637 /* --- request table ------------------------------------------------------ */
640 roffhash_alloc(enum roff_tok mintok
, enum roff_tok maxtok
)
648 htab
= mandoc_malloc(sizeof(*htab
));
649 mandoc_ohash_init(htab
, 8, offsetof(struct roffreq
, name
));
651 for (tok
= mintok
; tok
< maxtok
; tok
++) {
652 if (roff_name
[tok
] == NULL
)
654 sz
= strlen(roff_name
[tok
]);
655 req
= mandoc_malloc(sizeof(*req
) + sz
+ 1);
657 memcpy(req
->name
, roff_name
[tok
], sz
+ 1);
658 slot
= ohash_qlookup(htab
, req
->name
);
659 ohash_insert(htab
, slot
, req
);
665 roffhash_free(struct ohash
*htab
)
672 for (req
= ohash_first(htab
, &slot
); req
!= NULL
;
673 req
= ohash_next(htab
, &slot
))
680 roffhash_find(struct ohash
*htab
, const char *name
, size_t sz
)
687 req
= ohash_find(htab
, ohash_qlookupi(htab
, name
, &end
));
689 req
= ohash_find(htab
, ohash_qlookup(htab
, name
));
690 return req
== NULL
? TOKEN_NONE
: req
->tok
;
693 /* --- stack of request blocks -------------------------------------------- */
696 * Pop the current node off of the stack of roff instructions currently
697 * pending. Return 1 if it is a loop or 0 otherwise.
700 roffnode_pop(struct roff
*r
)
706 inloop
= p
->tok
== ROFF_while
;
715 * Push a roff node onto the instruction stack. This must later be
716 * removed with roffnode_pop().
719 roffnode_push(struct roff
*r
, enum roff_tok tok
, const char *name
,
724 p
= mandoc_calloc(1, sizeof(struct roffnode
));
727 p
->name
= mandoc_strdup(name
);
731 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
736 /* --- roff parser state data management ---------------------------------- */
739 roff_free1(struct roff
*r
)
743 tbl_free(r
->first_tbl
);
744 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
746 eqn_free(r
->last_eqn
);
747 r
->last_eqn
= r
->eqn
= NULL
;
749 while (r
->mstackpos
>= 0)
760 roff_freereg(r
->regtab
);
763 roff_freestr(r
->strtab
);
764 roff_freestr(r
->rentab
);
765 roff_freestr(r
->xmbtab
);
766 r
->strtab
= r
->rentab
= r
->xmbtab
= NULL
;
769 for (i
= 0; i
< 128; i
++)
776 roff_reset(struct roff
*r
)
779 r
->options
|= MPARSE_COMMENT
;
780 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
790 roff_free(struct roff
*r
)
795 for (i
= 0; i
< r
->mstacksz
; i
++)
796 free(r
->mstack
[i
].argv
);
798 roffhash_free(r
->reqtab
);
803 roff_alloc(int options
)
807 r
= mandoc_calloc(1, sizeof(struct roff
));
808 r
->reqtab
= roffhash_alloc(0, ROFF_RENAMED
);
809 r
->options
= options
| MPARSE_COMMENT
;
810 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
817 /* --- syntax tree state data management ---------------------------------- */
820 roff_man_free1(struct roff_man
*man
)
822 if (man
->meta
.first
!= NULL
)
823 roff_node_delete(man
, man
->meta
.first
);
824 free(man
->meta
.msec
);
827 free(man
->meta
.arch
);
828 free(man
->meta
.title
);
829 free(man
->meta
.name
);
830 free(man
->meta
.date
);
831 free(man
->meta
.sodest
);
835 roff_state_reset(struct roff_man
*man
)
837 man
->last
= man
->meta
.first
;
840 man
->lastsec
= man
->lastnamed
= SEC_NONE
;
841 man
->next
= ROFF_NEXT_CHILD
;
842 roff_setreg(man
->roff
, "nS", 0, '=');
846 roff_man_alloc1(struct roff_man
*man
)
848 memset(&man
->meta
, 0, sizeof(man
->meta
));
849 man
->meta
.first
= mandoc_calloc(1, sizeof(*man
->meta
.first
));
850 man
->meta
.first
->type
= ROFFT_ROOT
;
851 man
->meta
.macroset
= MACROSET_NONE
;
852 roff_state_reset(man
);
856 roff_man_reset(struct roff_man
*man
)
859 roff_man_alloc1(man
);
863 roff_man_free(struct roff_man
*man
)
871 roff_man_alloc(struct roff
*roff
, const char *os_s
, int quick
)
873 struct roff_man
*man
;
875 man
= mandoc_calloc(1, sizeof(*man
));
879 roff_man_alloc1(man
);
884 /* --- syntax tree handling ----------------------------------------------- */
887 roff_node_alloc(struct roff_man
*man
, int line
, int pos
,
888 enum roff_type type
, int tok
)
892 n
= mandoc_calloc(1, sizeof(*n
));
897 n
->sec
= man
->lastsec
;
899 if (man
->flags
& MDOC_SYNOPSIS
)
900 n
->flags
|= NODE_SYNPRETTY
;
902 n
->flags
&= ~NODE_SYNPRETTY
;
903 if ((man
->flags
& (ROFF_NOFILL
| ROFF_NONOFILL
)) == ROFF_NOFILL
)
904 n
->flags
|= NODE_NOFILL
;
906 n
->flags
&= ~NODE_NOFILL
;
907 if (man
->flags
& MDOC_NEWLINE
)
908 n
->flags
|= NODE_LINE
;
909 man
->flags
&= ~MDOC_NEWLINE
;
915 roff_node_append(struct roff_man
*man
, struct roff_node
*n
)
919 case ROFF_NEXT_SIBLING
:
920 if (man
->last
->next
!= NULL
) {
921 n
->next
= man
->last
->next
;
922 man
->last
->next
->prev
= n
;
924 man
->last
->parent
->last
= n
;
927 n
->parent
= man
->last
->parent
;
929 case ROFF_NEXT_CHILD
:
930 if (man
->last
->child
!= NULL
) {
931 n
->next
= man
->last
->child
;
932 man
->last
->child
->prev
= n
;
935 man
->last
->child
= n
;
936 n
->parent
= man
->last
;
948 if (n
->end
!= ENDBODY_NOT
)
960 * Copy over the normalised-data pointer of our parent. Not
961 * everybody has one, but copying a null pointer is fine.
964 n
->norm
= n
->parent
->norm
;
965 assert(n
->parent
->type
== ROFFT_BLOCK
);
969 roff_word_alloc(struct roff_man
*man
, int line
, int pos
, const char *word
)
973 n
= roff_node_alloc(man
, line
, pos
, ROFFT_TEXT
, TOKEN_NONE
);
974 n
->string
= roff_strdup(man
->roff
, word
);
975 roff_node_append(man
, n
);
976 n
->flags
|= NODE_VALID
| NODE_ENDED
;
977 man
->next
= ROFF_NEXT_SIBLING
;
981 roff_word_append(struct roff_man
*man
, const char *word
)
984 char *addstr
, *newstr
;
987 addstr
= roff_strdup(man
->roff
, word
);
988 mandoc_asprintf(&newstr
, "%s %s", n
->string
, addstr
);
992 man
->next
= ROFF_NEXT_SIBLING
;
996 roff_elem_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1000 n
= roff_node_alloc(man
, line
, pos
, ROFFT_ELEM
, tok
);
1001 roff_node_append(man
, n
);
1002 man
->next
= ROFF_NEXT_CHILD
;
1006 roff_block_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1008 struct roff_node
*n
;
1010 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BLOCK
, tok
);
1011 roff_node_append(man
, n
);
1012 man
->next
= ROFF_NEXT_CHILD
;
1017 roff_head_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1019 struct roff_node
*n
;
1021 n
= roff_node_alloc(man
, line
, pos
, ROFFT_HEAD
, tok
);
1022 roff_node_append(man
, n
);
1023 man
->next
= ROFF_NEXT_CHILD
;
1028 roff_body_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1030 struct roff_node
*n
;
1032 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BODY
, tok
);
1033 roff_node_append(man
, n
);
1034 man
->next
= ROFF_NEXT_CHILD
;
1039 roff_addtbl(struct roff_man
*man
, int line
, struct tbl_node
*tbl
)
1041 struct roff_node
*n
;
1042 struct tbl_span
*span
;
1044 if (man
->meta
.macroset
== MACROSET_MAN
)
1045 man_breakscope(man
, ROFF_TS
);
1046 while ((span
= tbl_span(tbl
)) != NULL
) {
1047 n
= roff_node_alloc(man
, line
, 0, ROFFT_TBL
, TOKEN_NONE
);
1049 roff_node_append(man
, n
);
1050 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1051 man
->next
= ROFF_NEXT_SIBLING
;
1056 roff_node_unlink(struct roff_man
*man
, struct roff_node
*n
)
1059 /* Adjust siblings. */
1062 n
->prev
->next
= n
->next
;
1064 n
->next
->prev
= n
->prev
;
1066 /* Adjust parent. */
1068 if (n
->parent
!= NULL
) {
1069 if (n
->parent
->child
== n
)
1070 n
->parent
->child
= n
->next
;
1071 if (n
->parent
->last
== n
)
1072 n
->parent
->last
= n
->prev
;
1075 /* Adjust parse point. */
1079 if (man
->last
== n
) {
1080 if (n
->prev
== NULL
) {
1081 man
->last
= n
->parent
;
1082 man
->next
= ROFF_NEXT_CHILD
;
1084 man
->last
= n
->prev
;
1085 man
->next
= ROFF_NEXT_SIBLING
;
1088 if (man
->meta
.first
== n
)
1089 man
->meta
.first
= NULL
;
1093 roff_node_relink(struct roff_man
*man
, struct roff_node
*n
)
1095 roff_node_unlink(man
, n
);
1096 n
->prev
= n
->next
= NULL
;
1097 roff_node_append(man
, n
);
1101 roff_node_free(struct roff_node
*n
)
1104 if (n
->args
!= NULL
)
1105 mdoc_argv_free(n
->args
);
1106 if (n
->type
== ROFFT_BLOCK
|| n
->type
== ROFFT_ELEM
)
1108 eqn_box_free(n
->eqn
);
1115 roff_node_delete(struct roff_man
*man
, struct roff_node
*n
)
1118 while (n
->child
!= NULL
)
1119 roff_node_delete(man
, n
->child
);
1120 roff_node_unlink(man
, n
);
1125 roff_node_transparent(struct roff_node
*n
)
1129 if (n
->type
== ROFFT_COMMENT
|| n
->flags
& NODE_NOPRT
)
1131 return roff_tok_transparent(n
->tok
);
1135 roff_tok_transparent(enum roff_tok tok
)
1158 roff_node_child(struct roff_node
*n
)
1160 for (n
= n
->child
; roff_node_transparent(n
); n
= n
->next
)
1166 roff_node_prev(struct roff_node
*n
)
1170 } while (roff_node_transparent(n
));
1175 roff_node_next(struct roff_node
*n
)
1179 } while (roff_node_transparent(n
));
1184 deroff(char **dest
, const struct roff_node
*n
)
1189 if (n
->string
== NULL
) {
1190 for (n
= n
->child
; n
!= NULL
; n
= n
->next
)
1195 /* Skip leading whitespace. */
1197 for (cp
= n
->string
; *cp
!= '\0'; cp
++) {
1198 if (cp
[0] == '\\' && cp
[1] != '\0' &&
1199 strchr(" %&0^|~", cp
[1]) != NULL
)
1201 else if ( ! isspace((unsigned char)*cp
))
1205 /* Skip trailing backslash. */
1208 if (sz
> 0 && cp
[sz
- 1] == '\\')
1211 /* Skip trailing whitespace. */
1214 if ( ! isspace((unsigned char)cp
[sz
-1]))
1217 /* Skip empty strings. */
1222 if (*dest
== NULL
) {
1223 *dest
= mandoc_strndup(cp
, sz
);
1227 mandoc_asprintf(&cp
, "%s %*s", *dest
, (int)sz
, cp
);
1232 /* --- main functions of the roff parser ---------------------------------- */
1235 * In the current line, expand escape sequences that produce parsable
1236 * input text. Also check the syntax of the remaining escape sequences,
1237 * which typically produce output glyphs or change formatter state.
1240 roff_expand(struct roff
*r
, struct buf
*buf
, int ln
, int pos
, char newesc
)
1242 struct mctx
*ctx
; /* current macro call context */
1243 char ubuf
[24]; /* buffer to print the number */
1244 struct roff_node
*n
; /* used for header comments */
1245 const char *start
; /* start of the string to process */
1246 char *stesc
; /* start of an escape sequence ('\\') */
1247 const char *esct
; /* type of esccape sequence */
1248 char *ep
; /* end of comment string */
1249 const char *stnam
; /* start of the name, after "[(*" */
1250 const char *cp
; /* end of the name, e.g. before ']' */
1251 const char *res
; /* the string to be substituted */
1252 char *nbuf
; /* new buffer to copy buf->buf to */
1253 size_t maxl
; /* expected length of the escape name */
1254 size_t naml
; /* actual length of the escape name */
1255 size_t asz
; /* length of the replacement */
1256 size_t rsz
; /* length of the rest of the string */
1257 int inaml
; /* length returned from mandoc_escape() */
1258 int expand_count
; /* to avoid infinite loops */
1259 int npos
; /* position in numeric expression */
1260 int arg_complete
; /* argument not interrupted by eol */
1261 int quote_args
; /* true for \\$@, false for \\$* */
1262 int done
; /* no more input available */
1263 int deftype
; /* type of definition to paste */
1264 int rcsid
; /* kind of RCS id seen */
1265 enum mandocerr err
; /* for escape sequence problems */
1266 char sign
; /* increment number register */
1267 char term
; /* character terminating the escape */
1269 /* Search forward for comments. */
1272 start
= buf
->buf
+ pos
;
1273 for (stesc
= buf
->buf
+ pos
; *stesc
!= '\0'; stesc
++) {
1274 if (stesc
[0] != newesc
|| stesc
[1] == '\0')
1277 if (*stesc
!= '"' && *stesc
!= '#')
1280 /* Comment found, look for RCS id. */
1283 if ((cp
= strstr(stesc
, "$" "OpenBSD")) != NULL
) {
1284 rcsid
= 1 << MANDOC_OS_OPENBSD
;
1286 } else if ((cp
= strstr(stesc
, "$" "NetBSD")) != NULL
) {
1287 rcsid
= 1 << MANDOC_OS_NETBSD
;
1291 isalnum((unsigned char)*cp
) == 0 &&
1292 strchr(cp
, '$') != NULL
) {
1293 if (r
->man
->meta
.rcsids
& rcsid
)
1294 mandoc_msg(MANDOCERR_RCS_REP
, ln
,
1295 (int)(stesc
- buf
->buf
) + 1,
1297 r
->man
->meta
.rcsids
|= rcsid
;
1300 /* Handle trailing whitespace. */
1302 ep
= strchr(stesc
--, '\0') - 1;
1307 if (*ep
== ' ' || *ep
== '\t')
1308 mandoc_msg(MANDOCERR_SPACE_EOL
,
1309 ln
, (int)(ep
- buf
->buf
), NULL
);
1312 * Save comments preceding the title macro
1313 * in the syntax tree.
1316 if (newesc
!= ASCII_ESC
&& r
->options
& MPARSE_COMMENT
) {
1317 while (*ep
== ' ' || *ep
== '\t')
1320 n
= roff_node_alloc(r
->man
,
1321 ln
, stesc
+ 1 - buf
->buf
,
1322 ROFFT_COMMENT
, TOKEN_NONE
);
1323 n
->string
= mandoc_strdup(stesc
+ 2);
1324 roff_node_append(r
->man
, n
);
1325 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1326 r
->man
->next
= ROFF_NEXT_SIBLING
;
1329 /* Line continuation with comment. */
1331 if (stesc
[1] == '#') {
1333 return ROFF_IGN
| ROFF_APPEND
;
1336 /* Discard normal comments. */
1338 while (stesc
> start
&& stesc
[-1] == ' ' &&
1339 (stesc
== start
+ 1 || stesc
[-2] != '\\'))
1348 /* Notice the end of the input. */
1350 if (*stesc
== '\n') {
1356 while (stesc
>= start
) {
1357 if (*stesc
!= newesc
) {
1360 * If we have a non-standard escape character,
1361 * escape literal backslashes because all
1362 * processing in subsequent functions uses
1363 * the standard escaping rules.
1366 if (newesc
!= ASCII_ESC
&& *stesc
== '\\') {
1368 buf
->sz
= mandoc_asprintf(&nbuf
, "%s\\e%s",
1369 buf
->buf
, stesc
+ 1) + 1;
1371 stesc
= nbuf
+ (stesc
- buf
->buf
);
1376 /* Search backwards for the next escape. */
1382 /* If it is escaped, skip it. */
1384 for (cp
= stesc
- 1; cp
>= start
; cp
--)
1385 if (*cp
!= r
->escape
)
1388 if ((stesc
- cp
) % 2 == 0) {
1392 } else if (stesc
[1] != '\0') {
1399 return ROFF_IGN
| ROFF_APPEND
;
1402 /* Decide whether to expand or to check only. */
1420 if (sign
== '+' || sign
== '-')
1426 switch(mandoc_escape(&cp
, &stnam
, &inaml
)) {
1427 case ESCAPE_SPECIAL
:
1428 if (mchars_spec2cp(stnam
, inaml
) >= 0)
1432 err
= MANDOCERR_ESC_BAD
;
1435 err
= MANDOCERR_ESC_UNDEF
;
1438 err
= MANDOCERR_ESC_UNSUPP
;
1443 if (err
!= MANDOCERR_OK
)
1444 mandoc_msg(err
, ln
, (int)(stesc
- buf
->buf
),
1445 "%.*s", (int)(cp
- stesc
), stesc
);
1450 if (EXPAND_LIMIT
< ++expand_count
) {
1451 mandoc_msg(MANDOCERR_ROFFLOOP
,
1452 ln
, (int)(stesc
- buf
->buf
), NULL
);
1457 * The third character decides the length
1458 * of the name of the string or register.
1459 * Save a pointer to the name.
1486 /* Advance to the end of the name. */
1490 while (maxl
== 0 || naml
< maxl
) {
1492 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
1493 (int)(stesc
- buf
->buf
), "%s", stesc
);
1497 if (maxl
== 0 && *cp
== term
) {
1501 if (*cp
++ != '\\' || *esct
!= 'w') {
1505 switch (mandoc_escape(&cp
, NULL
, NULL
)) {
1506 case ESCAPE_SPECIAL
:
1507 case ESCAPE_UNICODE
:
1508 case ESCAPE_NUMBERED
:
1510 case ESCAPE_OVERSTRIKE
:
1519 * Retrieve the replacement string; if it is
1520 * undefined, resume searching for escapes.
1526 deftype
= ROFFDEF_USER
| ROFFDEF_PRE
;
1527 res
= roff_getstrn(r
, stnam
, naml
, &deftype
);
1530 * If not overriden, let \*(.T
1531 * through to the formatters.
1534 if (res
== NULL
&& naml
== 2 &&
1535 stnam
[0] == '.' && stnam
[1] == 'T') {
1536 roff_setstrn(&r
->strtab
,
1537 ".T", 2, NULL
, 0, 0);
1544 if (r
->mstackpos
< 0) {
1545 mandoc_msg(MANDOCERR_ARG_UNDEF
, ln
,
1546 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1549 ctx
= r
->mstack
+ r
->mstackpos
;
1550 npos
= esct
[1] - '1';
1551 if (npos
>= 0 && npos
<= 8) {
1552 res
= npos
< ctx
->argc
?
1553 ctx
->argv
[npos
] : "";
1558 else if (esct
[1] == '@')
1561 mandoc_msg(MANDOCERR_ARG_NONUM
, ln
,
1562 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1566 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1570 asz
+= 2; /* quotes */
1571 asz
+= strlen(ctx
->argv
[npos
]);
1574 rsz
= buf
->sz
- (stesc
- buf
->buf
) - 3;
1576 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1578 nbuf
= mandoc_realloc(buf
->buf
, buf
->sz
);
1580 stesc
= nbuf
+ (stesc
- buf
->buf
);
1583 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1585 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1590 cp
= ctx
->argv
[npos
];
1599 ubuf
[0] = arg_complete
&&
1600 roff_evalnum(r
, ln
, stnam
, &npos
,
1601 NULL
, ROFFNUM_SCALE
) &&
1602 stnam
+ npos
+ 1 == cp
? '1' : '0';
1607 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1608 roff_getregn(r
, stnam
, naml
, sign
));
1613 /* use even incomplete args */
1614 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1621 mandoc_msg(MANDOCERR_STR_UNDEF
,
1622 ln
, (int)(stesc
- buf
->buf
),
1623 "%.*s", (int)naml
, stnam
);
1625 } else if (buf
->sz
+ strlen(res
) > SHRT_MAX
) {
1626 mandoc_msg(MANDOCERR_ROFFLOOP
,
1627 ln
, (int)(stesc
- buf
->buf
), NULL
);
1631 /* Replace the escape sequence by the string. */
1634 buf
->sz
= mandoc_asprintf(&nbuf
, "%s%s%s",
1635 buf
->buf
, res
, cp
) + 1;
1637 /* Prepare for the next replacement. */
1640 stesc
= nbuf
+ (stesc
- buf
->buf
) + strlen(res
);
1648 * Parse a quoted or unquoted roff-style request or macro argument.
1649 * Return a pointer to the parsed argument, which is either the original
1650 * pointer or advanced by one byte in case the argument is quoted.
1651 * NUL-terminate the argument in place.
1652 * Collapse pairs of quotes inside quoted arguments.
1653 * Advance the argument pointer to the next argument,
1654 * or to the NUL byte terminating the argument line.
1657 roff_getarg(struct roff
*r
, char **cpp
, int ln
, int *pos
)
1661 int newesc
, pairs
, quoted
, white
;
1663 /* Quoting can only start with a new word. */
1666 if ('"' == *start
) {
1671 newesc
= pairs
= white
= 0;
1672 for (cp
= start
; '\0' != *cp
; cp
++) {
1675 * Move the following text left
1676 * after quoted quotes and after "\\" and "\t".
1681 if ('\\' == cp
[0]) {
1683 * In copy mode, translate double to single
1684 * backslashes and backslash-t to literal tabs.
1695 cp
[-pairs
] = ASCII_ESC
;
1700 /* Skip escaped blanks. */
1707 } else if (0 == quoted
) {
1709 /* Unescaped blanks end unquoted args. */
1713 } else if ('"' == cp
[0]) {
1715 /* Quoted quotes collapse. */
1719 /* Unquoted quotes end quoted args. */
1726 /* Quoted argument without a closing quote. */
1728 mandoc_msg(MANDOCERR_ARG_QUOTE
, ln
, *pos
, NULL
);
1730 /* NUL-terminate this argument and move to the next one. */
1738 *pos
+= (int)(cp
- start
) + (quoted
? 1 : 0);
1741 if ('\0' == *cp
&& (white
|| ' ' == cp
[-1]))
1742 mandoc_msg(MANDOCERR_SPACE_EOL
, ln
, *pos
, NULL
);
1744 start
= mandoc_strdup(start
);
1749 buf
.sz
= strlen(start
) + 1;
1751 if (roff_expand(r
, &buf
, ln
, 0, ASCII_ESC
) & ROFF_IGN
) {
1753 buf
.buf
= mandoc_strdup("");
1760 * Process text streams.
1763 roff_parsetext(struct roff
*r
, struct buf
*buf
, int pos
, int *offs
)
1769 enum mandoc_esc esc
;
1771 /* Spring the input line trap. */
1773 if (roffit_lines
== 1) {
1774 isz
= mandoc_asprintf(&p
, "%s\n.%s", buf
->buf
, roffit_macro
);
1781 return ROFF_REPARSE
;
1782 } else if (roffit_lines
> 1)
1785 if (roffce_node
!= NULL
&& buf
->buf
[pos
] != '\0') {
1786 if (roffce_lines
< 1) {
1787 r
->man
->last
= roffce_node
;
1788 r
->man
->next
= ROFF_NEXT_SIBLING
;
1795 /* Convert all breakable hyphens into ASCII_HYPH. */
1797 start
= p
= buf
->buf
+ pos
;
1799 while (*p
!= '\0') {
1800 sz
= strcspn(p
, "-\\");
1807 /* Skip over escapes. */
1809 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
1810 if (esc
== ESCAPE_ERROR
)
1815 } else if (p
== start
) {
1820 if (isalpha((unsigned char)p
[-1]) &&
1821 isalpha((unsigned char)p
[1]))
1829 roff_parseln(struct roff
*r
, int ln
, struct buf
*buf
, int *offs
, size_t len
)
1833 int pos
; /* parse point */
1834 int spos
; /* saved parse point for messages */
1835 int ppos
; /* original offset in buf->buf */
1836 int ctl
; /* macro line (boolean) */
1840 if (len
> 80 && r
->tbl
== NULL
&& r
->eqn
== NULL
&&
1841 (r
->man
->flags
& ROFF_NOFILL
) == 0 &&
1842 strchr(" .\\", buf
->buf
[pos
]) == NULL
&&
1843 buf
->buf
[pos
] != r
->control
&&
1844 strcspn(buf
->buf
, " ") < 80)
1845 mandoc_msg(MANDOCERR_TEXT_LONG
, ln
, (int)len
- 1,
1846 "%.20s...", buf
->buf
+ pos
);
1848 /* Handle in-line equation delimiters. */
1850 if (r
->tbl
== NULL
&&
1851 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
1852 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
1853 e
= roff_eqndelim(r
, buf
, pos
);
1854 if (e
== ROFF_REPARSE
)
1856 assert(e
== ROFF_CONT
);
1859 /* Expand some escape sequences. */
1861 e
= roff_expand(r
, buf
, ln
, pos
, r
->escape
);
1862 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1864 assert(e
== ROFF_CONT
);
1866 ctl
= roff_getcontrol(r
, buf
->buf
, &pos
);
1869 * First, if a scope is open and we're not a macro, pass the
1870 * text through the macro's filter.
1871 * Equations process all content themselves.
1872 * Tables process almost all content themselves, but we want
1873 * to warn about macros before passing it there.
1876 if (r
->last
!= NULL
&& ! ctl
) {
1878 e
= (*roffs
[t
].text
)(r
, t
, buf
, ln
, pos
, pos
, offs
);
1879 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1884 if (r
->eqn
!= NULL
&& strncmp(buf
->buf
+ ppos
, ".EN", 3)) {
1885 eqn_read(r
->eqn
, buf
->buf
+ ppos
);
1888 if (r
->tbl
!= NULL
&& (ctl
== 0 || buf
->buf
[pos
] == '\0')) {
1889 tbl_read(r
->tbl
, ln
, buf
->buf
, ppos
);
1890 roff_addtbl(r
->man
, ln
, r
->tbl
);
1894 r
->options
&= ~MPARSE_COMMENT
;
1895 return roff_parsetext(r
, buf
, pos
, offs
) | e
;
1898 /* Skip empty request lines. */
1900 if (buf
->buf
[pos
] == '"') {
1901 mandoc_msg(MANDOCERR_COMMENT_BAD
, ln
, pos
, NULL
);
1903 } else if (buf
->buf
[pos
] == '\0')
1907 * If a scope is open, go to the child handler for that macro,
1908 * as it may want to preprocess before doing anything with it.
1913 return (*roffs
[t
].sub
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
1916 r
->options
&= ~MPARSE_COMMENT
;
1918 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
1919 return roff_req_or_macro(r
, t
, buf
, ln
, spos
, pos
, offs
);
1923 * Handle a new request or macro.
1924 * May be called outside any scope or from inside a conditional scope.
1927 roff_req_or_macro(ROFF_ARGS
) {
1929 /* For now, tables ignore most macros and some request. */
1931 if (r
->tbl
!= NULL
&& (tok
== TOKEN_NONE
|| tok
== ROFF_TS
||
1932 tok
== ROFF_br
|| tok
== ROFF_ce
|| tok
== ROFF_rj
||
1934 mandoc_msg(MANDOCERR_TBLMACRO
,
1935 ln
, ppos
, "%s", buf
->buf
+ ppos
);
1936 if (tok
!= TOKEN_NONE
)
1938 while (buf
->buf
[pos
] != '\0' && buf
->buf
[pos
] != ' ')
1940 while (buf
->buf
[pos
] == ' ')
1942 tbl_read(r
->tbl
, ln
, buf
->buf
, pos
);
1943 roff_addtbl(r
->man
, ln
, r
->tbl
);
1947 /* For now, let high level macros abort .ce mode. */
1949 if (roffce_node
!= NULL
&&
1950 (tok
== TOKEN_NONE
|| tok
== ROFF_Dd
|| tok
== ROFF_EQ
||
1951 tok
== ROFF_TH
|| tok
== ROFF_TS
)) {
1952 r
->man
->last
= roffce_node
;
1953 r
->man
->next
= ROFF_NEXT_SIBLING
;
1959 * This is neither a roff request nor a user-defined macro.
1960 * Let the standard macro set parsers handle it.
1963 if (tok
== TOKEN_NONE
)
1966 /* Execute a roff request or a user-defined macro. */
1968 return (*roffs
[tok
].proc
)(r
, tok
, buf
, ln
, ppos
, pos
, offs
);
1972 * Internal interface function to tell the roff parser that execution
1973 * of the current macro ended. This is required because macro
1974 * definitions usually do not end with a .return request.
1977 roff_userret(struct roff
*r
)
1982 assert(r
->mstackpos
>= 0);
1983 ctx
= r
->mstack
+ r
->mstackpos
;
1984 for (i
= 0; i
< ctx
->argc
; i
++)
1991 roff_endparse(struct roff
*r
)
1993 if (r
->last
!= NULL
)
1994 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->last
->line
,
1995 r
->last
->col
, "%s", roff_name
[r
->last
->tok
]);
1997 if (r
->eqn
!= NULL
) {
1998 mandoc_msg(MANDOCERR_BLK_NOEND
,
1999 r
->eqn
->node
->line
, r
->eqn
->node
->pos
, "EQ");
2004 if (r
->tbl
!= NULL
) {
2011 * Parse the request or macro name at buf[*pos].
2012 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
2013 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
2014 * As a side effect, set r->current_string to the definition or to NULL.
2016 static enum roff_tok
2017 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
2027 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
2031 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
2033 deftype
= ROFFDEF_USER
| ROFFDEF_REN
;
2034 r
->current_string
= roff_getstrn(r
, mac
, maclen
, &deftype
);
2043 t
= roffhash_find(r
->reqtab
, mac
, maclen
);
2046 if (t
!= TOKEN_NONE
)
2048 else if (deftype
== ROFFDEF_UNDEF
) {
2049 /* Using an undefined macro defines it to be empty. */
2050 roff_setstrn(&r
->strtab
, mac
, maclen
, "", 0, 0);
2051 roff_setstrn(&r
->rentab
, mac
, maclen
, NULL
, 0, 0);
2056 /* --- handling of request blocks ----------------------------------------- */
2059 * Close a macro definition block or an "ignore" block.
2062 roff_cblock(ROFF_ARGS
)
2066 if (r
->last
== NULL
) {
2067 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
2071 switch (r
->last
->tok
) {
2080 /* Remapped in roff_block(). */
2083 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
2088 roffnode_cleanscope(r
);
2091 * If a conditional block with braces is still open,
2092 * check for "\}" block end markers.
2095 if (r
->last
!= NULL
&& r
->last
->endspan
< 0) {
2096 rr
= 1; /* If arguments follow "\}", warn about them. */
2097 roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2100 if (buf
->buf
[pos
] != '\0')
2101 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
2102 ".. %s", buf
->buf
+ pos
);
2108 * Pop all nodes ending at the end of the current input line.
2109 * Return the number of loops ended.
2112 roffnode_cleanscope(struct roff
*r
)
2117 while (r
->last
!= NULL
&& r
->last
->endspan
> 0) {
2118 if (--r
->last
->endspan
!= 0)
2120 inloop
+= roffnode_pop(r
);
2126 * Handle the closing "\}" of a conditional block.
2127 * Apart from generating warnings, this only pops nodes.
2128 * Return the number of loops ended.
2131 roff_ccond(struct roff
*r
, int ln
, int ppos
)
2133 if (NULL
== r
->last
) {
2134 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2138 switch (r
->last
->tok
) {
2145 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2149 if (r
->last
->endspan
> -1) {
2150 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2154 return roffnode_pop(r
) + roffnode_cleanscope(r
);
2158 roff_block(ROFF_ARGS
)
2160 const char *name
, *value
;
2161 char *call
, *cp
, *iname
, *rname
;
2162 size_t csz
, namesz
, rsz
;
2165 /* Ignore groff compatibility mode for now. */
2167 if (tok
== ROFF_de1
)
2169 else if (tok
== ROFF_dei1
)
2171 else if (tok
== ROFF_am1
)
2173 else if (tok
== ROFF_ami1
)
2176 /* Parse the macro name argument. */
2178 cp
= buf
->buf
+ pos
;
2179 if (tok
== ROFF_ig
) {
2184 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2185 iname
[namesz
] = '\0';
2188 /* Resolve the macro name argument if it is indirect. */
2190 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2191 deftype
= ROFFDEF_USER
;
2192 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2194 mandoc_msg(MANDOCERR_STR_UNDEF
,
2195 ln
, (int)(iname
- buf
->buf
),
2196 "%.*s", (int)namesz
, iname
);
2199 namesz
= strlen(name
);
2203 if (namesz
== 0 && tok
!= ROFF_ig
) {
2204 mandoc_msg(MANDOCERR_REQ_EMPTY
,
2205 ln
, ppos
, "%s", roff_name
[tok
]);
2209 roffnode_push(r
, tok
, name
, ln
, ppos
);
2212 * At the beginning of a `de' macro, clear the existing string
2213 * with the same name, if there is one. New content will be
2214 * appended from roff_block_text() in multiline mode.
2217 if (tok
== ROFF_de
|| tok
== ROFF_dei
) {
2218 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
2219 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2220 } else if (tok
== ROFF_am
|| tok
== ROFF_ami
) {
2221 deftype
= ROFFDEF_ANY
;
2222 value
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2223 switch (deftype
) { /* Before appending, ... */
2224 case ROFFDEF_PRE
: /* copy predefined to user-defined. */
2225 roff_setstrn(&r
->strtab
, name
, namesz
,
2226 value
, strlen(value
), 0);
2228 case ROFFDEF_REN
: /* call original standard macro. */
2229 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2230 (int)strlen(value
), value
);
2231 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2232 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2235 case ROFFDEF_STD
: /* rename and call standard macro. */
2236 rsz
= mandoc_asprintf(&rname
, "__%s_renamed", name
);
2237 roff_setstrn(&r
->rentab
, rname
, rsz
, name
, namesz
, 0);
2238 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2240 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2252 /* Get the custom end marker. */
2255 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2257 /* Resolve the end marker if it is indirect. */
2259 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2260 deftype
= ROFFDEF_USER
;
2261 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2263 mandoc_msg(MANDOCERR_STR_UNDEF
,
2264 ln
, (int)(iname
- buf
->buf
),
2265 "%.*s", (int)namesz
, iname
);
2268 namesz
= strlen(name
);
2273 r
->last
->end
= mandoc_strndup(name
, namesz
);
2276 mandoc_msg(MANDOCERR_ARG_EXCESS
,
2277 ln
, pos
, ".%s ... %s", roff_name
[tok
], cp
);
2283 roff_block_sub(ROFF_ARGS
)
2289 * If a custom end marker is a user-defined or predefined macro
2290 * or a request, interpret it.
2294 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
2295 if (buf
->buf
[i
] != r
->last
->end
[j
])
2298 if (r
->last
->end
[j
] == '\0' &&
2299 (buf
->buf
[i
] == '\0' ||
2300 buf
->buf
[i
] == ' ' ||
2301 buf
->buf
[i
] == '\t')) {
2303 roffnode_cleanscope(r
);
2305 while (buf
->buf
[i
] == ' ' || buf
->buf
[i
] == '\t')
2309 if (roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
) !=
2316 /* Handle the standard end marker. */
2318 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2319 if (t
== ROFF_cblock
)
2320 return roff_cblock(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2322 /* Not an end marker, so append the line to the block. */
2325 roff_setstr(r
, r
->last
->name
, buf
->buf
+ ppos
, 2);
2330 roff_block_text(ROFF_ARGS
)
2334 roff_setstr(r
, r
->last
->name
, buf
->buf
+ pos
, 2);
2340 * Check for a closing "\}" and handle it.
2341 * In this function, the final "int *offs" argument is used for
2342 * different purposes than elsewhere:
2343 * Input: *offs == 0: caller wants to discard arguments following \}
2344 * *offs == 1: caller wants to preserve text following \}
2345 * Output: *offs = 0: tell caller to discard input line
2346 * *offs = 1: tell caller to use input line
2349 roff_cond_checkend(ROFF_ARGS
)
2352 int endloop
, irc
, rr
;
2356 endloop
= tok
!= ROFF_while
? ROFF_IGN
:
2357 rr
? ROFF_LOOPCONT
: ROFF_LOOPEXIT
;
2358 if (roffnode_cleanscope(r
))
2362 * If "\}" occurs on a macro line without a preceding macro or
2363 * a text line contains nothing else, drop the line completely.
2366 ep
= buf
->buf
+ pos
;
2367 if (ep
[0] == '\\' && ep
[1] == '}' && (ep
[2] == '\0' || *offs
== 0))
2371 * The closing delimiter "\}" rewinds the conditional scope
2372 * but is otherwise ignored when interpreting the line.
2375 while ((ep
= strchr(ep
, '\\')) != NULL
) {
2383 memmove(ep
, ep
+ 2, strlen(ep
+ 2) + 1);
2384 if (roff_ccond(r
, ln
, ep
- buf
->buf
))
2400 * Parse and process a request or macro line in conditional scope.
2403 roff_cond_sub(ROFF_ARGS
)
2405 struct roffnode
*bl
;
2409 rr
= 0; /* If arguments follow "\}", skip them. */
2410 irc
= roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2412 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2415 * Handle requests and macros if the conditional evaluated
2416 * to true or if they are structurally required.
2417 * The .break request is always handled specially.
2420 if (t
== ROFF_break
) {
2421 if (irc
& ROFF_LOOPMASK
)
2422 irc
= ROFF_IGN
| ROFF_LOOPEXIT
;
2424 for (bl
= r
->last
; bl
!= NULL
; bl
= bl
->parent
) {
2426 if (bl
->tok
== ROFF_while
)
2430 } else if (rr
|| (t
< TOKEN_NONE
&& roffs
[t
].flags
& ROFFMAC_STRUCT
)) {
2431 irc
|= roff_req_or_macro(r
, t
, buf
, ln
, spos
, pos
, offs
);
2432 if (irc
& ROFF_WHILE
)
2433 irc
&= ~(ROFF_LOOPCONT
| ROFF_LOOPEXIT
);
2439 * Parse and process a text line in conditional scope.
2442 roff_cond_text(ROFF_ARGS
)
2446 rr
= 1; /* If arguments follow "\}", preserve them. */
2447 irc
= roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2453 /* --- handling of numeric and conditional expressions -------------------- */
2456 * Parse a single signed integer number. Stop at the first non-digit.
2457 * If there is at least one digit, return success and advance the
2458 * parse point, else return failure and let the parse point unchanged.
2459 * Ignore overflows, treat them just like the C language.
2462 roff_getnum(const char *v
, int *pos
, int *res
, int flags
)
2464 int myres
, scaled
, n
, p
;
2471 if (n
|| v
[p
] == '+')
2474 if (flags
& ROFFNUM_WHITE
)
2475 while (isspace((unsigned char)v
[p
]))
2478 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
2479 *res
= 10 * *res
+ v
[p
] - '0';
2486 /* Each number may be followed by one optional scaling unit. */
2490 scaled
= *res
* 65536;
2493 scaled
= *res
* 240;
2496 scaled
= *res
* 240 / 2.54;
2507 scaled
= *res
* 10 / 3;
2513 scaled
= *res
* 6 / 25;
2520 if (flags
& ROFFNUM_SCALE
)
2528 * Evaluate a string comparison condition.
2529 * The first character is the delimiter.
2530 * Succeed if the string up to its second occurrence
2531 * matches the string up to its third occurence.
2532 * Advance the cursor after the third occurrence
2533 * or lacking that, to the end of the line.
2536 roff_evalstrcond(const char *v
, int *pos
)
2538 const char *s1
, *s2
, *s3
;
2542 s1
= v
+ *pos
; /* initial delimiter */
2543 s2
= s1
+ 1; /* for scanning the first string */
2544 s3
= strchr(s2
, *s1
); /* for scanning the second string */
2546 if (NULL
== s3
) /* found no middle delimiter */
2549 while ('\0' != *++s3
) {
2550 if (*s2
!= *s3
) { /* mismatch */
2551 s3
= strchr(s3
, *s1
);
2554 if (*s3
== *s1
) { /* found the final delimiter */
2563 s3
= strchr(s2
, '\0');
2564 else if (*s3
!= '\0')
2571 * Evaluate an optionally negated single character, numerical,
2572 * or string condition.
2575 roff_evalcond(struct roff
*r
, int ln
, char *v
, int *pos
)
2577 const char *start
, *end
;
2580 int deftype
, len
, number
, savepos
, istrue
, wanttrue
;
2582 if ('!' == v
[*pos
]) {
2603 } while (v
[*pos
] == ' ');
2606 * Quirk for groff compatibility:
2607 * The horizontal tab is neither available nor unavailable.
2610 if (v
[*pos
] == '\t') {
2615 /* Printable ASCII characters are available. */
2617 if (v
[*pos
] != '\\') {
2623 switch (mandoc_escape(&end
, &start
, &len
)) {
2624 case ESCAPE_SPECIAL
:
2625 istrue
= mchars_spec2cp(start
, len
) != -1;
2627 case ESCAPE_UNICODE
:
2630 case ESCAPE_NUMBERED
:
2631 istrue
= mchars_num2char(start
, len
) != -1;
2638 return istrue
== wanttrue
;
2645 sz
= roff_getname(r
, &cp
, ln
, cp
- v
);
2648 else if (v
[*pos
] == 'r')
2649 istrue
= roff_hasregn(r
, name
, sz
);
2651 deftype
= ROFFDEF_ANY
;
2652 roff_getstrn(r
, name
, sz
, &deftype
);
2655 *pos
= (name
+ sz
) - v
;
2656 return istrue
== wanttrue
;
2662 if (roff_evalnum(r
, ln
, v
, pos
, &number
, ROFFNUM_SCALE
))
2663 return (number
> 0) == wanttrue
;
2664 else if (*pos
== savepos
)
2665 return roff_evalstrcond(v
, pos
) == wanttrue
;
2671 roff_line_ignore(ROFF_ARGS
)
2678 roff_insec(ROFF_ARGS
)
2681 mandoc_msg(MANDOCERR_REQ_INSEC
, ln
, ppos
, "%s", roff_name
[tok
]);
2686 roff_unsupp(ROFF_ARGS
)
2689 mandoc_msg(MANDOCERR_REQ_UNSUPP
, ln
, ppos
, "%s", roff_name
[tok
]);
2694 roff_cond(ROFF_ARGS
)
2698 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
2701 * An `.el' has no conditional body: it will consume the value
2702 * of the current rstack entry set in prior `ie' calls or
2705 * If we're not an `el', however, then evaluate the conditional.
2708 r
->last
->rule
= tok
== ROFF_el
?
2709 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
2710 roff_evalcond(r
, ln
, buf
->buf
, &pos
);
2713 * An if-else will put the NEGATION of the current evaluated
2714 * conditional into the stack of rules.
2717 if (tok
== ROFF_ie
) {
2718 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
2720 r
->rstack
= mandoc_reallocarray(r
->rstack
,
2721 r
->rstacksz
, sizeof(int));
2723 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
2726 /* If the parent has false as its rule, then so do we. */
2728 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
2733 * If there is nothing on the line after the conditional,
2734 * not even whitespace, use next-line scope.
2735 * Except that .while does not support next-line scope.
2738 if (buf
->buf
[pos
] == '\0' && tok
!= ROFF_while
) {
2739 r
->last
->endspan
= 2;
2743 while (buf
->buf
[pos
] == ' ')
2746 /* An opening brace requests multiline scope. */
2748 if (buf
->buf
[pos
] == '\\' && buf
->buf
[pos
+ 1] == '{') {
2749 r
->last
->endspan
= -1;
2751 while (buf
->buf
[pos
] == ' ')
2757 * Anything else following the conditional causes
2758 * single-line scope. Warn if the scope contains
2759 * nothing but trailing whitespace.
2762 if (buf
->buf
[pos
] == '\0')
2763 mandoc_msg(MANDOCERR_COND_EMPTY
,
2764 ln
, ppos
, "%s", roff_name
[tok
]);
2766 r
->last
->endspan
= 1;
2771 if (tok
== ROFF_while
)
2783 /* Ignore groff compatibility mode for now. */
2785 if (tok
== ROFF_ds1
)
2787 else if (tok
== ROFF_as1
)
2791 * The first word is the name of the string.
2792 * If it is empty or terminated by an escape sequence,
2793 * abort the `ds' request without defining anything.
2796 name
= string
= buf
->buf
+ pos
;
2800 namesz
= roff_getname(r
, &string
, ln
, pos
);
2801 switch (name
[namesz
]) {
2805 string
= buf
->buf
+ pos
+ namesz
;
2811 /* Read past the initial double-quote, if any. */
2815 /* The rest is the value. */
2816 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
2818 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2823 * Parse a single operator, one or two characters long.
2824 * If the operator is recognized, return success and advance the
2825 * parse point, else return failure and let the parse point unchanged.
2828 roff_getop(const char *v
, int *pos
, char *res
)
2843 switch (v
[*pos
+ 1]) {
2861 switch (v
[*pos
+ 1]) {
2875 if ('=' == v
[*pos
+ 1])
2887 * Evaluate either a parenthesized numeric expression
2888 * or a single signed integer number.
2891 roff_evalpar(struct roff
*r
, int ln
,
2892 const char *v
, int *pos
, int *res
, int flags
)
2896 return roff_getnum(v
, pos
, res
, flags
);
2899 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, flags
| ROFFNUM_WHITE
))
2903 * Omission of the closing parenthesis
2904 * is an error in validation mode,
2905 * but ignored in evaluation mode.
2910 else if (NULL
== res
)
2917 * Evaluate a complete numeric expression.
2918 * Proceed left to right, there is no concept of precedence.
2921 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
2922 int *pos
, int *res
, int flags
)
2924 int mypos
, operand2
;
2932 if (flags
& ROFFNUM_WHITE
)
2933 while (isspace((unsigned char)v
[*pos
]))
2936 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
, flags
))
2940 if (flags
& ROFFNUM_WHITE
)
2941 while (isspace((unsigned char)v
[*pos
]))
2944 if ( ! roff_getop(v
, pos
, &operator))
2947 if (flags
& ROFFNUM_WHITE
)
2948 while (isspace((unsigned char)v
[*pos
]))
2951 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
, flags
))
2954 if (flags
& ROFFNUM_WHITE
)
2955 while (isspace((unsigned char)v
[*pos
]))
2972 if (operand2
== 0) {
2973 mandoc_msg(MANDOCERR_DIVZERO
,
2981 if (operand2
== 0) {
2982 mandoc_msg(MANDOCERR_DIVZERO
,
2990 *res
= *res
< operand2
;
2993 *res
= *res
> operand2
;
2996 *res
= *res
<= operand2
;
2999 *res
= *res
>= operand2
;
3002 *res
= *res
== operand2
;
3005 *res
= *res
!= operand2
;
3008 *res
= *res
&& operand2
;
3011 *res
= *res
|| operand2
;
3014 if (operand2
< *res
)
3018 if (operand2
> *res
)
3028 /* --- register management ------------------------------------------------ */
3031 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
3033 roff_setregn(r
, name
, strlen(name
), val
, sign
, INT_MIN
);
3037 roff_setregn(struct roff
*r
, const char *name
, size_t len
,
3038 int val
, char sign
, int step
)
3040 struct roffreg
*reg
;
3042 /* Search for an existing register with the same name. */
3045 while (reg
!= NULL
&& (reg
->key
.sz
!= len
||
3046 strncmp(reg
->key
.p
, name
, len
) != 0))
3050 /* Create a new register. */
3051 reg
= mandoc_malloc(sizeof(struct roffreg
));
3052 reg
->key
.p
= mandoc_strndup(name
, len
);
3056 reg
->next
= r
->regtab
;
3062 else if ('-' == sign
)
3066 if (step
!= INT_MIN
)
3071 * Handle some predefined read-only number registers.
3072 * For now, return -1 if the requested register is not predefined;
3073 * in case a predefined read-only register having the value -1
3074 * were to turn up, another special value would have to be chosen.
3077 roff_getregro(const struct roff
*r
, const char *name
)
3081 case '$': /* Number of arguments of the last macro evaluated. */
3082 return r
->mstackpos
< 0 ? 0 : r
->mstack
[r
->mstackpos
].argc
;
3083 case 'A': /* ASCII approximation mode is always off. */
3085 case 'g': /* Groff compatibility mode is always on. */
3087 case 'H': /* Fixed horizontal resolution. */
3089 case 'j': /* Always adjust left margin only. */
3091 case 'T': /* Some output device is always defined. */
3093 case 'V': /* Fixed vertical resolution. */
3101 roff_getreg(struct roff
*r
, const char *name
)
3103 return roff_getregn(r
, name
, strlen(name
), '\0');
3107 roff_getregn(struct roff
*r
, const char *name
, size_t len
, char sign
)
3109 struct roffreg
*reg
;
3112 if ('.' == name
[0] && 2 == len
) {
3113 val
= roff_getregro(r
, name
+ 1);
3118 for (reg
= r
->regtab
; reg
; reg
= reg
->next
) {
3119 if (len
== reg
->key
.sz
&&
3120 0 == strncmp(name
, reg
->key
.p
, len
)) {
3123 reg
->val
+= reg
->step
;
3126 reg
->val
-= reg
->step
;
3135 roff_setregn(r
, name
, len
, 0, '\0', INT_MIN
);
3140 roff_hasregn(const struct roff
*r
, const char *name
, size_t len
)
3142 struct roffreg
*reg
;
3145 if ('.' == name
[0] && 2 == len
) {
3146 val
= roff_getregro(r
, name
+ 1);
3151 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
3152 if (len
== reg
->key
.sz
&&
3153 0 == strncmp(name
, reg
->key
.p
, len
))
3160 roff_freereg(struct roffreg
*reg
)
3162 struct roffreg
*old_reg
;
3164 while (NULL
!= reg
) {
3175 char *key
, *val
, *step
;
3180 key
= val
= buf
->buf
+ pos
;
3184 keysz
= roff_getname(r
, &val
, ln
, pos
);
3185 if (key
[keysz
] == '\\' || key
[keysz
] == '\t')
3189 if (sign
== '+' || sign
== '-')
3193 if (roff_evalnum(r
, ln
, val
, &len
, &iv
, ROFFNUM_SCALE
) == 0)
3197 while (isspace((unsigned char)*step
))
3199 if (roff_evalnum(r
, ln
, step
, NULL
, &is
, 0) == 0)
3202 roff_setregn(r
, key
, keysz
, iv
, sign
, is
);
3209 struct roffreg
*reg
, **prev
;
3213 name
= cp
= buf
->buf
+ pos
;
3216 namesz
= roff_getname(r
, &cp
, ln
, pos
);
3217 name
[namesz
] = '\0';
3222 if (reg
== NULL
|| !strcmp(name
, reg
->key
.p
))
3234 /* --- handler functions for roff requests -------------------------------- */
3243 cp
= buf
->buf
+ pos
;
3244 while (*cp
!= '\0') {
3246 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- buf
->buf
));
3247 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
3248 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
3249 if (name
[namesz
] == '\\' || name
[namesz
] == '\t')
3260 /* Parse the number of lines. */
3262 if ( ! roff_evalnum(r
, ln
, buf
->buf
, &pos
, &iv
, 0)) {
3263 mandoc_msg(MANDOCERR_IT_NONUM
,
3264 ln
, ppos
, "%s", buf
->buf
+ 1);
3268 while (isspace((unsigned char)buf
->buf
[pos
]))
3272 * Arm the input line trap.
3273 * Special-casing "an-trap" is an ugly workaround to cope
3274 * with DocBook stupidly fiddling with man(7) internals.
3278 roffit_macro
= mandoc_strdup(iv
!= 1 ||
3279 strcmp(buf
->buf
+ pos
, "an-trap") ?
3280 buf
->buf
+ pos
: "br");
3288 enum roff_tok t
, te
;
3295 r
->format
= MPARSE_MDOC
;
3296 mask
= MPARSE_MDOC
| MPARSE_QUICK
;
3302 r
->format
= MPARSE_MAN
;
3303 mask
= MPARSE_QUICK
;
3308 if ((r
->options
& mask
) == 0)
3309 for (t
= tok
; t
< te
; t
++)
3310 roff_setstr(r
, roff_name
[t
], NULL
, 0);
3317 r
->man
->flags
&= ~ROFF_NONOFILL
;
3318 if (r
->tbl
== NULL
) {
3319 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "TE");
3322 if (tbl_end(r
->tbl
, 0) == 0) {
3325 buf
->buf
= mandoc_strdup(".sp");
3328 return ROFF_REPARSE
;
3339 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "T&");
3341 tbl_restart(ln
, ppos
, r
->tbl
);
3347 * Handle in-line equation delimiters.
3350 roff_eqndelim(struct roff
*r
, struct buf
*buf
, int pos
)
3353 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
3356 * Outside equations, look for an opening delimiter.
3357 * If we are inside an equation, we already know it is
3358 * in-line, or this function wouldn't have been called;
3359 * so look for a closing delimiter.
3362 cp1
= buf
->buf
+ pos
;
3363 cp2
= strchr(cp1
, r
->eqn
== NULL
?
3364 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
3369 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
3371 /* Handle preceding text, protecting whitespace. */
3373 if (*buf
->buf
!= '\0') {
3380 * Prepare replacing the delimiter with an equation macro
3381 * and drop leading white space from the equation.
3384 if (r
->eqn
== NULL
) {
3391 /* Handle following text, protecting whitespace. */
3399 /* Do the actual replacement. */
3401 buf
->sz
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", buf
->buf
,
3402 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
3406 /* Toggle the in-line state of the eqn subsystem. */
3408 r
->eqn_inline
= r
->eqn
== NULL
;
3409 return ROFF_REPARSE
;
3415 struct roff_node
*n
;
3417 if (r
->man
->meta
.macroset
== MACROSET_MAN
)
3418 man_breakscope(r
->man
, ROFF_EQ
);
3419 n
= roff_node_alloc(r
->man
, ln
, ppos
, ROFFT_EQN
, TOKEN_NONE
);
3420 if (ln
> r
->man
->last
->line
)
3421 n
->flags
|= NODE_LINE
;
3422 n
->eqn
= eqn_box_new();
3423 roff_node_append(r
->man
, n
);
3424 r
->man
->next
= ROFF_NEXT_SIBLING
;
3426 assert(r
->eqn
== NULL
);
3427 if (r
->last_eqn
== NULL
)
3428 r
->last_eqn
= eqn_alloc();
3430 eqn_reset(r
->last_eqn
);
3431 r
->eqn
= r
->last_eqn
;
3434 if (buf
->buf
[pos
] != '\0')
3435 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3436 ".EQ %s", buf
->buf
+ pos
);
3444 if (r
->eqn
!= NULL
) {
3448 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "EN");
3449 if (buf
->buf
[pos
] != '\0')
3450 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3451 "EN %s", buf
->buf
+ pos
);
3458 if (r
->tbl
!= NULL
) {
3459 mandoc_msg(MANDOCERR_BLK_BROKEN
, ln
, ppos
, "TS breaks TS");
3462 r
->man
->flags
|= ROFF_NONOFILL
;
3463 r
->tbl
= tbl_alloc(ppos
, ln
, r
->last_tbl
);
3464 if (r
->last_tbl
== NULL
)
3465 r
->first_tbl
= r
->tbl
;
3466 r
->last_tbl
= r
->tbl
;
3471 roff_noarg(ROFF_ARGS
)
3473 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
))
3474 man_breakscope(r
->man
, tok
);
3475 if (tok
== ROFF_brp
)
3477 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3478 if (buf
->buf
[pos
] != '\0')
3479 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3480 "%s %s", roff_name
[tok
], buf
->buf
+ pos
);
3482 r
->man
->flags
|= ROFF_NOFILL
;
3483 else if (tok
== ROFF_fi
)
3484 r
->man
->flags
&= ~ROFF_NOFILL
;
3485 r
->man
->last
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3486 r
->man
->next
= ROFF_NEXT_SIBLING
;
3491 roff_onearg(ROFF_ARGS
)
3493 struct roff_node
*n
;
3497 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
) &&
3498 (tok
== ROFF_ce
|| tok
== ROFF_rj
|| tok
== ROFF_sp
||
3500 man_breakscope(r
->man
, tok
);
3502 if (roffce_node
!= NULL
&& (tok
== ROFF_ce
|| tok
== ROFF_rj
)) {
3503 r
->man
->last
= roffce_node
;
3504 r
->man
->next
= ROFF_NEXT_SIBLING
;
3507 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3510 cp
= buf
->buf
+ pos
;
3512 while (*cp
!= '\0' && *cp
!= ' ')
3517 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3518 ln
, (int)(cp
- buf
->buf
),
3519 "%s ... %s", roff_name
[tok
], cp
);
3520 roff_word_alloc(r
->man
, ln
, pos
, buf
->buf
+ pos
);
3523 if (tok
== ROFF_ce
|| tok
== ROFF_rj
) {
3524 if (r
->man
->last
->type
== ROFFT_ELEM
) {
3525 roff_word_alloc(r
->man
, ln
, pos
, "1");
3526 r
->man
->last
->flags
|= NODE_NOSRC
;
3529 if (roff_evalnum(r
, ln
, r
->man
->last
->string
, &npos
,
3530 &roffce_lines
, 0) == 0) {
3531 mandoc_msg(MANDOCERR_CE_NONUM
,
3532 ln
, pos
, "ce %s", buf
->buf
+ pos
);
3535 if (roffce_lines
< 1) {
3536 r
->man
->last
= r
->man
->last
->parent
;
3540 roffce_node
= r
->man
->last
->parent
;
3542 n
->flags
|= NODE_VALID
| NODE_ENDED
;
3545 n
->flags
|= NODE_LINE
;
3546 r
->man
->next
= ROFF_NEXT_SIBLING
;
3551 roff_manyarg(ROFF_ARGS
)
3553 struct roff_node
*n
;
3556 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3559 for (sp
= ep
= buf
->buf
+ pos
; *sp
!= '\0'; sp
= ep
) {
3560 while (*ep
!= '\0' && *ep
!= ' ')
3564 roff_word_alloc(r
->man
, ln
, sp
- buf
->buf
, sp
);
3567 n
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3569 r
->man
->next
= ROFF_NEXT_SIBLING
;
3576 char *oldn
, *newn
, *end
, *value
;
3577 size_t oldsz
, newsz
, valsz
;
3579 newn
= oldn
= buf
->buf
+ pos
;
3583 newsz
= roff_getname(r
, &oldn
, ln
, pos
);
3584 if (newn
[newsz
] == '\\' || newn
[newsz
] == '\t' || *oldn
== '\0')
3588 oldsz
= roff_getname(r
, &end
, ln
, oldn
- buf
->buf
);
3592 valsz
= mandoc_asprintf(&value
, ".%.*s \\$@\\\"\n",
3594 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, valsz
, 0);
3595 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3601 * The .break request only makes sense inside conditionals,
3602 * and that case is already handled in roff_cond_sub().
3605 roff_break(ROFF_ARGS
)
3607 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, pos
, "break");
3618 if (*p
== '\0' || (r
->control
= *p
++) == '.')
3622 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3623 ln
, p
- buf
->buf
, "cc ... %s", p
);
3629 roff_char(ROFF_ARGS
)
3631 const char *p
, *kp
, *vp
;
3635 /* Parse the character to be replaced. */
3637 kp
= buf
->buf
+ pos
;
3639 if (*kp
== '\0' || (*kp
== '\\' &&
3640 mandoc_escape(&p
, NULL
, NULL
) != ESCAPE_SPECIAL
) ||
3641 (*p
!= ' ' && *p
!= '\0')) {
3642 mandoc_msg(MANDOCERR_CHAR_ARG
, ln
, pos
, "char %s", kp
);
3650 * If the replacement string contains a font escape sequence,
3651 * we have to restore the font at the end.
3657 while (*p
!= '\0') {
3660 switch (mandoc_escape(&p
, NULL
, NULL
)) {
3662 case ESCAPE_FONTROMAN
:
3663 case ESCAPE_FONTITALIC
:
3664 case ESCAPE_FONTBOLD
:
3669 case ESCAPE_FONTPREV
:
3677 mandoc_msg(MANDOCERR_CHAR_FONT
,
3678 ln
, (int)(vp
- buf
->buf
), "%s", vp
);
3681 * Approximate the effect of .char using the .tr tables.
3682 * XXX In groff, .char and .tr interact differently.
3686 if (r
->xtab
== NULL
)
3687 r
->xtab
= mandoc_calloc(128, sizeof(*r
->xtab
));
3688 assert((unsigned int)*kp
< 128);
3689 free(r
->xtab
[(int)*kp
].p
);
3690 r
->xtab
[(int)*kp
].sz
= mandoc_asprintf(&r
->xtab
[(int)*kp
].p
,
3691 "%s%s", vp
, font
? "\fP" : "");
3693 roff_setstrn(&r
->xmbtab
, kp
, ksz
, vp
, vsz
, 0);
3695 roff_setstrn(&r
->xmbtab
, kp
, ksz
, "\\fP", 3, 1);
3711 mandoc_msg(MANDOCERR_ARG_EXCESS
, ln
,
3712 (int)(p
- buf
->buf
), "ec ... %s", p
);
3721 if (buf
->buf
[pos
] != '\0')
3722 mandoc_msg(MANDOCERR_ARG_SKIP
,
3723 ln
, pos
, "eo %s", buf
->buf
+ pos
);
3730 struct roff_node
*n
;
3733 /* Parse the first argument. */
3735 cp
= buf
->buf
+ pos
;
3738 if (buf
->buf
[pos
] == '\\') {
3739 switch (mandoc_escape((const char **)&cp
, NULL
, NULL
)) {
3740 case ESCAPE_SPECIAL
:
3741 case ESCAPE_UNICODE
:
3742 case ESCAPE_NUMBERED
:
3746 mandoc_msg(MANDOCERR_MC_ESC
, ln
, pos
,
3747 "mc %s", buf
->buf
+ pos
);
3748 buf
->buf
[pos
] = '\0';
3753 /* Ignore additional arguments. */
3758 mandoc_msg(MANDOCERR_MC_DIST
, ln
, (int)(cp
- buf
->buf
),
3763 /* Create the .mc node. */
3765 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3767 if (buf
->buf
[pos
] != '\0')
3768 roff_word_alloc(r
->man
, ln
, pos
, buf
->buf
+ pos
);
3769 n
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3771 r
->man
->next
= ROFF_NEXT_SIBLING
;
3778 while (buf
->buf
[pos
] == ' ')
3787 const char *p
, *first
, *second
;
3789 enum mandoc_esc esc
;
3794 mandoc_msg(MANDOCERR_REQ_EMPTY
, ln
, ppos
, "tr");
3798 while (*p
!= '\0') {
3802 if (*first
== '\\') {
3803 esc
= mandoc_escape(&p
, NULL
, NULL
);
3804 if (esc
== ESCAPE_ERROR
) {
3805 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3806 (int)(p
- buf
->buf
), "%s", first
);
3809 fsz
= (size_t)(p
- first
);
3813 if (*second
== '\\') {
3814 esc
= mandoc_escape(&p
, NULL
, NULL
);
3815 if (esc
== ESCAPE_ERROR
) {
3816 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3817 (int)(p
- buf
->buf
), "%s", second
);
3820 ssz
= (size_t)(p
- second
);
3821 } else if (*second
== '\0') {
3822 mandoc_msg(MANDOCERR_TR_ODD
, ln
,
3823 (int)(first
- buf
->buf
), "tr %s", first
);
3829 roff_setstrn(&r
->xmbtab
, first
, fsz
,
3834 if (r
->xtab
== NULL
)
3835 r
->xtab
= mandoc_calloc(128,
3836 sizeof(struct roffstr
));
3838 free(r
->xtab
[(int)*first
].p
);
3839 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
3840 r
->xtab
[(int)*first
].sz
= ssz
;
3847 * Implementation of the .return request.
3848 * There is no need to call roff_userret() from here.
3849 * The read module will call that after rewinding the reader stack
3850 * to the place from where the current macro was called.
3853 roff_return(ROFF_ARGS
)
3855 if (r
->mstackpos
>= 0)
3856 return ROFF_IGN
| ROFF_USERRET
;
3858 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "return");
3866 char *oldn
, *newn
, *end
;
3867 size_t oldsz
, newsz
;
3870 oldn
= newn
= buf
->buf
+ pos
;
3874 oldsz
= roff_getname(r
, &newn
, ln
, pos
);
3875 if (oldn
[oldsz
] == '\\' || oldn
[oldsz
] == '\t' || *newn
== '\0')
3879 newsz
= roff_getname(r
, &end
, ln
, newn
- buf
->buf
);
3883 deftype
= ROFFDEF_ANY
;
3884 value
= roff_getstrn(r
, oldn
, oldsz
, &deftype
);
3887 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3888 roff_setstrn(&r
->strtab
, oldn
, oldsz
, NULL
, 0, 0);
3889 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3892 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3893 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3896 roff_setstrn(&r
->rentab
, newn
, newsz
, value
, strlen(value
), 0);
3897 roff_setstrn(&r
->rentab
, oldn
, oldsz
, NULL
, 0, 0);
3898 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3901 roff_setstrn(&r
->rentab
, newn
, newsz
, oldn
, oldsz
, 0);
3902 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3905 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3906 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3913 roff_shift(ROFF_ARGS
)
3916 int argpos
, levels
, i
;
3920 if (buf
->buf
[pos
] != '\0' &&
3921 roff_evalnum(r
, ln
, buf
->buf
, &pos
, &levels
, 0) == 0) {
3922 mandoc_msg(MANDOCERR_CE_NONUM
,
3923 ln
, pos
, "shift %s", buf
->buf
+ pos
);
3926 if (r
->mstackpos
< 0) {
3927 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "shift");
3930 ctx
= r
->mstack
+ r
->mstackpos
;
3931 if (levels
> ctx
->argc
) {
3932 mandoc_msg(MANDOCERR_SHIFT
,
3933 ln
, argpos
, "%d, but max is %d", levels
, ctx
->argc
);
3937 mandoc_msg(MANDOCERR_ARG_NEG
, ln
, argpos
, "shift %d", levels
);
3942 for (i
= 0; i
< levels
; i
++)
3944 ctx
->argc
-= levels
;
3945 for (i
= 0; i
< ctx
->argc
; i
++)
3946 ctx
->argv
[i
] = ctx
->argv
[i
+ levels
];
3955 name
= buf
->buf
+ pos
;
3956 mandoc_msg(MANDOCERR_SO
, ln
, ppos
, "so %s", name
);
3959 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3960 * opening anything that's not in our cwd or anything beneath
3961 * it. Thus, explicitly disallow traversing up the file-system
3962 * or using absolute paths.
3965 if (*name
== '/' || strstr(name
, "../") || strstr(name
, "/..")) {
3966 mandoc_msg(MANDOCERR_SO_PATH
, ln
, ppos
, ".so %s", name
);
3967 buf
->sz
= mandoc_asprintf(&cp
,
3968 ".sp\nSee the file %s.\n.sp", name
) + 1;
3972 return ROFF_REPARSE
;
3979 /* --- user defined strings and macros ------------------------------------ */
3982 roff_userdef(ROFF_ARGS
)
3985 char *arg
, *ap
, *dst
, *src
;
3988 /* If the macro is empty, ignore it altogether. */
3990 if (*r
->current_string
== '\0')
3993 /* Initialize a new macro stack context. */
3995 if (++r
->mstackpos
== r
->mstacksz
) {
3996 r
->mstack
= mandoc_recallocarray(r
->mstack
,
3997 r
->mstacksz
, r
->mstacksz
+ 8, sizeof(*r
->mstack
));
4000 ctx
= r
->mstack
+ r
->mstackpos
;
4004 * Collect pointers to macro argument strings,
4005 * NUL-terminating them and escaping quotes.
4008 src
= buf
->buf
+ pos
;
4009 while (*src
!= '\0') {
4010 if (ctx
->argc
== ctx
->argsz
) {
4012 ctx
->argv
= mandoc_reallocarray(ctx
->argv
,
4013 ctx
->argsz
, sizeof(*ctx
->argv
));
4015 arg
= roff_getarg(r
, &src
, ln
, &pos
);
4016 sz
= 1; /* For the terminating NUL. */
4017 for (ap
= arg
; *ap
!= '\0'; ap
++)
4018 sz
+= *ap
== '"' ? 4 : 1;
4019 ctx
->argv
[ctx
->argc
++] = dst
= mandoc_malloc(sz
);
4020 for (ap
= arg
; *ap
!= '\0'; ap
++) {
4022 memcpy(dst
, "\\(dq", 4);
4031 /* Replace the macro invocation by the macro definition. */
4034 buf
->buf
= mandoc_strdup(r
->current_string
);
4035 buf
->sz
= strlen(buf
->buf
) + 1;
4038 return buf
->buf
[buf
->sz
- 2] == '\n' ?
4039 ROFF_REPARSE
| ROFF_USERCALL
: ROFF_IGN
| ROFF_APPEND
;
4043 * Calling a high-level macro that was renamed with .rn.
4044 * r->current_string has already been set up by roff_parse().
4047 roff_renamed(ROFF_ARGS
)
4051 buf
->sz
= mandoc_asprintf(&nbuf
, ".%s%s%s", r
->current_string
,
4052 buf
->buf
[pos
] == '\0' ? "" : " ", buf
->buf
+ pos
) + 1;
4060 * Measure the length in bytes of the roff identifier at *cpp
4061 * and advance the pointer to the next word.
4064 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
4073 /* Advance cp to the byte after the end of the name. */
4075 for (cp
= name
; 1; cp
++) {
4079 if (*cp
== ' ' || *cp
== '\t') {
4085 if (cp
[1] == '{' || cp
[1] == '}')
4089 mandoc_msg(MANDOCERR_NAMESC
, ln
, pos
,
4090 "%.*s", (int)(cp
- name
+ 1), name
);
4091 mandoc_escape((const char **)&cp
, NULL
, NULL
);
4095 /* Read past spaces. */
4105 * Store *string into the user-defined string called *name.
4106 * To clear an existing entry, call with (*r, *name, NULL, 0).
4107 * append == 0: replace mode
4108 * append == 1: single-line append mode
4109 * append == 2: multiline append mode, append '\n' after each call
4112 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
4117 namesz
= strlen(name
);
4118 roff_setstrn(&r
->strtab
, name
, namesz
, string
,
4119 string
? strlen(string
) : 0, append
);
4120 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
4124 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
4125 const char *string
, size_t stringsz
, int append
)
4130 size_t oldch
, newch
;
4132 /* Search for an existing string with the same name. */
4135 while (n
&& (namesz
!= n
->key
.sz
||
4136 strncmp(n
->key
.p
, name
, namesz
)))
4140 /* Create a new string table entry. */
4141 n
= mandoc_malloc(sizeof(struct roffkv
));
4142 n
->key
.p
= mandoc_strndup(name
, namesz
);
4148 } else if (0 == append
) {
4158 * One additional byte for the '\n' in multiline mode,
4159 * and one for the terminating '\0'.
4161 newch
= stringsz
+ (1 < append
? 2u : 1u);
4163 if (NULL
== n
->val
.p
) {
4164 n
->val
.p
= mandoc_malloc(newch
);
4169 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
4172 /* Skip existing content in the destination buffer. */
4173 c
= n
->val
.p
+ (int)oldch
;
4175 /* Append new content to the destination buffer. */
4177 while (i
< (int)stringsz
) {
4179 * Rudimentary roff copy mode:
4180 * Handle escaped backslashes.
4182 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
4187 /* Append terminating bytes. */
4192 n
->val
.sz
= (int)(c
- n
->val
.p
);
4196 roff_getstrn(struct roff
*r
, const char *name
, size_t len
,
4199 const struct roffkv
*n
;
4204 for (n
= r
->strtab
; n
!= NULL
; n
= n
->next
) {
4205 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4206 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4208 if (*deftype
& ROFFDEF_USER
) {
4209 *deftype
= ROFFDEF_USER
;
4216 for (n
= r
->rentab
; n
!= NULL
; n
= n
->next
) {
4217 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4218 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4220 if (*deftype
& ROFFDEF_REN
) {
4221 *deftype
= ROFFDEF_REN
;
4228 for (i
= 0; i
< PREDEFS_MAX
; i
++) {
4229 if (strncmp(name
, predefs
[i
].name
, len
) != 0 ||
4230 predefs
[i
].name
[len
] != '\0')
4232 if (*deftype
& ROFFDEF_PRE
) {
4233 *deftype
= ROFFDEF_PRE
;
4234 return predefs
[i
].str
;
4240 if (r
->man
->meta
.macroset
!= MACROSET_MAN
) {
4241 for (tok
= MDOC_Dd
; tok
< MDOC_MAX
; tok
++) {
4242 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4243 roff_name
[tok
][len
] != '\0')
4245 if (*deftype
& ROFFDEF_STD
) {
4246 *deftype
= ROFFDEF_STD
;
4254 if (r
->man
->meta
.macroset
!= MACROSET_MDOC
) {
4255 for (tok
= MAN_TH
; tok
< MAN_MAX
; tok
++) {
4256 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4257 roff_name
[tok
][len
] != '\0')
4259 if (*deftype
& ROFFDEF_STD
) {
4260 *deftype
= ROFFDEF_STD
;
4269 if (found
== 0 && *deftype
!= ROFFDEF_ANY
) {
4270 if (*deftype
& ROFFDEF_REN
) {
4272 * This might still be a request,
4273 * so do not treat it as undefined yet.
4275 *deftype
= ROFFDEF_UNDEF
;
4279 /* Using an undefined string defines it to be empty. */
4281 roff_setstrn(&r
->strtab
, name
, len
, "", 0, 0);
4282 roff_setstrn(&r
->rentab
, name
, len
, NULL
, 0, 0);
4290 roff_freestr(struct roffkv
*r
)
4292 struct roffkv
*n
, *nn
;
4294 for (n
= r
; n
; n
= nn
) {
4302 /* --- accessors and utility functions ------------------------------------ */
4305 * Duplicate an input string, making the appropriate character
4306 * conversations (as stipulated by `tr') along the way.
4307 * Returns a heap-allocated string with all the replacements made.
4310 roff_strdup(const struct roff
*r
, const char *p
)
4312 const struct roffkv
*cp
;
4316 enum mandoc_esc esc
;
4318 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
4319 return mandoc_strdup(p
);
4320 else if ('\0' == *p
)
4321 return mandoc_strdup("");
4324 * Step through each character looking for term matches
4325 * (remember that a `tr' can be invoked with an escape, which is
4326 * a glyph but the escape is multi-character).
4327 * We only do this if the character hash has been initialised
4328 * and the string is >0 length.
4334 while ('\0' != *p
) {
4335 assert((unsigned int)*p
< 128);
4336 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(unsigned int)*p
].p
) {
4337 sz
= r
->xtab
[(int)*p
].sz
;
4338 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4339 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
4343 } else if ('\\' != *p
) {
4344 res
= mandoc_realloc(res
, ssz
+ 2);
4349 /* Search for term matches. */
4350 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
4351 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
4356 * A match has been found.
4357 * Append the match to the array and move
4358 * forward by its keysize.
4360 res
= mandoc_realloc(res
,
4361 ssz
+ cp
->val
.sz
+ 1);
4362 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
4364 p
+= (int)cp
->key
.sz
;
4369 * Handle escapes carefully: we need to copy
4370 * over just the escape itself, or else we might
4371 * do replacements within the escape itself.
4372 * Make sure to pass along the bogus string.
4375 esc
= mandoc_escape(&p
, NULL
, NULL
);
4376 if (ESCAPE_ERROR
== esc
) {
4378 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4379 memcpy(res
+ ssz
, pp
, sz
);
4383 * We bail out on bad escapes.
4384 * No need to warn: we already did so when
4385 * roff_expand() was called.
4388 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4389 memcpy(res
+ ssz
, pp
, sz
);
4393 res
[(int)ssz
] = '\0';
4398 roff_getformat(const struct roff
*r
)
4405 * Find out whether a line is a macro line or not.
4406 * If it is, adjust the current position and return one; if it isn't,
4407 * return zero and don't change the current position.
4408 * If the control character has been set with `.cc', then let that grain
4410 * This is slighly contrary to groff, where using the non-breaking
4411 * control character when `cc' has been invoked will cause the
4412 * non-breaking macro contents to be printed verbatim.
4415 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
4421 if (r
->control
!= '\0' && cp
[pos
] == r
->control
)
4423 else if (r
->control
!= '\0')
4425 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
4427 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
4432 while (' ' == cp
[pos
] || '\t' == cp
[pos
])