]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.381 2022/04/13 13:19:34 schwarze Exp $ */
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 * Implementation of the roff(7) parser for mandoc(1).
22 #include <sys/types.h>
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
63 /* --- data types --------------------------------------------------------- */
66 * An incredibly-simple string buffer.
69 char *p
; /* nil-terminated buffer */
70 size_t sz
; /* saved strlen(p) */
74 * A key-value roffstr pair as part of a singly-linked list.
79 struct roffkv
*next
; /* next in list */
83 * A single number register as part of a singly-linked list.
93 * Association of request and macro names with token IDs.
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
111 struct roff_man
*man
; /* mdoc or man parser */
112 struct roffnode
*last
; /* leaf of stack */
113 struct mctx
*mstack
; /* stack of macro contexts */
114 int *rstack
; /* stack of inverted `ie' values */
115 struct ohash
*reqtab
; /* request lookup table */
116 struct roffreg
*regtab
; /* number registers */
117 struct roffkv
*strtab
; /* user-defined strings & macros */
118 struct roffkv
*rentab
; /* renamed strings & macros */
119 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
120 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
121 const char *current_string
; /* value of last called user macro */
122 struct tbl_node
*first_tbl
; /* first table parsed */
123 struct tbl_node
*last_tbl
; /* last table parsed */
124 struct tbl_node
*tbl
; /* current table being parsed */
125 struct eqn_node
*last_eqn
; /* equation parser */
126 struct eqn_node
*eqn
; /* active equation parser */
127 int eqn_inline
; /* current equation is inline */
128 int options
; /* parse options */
129 int mstacksz
; /* current size of mstack */
130 int mstackpos
; /* position in mstack */
131 int rstacksz
; /* current size limit of rstack */
132 int rstackpos
; /* position in rstack */
133 int format
; /* current file in mdoc or man format */
134 char control
; /* control character */
135 char escape
; /* escape character */
139 * A macro definition, condition, or ignored block.
142 enum roff_tok tok
; /* type of node */
143 struct roffnode
*parent
; /* up one in stack */
144 int line
; /* parse line */
145 int col
; /* parse col */
146 char *name
; /* node name, e.g. macro name */
147 char *end
; /* custom end macro of the block */
148 int endspan
; /* scope to: 1=eol 2=next line -1=\} */
149 int rule
; /* content is: 1=evaluated 0=skipped */
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
160 typedef int (*roffproc
)(ROFF_ARGS
);
163 roffproc proc
; /* process new macro */
164 roffproc text
; /* process as child text of macro */
165 roffproc sub
; /* process as child of macro */
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
171 const char *name
; /* predefined input name */
172 const char *str
; /* replacement symbol */
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
178 /* --- function prototypes ------------------------------------------------ */
180 static int roffnode_cleanscope(struct roff
*);
181 static int roffnode_pop(struct roff
*);
182 static void roffnode_push(struct roff
*, enum roff_tok
,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man
*, int, struct tbl_node
*);
185 static int roff_als(ROFF_ARGS
);
186 static int roff_block(ROFF_ARGS
);
187 static int roff_block_text(ROFF_ARGS
);
188 static int roff_block_sub(ROFF_ARGS
);
189 static int roff_break(ROFF_ARGS
);
190 static int roff_cblock(ROFF_ARGS
);
191 static int roff_cc(ROFF_ARGS
);
192 static int roff_ccond(struct roff
*, int, int);
193 static int roff_char(ROFF_ARGS
);
194 static int roff_cond(ROFF_ARGS
);
195 static int roff_cond_checkend(ROFF_ARGS
);
196 static int roff_cond_text(ROFF_ARGS
);
197 static int roff_cond_sub(ROFF_ARGS
);
198 static int roff_ds(ROFF_ARGS
);
199 static int roff_ec(ROFF_ARGS
);
200 static int roff_eo(ROFF_ARGS
);
201 static int roff_eqndelim(struct roff
*, struct buf
*, int);
202 static int roff_evalcond(struct roff
*, int, char *, int *);
203 static int roff_evalnum(struct roff
*, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff
*, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff
*, struct buf
*,
210 static void roff_free1(struct roff
*);
211 static void roff_freereg(struct roffreg
*);
212 static void roff_freestr(struct roffkv
*);
213 static size_t roff_getname(struct roff
*, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff
*,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff
*,
220 static const char *roff_getstrn(struct roff
*,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff
*,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS
);
225 static int roff_it(ROFF_ARGS
);
226 static int roff_line_ignore(ROFF_ARGS
);
227 static void roff_man_alloc1(struct roff_man
*);
228 static void roff_man_free1(struct roff_man
*);
229 static int roff_manyarg(ROFF_ARGS
);
230 static int roff_noarg(ROFF_ARGS
);
231 static int roff_nop(ROFF_ARGS
);
232 static int roff_nr(ROFF_ARGS
);
233 static int roff_onearg(ROFF_ARGS
);
234 static enum roff_tok
roff_parse(struct roff
*, char *, int *,
236 static int roff_parsetext(struct roff
*, struct buf
*,
238 static int roff_renamed(ROFF_ARGS
);
239 static int roff_return(ROFF_ARGS
);
240 static int roff_rm(ROFF_ARGS
);
241 static int roff_rn(ROFF_ARGS
);
242 static int roff_rr(ROFF_ARGS
);
243 static void roff_setregn(struct roff
*, const char *,
244 size_t, int, char, int);
245 static void roff_setstr(struct roff
*,
246 const char *, const char *, int);
247 static void roff_setstrn(struct roffkv
**, const char *,
248 size_t, const char *, size_t, int);
249 static int roff_shift(ROFF_ARGS
);
250 static int roff_so(ROFF_ARGS
);
251 static int roff_tr(ROFF_ARGS
);
252 static int roff_Dd(ROFF_ARGS
);
253 static int roff_TE(ROFF_ARGS
);
254 static int roff_TS(ROFF_ARGS
);
255 static int roff_EQ(ROFF_ARGS
);
256 static int roff_EN(ROFF_ARGS
);
257 static int roff_T_(ROFF_ARGS
);
258 static int roff_unsupp(ROFF_ARGS
);
259 static int roff_userdef(ROFF_ARGS
);
261 /* --- constant data ------------------------------------------------------ */
263 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
264 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
266 const char *__roff_name
[MAN_MAX
+ 1] = {
267 "br", "ce", "fi", "ft",
271 "ab", "ad", "af", "aln",
272 "als", "am", "am1", "ami",
273 "ami1", "as", "as1", "asciify",
274 "backtrace", "bd", "bleedat", "blm",
275 "box", "boxa", "bp", "BP",
276 "break", "breakchar", "brnl", "brp",
278 "cf", "cflags", "ch", "char",
279 "chop", "class", "close", "CL",
280 "color", "composite", "continue", "cp",
281 "cropat", "cs", "cu", "da",
282 "dch", "Dd", "de", "de1",
283 "defcolor", "dei", "dei1", "device",
284 "devicem", "di", "do", "ds",
285 "ds1", "dwh", "dt", "ec",
286 "ecr", "ecs", "el", "em",
287 "EN", "eo", "EP", "EQ",
288 "errprint", "ev", "evc", "ex",
289 "fallback", "fam", "fc", "fchar",
290 "fcolor", "fdeferlig", "feature", "fkern",
291 "fl", "flig", "fp", "fps",
292 "fschar", "fspacewidth", "fspecial", "ftr",
293 "fzoom", "gcolor", "hc", "hcode",
294 "hidechar", "hla", "hlm", "hpf",
295 "hpfa", "hpfcode", "hw", "hy",
296 "hylang", "hylen", "hym", "hypp",
297 "hys", "ie", "if", "ig",
298 "index", "it", "itc", "IX",
299 "kern", "kernafter", "kernbefore", "kernpair",
300 "lc", "lc_ctype", "lds", "length",
301 "letadj", "lf", "lg", "lhang",
302 "linetabs", "lnr", "lnrf", "lpfx",
304 "mediasize", "minss", "mk", "mso",
305 "na", "ne", "nh", "nhychar",
306 "nm", "nn", "nop", "nr",
307 "nrf", "nroff", "ns", "nx",
308 "open", "opena", "os", "output",
309 "padj", "papersize", "pc", "pev",
310 "pi", "PI", "pl", "pm",
312 "psbb", "pshape", "pso", "ptr",
313 "pvs", "rchar", "rd", "recursionlimit",
314 "return", "rfschar", "rhang",
315 "rm", "rn", "rnn", "rr",
316 "rs", "rt", "schar", "sentchar",
317 "shc", "shift", "sizes", "so",
318 "spacewidth", "special", "spreadwarn", "ss",
319 "sty", "substring", "sv", "sy",
322 "tm", "tm1", "tmc", "tr",
323 "track", "transchar", "trf", "trimat",
324 "trin", "trnt", "troff", "TS",
325 "uf", "ul", "unformat", "unwatch",
326 "unwatchn", "vpt", "vs", "warn",
327 "warnscale", "watch", "watchlength", "watchn",
328 "wh", "while", "write", "writec",
329 "writem", "xflag", ".", NULL
,
331 "Dd", "Dt", "Os", "Sh",
332 "Ss", "Pp", "D1", "Dl",
333 "Bd", "Ed", "Bl", "El",
334 "It", "Ad", "An", "Ap",
335 "Ar", "Cd", "Cm", "Dv",
336 "Er", "Ev", "Ex", "Fa",
337 "Fd", "Fl", "Fn", "Ft",
338 "Ic", "In", "Li", "Nd",
339 "Nm", "Op", "Ot", "Pa",
340 "Rv", "St", "Va", "Vt",
341 "Xr", "%A", "%B", "%D",
342 "%I", "%J", "%N", "%O",
343 "%P", "%R", "%T", "%V",
344 "Ac", "Ao", "Aq", "At",
345 "Bc", "Bf", "Bo", "Bq",
346 "Bsx", "Bx", "Db", "Dc",
347 "Do", "Dq", "Ec", "Ef",
348 "Em", "Eo", "Fx", "Ms",
349 "No", "Ns", "Nx", "Ox",
350 "Pc", "Pf", "Po", "Pq",
351 "Qc", "Ql", "Qo", "Qq",
352 "Re", "Rs", "Sc", "So",
353 "Sq", "Sm", "Sx", "Sy",
354 "Tn", "Ux", "Xc", "Xo",
355 "Fo", "Fc", "Oo", "Oc",
356 "Bk", "Ek", "Bt", "Hf",
357 "Fr", "Ud", "Lb", "Lp",
358 "Lk", "Mt", "Brq", "Bro",
359 "Brc", "%C", "Es", "En",
360 "Dx", "%Q", "%U", "Ta",
362 "TH", "SH", "SS", "TP",
364 "LP", "PP", "P", "IP",
365 "HP", "SM", "SB", "BI",
366 "IB", "BR", "RB", "R",
367 "B", "I", "IR", "RI",
368 "RE", "RS", "DT", "UC",
372 "UE", "MT", "ME", NULL
374 const char *const *roff_name
= __roff_name
;
376 static struct roffmac roffs
[TOKEN_NONE
] = {
377 { roff_noarg
, NULL
, NULL
, 0 }, /* br */
378 { roff_onearg
, NULL
, NULL
, 0 }, /* ce */
379 { roff_noarg
, NULL
, NULL
, 0 }, /* fi */
380 { roff_onearg
, NULL
, NULL
, 0 }, /* ft */
381 { roff_onearg
, NULL
, NULL
, 0 }, /* ll */
382 { roff_onearg
, NULL
, NULL
, 0 }, /* mc */
383 { roff_noarg
, NULL
, NULL
, 0 }, /* nf */
384 { roff_onearg
, NULL
, NULL
, 0 }, /* po */
385 { roff_onearg
, NULL
, NULL
, 0 }, /* rj */
386 { roff_onearg
, NULL
, NULL
, 0 }, /* sp */
387 { roff_manyarg
, NULL
, NULL
, 0 }, /* ta */
388 { roff_onearg
, NULL
, NULL
, 0 }, /* ti */
389 { NULL
, NULL
, NULL
, 0 }, /* ROFF_MAX */
390 { roff_unsupp
, NULL
, NULL
, 0 }, /* ab */
391 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ad */
392 { roff_line_ignore
, NULL
, NULL
, 0 }, /* af */
393 { roff_unsupp
, NULL
, NULL
, 0 }, /* aln */
394 { roff_als
, NULL
, NULL
, 0 }, /* als */
395 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am */
396 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am1 */
397 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami */
398 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami1 */
399 { roff_ds
, NULL
, NULL
, 0 }, /* as */
400 { roff_ds
, NULL
, NULL
, 0 }, /* as1 */
401 { roff_unsupp
, NULL
, NULL
, 0 }, /* asciify */
402 { roff_line_ignore
, NULL
, NULL
, 0 }, /* backtrace */
403 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bd */
404 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bleedat */
405 { roff_unsupp
, NULL
, NULL
, 0 }, /* blm */
406 { roff_unsupp
, NULL
, NULL
, 0 }, /* box */
407 { roff_unsupp
, NULL
, NULL
, 0 }, /* boxa */
408 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bp */
409 { roff_unsupp
, NULL
, NULL
, 0 }, /* BP */
410 { roff_break
, NULL
, NULL
, 0 }, /* break */
411 { roff_line_ignore
, NULL
, NULL
, 0 }, /* breakchar */
412 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brnl */
413 { roff_noarg
, NULL
, NULL
, 0 }, /* brp */
414 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brpnl */
415 { roff_unsupp
, NULL
, NULL
, 0 }, /* c2 */
416 { roff_cc
, NULL
, NULL
, 0 }, /* cc */
417 { roff_insec
, NULL
, NULL
, 0 }, /* cf */
418 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cflags */
419 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ch */
420 { roff_char
, NULL
, NULL
, 0 }, /* char */
421 { roff_unsupp
, NULL
, NULL
, 0 }, /* chop */
422 { roff_line_ignore
, NULL
, NULL
, 0 }, /* class */
423 { roff_insec
, NULL
, NULL
, 0 }, /* close */
424 { roff_unsupp
, NULL
, NULL
, 0 }, /* CL */
425 { roff_line_ignore
, NULL
, NULL
, 0 }, /* color */
426 { roff_unsupp
, NULL
, NULL
, 0 }, /* composite */
427 { roff_unsupp
, NULL
, NULL
, 0 }, /* continue */
428 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cp */
429 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cropat */
430 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cs */
431 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cu */
432 { roff_unsupp
, NULL
, NULL
, 0 }, /* da */
433 { roff_unsupp
, NULL
, NULL
, 0 }, /* dch */
434 { roff_Dd
, NULL
, NULL
, 0 }, /* Dd */
435 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de */
436 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de1 */
437 { roff_line_ignore
, NULL
, NULL
, 0 }, /* defcolor */
438 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei */
439 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei1 */
440 { roff_unsupp
, NULL
, NULL
, 0 }, /* device */
441 { roff_unsupp
, NULL
, NULL
, 0 }, /* devicem */
442 { roff_unsupp
, NULL
, NULL
, 0 }, /* di */
443 { roff_unsupp
, NULL
, NULL
, 0 }, /* do */
444 { roff_ds
, NULL
, NULL
, 0 }, /* ds */
445 { roff_ds
, NULL
, NULL
, 0 }, /* ds1 */
446 { roff_unsupp
, NULL
, NULL
, 0 }, /* dwh */
447 { roff_unsupp
, NULL
, NULL
, 0 }, /* dt */
448 { roff_ec
, NULL
, NULL
, 0 }, /* ec */
449 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecr */
450 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecs */
451 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* el */
452 { roff_unsupp
, NULL
, NULL
, 0 }, /* em */
453 { roff_EN
, NULL
, NULL
, 0 }, /* EN */
454 { roff_eo
, NULL
, NULL
, 0 }, /* eo */
455 { roff_unsupp
, NULL
, NULL
, 0 }, /* EP */
456 { roff_EQ
, NULL
, NULL
, 0 }, /* EQ */
457 { roff_line_ignore
, NULL
, NULL
, 0 }, /* errprint */
458 { roff_unsupp
, NULL
, NULL
, 0 }, /* ev */
459 { roff_unsupp
, NULL
, NULL
, 0 }, /* evc */
460 { roff_unsupp
, NULL
, NULL
, 0 }, /* ex */
461 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fallback */
462 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fam */
463 { roff_unsupp
, NULL
, NULL
, 0 }, /* fc */
464 { roff_unsupp
, NULL
, NULL
, 0 }, /* fchar */
465 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fcolor */
466 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fdeferlig */
467 { roff_line_ignore
, NULL
, NULL
, 0 }, /* feature */
468 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fkern */
469 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fl */
470 { roff_line_ignore
, NULL
, NULL
, 0 }, /* flig */
471 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fp */
472 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fps */
473 { roff_unsupp
, NULL
, NULL
, 0 }, /* fschar */
474 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspacewidth */
475 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspecial */
476 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ftr */
477 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fzoom */
478 { roff_line_ignore
, NULL
, NULL
, 0 }, /* gcolor */
479 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hc */
480 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hcode */
481 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hidechar */
482 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hla */
483 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hlm */
484 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpf */
485 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfa */
486 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfcode */
487 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hw */
488 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hy */
489 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylang */
490 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylen */
491 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hym */
492 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hypp */
493 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hys */
494 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* ie */
495 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* if */
496 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ig */
497 { roff_unsupp
, NULL
, NULL
, 0 }, /* index */
498 { roff_it
, NULL
, NULL
, 0 }, /* it */
499 { roff_unsupp
, NULL
, NULL
, 0 }, /* itc */
500 { roff_line_ignore
, NULL
, NULL
, 0 }, /* IX */
501 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kern */
502 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernafter */
503 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernbefore */
504 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernpair */
505 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc */
506 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc_ctype */
507 { roff_unsupp
, NULL
, NULL
, 0 }, /* lds */
508 { roff_unsupp
, NULL
, NULL
, 0 }, /* length */
509 { roff_line_ignore
, NULL
, NULL
, 0 }, /* letadj */
510 { roff_insec
, NULL
, NULL
, 0 }, /* lf */
511 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lg */
512 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lhang */
513 { roff_unsupp
, NULL
, NULL
, 0 }, /* linetabs */
514 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnr */
515 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnrf */
516 { roff_unsupp
, NULL
, NULL
, 0 }, /* lpfx */
517 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ls */
518 { roff_unsupp
, NULL
, NULL
, 0 }, /* lsm */
519 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lt */
520 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mediasize */
521 { roff_line_ignore
, NULL
, NULL
, 0 }, /* minss */
522 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mk */
523 { roff_insec
, NULL
, NULL
, 0 }, /* mso */
524 { roff_line_ignore
, NULL
, NULL
, 0 }, /* na */
525 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ne */
526 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nh */
527 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nhychar */
528 { roff_unsupp
, NULL
, NULL
, 0 }, /* nm */
529 { roff_unsupp
, NULL
, NULL
, 0 }, /* nn */
530 { roff_nop
, NULL
, NULL
, 0 }, /* nop */
531 { roff_nr
, NULL
, NULL
, 0 }, /* nr */
532 { roff_unsupp
, NULL
, NULL
, 0 }, /* nrf */
533 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nroff */
534 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ns */
535 { roff_insec
, NULL
, NULL
, 0 }, /* nx */
536 { roff_insec
, NULL
, NULL
, 0 }, /* open */
537 { roff_insec
, NULL
, NULL
, 0 }, /* opena */
538 { roff_line_ignore
, NULL
, NULL
, 0 }, /* os */
539 { roff_unsupp
, NULL
, NULL
, 0 }, /* output */
540 { roff_line_ignore
, NULL
, NULL
, 0 }, /* padj */
541 { roff_line_ignore
, NULL
, NULL
, 0 }, /* papersize */
542 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pc */
543 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pev */
544 { roff_insec
, NULL
, NULL
, 0 }, /* pi */
545 { roff_unsupp
, NULL
, NULL
, 0 }, /* PI */
546 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pl */
547 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pm */
548 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pn */
549 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pnr */
550 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ps */
551 { roff_unsupp
, NULL
, NULL
, 0 }, /* psbb */
552 { roff_unsupp
, NULL
, NULL
, 0 }, /* pshape */
553 { roff_insec
, NULL
, NULL
, 0 }, /* pso */
554 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ptr */
555 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pvs */
556 { roff_unsupp
, NULL
, NULL
, 0 }, /* rchar */
557 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rd */
558 { roff_line_ignore
, NULL
, NULL
, 0 }, /* recursionlimit */
559 { roff_return
, NULL
, NULL
, 0 }, /* return */
560 { roff_unsupp
, NULL
, NULL
, 0 }, /* rfschar */
561 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rhang */
562 { roff_rm
, NULL
, NULL
, 0 }, /* rm */
563 { roff_rn
, NULL
, NULL
, 0 }, /* rn */
564 { roff_unsupp
, NULL
, NULL
, 0 }, /* rnn */
565 { roff_rr
, NULL
, NULL
, 0 }, /* rr */
566 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rs */
567 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rt */
568 { roff_unsupp
, NULL
, NULL
, 0 }, /* schar */
569 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sentchar */
570 { roff_line_ignore
, NULL
, NULL
, 0 }, /* shc */
571 { roff_shift
, NULL
, NULL
, 0 }, /* shift */
572 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sizes */
573 { roff_so
, NULL
, NULL
, 0 }, /* so */
574 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spacewidth */
575 { roff_line_ignore
, NULL
, NULL
, 0 }, /* special */
576 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spreadwarn */
577 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ss */
578 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sty */
579 { roff_unsupp
, NULL
, NULL
, 0 }, /* substring */
580 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sv */
581 { roff_insec
, NULL
, NULL
, 0 }, /* sy */
582 { roff_T_
, NULL
, NULL
, 0 }, /* T& */
583 { roff_unsupp
, NULL
, NULL
, 0 }, /* tc */
584 { roff_TE
, NULL
, NULL
, 0 }, /* TE */
585 { roff_Dd
, NULL
, NULL
, 0 }, /* TH */
586 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tkf */
587 { roff_unsupp
, NULL
, NULL
, 0 }, /* tl */
588 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm */
589 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm1 */
590 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tmc */
591 { roff_tr
, NULL
, NULL
, 0 }, /* tr */
592 { roff_line_ignore
, NULL
, NULL
, 0 }, /* track */
593 { roff_line_ignore
, NULL
, NULL
, 0 }, /* transchar */
594 { roff_insec
, NULL
, NULL
, 0 }, /* trf */
595 { roff_line_ignore
, NULL
, NULL
, 0 }, /* trimat */
596 { roff_unsupp
, NULL
, NULL
, 0 }, /* trin */
597 { roff_unsupp
, NULL
, NULL
, 0 }, /* trnt */
598 { roff_line_ignore
, NULL
, NULL
, 0 }, /* troff */
599 { roff_TS
, NULL
, NULL
, 0 }, /* TS */
600 { roff_line_ignore
, NULL
, NULL
, 0 }, /* uf */
601 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ul */
602 { roff_unsupp
, NULL
, NULL
, 0 }, /* unformat */
603 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatch */
604 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatchn */
605 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vpt */
606 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vs */
607 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warn */
608 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warnscale */
609 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watch */
610 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchlength */
611 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchn */
612 { roff_unsupp
, NULL
, NULL
, 0 }, /* wh */
613 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /*while*/
614 { roff_insec
, NULL
, NULL
, 0 }, /* write */
615 { roff_insec
, NULL
, NULL
, 0 }, /* writec */
616 { roff_insec
, NULL
, NULL
, 0 }, /* writem */
617 { roff_line_ignore
, NULL
, NULL
, 0 }, /* xflag */
618 { roff_cblock
, NULL
, NULL
, 0 }, /* . */
619 { roff_renamed
, NULL
, NULL
, 0 },
620 { roff_userdef
, NULL
, NULL
, 0 }
623 /* Array of injected predefined strings. */
624 #define PREDEFS_MAX 38
625 static const struct predef predefs
[PREDEFS_MAX
] = {
626 #include "predefs.in"
629 static int roffce_lines
; /* number of input lines to center */
630 static struct roff_node
*roffce_node
; /* active request */
631 static int roffit_lines
; /* number of lines to delay */
632 static char *roffit_macro
; /* nil-terminated macro line */
635 /* --- request table ------------------------------------------------------ */
638 roffhash_alloc(enum roff_tok mintok
, enum roff_tok maxtok
)
646 htab
= mandoc_malloc(sizeof(*htab
));
647 mandoc_ohash_init(htab
, 8, offsetof(struct roffreq
, name
));
649 for (tok
= mintok
; tok
< maxtok
; tok
++) {
650 if (roff_name
[tok
] == NULL
)
652 sz
= strlen(roff_name
[tok
]);
653 req
= mandoc_malloc(sizeof(*req
) + sz
+ 1);
655 memcpy(req
->name
, roff_name
[tok
], sz
+ 1);
656 slot
= ohash_qlookup(htab
, req
->name
);
657 ohash_insert(htab
, slot
, req
);
663 roffhash_free(struct ohash
*htab
)
670 for (req
= ohash_first(htab
, &slot
); req
!= NULL
;
671 req
= ohash_next(htab
, &slot
))
678 roffhash_find(struct ohash
*htab
, const char *name
, size_t sz
)
685 req
= ohash_find(htab
, ohash_qlookupi(htab
, name
, &end
));
687 req
= ohash_find(htab
, ohash_qlookup(htab
, name
));
688 return req
== NULL
? TOKEN_NONE
: req
->tok
;
691 /* --- stack of request blocks -------------------------------------------- */
694 * Pop the current node off of the stack of roff instructions currently
695 * pending. Return 1 if it is a loop or 0 otherwise.
698 roffnode_pop(struct roff
*r
)
704 inloop
= p
->tok
== ROFF_while
;
713 * Push a roff node onto the instruction stack. This must later be
714 * removed with roffnode_pop().
717 roffnode_push(struct roff
*r
, enum roff_tok tok
, const char *name
,
722 p
= mandoc_calloc(1, sizeof(struct roffnode
));
725 p
->name
= mandoc_strdup(name
);
729 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
734 /* --- roff parser state data management ---------------------------------- */
737 roff_free1(struct roff
*r
)
741 tbl_free(r
->first_tbl
);
742 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
744 eqn_free(r
->last_eqn
);
745 r
->last_eqn
= r
->eqn
= NULL
;
747 while (r
->mstackpos
>= 0)
758 roff_freereg(r
->regtab
);
761 roff_freestr(r
->strtab
);
762 roff_freestr(r
->rentab
);
763 roff_freestr(r
->xmbtab
);
764 r
->strtab
= r
->rentab
= r
->xmbtab
= NULL
;
767 for (i
= 0; i
< 128; i
++)
774 roff_reset(struct roff
*r
)
777 r
->options
|= MPARSE_COMMENT
;
778 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
788 roff_free(struct roff
*r
)
793 for (i
= 0; i
< r
->mstacksz
; i
++)
794 free(r
->mstack
[i
].argv
);
796 roffhash_free(r
->reqtab
);
801 roff_alloc(int options
)
805 r
= mandoc_calloc(1, sizeof(struct roff
));
806 r
->reqtab
= roffhash_alloc(0, ROFF_RENAMED
);
807 r
->options
= options
| MPARSE_COMMENT
;
808 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
815 /* --- syntax tree state data management ---------------------------------- */
818 roff_man_free1(struct roff_man
*man
)
820 if (man
->meta
.first
!= NULL
)
821 roff_node_delete(man
, man
->meta
.first
);
822 free(man
->meta
.msec
);
825 free(man
->meta
.arch
);
826 free(man
->meta
.title
);
827 free(man
->meta
.name
);
828 free(man
->meta
.date
);
829 free(man
->meta
.sodest
);
833 roff_state_reset(struct roff_man
*man
)
835 man
->last
= man
->meta
.first
;
838 man
->lastsec
= man
->lastnamed
= SEC_NONE
;
839 man
->next
= ROFF_NEXT_CHILD
;
840 roff_setreg(man
->roff
, "nS", 0, '=');
844 roff_man_alloc1(struct roff_man
*man
)
846 memset(&man
->meta
, 0, sizeof(man
->meta
));
847 man
->meta
.first
= mandoc_calloc(1, sizeof(*man
->meta
.first
));
848 man
->meta
.first
->type
= ROFFT_ROOT
;
849 man
->meta
.macroset
= MACROSET_NONE
;
850 roff_state_reset(man
);
854 roff_man_reset(struct roff_man
*man
)
857 roff_man_alloc1(man
);
861 roff_man_free(struct roff_man
*man
)
869 roff_man_alloc(struct roff
*roff
, const char *os_s
, int quick
)
871 struct roff_man
*man
;
873 man
= mandoc_calloc(1, sizeof(*man
));
877 roff_man_alloc1(man
);
882 /* --- syntax tree handling ----------------------------------------------- */
885 roff_node_alloc(struct roff_man
*man
, int line
, int pos
,
886 enum roff_type type
, int tok
)
890 n
= mandoc_calloc(1, sizeof(*n
));
895 n
->sec
= man
->lastsec
;
897 if (man
->flags
& MDOC_SYNOPSIS
)
898 n
->flags
|= NODE_SYNPRETTY
;
900 n
->flags
&= ~NODE_SYNPRETTY
;
901 if ((man
->flags
& (ROFF_NOFILL
| ROFF_NONOFILL
)) == ROFF_NOFILL
)
902 n
->flags
|= NODE_NOFILL
;
904 n
->flags
&= ~NODE_NOFILL
;
905 if (man
->flags
& MDOC_NEWLINE
)
906 n
->flags
|= NODE_LINE
;
907 man
->flags
&= ~MDOC_NEWLINE
;
913 roff_node_append(struct roff_man
*man
, struct roff_node
*n
)
917 case ROFF_NEXT_SIBLING
:
918 if (man
->last
->next
!= NULL
) {
919 n
->next
= man
->last
->next
;
920 man
->last
->next
->prev
= n
;
922 man
->last
->parent
->last
= n
;
925 n
->parent
= man
->last
->parent
;
927 case ROFF_NEXT_CHILD
:
928 if (man
->last
->child
!= NULL
) {
929 n
->next
= man
->last
->child
;
930 man
->last
->child
->prev
= n
;
933 man
->last
->child
= n
;
934 n
->parent
= man
->last
;
946 if (n
->end
!= ENDBODY_NOT
)
958 * Copy over the normalised-data pointer of our parent. Not
959 * everybody has one, but copying a null pointer is fine.
962 n
->norm
= n
->parent
->norm
;
963 assert(n
->parent
->type
== ROFFT_BLOCK
);
967 roff_word_alloc(struct roff_man
*man
, int line
, int pos
, const char *word
)
971 n
= roff_node_alloc(man
, line
, pos
, ROFFT_TEXT
, TOKEN_NONE
);
972 n
->string
= roff_strdup(man
->roff
, word
);
973 roff_node_append(man
, n
);
974 n
->flags
|= NODE_VALID
| NODE_ENDED
;
975 man
->next
= ROFF_NEXT_SIBLING
;
979 roff_word_append(struct roff_man
*man
, const char *word
)
982 char *addstr
, *newstr
;
985 addstr
= roff_strdup(man
->roff
, word
);
986 mandoc_asprintf(&newstr
, "%s %s", n
->string
, addstr
);
990 man
->next
= ROFF_NEXT_SIBLING
;
994 roff_elem_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
998 n
= roff_node_alloc(man
, line
, pos
, ROFFT_ELEM
, tok
);
999 roff_node_append(man
, n
);
1000 man
->next
= ROFF_NEXT_CHILD
;
1004 roff_block_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1006 struct roff_node
*n
;
1008 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BLOCK
, tok
);
1009 roff_node_append(man
, n
);
1010 man
->next
= ROFF_NEXT_CHILD
;
1015 roff_head_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1017 struct roff_node
*n
;
1019 n
= roff_node_alloc(man
, line
, pos
, ROFFT_HEAD
, tok
);
1020 roff_node_append(man
, n
);
1021 man
->next
= ROFF_NEXT_CHILD
;
1026 roff_body_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1028 struct roff_node
*n
;
1030 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BODY
, tok
);
1031 roff_node_append(man
, n
);
1032 man
->next
= ROFF_NEXT_CHILD
;
1037 roff_addtbl(struct roff_man
*man
, int line
, struct tbl_node
*tbl
)
1039 struct roff_node
*n
;
1040 struct tbl_span
*span
;
1042 if (man
->meta
.macroset
== MACROSET_MAN
)
1043 man_breakscope(man
, ROFF_TS
);
1044 while ((span
= tbl_span(tbl
)) != NULL
) {
1045 n
= roff_node_alloc(man
, line
, 0, ROFFT_TBL
, TOKEN_NONE
);
1047 roff_node_append(man
, n
);
1048 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1049 man
->next
= ROFF_NEXT_SIBLING
;
1054 roff_node_unlink(struct roff_man
*man
, struct roff_node
*n
)
1057 /* Adjust siblings. */
1060 n
->prev
->next
= n
->next
;
1062 n
->next
->prev
= n
->prev
;
1064 /* Adjust parent. */
1066 if (n
->parent
!= NULL
) {
1067 if (n
->parent
->child
== n
)
1068 n
->parent
->child
= n
->next
;
1069 if (n
->parent
->last
== n
)
1070 n
->parent
->last
= n
->prev
;
1073 /* Adjust parse point. */
1077 if (man
->last
== n
) {
1078 if (n
->prev
== NULL
) {
1079 man
->last
= n
->parent
;
1080 man
->next
= ROFF_NEXT_CHILD
;
1082 man
->last
= n
->prev
;
1083 man
->next
= ROFF_NEXT_SIBLING
;
1086 if (man
->meta
.first
== n
)
1087 man
->meta
.first
= NULL
;
1091 roff_node_relink(struct roff_man
*man
, struct roff_node
*n
)
1093 roff_node_unlink(man
, n
);
1094 n
->prev
= n
->next
= NULL
;
1095 roff_node_append(man
, n
);
1099 roff_node_free(struct roff_node
*n
)
1102 if (n
->args
!= NULL
)
1103 mdoc_argv_free(n
->args
);
1104 if (n
->type
== ROFFT_BLOCK
|| n
->type
== ROFFT_ELEM
)
1106 eqn_box_free(n
->eqn
);
1113 roff_node_delete(struct roff_man
*man
, struct roff_node
*n
)
1116 while (n
->child
!= NULL
)
1117 roff_node_delete(man
, n
->child
);
1118 roff_node_unlink(man
, n
);
1123 roff_node_transparent(struct roff_node
*n
)
1127 if (n
->type
== ROFFT_COMMENT
|| n
->flags
& NODE_NOPRT
)
1129 return roff_tok_transparent(n
->tok
);
1133 roff_tok_transparent(enum roff_tok tok
)
1156 roff_node_child(struct roff_node
*n
)
1158 for (n
= n
->child
; roff_node_transparent(n
); n
= n
->next
)
1164 roff_node_prev(struct roff_node
*n
)
1168 } while (roff_node_transparent(n
));
1173 roff_node_next(struct roff_node
*n
)
1177 } while (roff_node_transparent(n
));
1182 deroff(char **dest
, const struct roff_node
*n
)
1187 if (n
->string
== NULL
) {
1188 for (n
= n
->child
; n
!= NULL
; n
= n
->next
)
1193 /* Skip leading whitespace. */
1195 for (cp
= n
->string
; *cp
!= '\0'; cp
++) {
1196 if (cp
[0] == '\\' && cp
[1] != '\0' &&
1197 strchr(" %&0^|~", cp
[1]) != NULL
)
1199 else if ( ! isspace((unsigned char)*cp
))
1203 /* Skip trailing backslash. */
1206 if (sz
> 0 && cp
[sz
- 1] == '\\')
1209 /* Skip trailing whitespace. */
1212 if ( ! isspace((unsigned char)cp
[sz
-1]))
1215 /* Skip empty strings. */
1220 if (*dest
== NULL
) {
1221 *dest
= mandoc_strndup(cp
, sz
);
1225 mandoc_asprintf(&cp
, "%s %*s", *dest
, (int)sz
, cp
);
1230 /* --- main functions of the roff parser ---------------------------------- */
1233 * In the current line, expand escape sequences that produce parsable
1234 * input text. Also check the syntax of the remaining escape sequences,
1235 * which typically produce output glyphs or change formatter state.
1238 roff_expand(struct roff
*r
, struct buf
*buf
, int ln
, int pos
, char newesc
)
1240 struct mctx
*ctx
; /* current macro call context */
1241 char ubuf
[24]; /* buffer to print the number */
1242 struct roff_node
*n
; /* used for header comments */
1243 const char *start
; /* start of the string to process */
1244 char *stesc
; /* start of an escape sequence ('\\') */
1245 const char *esct
; /* type of esccape sequence */
1246 char *ep
; /* end of comment string */
1247 const char *stnam
; /* start of the name, after "[(*" */
1248 const char *cp
; /* end of the name, e.g. before ']' */
1249 const char *res
; /* the string to be substituted */
1250 char *nbuf
; /* new buffer to copy buf->buf to */
1251 size_t maxl
; /* expected length of the escape name */
1252 size_t naml
; /* actual length of the escape name */
1253 size_t asz
; /* length of the replacement */
1254 size_t rsz
; /* length of the rest of the string */
1255 int inaml
; /* length returned from mandoc_escape() */
1256 int expand_count
; /* to avoid infinite loops */
1257 int npos
; /* position in numeric expression */
1258 int arg_complete
; /* argument not interrupted by eol */
1259 int quote_args
; /* true for \\$@, false for \\$* */
1260 int done
; /* no more input available */
1261 int deftype
; /* type of definition to paste */
1262 int rcsid
; /* kind of RCS id seen */
1263 enum mandocerr err
; /* for escape sequence problems */
1264 char sign
; /* increment number register */
1265 char term
; /* character terminating the escape */
1267 /* Search forward for comments. */
1270 start
= buf
->buf
+ pos
;
1271 for (stesc
= buf
->buf
+ pos
; *stesc
!= '\0'; stesc
++) {
1272 if (stesc
[0] != newesc
|| stesc
[1] == '\0')
1275 if (*stesc
!= '"' && *stesc
!= '#')
1278 /* Comment found, look for RCS id. */
1281 if ((cp
= strstr(stesc
, "$" "OpenBSD")) != NULL
) {
1282 rcsid
= 1 << MANDOC_OS_OPENBSD
;
1284 } else if ((cp
= strstr(stesc
, "$" "NetBSD")) != NULL
) {
1285 rcsid
= 1 << MANDOC_OS_NETBSD
;
1289 isalnum((unsigned char)*cp
) == 0 &&
1290 strchr(cp
, '$') != NULL
) {
1291 if (r
->man
->meta
.rcsids
& rcsid
)
1292 mandoc_msg(MANDOCERR_RCS_REP
, ln
,
1293 (int)(stesc
- buf
->buf
) + 1,
1295 r
->man
->meta
.rcsids
|= rcsid
;
1298 /* Handle trailing whitespace. */
1300 ep
= strchr(stesc
--, '\0') - 1;
1305 if (*ep
== ' ' || *ep
== '\t')
1306 mandoc_msg(MANDOCERR_SPACE_EOL
,
1307 ln
, (int)(ep
- buf
->buf
), NULL
);
1310 * Save comments preceding the title macro
1311 * in the syntax tree.
1314 if (newesc
!= ASCII_ESC
&& r
->options
& MPARSE_COMMENT
) {
1315 while (*ep
== ' ' || *ep
== '\t')
1318 n
= roff_node_alloc(r
->man
,
1319 ln
, stesc
+ 1 - buf
->buf
,
1320 ROFFT_COMMENT
, TOKEN_NONE
);
1321 n
->string
= mandoc_strdup(stesc
+ 2);
1322 roff_node_append(r
->man
, n
);
1323 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1324 r
->man
->next
= ROFF_NEXT_SIBLING
;
1327 /* Line continuation with comment. */
1329 if (stesc
[1] == '#') {
1331 return ROFF_IGN
| ROFF_APPEND
;
1334 /* Discard normal comments. */
1336 while (stesc
> start
&& stesc
[-1] == ' ' &&
1337 (stesc
== start
+ 1 || stesc
[-2] != '\\'))
1346 /* Notice the end of the input. */
1348 if (*stesc
== '\n') {
1354 while (stesc
>= start
) {
1355 if (*stesc
!= newesc
) {
1358 * If we have a non-standard escape character,
1359 * escape literal backslashes because all
1360 * processing in subsequent functions uses
1361 * the standard escaping rules.
1364 if (newesc
!= ASCII_ESC
&& *stesc
== '\\') {
1366 buf
->sz
= mandoc_asprintf(&nbuf
, "%s\\e%s",
1367 buf
->buf
, stesc
+ 1) + 1;
1369 stesc
= nbuf
+ (stesc
- buf
->buf
);
1374 /* Search backwards for the next escape. */
1380 /* If it is escaped, skip it. */
1382 for (cp
= stesc
- 1; cp
>= start
; cp
--)
1383 if (*cp
!= r
->escape
)
1386 if ((stesc
- cp
) % 2 == 0) {
1390 } else if (stesc
[1] != '\0') {
1397 return ROFF_IGN
| ROFF_APPEND
;
1400 /* Decide whether to expand or to check only. */
1418 if (sign
== '+' || sign
== '-')
1424 switch(mandoc_escape(&cp
, &stnam
, &inaml
)) {
1425 case ESCAPE_SPECIAL
:
1426 if (mchars_spec2cp(stnam
, inaml
) >= 0)
1430 err
= MANDOCERR_ESC_BAD
;
1433 err
= MANDOCERR_ESC_UNDEF
;
1436 err
= MANDOCERR_ESC_UNSUPP
;
1441 if (err
!= MANDOCERR_OK
)
1442 mandoc_msg(err
, ln
, (int)(stesc
- buf
->buf
),
1443 "%.*s", (int)(cp
- stesc
), stesc
);
1448 if (EXPAND_LIMIT
< ++expand_count
) {
1449 mandoc_msg(MANDOCERR_ROFFLOOP
,
1450 ln
, (int)(stesc
- buf
->buf
), NULL
);
1455 * The third character decides the length
1456 * of the name of the string or register.
1457 * Save a pointer to the name.
1484 /* Advance to the end of the name. */
1488 while (maxl
== 0 || naml
< maxl
) {
1490 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
1491 (int)(stesc
- buf
->buf
), "%s", stesc
);
1495 if (maxl
== 0 && *cp
== term
) {
1499 if (*cp
++ != '\\' || *esct
!= 'w') {
1503 switch (mandoc_escape(&cp
, NULL
, NULL
)) {
1504 case ESCAPE_SPECIAL
:
1505 case ESCAPE_UNICODE
:
1506 case ESCAPE_NUMBERED
:
1508 case ESCAPE_OVERSTRIKE
:
1517 * Retrieve the replacement string; if it is
1518 * undefined, resume searching for escapes.
1524 deftype
= ROFFDEF_USER
| ROFFDEF_PRE
;
1525 res
= roff_getstrn(r
, stnam
, naml
, &deftype
);
1528 * If not overriden, let \*(.T
1529 * through to the formatters.
1532 if (res
== NULL
&& naml
== 2 &&
1533 stnam
[0] == '.' && stnam
[1] == 'T') {
1534 roff_setstrn(&r
->strtab
,
1535 ".T", 2, NULL
, 0, 0);
1542 if (r
->mstackpos
< 0) {
1543 mandoc_msg(MANDOCERR_ARG_UNDEF
, ln
,
1544 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1547 ctx
= r
->mstack
+ r
->mstackpos
;
1548 npos
= esct
[1] - '1';
1549 if (npos
>= 0 && npos
<= 8) {
1550 res
= npos
< ctx
->argc
?
1551 ctx
->argv
[npos
] : "";
1556 else if (esct
[1] == '@')
1559 mandoc_msg(MANDOCERR_ARG_NONUM
, ln
,
1560 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1564 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1568 asz
+= 2; /* quotes */
1569 asz
+= strlen(ctx
->argv
[npos
]);
1572 rsz
= buf
->sz
- (stesc
- buf
->buf
) - 3;
1574 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1576 nbuf
= mandoc_realloc(buf
->buf
, buf
->sz
);
1578 stesc
= nbuf
+ (stesc
- buf
->buf
);
1581 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1583 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1588 cp
= ctx
->argv
[npos
];
1597 ubuf
[0] = arg_complete
&&
1598 roff_evalnum(r
, ln
, stnam
, &npos
,
1599 NULL
, ROFFNUM_SCALE
) &&
1600 stnam
+ npos
+ 1 == cp
? '1' : '0';
1605 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1606 roff_getregn(r
, stnam
, naml
, sign
));
1611 /* use even incomplete args */
1612 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1619 mandoc_msg(MANDOCERR_STR_UNDEF
,
1620 ln
, (int)(stesc
- buf
->buf
),
1621 "%.*s", (int)naml
, stnam
);
1623 } else if (buf
->sz
+ strlen(res
) > SHRT_MAX
) {
1624 mandoc_msg(MANDOCERR_ROFFLOOP
,
1625 ln
, (int)(stesc
- buf
->buf
), NULL
);
1629 /* Replace the escape sequence by the string. */
1632 buf
->sz
= mandoc_asprintf(&nbuf
, "%s%s%s",
1633 buf
->buf
, res
, cp
) + 1;
1635 /* Prepare for the next replacement. */
1638 stesc
= nbuf
+ (stesc
- buf
->buf
) + strlen(res
);
1646 * Parse a quoted or unquoted roff-style request or macro argument.
1647 * Return a pointer to the parsed argument, which is either the original
1648 * pointer or advanced by one byte in case the argument is quoted.
1649 * NUL-terminate the argument in place.
1650 * Collapse pairs of quotes inside quoted arguments.
1651 * Advance the argument pointer to the next argument,
1652 * or to the NUL byte terminating the argument line.
1655 roff_getarg(struct roff
*r
, char **cpp
, int ln
, int *pos
)
1659 int newesc
, pairs
, quoted
, white
;
1661 /* Quoting can only start with a new word. */
1664 if ('"' == *start
) {
1669 newesc
= pairs
= white
= 0;
1670 for (cp
= start
; '\0' != *cp
; cp
++) {
1673 * Move the following text left
1674 * after quoted quotes and after "\\" and "\t".
1679 if ('\\' == cp
[0]) {
1681 * In copy mode, translate double to single
1682 * backslashes and backslash-t to literal tabs.
1693 cp
[-pairs
] = ASCII_ESC
;
1698 /* Skip escaped blanks. */
1705 } else if (0 == quoted
) {
1707 /* Unescaped blanks end unquoted args. */
1711 } else if ('"' == cp
[0]) {
1713 /* Quoted quotes collapse. */
1717 /* Unquoted quotes end quoted args. */
1724 /* Quoted argument without a closing quote. */
1726 mandoc_msg(MANDOCERR_ARG_QUOTE
, ln
, *pos
, NULL
);
1728 /* NUL-terminate this argument and move to the next one. */
1736 *pos
+= (int)(cp
- start
) + (quoted
? 1 : 0);
1739 if ('\0' == *cp
&& (white
|| ' ' == cp
[-1]))
1740 mandoc_msg(MANDOCERR_SPACE_EOL
, ln
, *pos
, NULL
);
1742 start
= mandoc_strdup(start
);
1747 buf
.sz
= strlen(start
) + 1;
1749 if (roff_expand(r
, &buf
, ln
, 0, ASCII_ESC
) & ROFF_IGN
) {
1751 buf
.buf
= mandoc_strdup("");
1758 * Process text streams.
1761 roff_parsetext(struct roff
*r
, struct buf
*buf
, int pos
, int *offs
)
1767 enum mandoc_esc esc
;
1769 /* Spring the input line trap. */
1771 if (roffit_lines
== 1) {
1772 isz
= mandoc_asprintf(&p
, "%s\n.%s", buf
->buf
, roffit_macro
);
1779 return ROFF_REPARSE
;
1780 } else if (roffit_lines
> 1)
1783 if (roffce_node
!= NULL
&& buf
->buf
[pos
] != '\0') {
1784 if (roffce_lines
< 1) {
1785 r
->man
->last
= roffce_node
;
1786 r
->man
->next
= ROFF_NEXT_SIBLING
;
1793 /* Convert all breakable hyphens into ASCII_HYPH. */
1795 start
= p
= buf
->buf
+ pos
;
1797 while (*p
!= '\0') {
1798 sz
= strcspn(p
, "-\\");
1805 /* Skip over escapes. */
1807 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
1808 if (esc
== ESCAPE_ERROR
)
1813 } else if (p
== start
) {
1818 if (isalpha((unsigned char)p
[-1]) &&
1819 isalpha((unsigned char)p
[1]))
1827 roff_parseln(struct roff
*r
, int ln
, struct buf
*buf
, int *offs
, size_t len
)
1831 int pos
; /* parse point */
1832 int spos
; /* saved parse point for messages */
1833 int ppos
; /* original offset in buf->buf */
1834 int ctl
; /* macro line (boolean) */
1838 if (len
> 80 && r
->tbl
== NULL
&& r
->eqn
== NULL
&&
1839 (r
->man
->flags
& ROFF_NOFILL
) == 0 &&
1840 strchr(" .\\", buf
->buf
[pos
]) == NULL
&&
1841 buf
->buf
[pos
] != r
->control
&&
1842 strcspn(buf
->buf
, " ") < 80)
1843 mandoc_msg(MANDOCERR_TEXT_LONG
, ln
, (int)len
- 1,
1844 "%.20s...", buf
->buf
+ pos
);
1846 /* Handle in-line equation delimiters. */
1848 if (r
->tbl
== NULL
&&
1849 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
1850 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
1851 e
= roff_eqndelim(r
, buf
, pos
);
1852 if (e
== ROFF_REPARSE
)
1854 assert(e
== ROFF_CONT
);
1857 /* Expand some escape sequences. */
1859 e
= roff_expand(r
, buf
, ln
, pos
, r
->escape
);
1860 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1862 assert(e
== ROFF_CONT
);
1864 ctl
= roff_getcontrol(r
, buf
->buf
, &pos
);
1867 * First, if a scope is open and we're not a macro, pass the
1868 * text through the macro's filter.
1869 * Equations process all content themselves.
1870 * Tables process almost all content themselves, but we want
1871 * to warn about macros before passing it there.
1874 if (r
->last
!= NULL
&& ! ctl
) {
1876 e
= (*roffs
[t
].text
)(r
, t
, buf
, ln
, pos
, pos
, offs
);
1877 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1882 if (r
->eqn
!= NULL
&& strncmp(buf
->buf
+ ppos
, ".EN", 3)) {
1883 eqn_read(r
->eqn
, buf
->buf
+ ppos
);
1886 if (r
->tbl
!= NULL
&& (ctl
== 0 || buf
->buf
[pos
] == '\0')) {
1887 tbl_read(r
->tbl
, ln
, buf
->buf
, ppos
);
1888 roff_addtbl(r
->man
, ln
, r
->tbl
);
1892 r
->options
&= ~MPARSE_COMMENT
;
1893 return roff_parsetext(r
, buf
, pos
, offs
) | e
;
1896 /* Skip empty request lines. */
1898 if (buf
->buf
[pos
] == '"') {
1899 mandoc_msg(MANDOCERR_COMMENT_BAD
, ln
, pos
, NULL
);
1901 } else if (buf
->buf
[pos
] == '\0')
1905 * If a scope is open, go to the child handler for that macro,
1906 * as it may want to preprocess before doing anything with it.
1907 * Don't do so if an equation is open.
1912 return (*roffs
[t
].sub
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
1915 /* No scope is open. This is a new request or macro. */
1917 r
->options
&= ~MPARSE_COMMENT
;
1919 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
1921 /* Tables ignore most macros. */
1923 if (r
->tbl
!= NULL
&& (t
== TOKEN_NONE
|| t
== ROFF_TS
||
1924 t
== ROFF_br
|| t
== ROFF_ce
|| t
== ROFF_rj
|| t
== ROFF_sp
)) {
1925 mandoc_msg(MANDOCERR_TBLMACRO
,
1926 ln
, pos
, "%s", buf
->buf
+ spos
);
1927 if (t
!= TOKEN_NONE
)
1929 while (buf
->buf
[pos
] != '\0' && buf
->buf
[pos
] != ' ')
1931 while (buf
->buf
[pos
] == ' ')
1933 tbl_read(r
->tbl
, ln
, buf
->buf
, pos
);
1934 roff_addtbl(r
->man
, ln
, r
->tbl
);
1938 /* For now, let high level macros abort .ce mode. */
1940 if (ctl
&& roffce_node
!= NULL
&&
1941 (t
== TOKEN_NONE
|| t
== ROFF_Dd
|| t
== ROFF_EQ
||
1942 t
== ROFF_TH
|| t
== ROFF_TS
)) {
1943 r
->man
->last
= roffce_node
;
1944 r
->man
->next
= ROFF_NEXT_SIBLING
;
1950 * This is neither a roff request nor a user-defined macro.
1951 * Let the standard macro set parsers handle it.
1954 if (t
== TOKEN_NONE
)
1957 /* Execute a roff request or a user defined macro. */
1959 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, spos
, pos
, offs
);
1963 * Internal interface function to tell the roff parser that execution
1964 * of the current macro ended. This is required because macro
1965 * definitions usually do not end with a .return request.
1968 roff_userret(struct roff
*r
)
1973 assert(r
->mstackpos
>= 0);
1974 ctx
= r
->mstack
+ r
->mstackpos
;
1975 for (i
= 0; i
< ctx
->argc
; i
++)
1982 roff_endparse(struct roff
*r
)
1984 if (r
->last
!= NULL
)
1985 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->last
->line
,
1986 r
->last
->col
, "%s", roff_name
[r
->last
->tok
]);
1988 if (r
->eqn
!= NULL
) {
1989 mandoc_msg(MANDOCERR_BLK_NOEND
,
1990 r
->eqn
->node
->line
, r
->eqn
->node
->pos
, "EQ");
1995 if (r
->tbl
!= NULL
) {
2002 * Parse a roff node's type from the input buffer. This must be in the
2003 * form of ".foo xxx" in the usual way.
2005 static enum roff_tok
2006 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
2016 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
2020 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
2022 deftype
= ROFFDEF_USER
| ROFFDEF_REN
;
2023 r
->current_string
= roff_getstrn(r
, mac
, maclen
, &deftype
);
2032 t
= roffhash_find(r
->reqtab
, mac
, maclen
);
2035 if (t
!= TOKEN_NONE
)
2037 else if (deftype
== ROFFDEF_UNDEF
) {
2038 /* Using an undefined macro defines it to be empty. */
2039 roff_setstrn(&r
->strtab
, mac
, maclen
, "", 0, 0);
2040 roff_setstrn(&r
->rentab
, mac
, maclen
, NULL
, 0, 0);
2045 /* --- handling of request blocks ----------------------------------------- */
2048 * Close a macro definition block or an "ignore" block.
2051 roff_cblock(ROFF_ARGS
)
2055 if (r
->last
== NULL
) {
2056 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
2060 switch (r
->last
->tok
) {
2069 /* Remapped in roff_block(). */
2072 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
2077 roffnode_cleanscope(r
);
2080 * If a conditional block with braces is still open,
2081 * check for "\}" block end markers.
2084 if (r
->last
!= NULL
&& r
->last
->endspan
< 0) {
2085 rr
= 1; /* If arguments follow "\}", warn about them. */
2086 roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2089 if (buf
->buf
[pos
] != '\0')
2090 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
2091 ".. %s", buf
->buf
+ pos
);
2097 * Pop all nodes ending at the end of the current input line.
2098 * Return the number of loops ended.
2101 roffnode_cleanscope(struct roff
*r
)
2106 while (r
->last
!= NULL
&& r
->last
->endspan
> 0) {
2107 if (--r
->last
->endspan
!= 0)
2109 inloop
+= roffnode_pop(r
);
2115 * Handle the closing "\}" of a conditional block.
2116 * Apart from generating warnings, this only pops nodes.
2117 * Return the number of loops ended.
2120 roff_ccond(struct roff
*r
, int ln
, int ppos
)
2122 if (NULL
== r
->last
) {
2123 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2127 switch (r
->last
->tok
) {
2134 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2138 if (r
->last
->endspan
> -1) {
2139 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2143 return roffnode_pop(r
) + roffnode_cleanscope(r
);
2147 roff_block(ROFF_ARGS
)
2149 const char *name
, *value
;
2150 char *call
, *cp
, *iname
, *rname
;
2151 size_t csz
, namesz
, rsz
;
2154 /* Ignore groff compatibility mode for now. */
2156 if (tok
== ROFF_de1
)
2158 else if (tok
== ROFF_dei1
)
2160 else if (tok
== ROFF_am1
)
2162 else if (tok
== ROFF_ami1
)
2165 /* Parse the macro name argument. */
2167 cp
= buf
->buf
+ pos
;
2168 if (tok
== ROFF_ig
) {
2173 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2174 iname
[namesz
] = '\0';
2177 /* Resolve the macro name argument if it is indirect. */
2179 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2180 deftype
= ROFFDEF_USER
;
2181 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2183 mandoc_msg(MANDOCERR_STR_UNDEF
,
2184 ln
, (int)(iname
- buf
->buf
),
2185 "%.*s", (int)namesz
, iname
);
2188 namesz
= strlen(name
);
2192 if (namesz
== 0 && tok
!= ROFF_ig
) {
2193 mandoc_msg(MANDOCERR_REQ_EMPTY
,
2194 ln
, ppos
, "%s", roff_name
[tok
]);
2198 roffnode_push(r
, tok
, name
, ln
, ppos
);
2201 * At the beginning of a `de' macro, clear the existing string
2202 * with the same name, if there is one. New content will be
2203 * appended from roff_block_text() in multiline mode.
2206 if (tok
== ROFF_de
|| tok
== ROFF_dei
) {
2207 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
2208 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2209 } else if (tok
== ROFF_am
|| tok
== ROFF_ami
) {
2210 deftype
= ROFFDEF_ANY
;
2211 value
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2212 switch (deftype
) { /* Before appending, ... */
2213 case ROFFDEF_PRE
: /* copy predefined to user-defined. */
2214 roff_setstrn(&r
->strtab
, name
, namesz
,
2215 value
, strlen(value
), 0);
2217 case ROFFDEF_REN
: /* call original standard macro. */
2218 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2219 (int)strlen(value
), value
);
2220 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2221 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2224 case ROFFDEF_STD
: /* rename and call standard macro. */
2225 rsz
= mandoc_asprintf(&rname
, "__%s_renamed", name
);
2226 roff_setstrn(&r
->rentab
, rname
, rsz
, name
, namesz
, 0);
2227 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2229 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2241 /* Get the custom end marker. */
2244 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2246 /* Resolve the end marker if it is indirect. */
2248 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2249 deftype
= ROFFDEF_USER
;
2250 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2252 mandoc_msg(MANDOCERR_STR_UNDEF
,
2253 ln
, (int)(iname
- buf
->buf
),
2254 "%.*s", (int)namesz
, iname
);
2257 namesz
= strlen(name
);
2262 r
->last
->end
= mandoc_strndup(name
, namesz
);
2265 mandoc_msg(MANDOCERR_ARG_EXCESS
,
2266 ln
, pos
, ".%s ... %s", roff_name
[tok
], cp
);
2272 roff_block_sub(ROFF_ARGS
)
2278 * First check whether a custom macro exists at this level. If
2279 * it does, then check against it. This is some of groff's
2280 * stranger behaviours. If we encountered a custom end-scope
2281 * tag and that tag also happens to be a "real" macro, then we
2282 * need to try interpreting it again as a real macro. If it's
2283 * not, then return ignore. Else continue.
2287 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
2288 if (buf
->buf
[i
] != r
->last
->end
[j
])
2291 if (r
->last
->end
[j
] == '\0' &&
2292 (buf
->buf
[i
] == '\0' ||
2293 buf
->buf
[i
] == ' ' ||
2294 buf
->buf
[i
] == '\t')) {
2296 roffnode_cleanscope(r
);
2298 while (buf
->buf
[i
] == ' ' || buf
->buf
[i
] == '\t')
2302 if (roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
) !=
2310 * If we have no custom end-query or lookup failed, then try
2311 * pulling it out of the hashtable.
2314 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2316 if (t
!= ROFF_cblock
) {
2318 roff_setstr(r
, r
->last
->name
, buf
->buf
+ ppos
, 2);
2322 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2326 roff_block_text(ROFF_ARGS
)
2330 roff_setstr(r
, r
->last
->name
, buf
->buf
+ pos
, 2);
2336 * Check for a closing "\}" and handle it.
2337 * In this function, the final "int *offs" argument is used for
2338 * different purposes than elsewhere:
2339 * Input: *offs == 0: caller wants to discard arguments following \}
2340 * *offs == 1: caller wants to preserve text following \}
2341 * Output: *offs = 0: tell caller to discard input line
2342 * *offs = 1: tell caller to use input line
2345 roff_cond_checkend(ROFF_ARGS
)
2348 int endloop
, irc
, rr
;
2352 endloop
= tok
!= ROFF_while
? ROFF_IGN
:
2353 rr
? ROFF_LOOPCONT
: ROFF_LOOPEXIT
;
2354 if (roffnode_cleanscope(r
))
2358 * If "\}" occurs on a macro line without a preceding macro or
2359 * a text line contains nothing else, drop the line completely.
2362 ep
= buf
->buf
+ pos
;
2363 if (ep
[0] == '\\' && ep
[1] == '}' && (ep
[2] == '\0' || *offs
== 0))
2367 * The closing delimiter "\}" rewinds the conditional scope
2368 * but is otherwise ignored when interpreting the line.
2371 while ((ep
= strchr(ep
, '\\')) != NULL
) {
2379 memmove(ep
, ep
+ 2, strlen(ep
+ 2) + 1);
2380 if (roff_ccond(r
, ln
, ep
- buf
->buf
))
2396 * Parse and process a request or macro line in conditional scope.
2399 roff_cond_sub(ROFF_ARGS
)
2401 struct roffnode
*bl
;
2405 rr
= 0; /* If arguments follow "\}", skip them. */
2406 irc
= roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2407 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2409 /* For now, let high level macros abort .ce mode. */
2411 if (roffce_node
!= NULL
&&
2412 (t
== TOKEN_NONE
|| t
== ROFF_Dd
|| t
== ROFF_EQ
||
2413 t
== ROFF_TH
|| t
== ROFF_TS
)) {
2414 r
->man
->last
= roffce_node
;
2415 r
->man
->next
= ROFF_NEXT_SIBLING
;
2421 * Fully handle known macros when they are structurally
2422 * required or when the conditional evaluated to true.
2425 if (t
== ROFF_break
) {
2426 if (irc
& ROFF_LOOPMASK
)
2427 irc
= ROFF_IGN
| ROFF_LOOPEXIT
;
2429 for (bl
= r
->last
; bl
!= NULL
; bl
= bl
->parent
) {
2431 if (bl
->tok
== ROFF_while
)
2435 } else if (t
!= TOKEN_NONE
&&
2436 (rr
|| roffs
[t
].flags
& ROFFMAC_STRUCT
))
2437 irc
|= (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2439 irc
|= rr
? ROFF_CONT
: ROFF_IGN
;
2444 * Parse and process a text line in conditional scope.
2447 roff_cond_text(ROFF_ARGS
)
2451 rr
= 1; /* If arguments follow "\}", preserve them. */
2452 irc
= roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2458 /* --- handling of numeric and conditional expressions -------------------- */
2461 * Parse a single signed integer number. Stop at the first non-digit.
2462 * If there is at least one digit, return success and advance the
2463 * parse point, else return failure and let the parse point unchanged.
2464 * Ignore overflows, treat them just like the C language.
2467 roff_getnum(const char *v
, int *pos
, int *res
, int flags
)
2469 int myres
, scaled
, n
, p
;
2476 if (n
|| v
[p
] == '+')
2479 if (flags
& ROFFNUM_WHITE
)
2480 while (isspace((unsigned char)v
[p
]))
2483 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
2484 *res
= 10 * *res
+ v
[p
] - '0';
2491 /* Each number may be followed by one optional scaling unit. */
2495 scaled
= *res
* 65536;
2498 scaled
= *res
* 240;
2501 scaled
= *res
* 240 / 2.54;
2512 scaled
= *res
* 10 / 3;
2518 scaled
= *res
* 6 / 25;
2525 if (flags
& ROFFNUM_SCALE
)
2533 * Evaluate a string comparison condition.
2534 * The first character is the delimiter.
2535 * Succeed if the string up to its second occurrence
2536 * matches the string up to its third occurence.
2537 * Advance the cursor after the third occurrence
2538 * or lacking that, to the end of the line.
2541 roff_evalstrcond(const char *v
, int *pos
)
2543 const char *s1
, *s2
, *s3
;
2547 s1
= v
+ *pos
; /* initial delimiter */
2548 s2
= s1
+ 1; /* for scanning the first string */
2549 s3
= strchr(s2
, *s1
); /* for scanning the second string */
2551 if (NULL
== s3
) /* found no middle delimiter */
2554 while ('\0' != *++s3
) {
2555 if (*s2
!= *s3
) { /* mismatch */
2556 s3
= strchr(s3
, *s1
);
2559 if (*s3
== *s1
) { /* found the final delimiter */
2568 s3
= strchr(s2
, '\0');
2569 else if (*s3
!= '\0')
2576 * Evaluate an optionally negated single character, numerical,
2577 * or string condition.
2580 roff_evalcond(struct roff
*r
, int ln
, char *v
, int *pos
)
2582 const char *start
, *end
;
2585 int deftype
, len
, number
, savepos
, istrue
, wanttrue
;
2587 if ('!' == v
[*pos
]) {
2608 } while (v
[*pos
] == ' ');
2611 * Quirk for groff compatibility:
2612 * The horizontal tab is neither available nor unavailable.
2615 if (v
[*pos
] == '\t') {
2620 /* Printable ASCII characters are available. */
2622 if (v
[*pos
] != '\\') {
2628 switch (mandoc_escape(&end
, &start
, &len
)) {
2629 case ESCAPE_SPECIAL
:
2630 istrue
= mchars_spec2cp(start
, len
) != -1;
2632 case ESCAPE_UNICODE
:
2635 case ESCAPE_NUMBERED
:
2636 istrue
= mchars_num2char(start
, len
) != -1;
2643 return istrue
== wanttrue
;
2650 sz
= roff_getname(r
, &cp
, ln
, cp
- v
);
2653 else if (v
[*pos
] == 'r')
2654 istrue
= roff_hasregn(r
, name
, sz
);
2656 deftype
= ROFFDEF_ANY
;
2657 roff_getstrn(r
, name
, sz
, &deftype
);
2660 *pos
= (name
+ sz
) - v
;
2661 return istrue
== wanttrue
;
2667 if (roff_evalnum(r
, ln
, v
, pos
, &number
, ROFFNUM_SCALE
))
2668 return (number
> 0) == wanttrue
;
2669 else if (*pos
== savepos
)
2670 return roff_evalstrcond(v
, pos
) == wanttrue
;
2676 roff_line_ignore(ROFF_ARGS
)
2683 roff_insec(ROFF_ARGS
)
2686 mandoc_msg(MANDOCERR_REQ_INSEC
, ln
, ppos
, "%s", roff_name
[tok
]);
2691 roff_unsupp(ROFF_ARGS
)
2694 mandoc_msg(MANDOCERR_REQ_UNSUPP
, ln
, ppos
, "%s", roff_name
[tok
]);
2699 roff_cond(ROFF_ARGS
)
2703 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
2706 * An `.el' has no conditional body: it will consume the value
2707 * of the current rstack entry set in prior `ie' calls or
2710 * If we're not an `el', however, then evaluate the conditional.
2713 r
->last
->rule
= tok
== ROFF_el
?
2714 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
2715 roff_evalcond(r
, ln
, buf
->buf
, &pos
);
2718 * An if-else will put the NEGATION of the current evaluated
2719 * conditional into the stack of rules.
2722 if (tok
== ROFF_ie
) {
2723 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
2725 r
->rstack
= mandoc_reallocarray(r
->rstack
,
2726 r
->rstacksz
, sizeof(int));
2728 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
2731 /* If the parent has false as its rule, then so do we. */
2733 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
2738 * If there is nothing on the line after the conditional,
2739 * not even whitespace, use next-line scope.
2740 * Except that .while does not support next-line scope.
2743 if (buf
->buf
[pos
] == '\0' && tok
!= ROFF_while
) {
2744 r
->last
->endspan
= 2;
2748 while (buf
->buf
[pos
] == ' ')
2751 /* An opening brace requests multiline scope. */
2753 if (buf
->buf
[pos
] == '\\' && buf
->buf
[pos
+ 1] == '{') {
2754 r
->last
->endspan
= -1;
2756 while (buf
->buf
[pos
] == ' ')
2762 * Anything else following the conditional causes
2763 * single-line scope. Warn if the scope contains
2764 * nothing but trailing whitespace.
2767 if (buf
->buf
[pos
] == '\0')
2768 mandoc_msg(MANDOCERR_COND_EMPTY
,
2769 ln
, ppos
, "%s", roff_name
[tok
]);
2771 r
->last
->endspan
= 1;
2776 if (tok
== ROFF_while
)
2788 /* Ignore groff compatibility mode for now. */
2790 if (tok
== ROFF_ds1
)
2792 else if (tok
== ROFF_as1
)
2796 * The first word is the name of the string.
2797 * If it is empty or terminated by an escape sequence,
2798 * abort the `ds' request without defining anything.
2801 name
= string
= buf
->buf
+ pos
;
2805 namesz
= roff_getname(r
, &string
, ln
, pos
);
2806 switch (name
[namesz
]) {
2810 string
= buf
->buf
+ pos
+ namesz
;
2816 /* Read past the initial double-quote, if any. */
2820 /* The rest is the value. */
2821 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
2823 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2828 * Parse a single operator, one or two characters long.
2829 * If the operator is recognized, return success and advance the
2830 * parse point, else return failure and let the parse point unchanged.
2833 roff_getop(const char *v
, int *pos
, char *res
)
2848 switch (v
[*pos
+ 1]) {
2866 switch (v
[*pos
+ 1]) {
2880 if ('=' == v
[*pos
+ 1])
2892 * Evaluate either a parenthesized numeric expression
2893 * or a single signed integer number.
2896 roff_evalpar(struct roff
*r
, int ln
,
2897 const char *v
, int *pos
, int *res
, int flags
)
2901 return roff_getnum(v
, pos
, res
, flags
);
2904 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, flags
| ROFFNUM_WHITE
))
2908 * Omission of the closing parenthesis
2909 * is an error in validation mode,
2910 * but ignored in evaluation mode.
2915 else if (NULL
== res
)
2922 * Evaluate a complete numeric expression.
2923 * Proceed left to right, there is no concept of precedence.
2926 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
2927 int *pos
, int *res
, int flags
)
2929 int mypos
, operand2
;
2937 if (flags
& ROFFNUM_WHITE
)
2938 while (isspace((unsigned char)v
[*pos
]))
2941 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
, flags
))
2945 if (flags
& ROFFNUM_WHITE
)
2946 while (isspace((unsigned char)v
[*pos
]))
2949 if ( ! roff_getop(v
, pos
, &operator))
2952 if (flags
& ROFFNUM_WHITE
)
2953 while (isspace((unsigned char)v
[*pos
]))
2956 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
, flags
))
2959 if (flags
& ROFFNUM_WHITE
)
2960 while (isspace((unsigned char)v
[*pos
]))
2977 if (operand2
== 0) {
2978 mandoc_msg(MANDOCERR_DIVZERO
,
2986 if (operand2
== 0) {
2987 mandoc_msg(MANDOCERR_DIVZERO
,
2995 *res
= *res
< operand2
;
2998 *res
= *res
> operand2
;
3001 *res
= *res
<= operand2
;
3004 *res
= *res
>= operand2
;
3007 *res
= *res
== operand2
;
3010 *res
= *res
!= operand2
;
3013 *res
= *res
&& operand2
;
3016 *res
= *res
|| operand2
;
3019 if (operand2
< *res
)
3023 if (operand2
> *res
)
3033 /* --- register management ------------------------------------------------ */
3036 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
3038 roff_setregn(r
, name
, strlen(name
), val
, sign
, INT_MIN
);
3042 roff_setregn(struct roff
*r
, const char *name
, size_t len
,
3043 int val
, char sign
, int step
)
3045 struct roffreg
*reg
;
3047 /* Search for an existing register with the same name. */
3050 while (reg
!= NULL
&& (reg
->key
.sz
!= len
||
3051 strncmp(reg
->key
.p
, name
, len
) != 0))
3055 /* Create a new register. */
3056 reg
= mandoc_malloc(sizeof(struct roffreg
));
3057 reg
->key
.p
= mandoc_strndup(name
, len
);
3061 reg
->next
= r
->regtab
;
3067 else if ('-' == sign
)
3071 if (step
!= INT_MIN
)
3076 * Handle some predefined read-only number registers.
3077 * For now, return -1 if the requested register is not predefined;
3078 * in case a predefined read-only register having the value -1
3079 * were to turn up, another special value would have to be chosen.
3082 roff_getregro(const struct roff
*r
, const char *name
)
3086 case '$': /* Number of arguments of the last macro evaluated. */
3087 return r
->mstackpos
< 0 ? 0 : r
->mstack
[r
->mstackpos
].argc
;
3088 case 'A': /* ASCII approximation mode is always off. */
3090 case 'g': /* Groff compatibility mode is always on. */
3092 case 'H': /* Fixed horizontal resolution. */
3094 case 'j': /* Always adjust left margin only. */
3096 case 'T': /* Some output device is always defined. */
3098 case 'V': /* Fixed vertical resolution. */
3106 roff_getreg(struct roff
*r
, const char *name
)
3108 return roff_getregn(r
, name
, strlen(name
), '\0');
3112 roff_getregn(struct roff
*r
, const char *name
, size_t len
, char sign
)
3114 struct roffreg
*reg
;
3117 if ('.' == name
[0] && 2 == len
) {
3118 val
= roff_getregro(r
, name
+ 1);
3123 for (reg
= r
->regtab
; reg
; reg
= reg
->next
) {
3124 if (len
== reg
->key
.sz
&&
3125 0 == strncmp(name
, reg
->key
.p
, len
)) {
3128 reg
->val
+= reg
->step
;
3131 reg
->val
-= reg
->step
;
3140 roff_setregn(r
, name
, len
, 0, '\0', INT_MIN
);
3145 roff_hasregn(const struct roff
*r
, const char *name
, size_t len
)
3147 struct roffreg
*reg
;
3150 if ('.' == name
[0] && 2 == len
) {
3151 val
= roff_getregro(r
, name
+ 1);
3156 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
3157 if (len
== reg
->key
.sz
&&
3158 0 == strncmp(name
, reg
->key
.p
, len
))
3165 roff_freereg(struct roffreg
*reg
)
3167 struct roffreg
*old_reg
;
3169 while (NULL
!= reg
) {
3180 char *key
, *val
, *step
;
3185 key
= val
= buf
->buf
+ pos
;
3189 keysz
= roff_getname(r
, &val
, ln
, pos
);
3190 if (key
[keysz
] == '\\' || key
[keysz
] == '\t')
3194 if (sign
== '+' || sign
== '-')
3198 if (roff_evalnum(r
, ln
, val
, &len
, &iv
, ROFFNUM_SCALE
) == 0)
3202 while (isspace((unsigned char)*step
))
3204 if (roff_evalnum(r
, ln
, step
, NULL
, &is
, 0) == 0)
3207 roff_setregn(r
, key
, keysz
, iv
, sign
, is
);
3214 struct roffreg
*reg
, **prev
;
3218 name
= cp
= buf
->buf
+ pos
;
3221 namesz
= roff_getname(r
, &cp
, ln
, pos
);
3222 name
[namesz
] = '\0';
3227 if (reg
== NULL
|| !strcmp(name
, reg
->key
.p
))
3239 /* --- handler functions for roff requests -------------------------------- */
3248 cp
= buf
->buf
+ pos
;
3249 while (*cp
!= '\0') {
3251 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- buf
->buf
));
3252 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
3253 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
3254 if (name
[namesz
] == '\\' || name
[namesz
] == '\t')
3265 /* Parse the number of lines. */
3267 if ( ! roff_evalnum(r
, ln
, buf
->buf
, &pos
, &iv
, 0)) {
3268 mandoc_msg(MANDOCERR_IT_NONUM
,
3269 ln
, ppos
, "%s", buf
->buf
+ 1);
3273 while (isspace((unsigned char)buf
->buf
[pos
]))
3277 * Arm the input line trap.
3278 * Special-casing "an-trap" is an ugly workaround to cope
3279 * with DocBook stupidly fiddling with man(7) internals.
3283 roffit_macro
= mandoc_strdup(iv
!= 1 ||
3284 strcmp(buf
->buf
+ pos
, "an-trap") ?
3285 buf
->buf
+ pos
: "br");
3293 enum roff_tok t
, te
;
3300 r
->format
= MPARSE_MDOC
;
3301 mask
= MPARSE_MDOC
| MPARSE_QUICK
;
3307 r
->format
= MPARSE_MAN
;
3308 mask
= MPARSE_QUICK
;
3313 if ((r
->options
& mask
) == 0)
3314 for (t
= tok
; t
< te
; t
++)
3315 roff_setstr(r
, roff_name
[t
], NULL
, 0);
3322 r
->man
->flags
&= ~ROFF_NONOFILL
;
3323 if (r
->tbl
== NULL
) {
3324 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "TE");
3327 if (tbl_end(r
->tbl
, 0) == 0) {
3330 buf
->buf
= mandoc_strdup(".sp");
3333 return ROFF_REPARSE
;
3344 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "T&");
3346 tbl_restart(ln
, ppos
, r
->tbl
);
3352 * Handle in-line equation delimiters.
3355 roff_eqndelim(struct roff
*r
, struct buf
*buf
, int pos
)
3358 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
3361 * Outside equations, look for an opening delimiter.
3362 * If we are inside an equation, we already know it is
3363 * in-line, or this function wouldn't have been called;
3364 * so look for a closing delimiter.
3367 cp1
= buf
->buf
+ pos
;
3368 cp2
= strchr(cp1
, r
->eqn
== NULL
?
3369 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
3374 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
3376 /* Handle preceding text, protecting whitespace. */
3378 if (*buf
->buf
!= '\0') {
3385 * Prepare replacing the delimiter with an equation macro
3386 * and drop leading white space from the equation.
3389 if (r
->eqn
== NULL
) {
3396 /* Handle following text, protecting whitespace. */
3404 /* Do the actual replacement. */
3406 buf
->sz
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", buf
->buf
,
3407 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
3411 /* Toggle the in-line state of the eqn subsystem. */
3413 r
->eqn_inline
= r
->eqn
== NULL
;
3414 return ROFF_REPARSE
;
3420 struct roff_node
*n
;
3422 if (r
->man
->meta
.macroset
== MACROSET_MAN
)
3423 man_breakscope(r
->man
, ROFF_EQ
);
3424 n
= roff_node_alloc(r
->man
, ln
, ppos
, ROFFT_EQN
, TOKEN_NONE
);
3425 if (ln
> r
->man
->last
->line
)
3426 n
->flags
|= NODE_LINE
;
3427 n
->eqn
= eqn_box_new();
3428 roff_node_append(r
->man
, n
);
3429 r
->man
->next
= ROFF_NEXT_SIBLING
;
3431 assert(r
->eqn
== NULL
);
3432 if (r
->last_eqn
== NULL
)
3433 r
->last_eqn
= eqn_alloc();
3435 eqn_reset(r
->last_eqn
);
3436 r
->eqn
= r
->last_eqn
;
3439 if (buf
->buf
[pos
] != '\0')
3440 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3441 ".EQ %s", buf
->buf
+ pos
);
3449 if (r
->eqn
!= NULL
) {
3453 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "EN");
3454 if (buf
->buf
[pos
] != '\0')
3455 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3456 "EN %s", buf
->buf
+ pos
);
3463 if (r
->tbl
!= NULL
) {
3464 mandoc_msg(MANDOCERR_BLK_BROKEN
, ln
, ppos
, "TS breaks TS");
3467 r
->man
->flags
|= ROFF_NONOFILL
;
3468 r
->tbl
= tbl_alloc(ppos
, ln
, r
->last_tbl
);
3469 if (r
->last_tbl
== NULL
)
3470 r
->first_tbl
= r
->tbl
;
3471 r
->last_tbl
= r
->tbl
;
3476 roff_noarg(ROFF_ARGS
)
3478 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
))
3479 man_breakscope(r
->man
, tok
);
3480 if (tok
== ROFF_brp
)
3482 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3483 if (buf
->buf
[pos
] != '\0')
3484 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3485 "%s %s", roff_name
[tok
], buf
->buf
+ pos
);
3487 r
->man
->flags
|= ROFF_NOFILL
;
3488 else if (tok
== ROFF_fi
)
3489 r
->man
->flags
&= ~ROFF_NOFILL
;
3490 r
->man
->last
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3491 r
->man
->next
= ROFF_NEXT_SIBLING
;
3496 roff_onearg(ROFF_ARGS
)
3498 struct roff_node
*n
;
3502 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
) &&
3503 (tok
== ROFF_ce
|| tok
== ROFF_rj
|| tok
== ROFF_sp
||
3505 man_breakscope(r
->man
, tok
);
3507 if (roffce_node
!= NULL
&& (tok
== ROFF_ce
|| tok
== ROFF_rj
)) {
3508 r
->man
->last
= roffce_node
;
3509 r
->man
->next
= ROFF_NEXT_SIBLING
;
3512 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3515 cp
= buf
->buf
+ pos
;
3517 while (*cp
!= '\0' && *cp
!= ' ')
3522 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3523 ln
, (int)(cp
- buf
->buf
),
3524 "%s ... %s", roff_name
[tok
], cp
);
3525 roff_word_alloc(r
->man
, ln
, pos
, buf
->buf
+ pos
);
3528 if (tok
== ROFF_ce
|| tok
== ROFF_rj
) {
3529 if (r
->man
->last
->type
== ROFFT_ELEM
) {
3530 roff_word_alloc(r
->man
, ln
, pos
, "1");
3531 r
->man
->last
->flags
|= NODE_NOSRC
;
3534 if (roff_evalnum(r
, ln
, r
->man
->last
->string
, &npos
,
3535 &roffce_lines
, 0) == 0) {
3536 mandoc_msg(MANDOCERR_CE_NONUM
,
3537 ln
, pos
, "ce %s", buf
->buf
+ pos
);
3540 if (roffce_lines
< 1) {
3541 r
->man
->last
= r
->man
->last
->parent
;
3545 roffce_node
= r
->man
->last
->parent
;
3547 n
->flags
|= NODE_VALID
| NODE_ENDED
;
3550 n
->flags
|= NODE_LINE
;
3551 r
->man
->next
= ROFF_NEXT_SIBLING
;
3556 roff_manyarg(ROFF_ARGS
)
3558 struct roff_node
*n
;
3561 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3564 for (sp
= ep
= buf
->buf
+ pos
; *sp
!= '\0'; sp
= ep
) {
3565 while (*ep
!= '\0' && *ep
!= ' ')
3569 roff_word_alloc(r
->man
, ln
, sp
- buf
->buf
, sp
);
3572 n
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3574 r
->man
->next
= ROFF_NEXT_SIBLING
;
3581 char *oldn
, *newn
, *end
, *value
;
3582 size_t oldsz
, newsz
, valsz
;
3584 newn
= oldn
= buf
->buf
+ pos
;
3588 newsz
= roff_getname(r
, &oldn
, ln
, pos
);
3589 if (newn
[newsz
] == '\\' || newn
[newsz
] == '\t' || *oldn
== '\0')
3593 oldsz
= roff_getname(r
, &end
, ln
, oldn
- buf
->buf
);
3597 valsz
= mandoc_asprintf(&value
, ".%.*s \\$@\\\"\n",
3599 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, valsz
, 0);
3600 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3606 * The .break request only makes sense inside conditionals,
3607 * and that case is already handled in roff_cond_sub().
3610 roff_break(ROFF_ARGS
)
3612 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, pos
, "break");
3623 if (*p
== '\0' || (r
->control
= *p
++) == '.')
3627 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3628 ln
, p
- buf
->buf
, "cc ... %s", p
);
3634 roff_char(ROFF_ARGS
)
3636 const char *p
, *kp
, *vp
;
3640 /* Parse the character to be replaced. */
3642 kp
= buf
->buf
+ pos
;
3644 if (*kp
== '\0' || (*kp
== '\\' &&
3645 mandoc_escape(&p
, NULL
, NULL
) != ESCAPE_SPECIAL
) ||
3646 (*p
!= ' ' && *p
!= '\0')) {
3647 mandoc_msg(MANDOCERR_CHAR_ARG
, ln
, pos
, "char %s", kp
);
3655 * If the replacement string contains a font escape sequence,
3656 * we have to restore the font at the end.
3662 while (*p
!= '\0') {
3665 switch (mandoc_escape(&p
, NULL
, NULL
)) {
3667 case ESCAPE_FONTROMAN
:
3668 case ESCAPE_FONTITALIC
:
3669 case ESCAPE_FONTBOLD
:
3674 case ESCAPE_FONTPREV
:
3682 mandoc_msg(MANDOCERR_CHAR_FONT
,
3683 ln
, (int)(vp
- buf
->buf
), "%s", vp
);
3686 * Approximate the effect of .char using the .tr tables.
3687 * XXX In groff, .char and .tr interact differently.
3691 if (r
->xtab
== NULL
)
3692 r
->xtab
= mandoc_calloc(128, sizeof(*r
->xtab
));
3693 assert((unsigned int)*kp
< 128);
3694 free(r
->xtab
[(int)*kp
].p
);
3695 r
->xtab
[(int)*kp
].sz
= mandoc_asprintf(&r
->xtab
[(int)*kp
].p
,
3696 "%s%s", vp
, font
? "\fP" : "");
3698 roff_setstrn(&r
->xmbtab
, kp
, ksz
, vp
, vsz
, 0);
3700 roff_setstrn(&r
->xmbtab
, kp
, ksz
, "\\fP", 3, 1);
3716 mandoc_msg(MANDOCERR_ARG_EXCESS
, ln
,
3717 (int)(p
- buf
->buf
), "ec ... %s", p
);
3726 if (buf
->buf
[pos
] != '\0')
3727 mandoc_msg(MANDOCERR_ARG_SKIP
,
3728 ln
, pos
, "eo %s", buf
->buf
+ pos
);
3735 while (buf
->buf
[pos
] == ' ')
3744 const char *p
, *first
, *second
;
3746 enum mandoc_esc esc
;
3751 mandoc_msg(MANDOCERR_REQ_EMPTY
, ln
, ppos
, "tr");
3755 while (*p
!= '\0') {
3759 if (*first
== '\\') {
3760 esc
= mandoc_escape(&p
, NULL
, NULL
);
3761 if (esc
== ESCAPE_ERROR
) {
3762 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3763 (int)(p
- buf
->buf
), "%s", first
);
3766 fsz
= (size_t)(p
- first
);
3770 if (*second
== '\\') {
3771 esc
= mandoc_escape(&p
, NULL
, NULL
);
3772 if (esc
== ESCAPE_ERROR
) {
3773 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3774 (int)(p
- buf
->buf
), "%s", second
);
3777 ssz
= (size_t)(p
- second
);
3778 } else if (*second
== '\0') {
3779 mandoc_msg(MANDOCERR_TR_ODD
, ln
,
3780 (int)(first
- buf
->buf
), "tr %s", first
);
3786 roff_setstrn(&r
->xmbtab
, first
, fsz
,
3791 if (r
->xtab
== NULL
)
3792 r
->xtab
= mandoc_calloc(128,
3793 sizeof(struct roffstr
));
3795 free(r
->xtab
[(int)*first
].p
);
3796 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
3797 r
->xtab
[(int)*first
].sz
= ssz
;
3804 * Implementation of the .return request.
3805 * There is no need to call roff_userret() from here.
3806 * The read module will call that after rewinding the reader stack
3807 * to the place from where the current macro was called.
3810 roff_return(ROFF_ARGS
)
3812 if (r
->mstackpos
>= 0)
3813 return ROFF_IGN
| ROFF_USERRET
;
3815 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "return");
3823 char *oldn
, *newn
, *end
;
3824 size_t oldsz
, newsz
;
3827 oldn
= newn
= buf
->buf
+ pos
;
3831 oldsz
= roff_getname(r
, &newn
, ln
, pos
);
3832 if (oldn
[oldsz
] == '\\' || oldn
[oldsz
] == '\t' || *newn
== '\0')
3836 newsz
= roff_getname(r
, &end
, ln
, newn
- buf
->buf
);
3840 deftype
= ROFFDEF_ANY
;
3841 value
= roff_getstrn(r
, oldn
, oldsz
, &deftype
);
3844 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3845 roff_setstrn(&r
->strtab
, oldn
, oldsz
, NULL
, 0, 0);
3846 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3849 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3850 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3853 roff_setstrn(&r
->rentab
, newn
, newsz
, value
, strlen(value
), 0);
3854 roff_setstrn(&r
->rentab
, oldn
, oldsz
, NULL
, 0, 0);
3855 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3858 roff_setstrn(&r
->rentab
, newn
, newsz
, oldn
, oldsz
, 0);
3859 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3862 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3863 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3870 roff_shift(ROFF_ARGS
)
3876 if (buf
->buf
[pos
] != '\0' &&
3877 roff_evalnum(r
, ln
, buf
->buf
, &pos
, &levels
, 0) == 0) {
3878 mandoc_msg(MANDOCERR_CE_NONUM
,
3879 ln
, pos
, "shift %s", buf
->buf
+ pos
);
3882 if (r
->mstackpos
< 0) {
3883 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "shift");
3886 ctx
= r
->mstack
+ r
->mstackpos
;
3887 if (levels
> ctx
->argc
) {
3888 mandoc_msg(MANDOCERR_SHIFT
,
3889 ln
, pos
, "%d, but max is %d", levels
, ctx
->argc
);
3894 for (i
= 0; i
< levels
; i
++)
3896 ctx
->argc
-= levels
;
3897 for (i
= 0; i
< ctx
->argc
; i
++)
3898 ctx
->argv
[i
] = ctx
->argv
[i
+ levels
];
3907 name
= buf
->buf
+ pos
;
3908 mandoc_msg(MANDOCERR_SO
, ln
, ppos
, "so %s", name
);
3911 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3912 * opening anything that's not in our cwd or anything beneath
3913 * it. Thus, explicitly disallow traversing up the file-system
3914 * or using absolute paths.
3917 if (*name
== '/' || strstr(name
, "../") || strstr(name
, "/..")) {
3918 mandoc_msg(MANDOCERR_SO_PATH
, ln
, ppos
, ".so %s", name
);
3919 buf
->sz
= mandoc_asprintf(&cp
,
3920 ".sp\nSee the file %s.\n.sp", name
) + 1;
3924 return ROFF_REPARSE
;
3931 /* --- user defined strings and macros ------------------------------------ */
3934 roff_userdef(ROFF_ARGS
)
3937 char *arg
, *ap
, *dst
, *src
;
3940 /* If the macro is empty, ignore it altogether. */
3942 if (*r
->current_string
== '\0')
3945 /* Initialize a new macro stack context. */
3947 if (++r
->mstackpos
== r
->mstacksz
) {
3948 r
->mstack
= mandoc_recallocarray(r
->mstack
,
3949 r
->mstacksz
, r
->mstacksz
+ 8, sizeof(*r
->mstack
));
3952 ctx
= r
->mstack
+ r
->mstackpos
;
3956 * Collect pointers to macro argument strings,
3957 * NUL-terminating them and escaping quotes.
3960 src
= buf
->buf
+ pos
;
3961 while (*src
!= '\0') {
3962 if (ctx
->argc
== ctx
->argsz
) {
3964 ctx
->argv
= mandoc_reallocarray(ctx
->argv
,
3965 ctx
->argsz
, sizeof(*ctx
->argv
));
3967 arg
= roff_getarg(r
, &src
, ln
, &pos
);
3968 sz
= 1; /* For the terminating NUL. */
3969 for (ap
= arg
; *ap
!= '\0'; ap
++)
3970 sz
+= *ap
== '"' ? 4 : 1;
3971 ctx
->argv
[ctx
->argc
++] = dst
= mandoc_malloc(sz
);
3972 for (ap
= arg
; *ap
!= '\0'; ap
++) {
3974 memcpy(dst
, "\\(dq", 4);
3983 /* Replace the macro invocation by the macro definition. */
3986 buf
->buf
= mandoc_strdup(r
->current_string
);
3987 buf
->sz
= strlen(buf
->buf
) + 1;
3990 return buf
->buf
[buf
->sz
- 2] == '\n' ?
3991 ROFF_REPARSE
| ROFF_USERCALL
: ROFF_IGN
| ROFF_APPEND
;
3995 * Calling a high-level macro that was renamed with .rn.
3996 * r->current_string has already been set up by roff_parse().
3999 roff_renamed(ROFF_ARGS
)
4003 buf
->sz
= mandoc_asprintf(&nbuf
, ".%s%s%s", r
->current_string
,
4004 buf
->buf
[pos
] == '\0' ? "" : " ", buf
->buf
+ pos
) + 1;
4012 * Measure the length in bytes of the roff identifier at *cpp
4013 * and advance the pointer to the next word.
4016 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
4025 /* Advance cp to the byte after the end of the name. */
4027 for (cp
= name
; 1; cp
++) {
4031 if (*cp
== ' ' || *cp
== '\t') {
4037 if (cp
[1] == '{' || cp
[1] == '}')
4041 mandoc_msg(MANDOCERR_NAMESC
, ln
, pos
,
4042 "%.*s", (int)(cp
- name
+ 1), name
);
4043 mandoc_escape((const char **)&cp
, NULL
, NULL
);
4047 /* Read past spaces. */
4057 * Store *string into the user-defined string called *name.
4058 * To clear an existing entry, call with (*r, *name, NULL, 0).
4059 * append == 0: replace mode
4060 * append == 1: single-line append mode
4061 * append == 2: multiline append mode, append '\n' after each call
4064 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
4069 namesz
= strlen(name
);
4070 roff_setstrn(&r
->strtab
, name
, namesz
, string
,
4071 string
? strlen(string
) : 0, append
);
4072 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
4076 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
4077 const char *string
, size_t stringsz
, int append
)
4082 size_t oldch
, newch
;
4084 /* Search for an existing string with the same name. */
4087 while (n
&& (namesz
!= n
->key
.sz
||
4088 strncmp(n
->key
.p
, name
, namesz
)))
4092 /* Create a new string table entry. */
4093 n
= mandoc_malloc(sizeof(struct roffkv
));
4094 n
->key
.p
= mandoc_strndup(name
, namesz
);
4100 } else if (0 == append
) {
4110 * One additional byte for the '\n' in multiline mode,
4111 * and one for the terminating '\0'.
4113 newch
= stringsz
+ (1 < append
? 2u : 1u);
4115 if (NULL
== n
->val
.p
) {
4116 n
->val
.p
= mandoc_malloc(newch
);
4121 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
4124 /* Skip existing content in the destination buffer. */
4125 c
= n
->val
.p
+ (int)oldch
;
4127 /* Append new content to the destination buffer. */
4129 while (i
< (int)stringsz
) {
4131 * Rudimentary roff copy mode:
4132 * Handle escaped backslashes.
4134 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
4139 /* Append terminating bytes. */
4144 n
->val
.sz
= (int)(c
- n
->val
.p
);
4148 roff_getstrn(struct roff
*r
, const char *name
, size_t len
,
4151 const struct roffkv
*n
;
4156 for (n
= r
->strtab
; n
!= NULL
; n
= n
->next
) {
4157 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4158 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4160 if (*deftype
& ROFFDEF_USER
) {
4161 *deftype
= ROFFDEF_USER
;
4168 for (n
= r
->rentab
; n
!= NULL
; n
= n
->next
) {
4169 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4170 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4172 if (*deftype
& ROFFDEF_REN
) {
4173 *deftype
= ROFFDEF_REN
;
4180 for (i
= 0; i
< PREDEFS_MAX
; i
++) {
4181 if (strncmp(name
, predefs
[i
].name
, len
) != 0 ||
4182 predefs
[i
].name
[len
] != '\0')
4184 if (*deftype
& ROFFDEF_PRE
) {
4185 *deftype
= ROFFDEF_PRE
;
4186 return predefs
[i
].str
;
4192 if (r
->man
->meta
.macroset
!= MACROSET_MAN
) {
4193 for (tok
= MDOC_Dd
; tok
< MDOC_MAX
; tok
++) {
4194 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4195 roff_name
[tok
][len
] != '\0')
4197 if (*deftype
& ROFFDEF_STD
) {
4198 *deftype
= ROFFDEF_STD
;
4206 if (r
->man
->meta
.macroset
!= MACROSET_MDOC
) {
4207 for (tok
= MAN_TH
; tok
< MAN_MAX
; tok
++) {
4208 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4209 roff_name
[tok
][len
] != '\0')
4211 if (*deftype
& ROFFDEF_STD
) {
4212 *deftype
= ROFFDEF_STD
;
4221 if (found
== 0 && *deftype
!= ROFFDEF_ANY
) {
4222 if (*deftype
& ROFFDEF_REN
) {
4224 * This might still be a request,
4225 * so do not treat it as undefined yet.
4227 *deftype
= ROFFDEF_UNDEF
;
4231 /* Using an undefined string defines it to be empty. */
4233 roff_setstrn(&r
->strtab
, name
, len
, "", 0, 0);
4234 roff_setstrn(&r
->rentab
, name
, len
, NULL
, 0, 0);
4242 roff_freestr(struct roffkv
*r
)
4244 struct roffkv
*n
, *nn
;
4246 for (n
= r
; n
; n
= nn
) {
4254 /* --- accessors and utility functions ------------------------------------ */
4257 * Duplicate an input string, making the appropriate character
4258 * conversations (as stipulated by `tr') along the way.
4259 * Returns a heap-allocated string with all the replacements made.
4262 roff_strdup(const struct roff
*r
, const char *p
)
4264 const struct roffkv
*cp
;
4268 enum mandoc_esc esc
;
4270 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
4271 return mandoc_strdup(p
);
4272 else if ('\0' == *p
)
4273 return mandoc_strdup("");
4276 * Step through each character looking for term matches
4277 * (remember that a `tr' can be invoked with an escape, which is
4278 * a glyph but the escape is multi-character).
4279 * We only do this if the character hash has been initialised
4280 * and the string is >0 length.
4286 while ('\0' != *p
) {
4287 assert((unsigned int)*p
< 128);
4288 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(unsigned int)*p
].p
) {
4289 sz
= r
->xtab
[(int)*p
].sz
;
4290 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4291 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
4295 } else if ('\\' != *p
) {
4296 res
= mandoc_realloc(res
, ssz
+ 2);
4301 /* Search for term matches. */
4302 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
4303 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
4308 * A match has been found.
4309 * Append the match to the array and move
4310 * forward by its keysize.
4312 res
= mandoc_realloc(res
,
4313 ssz
+ cp
->val
.sz
+ 1);
4314 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
4316 p
+= (int)cp
->key
.sz
;
4321 * Handle escapes carefully: we need to copy
4322 * over just the escape itself, or else we might
4323 * do replacements within the escape itself.
4324 * Make sure to pass along the bogus string.
4327 esc
= mandoc_escape(&p
, NULL
, NULL
);
4328 if (ESCAPE_ERROR
== esc
) {
4330 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4331 memcpy(res
+ ssz
, pp
, sz
);
4335 * We bail out on bad escapes.
4336 * No need to warn: we already did so when
4337 * roff_expand() was called.
4340 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4341 memcpy(res
+ ssz
, pp
, sz
);
4345 res
[(int)ssz
] = '\0';
4350 roff_getformat(const struct roff
*r
)
4357 * Find out whether a line is a macro line or not.
4358 * If it is, adjust the current position and return one; if it isn't,
4359 * return zero and don't change the current position.
4360 * If the control character has been set with `.cc', then let that grain
4362 * This is slighly contrary to groff, where using the non-breaking
4363 * control character when `cc' has been invoked will cause the
4364 * non-breaking macro contents to be printed verbatim.
4367 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
4373 if (r
->control
!= '\0' && cp
[pos
] == r
->control
)
4375 else if (r
->control
!= '\0')
4377 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
4379 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
4384 while (' ' == cp
[pos
] || '\t' == cp
[pos
])