]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.383 2022/04/24 17:40:22 schwarze Exp $ */
3 * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 * Implementation of the roff(7) parser for mandoc(1).
22 #include <sys/types.h>
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
63 /* --- data types --------------------------------------------------------- */
66 * An incredibly-simple string buffer.
69 char *p
; /* nil-terminated buffer */
70 size_t sz
; /* saved strlen(p) */
74 * A key-value roffstr pair as part of a singly-linked list.
79 struct roffkv
*next
; /* next in list */
83 * A single number register as part of a singly-linked list.
93 * Association of request and macro names with token IDs.
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
111 struct roff_man
*man
; /* mdoc or man parser */
112 struct roffnode
*last
; /* leaf of stack */
113 struct mctx
*mstack
; /* stack of macro contexts */
114 int *rstack
; /* stack of inverted `ie' values */
115 struct ohash
*reqtab
; /* request lookup table */
116 struct roffreg
*regtab
; /* number registers */
117 struct roffkv
*strtab
; /* user-defined strings & macros */
118 struct roffkv
*rentab
; /* renamed strings & macros */
119 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
120 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
121 const char *current_string
; /* value of last called user macro */
122 struct tbl_node
*first_tbl
; /* first table parsed */
123 struct tbl_node
*last_tbl
; /* last table parsed */
124 struct tbl_node
*tbl
; /* current table being parsed */
125 struct eqn_node
*last_eqn
; /* equation parser */
126 struct eqn_node
*eqn
; /* active equation parser */
127 int eqn_inline
; /* current equation is inline */
128 int options
; /* parse options */
129 int mstacksz
; /* current size of mstack */
130 int mstackpos
; /* position in mstack */
131 int rstacksz
; /* current size limit of rstack */
132 int rstackpos
; /* position in rstack */
133 int format
; /* current file in mdoc or man format */
134 char control
; /* control character */
135 char escape
; /* escape character */
139 * A macro definition, condition, or ignored block.
142 enum roff_tok tok
; /* type of node */
143 struct roffnode
*parent
; /* up one in stack */
144 int line
; /* parse line */
145 int col
; /* parse col */
146 char *name
; /* node name, e.g. macro name */
147 char *end
; /* custom end macro of the block */
148 int endspan
; /* scope to: 1=eol 2=next line -1=\} */
149 int rule
; /* content is: 1=evaluated 0=skipped */
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
160 typedef int (*roffproc
)(ROFF_ARGS
);
163 roffproc proc
; /* process new macro */
164 roffproc text
; /* process as child text of macro */
165 roffproc sub
; /* process as child of macro */
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
171 const char *name
; /* predefined input name */
172 const char *str
; /* replacement symbol */
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
178 /* --- function prototypes ------------------------------------------------ */
180 static int roffnode_cleanscope(struct roff
*);
181 static int roffnode_pop(struct roff
*);
182 static void roffnode_push(struct roff
*, enum roff_tok
,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man
*, int, struct tbl_node
*);
185 static int roff_als(ROFF_ARGS
);
186 static int roff_block(ROFF_ARGS
);
187 static int roff_block_text(ROFF_ARGS
);
188 static int roff_block_sub(ROFF_ARGS
);
189 static int roff_break(ROFF_ARGS
);
190 static int roff_cblock(ROFF_ARGS
);
191 static int roff_cc(ROFF_ARGS
);
192 static int roff_ccond(struct roff
*, int, int);
193 static int roff_char(ROFF_ARGS
);
194 static int roff_cond(ROFF_ARGS
);
195 static int roff_cond_checkend(ROFF_ARGS
);
196 static int roff_cond_text(ROFF_ARGS
);
197 static int roff_cond_sub(ROFF_ARGS
);
198 static int roff_ds(ROFF_ARGS
);
199 static int roff_ec(ROFF_ARGS
);
200 static int roff_eo(ROFF_ARGS
);
201 static int roff_eqndelim(struct roff
*, struct buf
*, int);
202 static int roff_evalcond(struct roff
*, int, char *, int *);
203 static int roff_evalnum(struct roff
*, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff
*, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff
*, struct buf
*,
210 static void roff_free1(struct roff
*);
211 static void roff_freereg(struct roffreg
*);
212 static void roff_freestr(struct roffkv
*);
213 static size_t roff_getname(struct roff
*, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff
*,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff
*,
220 static const char *roff_getstrn(struct roff
*,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff
*,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS
);
225 static int roff_it(ROFF_ARGS
);
226 static int roff_line_ignore(ROFF_ARGS
);
227 static void roff_man_alloc1(struct roff_man
*);
228 static void roff_man_free1(struct roff_man
*);
229 static int roff_manyarg(ROFF_ARGS
);
230 static int roff_noarg(ROFF_ARGS
);
231 static int roff_nop(ROFF_ARGS
);
232 static int roff_nr(ROFF_ARGS
);
233 static int roff_onearg(ROFF_ARGS
);
234 static enum roff_tok
roff_parse(struct roff
*, char *, int *,
236 static int roff_parsetext(struct roff
*, struct buf
*,
238 static int roff_renamed(ROFF_ARGS
);
239 static int roff_return(ROFF_ARGS
);
240 static int roff_rm(ROFF_ARGS
);
241 static int roff_rn(ROFF_ARGS
);
242 static int roff_rr(ROFF_ARGS
);
243 static void roff_setregn(struct roff
*, const char *,
244 size_t, int, char, int);
245 static void roff_setstr(struct roff
*,
246 const char *, const char *, int);
247 static void roff_setstrn(struct roffkv
**, const char *,
248 size_t, const char *, size_t, int);
249 static int roff_shift(ROFF_ARGS
);
250 static int roff_so(ROFF_ARGS
);
251 static int roff_tr(ROFF_ARGS
);
252 static int roff_Dd(ROFF_ARGS
);
253 static int roff_TE(ROFF_ARGS
);
254 static int roff_TS(ROFF_ARGS
);
255 static int roff_EQ(ROFF_ARGS
);
256 static int roff_EN(ROFF_ARGS
);
257 static int roff_T_(ROFF_ARGS
);
258 static int roff_unsupp(ROFF_ARGS
);
259 static int roff_userdef(ROFF_ARGS
);
261 /* --- constant data ------------------------------------------------------ */
263 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
264 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
266 const char *__roff_name
[MAN_MAX
+ 1] = {
267 "br", "ce", "fi", "ft",
271 "ab", "ad", "af", "aln",
272 "als", "am", "am1", "ami",
273 "ami1", "as", "as1", "asciify",
274 "backtrace", "bd", "bleedat", "blm",
275 "box", "boxa", "bp", "BP",
276 "break", "breakchar", "brnl", "brp",
278 "cf", "cflags", "ch", "char",
279 "chop", "class", "close", "CL",
280 "color", "composite", "continue", "cp",
281 "cropat", "cs", "cu", "da",
282 "dch", "Dd", "de", "de1",
283 "defcolor", "dei", "dei1", "device",
284 "devicem", "di", "do", "ds",
285 "ds1", "dwh", "dt", "ec",
286 "ecr", "ecs", "el", "em",
287 "EN", "eo", "EP", "EQ",
288 "errprint", "ev", "evc", "ex",
289 "fallback", "fam", "fc", "fchar",
290 "fcolor", "fdeferlig", "feature", "fkern",
291 "fl", "flig", "fp", "fps",
292 "fschar", "fspacewidth", "fspecial", "ftr",
293 "fzoom", "gcolor", "hc", "hcode",
294 "hidechar", "hla", "hlm", "hpf",
295 "hpfa", "hpfcode", "hw", "hy",
296 "hylang", "hylen", "hym", "hypp",
297 "hys", "ie", "if", "ig",
298 "index", "it", "itc", "IX",
299 "kern", "kernafter", "kernbefore", "kernpair",
300 "lc", "lc_ctype", "lds", "length",
301 "letadj", "lf", "lg", "lhang",
302 "linetabs", "lnr", "lnrf", "lpfx",
304 "mediasize", "minss", "mk", "mso",
305 "na", "ne", "nh", "nhychar",
306 "nm", "nn", "nop", "nr",
307 "nrf", "nroff", "ns", "nx",
308 "open", "opena", "os", "output",
309 "padj", "papersize", "pc", "pev",
310 "pi", "PI", "pl", "pm",
312 "psbb", "pshape", "pso", "ptr",
313 "pvs", "rchar", "rd", "recursionlimit",
314 "return", "rfschar", "rhang",
315 "rm", "rn", "rnn", "rr",
316 "rs", "rt", "schar", "sentchar",
317 "shc", "shift", "sizes", "so",
318 "spacewidth", "special", "spreadwarn", "ss",
319 "sty", "substring", "sv", "sy",
322 "tm", "tm1", "tmc", "tr",
323 "track", "transchar", "trf", "trimat",
324 "trin", "trnt", "troff", "TS",
325 "uf", "ul", "unformat", "unwatch",
326 "unwatchn", "vpt", "vs", "warn",
327 "warnscale", "watch", "watchlength", "watchn",
328 "wh", "while", "write", "writec",
329 "writem", "xflag", ".", NULL
,
331 "Dd", "Dt", "Os", "Sh",
332 "Ss", "Pp", "D1", "Dl",
333 "Bd", "Ed", "Bl", "El",
334 "It", "Ad", "An", "Ap",
335 "Ar", "Cd", "Cm", "Dv",
336 "Er", "Ev", "Ex", "Fa",
337 "Fd", "Fl", "Fn", "Ft",
338 "Ic", "In", "Li", "Nd",
339 "Nm", "Op", "Ot", "Pa",
340 "Rv", "St", "Va", "Vt",
341 "Xr", "%A", "%B", "%D",
342 "%I", "%J", "%N", "%O",
343 "%P", "%R", "%T", "%V",
344 "Ac", "Ao", "Aq", "At",
345 "Bc", "Bf", "Bo", "Bq",
346 "Bsx", "Bx", "Db", "Dc",
347 "Do", "Dq", "Ec", "Ef",
348 "Em", "Eo", "Fx", "Ms",
349 "No", "Ns", "Nx", "Ox",
350 "Pc", "Pf", "Po", "Pq",
351 "Qc", "Ql", "Qo", "Qq",
352 "Re", "Rs", "Sc", "So",
353 "Sq", "Sm", "Sx", "Sy",
354 "Tn", "Ux", "Xc", "Xo",
355 "Fo", "Fc", "Oo", "Oc",
356 "Bk", "Ek", "Bt", "Hf",
357 "Fr", "Ud", "Lb", "Lp",
358 "Lk", "Mt", "Brq", "Bro",
359 "Brc", "%C", "Es", "En",
360 "Dx", "%Q", "%U", "Ta",
362 "TH", "SH", "SS", "TP",
364 "LP", "PP", "P", "IP",
365 "HP", "SM", "SB", "BI",
366 "IB", "BR", "RB", "R",
367 "B", "I", "IR", "RI",
368 "RE", "RS", "DT", "UC",
372 "UE", "MT", "ME", NULL
374 const char *const *roff_name
= __roff_name
;
376 static struct roffmac roffs
[TOKEN_NONE
] = {
377 { roff_noarg
, NULL
, NULL
, 0 }, /* br */
378 { roff_onearg
, NULL
, NULL
, 0 }, /* ce */
379 { roff_noarg
, NULL
, NULL
, 0 }, /* fi */
380 { roff_onearg
, NULL
, NULL
, 0 }, /* ft */
381 { roff_onearg
, NULL
, NULL
, 0 }, /* ll */
382 { roff_onearg
, NULL
, NULL
, 0 }, /* mc */
383 { roff_noarg
, NULL
, NULL
, 0 }, /* nf */
384 { roff_onearg
, NULL
, NULL
, 0 }, /* po */
385 { roff_onearg
, NULL
, NULL
, 0 }, /* rj */
386 { roff_onearg
, NULL
, NULL
, 0 }, /* sp */
387 { roff_manyarg
, NULL
, NULL
, 0 }, /* ta */
388 { roff_onearg
, NULL
, NULL
, 0 }, /* ti */
389 { NULL
, NULL
, NULL
, 0 }, /* ROFF_MAX */
390 { roff_unsupp
, NULL
, NULL
, 0 }, /* ab */
391 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ad */
392 { roff_line_ignore
, NULL
, NULL
, 0 }, /* af */
393 { roff_unsupp
, NULL
, NULL
, 0 }, /* aln */
394 { roff_als
, NULL
, NULL
, 0 }, /* als */
395 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am */
396 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am1 */
397 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami */
398 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami1 */
399 { roff_ds
, NULL
, NULL
, 0 }, /* as */
400 { roff_ds
, NULL
, NULL
, 0 }, /* as1 */
401 { roff_unsupp
, NULL
, NULL
, 0 }, /* asciify */
402 { roff_line_ignore
, NULL
, NULL
, 0 }, /* backtrace */
403 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bd */
404 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bleedat */
405 { roff_unsupp
, NULL
, NULL
, 0 }, /* blm */
406 { roff_unsupp
, NULL
, NULL
, 0 }, /* box */
407 { roff_unsupp
, NULL
, NULL
, 0 }, /* boxa */
408 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bp */
409 { roff_unsupp
, NULL
, NULL
, 0 }, /* BP */
410 { roff_break
, NULL
, NULL
, 0 }, /* break */
411 { roff_line_ignore
, NULL
, NULL
, 0 }, /* breakchar */
412 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brnl */
413 { roff_noarg
, NULL
, NULL
, 0 }, /* brp */
414 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brpnl */
415 { roff_unsupp
, NULL
, NULL
, 0 }, /* c2 */
416 { roff_cc
, NULL
, NULL
, 0 }, /* cc */
417 { roff_insec
, NULL
, NULL
, 0 }, /* cf */
418 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cflags */
419 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ch */
420 { roff_char
, NULL
, NULL
, 0 }, /* char */
421 { roff_unsupp
, NULL
, NULL
, 0 }, /* chop */
422 { roff_line_ignore
, NULL
, NULL
, 0 }, /* class */
423 { roff_insec
, NULL
, NULL
, 0 }, /* close */
424 { roff_unsupp
, NULL
, NULL
, 0 }, /* CL */
425 { roff_line_ignore
, NULL
, NULL
, 0 }, /* color */
426 { roff_unsupp
, NULL
, NULL
, 0 }, /* composite */
427 { roff_unsupp
, NULL
, NULL
, 0 }, /* continue */
428 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cp */
429 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cropat */
430 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cs */
431 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cu */
432 { roff_unsupp
, NULL
, NULL
, 0 }, /* da */
433 { roff_unsupp
, NULL
, NULL
, 0 }, /* dch */
434 { roff_Dd
, NULL
, NULL
, 0 }, /* Dd */
435 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de */
436 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de1 */
437 { roff_line_ignore
, NULL
, NULL
, 0 }, /* defcolor */
438 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei */
439 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei1 */
440 { roff_unsupp
, NULL
, NULL
, 0 }, /* device */
441 { roff_unsupp
, NULL
, NULL
, 0 }, /* devicem */
442 { roff_unsupp
, NULL
, NULL
, 0 }, /* di */
443 { roff_unsupp
, NULL
, NULL
, 0 }, /* do */
444 { roff_ds
, NULL
, NULL
, 0 }, /* ds */
445 { roff_ds
, NULL
, NULL
, 0 }, /* ds1 */
446 { roff_unsupp
, NULL
, NULL
, 0 }, /* dwh */
447 { roff_unsupp
, NULL
, NULL
, 0 }, /* dt */
448 { roff_ec
, NULL
, NULL
, 0 }, /* ec */
449 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecr */
450 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecs */
451 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* el */
452 { roff_unsupp
, NULL
, NULL
, 0 }, /* em */
453 { roff_EN
, NULL
, NULL
, 0 }, /* EN */
454 { roff_eo
, NULL
, NULL
, 0 }, /* eo */
455 { roff_unsupp
, NULL
, NULL
, 0 }, /* EP */
456 { roff_EQ
, NULL
, NULL
, 0 }, /* EQ */
457 { roff_line_ignore
, NULL
, NULL
, 0 }, /* errprint */
458 { roff_unsupp
, NULL
, NULL
, 0 }, /* ev */
459 { roff_unsupp
, NULL
, NULL
, 0 }, /* evc */
460 { roff_unsupp
, NULL
, NULL
, 0 }, /* ex */
461 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fallback */
462 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fam */
463 { roff_unsupp
, NULL
, NULL
, 0 }, /* fc */
464 { roff_unsupp
, NULL
, NULL
, 0 }, /* fchar */
465 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fcolor */
466 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fdeferlig */
467 { roff_line_ignore
, NULL
, NULL
, 0 }, /* feature */
468 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fkern */
469 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fl */
470 { roff_line_ignore
, NULL
, NULL
, 0 }, /* flig */
471 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fp */
472 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fps */
473 { roff_unsupp
, NULL
, NULL
, 0 }, /* fschar */
474 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspacewidth */
475 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspecial */
476 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ftr */
477 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fzoom */
478 { roff_line_ignore
, NULL
, NULL
, 0 }, /* gcolor */
479 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hc */
480 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hcode */
481 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hidechar */
482 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hla */
483 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hlm */
484 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpf */
485 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfa */
486 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfcode */
487 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hw */
488 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hy */
489 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylang */
490 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylen */
491 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hym */
492 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hypp */
493 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hys */
494 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* ie */
495 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* if */
496 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ig */
497 { roff_unsupp
, NULL
, NULL
, 0 }, /* index */
498 { roff_it
, NULL
, NULL
, 0 }, /* it */
499 { roff_unsupp
, NULL
, NULL
, 0 }, /* itc */
500 { roff_line_ignore
, NULL
, NULL
, 0 }, /* IX */
501 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kern */
502 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernafter */
503 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernbefore */
504 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernpair */
505 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc */
506 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc_ctype */
507 { roff_unsupp
, NULL
, NULL
, 0 }, /* lds */
508 { roff_unsupp
, NULL
, NULL
, 0 }, /* length */
509 { roff_line_ignore
, NULL
, NULL
, 0 }, /* letadj */
510 { roff_insec
, NULL
, NULL
, 0 }, /* lf */
511 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lg */
512 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lhang */
513 { roff_unsupp
, NULL
, NULL
, 0 }, /* linetabs */
514 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnr */
515 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnrf */
516 { roff_unsupp
, NULL
, NULL
, 0 }, /* lpfx */
517 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ls */
518 { roff_unsupp
, NULL
, NULL
, 0 }, /* lsm */
519 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lt */
520 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mediasize */
521 { roff_line_ignore
, NULL
, NULL
, 0 }, /* minss */
522 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mk */
523 { roff_insec
, NULL
, NULL
, 0 }, /* mso */
524 { roff_line_ignore
, NULL
, NULL
, 0 }, /* na */
525 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ne */
526 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nh */
527 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nhychar */
528 { roff_unsupp
, NULL
, NULL
, 0 }, /* nm */
529 { roff_unsupp
, NULL
, NULL
, 0 }, /* nn */
530 { roff_nop
, NULL
, NULL
, 0 }, /* nop */
531 { roff_nr
, NULL
, NULL
, 0 }, /* nr */
532 { roff_unsupp
, NULL
, NULL
, 0 }, /* nrf */
533 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nroff */
534 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ns */
535 { roff_insec
, NULL
, NULL
, 0 }, /* nx */
536 { roff_insec
, NULL
, NULL
, 0 }, /* open */
537 { roff_insec
, NULL
, NULL
, 0 }, /* opena */
538 { roff_line_ignore
, NULL
, NULL
, 0 }, /* os */
539 { roff_unsupp
, NULL
, NULL
, 0 }, /* output */
540 { roff_line_ignore
, NULL
, NULL
, 0 }, /* padj */
541 { roff_line_ignore
, NULL
, NULL
, 0 }, /* papersize */
542 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pc */
543 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pev */
544 { roff_insec
, NULL
, NULL
, 0 }, /* pi */
545 { roff_unsupp
, NULL
, NULL
, 0 }, /* PI */
546 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pl */
547 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pm */
548 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pn */
549 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pnr */
550 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ps */
551 { roff_unsupp
, NULL
, NULL
, 0 }, /* psbb */
552 { roff_unsupp
, NULL
, NULL
, 0 }, /* pshape */
553 { roff_insec
, NULL
, NULL
, 0 }, /* pso */
554 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ptr */
555 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pvs */
556 { roff_unsupp
, NULL
, NULL
, 0 }, /* rchar */
557 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rd */
558 { roff_line_ignore
, NULL
, NULL
, 0 }, /* recursionlimit */
559 { roff_return
, NULL
, NULL
, 0 }, /* return */
560 { roff_unsupp
, NULL
, NULL
, 0 }, /* rfschar */
561 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rhang */
562 { roff_rm
, NULL
, NULL
, 0 }, /* rm */
563 { roff_rn
, NULL
, NULL
, 0 }, /* rn */
564 { roff_unsupp
, NULL
, NULL
, 0 }, /* rnn */
565 { roff_rr
, NULL
, NULL
, 0 }, /* rr */
566 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rs */
567 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rt */
568 { roff_unsupp
, NULL
, NULL
, 0 }, /* schar */
569 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sentchar */
570 { roff_line_ignore
, NULL
, NULL
, 0 }, /* shc */
571 { roff_shift
, NULL
, NULL
, 0 }, /* shift */
572 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sizes */
573 { roff_so
, NULL
, NULL
, 0 }, /* so */
574 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spacewidth */
575 { roff_line_ignore
, NULL
, NULL
, 0 }, /* special */
576 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spreadwarn */
577 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ss */
578 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sty */
579 { roff_unsupp
, NULL
, NULL
, 0 }, /* substring */
580 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sv */
581 { roff_insec
, NULL
, NULL
, 0 }, /* sy */
582 { roff_T_
, NULL
, NULL
, 0 }, /* T& */
583 { roff_unsupp
, NULL
, NULL
, 0 }, /* tc */
584 { roff_TE
, NULL
, NULL
, 0 }, /* TE */
585 { roff_Dd
, NULL
, NULL
, 0 }, /* TH */
586 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tkf */
587 { roff_unsupp
, NULL
, NULL
, 0 }, /* tl */
588 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm */
589 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm1 */
590 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tmc */
591 { roff_tr
, NULL
, NULL
, 0 }, /* tr */
592 { roff_line_ignore
, NULL
, NULL
, 0 }, /* track */
593 { roff_line_ignore
, NULL
, NULL
, 0 }, /* transchar */
594 { roff_insec
, NULL
, NULL
, 0 }, /* trf */
595 { roff_line_ignore
, NULL
, NULL
, 0 }, /* trimat */
596 { roff_unsupp
, NULL
, NULL
, 0 }, /* trin */
597 { roff_unsupp
, NULL
, NULL
, 0 }, /* trnt */
598 { roff_line_ignore
, NULL
, NULL
, 0 }, /* troff */
599 { roff_TS
, NULL
, NULL
, 0 }, /* TS */
600 { roff_line_ignore
, NULL
, NULL
, 0 }, /* uf */
601 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ul */
602 { roff_unsupp
, NULL
, NULL
, 0 }, /* unformat */
603 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatch */
604 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatchn */
605 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vpt */
606 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vs */
607 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warn */
608 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warnscale */
609 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watch */
610 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchlength */
611 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchn */
612 { roff_unsupp
, NULL
, NULL
, 0 }, /* wh */
613 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /*while*/
614 { roff_insec
, NULL
, NULL
, 0 }, /* write */
615 { roff_insec
, NULL
, NULL
, 0 }, /* writec */
616 { roff_insec
, NULL
, NULL
, 0 }, /* writem */
617 { roff_line_ignore
, NULL
, NULL
, 0 }, /* xflag */
618 { roff_cblock
, NULL
, NULL
, 0 }, /* . */
619 { roff_renamed
, NULL
, NULL
, 0 },
620 { roff_userdef
, NULL
, NULL
, 0 }
623 /* Array of injected predefined strings. */
624 #define PREDEFS_MAX 38
625 static const struct predef predefs
[PREDEFS_MAX
] = {
626 #include "predefs.in"
629 static int roffce_lines
; /* number of input lines to center */
630 static struct roff_node
*roffce_node
; /* active request */
631 static int roffit_lines
; /* number of lines to delay */
632 static char *roffit_macro
; /* nil-terminated macro line */
635 /* --- request table ------------------------------------------------------ */
638 roffhash_alloc(enum roff_tok mintok
, enum roff_tok maxtok
)
646 htab
= mandoc_malloc(sizeof(*htab
));
647 mandoc_ohash_init(htab
, 8, offsetof(struct roffreq
, name
));
649 for (tok
= mintok
; tok
< maxtok
; tok
++) {
650 if (roff_name
[tok
] == NULL
)
652 sz
= strlen(roff_name
[tok
]);
653 req
= mandoc_malloc(sizeof(*req
) + sz
+ 1);
655 memcpy(req
->name
, roff_name
[tok
], sz
+ 1);
656 slot
= ohash_qlookup(htab
, req
->name
);
657 ohash_insert(htab
, slot
, req
);
663 roffhash_free(struct ohash
*htab
)
670 for (req
= ohash_first(htab
, &slot
); req
!= NULL
;
671 req
= ohash_next(htab
, &slot
))
678 roffhash_find(struct ohash
*htab
, const char *name
, size_t sz
)
685 req
= ohash_find(htab
, ohash_qlookupi(htab
, name
, &end
));
687 req
= ohash_find(htab
, ohash_qlookup(htab
, name
));
688 return req
== NULL
? TOKEN_NONE
: req
->tok
;
691 /* --- stack of request blocks -------------------------------------------- */
694 * Pop the current node off of the stack of roff instructions currently
695 * pending. Return 1 if it is a loop or 0 otherwise.
698 roffnode_pop(struct roff
*r
)
704 inloop
= p
->tok
== ROFF_while
;
713 * Push a roff node onto the instruction stack. This must later be
714 * removed with roffnode_pop().
717 roffnode_push(struct roff
*r
, enum roff_tok tok
, const char *name
,
722 p
= mandoc_calloc(1, sizeof(struct roffnode
));
725 p
->name
= mandoc_strdup(name
);
729 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
734 /* --- roff parser state data management ---------------------------------- */
737 roff_free1(struct roff
*r
)
741 tbl_free(r
->first_tbl
);
742 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
744 eqn_free(r
->last_eqn
);
745 r
->last_eqn
= r
->eqn
= NULL
;
747 while (r
->mstackpos
>= 0)
758 roff_freereg(r
->regtab
);
761 roff_freestr(r
->strtab
);
762 roff_freestr(r
->rentab
);
763 roff_freestr(r
->xmbtab
);
764 r
->strtab
= r
->rentab
= r
->xmbtab
= NULL
;
767 for (i
= 0; i
< 128; i
++)
774 roff_reset(struct roff
*r
)
777 r
->options
|= MPARSE_COMMENT
;
778 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
788 roff_free(struct roff
*r
)
793 for (i
= 0; i
< r
->mstacksz
; i
++)
794 free(r
->mstack
[i
].argv
);
796 roffhash_free(r
->reqtab
);
801 roff_alloc(int options
)
805 r
= mandoc_calloc(1, sizeof(struct roff
));
806 r
->reqtab
= roffhash_alloc(0, ROFF_RENAMED
);
807 r
->options
= options
| MPARSE_COMMENT
;
808 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
815 /* --- syntax tree state data management ---------------------------------- */
818 roff_man_free1(struct roff_man
*man
)
820 if (man
->meta
.first
!= NULL
)
821 roff_node_delete(man
, man
->meta
.first
);
822 free(man
->meta
.msec
);
825 free(man
->meta
.arch
);
826 free(man
->meta
.title
);
827 free(man
->meta
.name
);
828 free(man
->meta
.date
);
829 free(man
->meta
.sodest
);
833 roff_state_reset(struct roff_man
*man
)
835 man
->last
= man
->meta
.first
;
838 man
->lastsec
= man
->lastnamed
= SEC_NONE
;
839 man
->next
= ROFF_NEXT_CHILD
;
840 roff_setreg(man
->roff
, "nS", 0, '=');
844 roff_man_alloc1(struct roff_man
*man
)
846 memset(&man
->meta
, 0, sizeof(man
->meta
));
847 man
->meta
.first
= mandoc_calloc(1, sizeof(*man
->meta
.first
));
848 man
->meta
.first
->type
= ROFFT_ROOT
;
849 man
->meta
.macroset
= MACROSET_NONE
;
850 roff_state_reset(man
);
854 roff_man_reset(struct roff_man
*man
)
857 roff_man_alloc1(man
);
861 roff_man_free(struct roff_man
*man
)
869 roff_man_alloc(struct roff
*roff
, const char *os_s
, int quick
)
871 struct roff_man
*man
;
873 man
= mandoc_calloc(1, sizeof(*man
));
877 roff_man_alloc1(man
);
882 /* --- syntax tree handling ----------------------------------------------- */
885 roff_node_alloc(struct roff_man
*man
, int line
, int pos
,
886 enum roff_type type
, int tok
)
890 n
= mandoc_calloc(1, sizeof(*n
));
895 n
->sec
= man
->lastsec
;
897 if (man
->flags
& MDOC_SYNOPSIS
)
898 n
->flags
|= NODE_SYNPRETTY
;
900 n
->flags
&= ~NODE_SYNPRETTY
;
901 if ((man
->flags
& (ROFF_NOFILL
| ROFF_NONOFILL
)) == ROFF_NOFILL
)
902 n
->flags
|= NODE_NOFILL
;
904 n
->flags
&= ~NODE_NOFILL
;
905 if (man
->flags
& MDOC_NEWLINE
)
906 n
->flags
|= NODE_LINE
;
907 man
->flags
&= ~MDOC_NEWLINE
;
913 roff_node_append(struct roff_man
*man
, struct roff_node
*n
)
917 case ROFF_NEXT_SIBLING
:
918 if (man
->last
->next
!= NULL
) {
919 n
->next
= man
->last
->next
;
920 man
->last
->next
->prev
= n
;
922 man
->last
->parent
->last
= n
;
925 n
->parent
= man
->last
->parent
;
927 case ROFF_NEXT_CHILD
:
928 if (man
->last
->child
!= NULL
) {
929 n
->next
= man
->last
->child
;
930 man
->last
->child
->prev
= n
;
933 man
->last
->child
= n
;
934 n
->parent
= man
->last
;
946 if (n
->end
!= ENDBODY_NOT
)
958 * Copy over the normalised-data pointer of our parent. Not
959 * everybody has one, but copying a null pointer is fine.
962 n
->norm
= n
->parent
->norm
;
963 assert(n
->parent
->type
== ROFFT_BLOCK
);
967 roff_word_alloc(struct roff_man
*man
, int line
, int pos
, const char *word
)
971 n
= roff_node_alloc(man
, line
, pos
, ROFFT_TEXT
, TOKEN_NONE
);
972 n
->string
= roff_strdup(man
->roff
, word
);
973 roff_node_append(man
, n
);
974 n
->flags
|= NODE_VALID
| NODE_ENDED
;
975 man
->next
= ROFF_NEXT_SIBLING
;
979 roff_word_append(struct roff_man
*man
, const char *word
)
982 char *addstr
, *newstr
;
985 addstr
= roff_strdup(man
->roff
, word
);
986 mandoc_asprintf(&newstr
, "%s %s", n
->string
, addstr
);
990 man
->next
= ROFF_NEXT_SIBLING
;
994 roff_elem_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
998 n
= roff_node_alloc(man
, line
, pos
, ROFFT_ELEM
, tok
);
999 roff_node_append(man
, n
);
1000 man
->next
= ROFF_NEXT_CHILD
;
1004 roff_block_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1006 struct roff_node
*n
;
1008 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BLOCK
, tok
);
1009 roff_node_append(man
, n
);
1010 man
->next
= ROFF_NEXT_CHILD
;
1015 roff_head_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1017 struct roff_node
*n
;
1019 n
= roff_node_alloc(man
, line
, pos
, ROFFT_HEAD
, tok
);
1020 roff_node_append(man
, n
);
1021 man
->next
= ROFF_NEXT_CHILD
;
1026 roff_body_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1028 struct roff_node
*n
;
1030 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BODY
, tok
);
1031 roff_node_append(man
, n
);
1032 man
->next
= ROFF_NEXT_CHILD
;
1037 roff_addtbl(struct roff_man
*man
, int line
, struct tbl_node
*tbl
)
1039 struct roff_node
*n
;
1040 struct tbl_span
*span
;
1042 if (man
->meta
.macroset
== MACROSET_MAN
)
1043 man_breakscope(man
, ROFF_TS
);
1044 while ((span
= tbl_span(tbl
)) != NULL
) {
1045 n
= roff_node_alloc(man
, line
, 0, ROFFT_TBL
, TOKEN_NONE
);
1047 roff_node_append(man
, n
);
1048 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1049 man
->next
= ROFF_NEXT_SIBLING
;
1054 roff_node_unlink(struct roff_man
*man
, struct roff_node
*n
)
1057 /* Adjust siblings. */
1060 n
->prev
->next
= n
->next
;
1062 n
->next
->prev
= n
->prev
;
1064 /* Adjust parent. */
1066 if (n
->parent
!= NULL
) {
1067 if (n
->parent
->child
== n
)
1068 n
->parent
->child
= n
->next
;
1069 if (n
->parent
->last
== n
)
1070 n
->parent
->last
= n
->prev
;
1073 /* Adjust parse point. */
1077 if (man
->last
== n
) {
1078 if (n
->prev
== NULL
) {
1079 man
->last
= n
->parent
;
1080 man
->next
= ROFF_NEXT_CHILD
;
1082 man
->last
= n
->prev
;
1083 man
->next
= ROFF_NEXT_SIBLING
;
1086 if (man
->meta
.first
== n
)
1087 man
->meta
.first
= NULL
;
1091 roff_node_relink(struct roff_man
*man
, struct roff_node
*n
)
1093 roff_node_unlink(man
, n
);
1094 n
->prev
= n
->next
= NULL
;
1095 roff_node_append(man
, n
);
1099 roff_node_free(struct roff_node
*n
)
1102 if (n
->args
!= NULL
)
1103 mdoc_argv_free(n
->args
);
1104 if (n
->type
== ROFFT_BLOCK
|| n
->type
== ROFFT_ELEM
)
1106 eqn_box_free(n
->eqn
);
1113 roff_node_delete(struct roff_man
*man
, struct roff_node
*n
)
1116 while (n
->child
!= NULL
)
1117 roff_node_delete(man
, n
->child
);
1118 roff_node_unlink(man
, n
);
1123 roff_node_transparent(struct roff_node
*n
)
1127 if (n
->type
== ROFFT_COMMENT
|| n
->flags
& NODE_NOPRT
)
1129 return roff_tok_transparent(n
->tok
);
1133 roff_tok_transparent(enum roff_tok tok
)
1156 roff_node_child(struct roff_node
*n
)
1158 for (n
= n
->child
; roff_node_transparent(n
); n
= n
->next
)
1164 roff_node_prev(struct roff_node
*n
)
1168 } while (roff_node_transparent(n
));
1173 roff_node_next(struct roff_node
*n
)
1177 } while (roff_node_transparent(n
));
1182 deroff(char **dest
, const struct roff_node
*n
)
1187 if (n
->string
== NULL
) {
1188 for (n
= n
->child
; n
!= NULL
; n
= n
->next
)
1193 /* Skip leading whitespace. */
1195 for (cp
= n
->string
; *cp
!= '\0'; cp
++) {
1196 if (cp
[0] == '\\' && cp
[1] != '\0' &&
1197 strchr(" %&0^|~", cp
[1]) != NULL
)
1199 else if ( ! isspace((unsigned char)*cp
))
1203 /* Skip trailing backslash. */
1206 if (sz
> 0 && cp
[sz
- 1] == '\\')
1209 /* Skip trailing whitespace. */
1212 if ( ! isspace((unsigned char)cp
[sz
-1]))
1215 /* Skip empty strings. */
1220 if (*dest
== NULL
) {
1221 *dest
= mandoc_strndup(cp
, sz
);
1225 mandoc_asprintf(&cp
, "%s %*s", *dest
, (int)sz
, cp
);
1230 /* --- main functions of the roff parser ---------------------------------- */
1233 * In the current line, expand escape sequences that produce parsable
1234 * input text. Also check the syntax of the remaining escape sequences,
1235 * which typically produce output glyphs or change formatter state.
1238 roff_expand(struct roff
*r
, struct buf
*buf
, int ln
, int pos
, char newesc
)
1240 struct mctx
*ctx
; /* current macro call context */
1241 char ubuf
[24]; /* buffer to print the number */
1242 struct roff_node
*n
; /* used for header comments */
1243 const char *start
; /* start of the string to process */
1244 char *stesc
; /* start of an escape sequence ('\\') */
1245 const char *esct
; /* type of esccape sequence */
1246 char *ep
; /* end of comment string */
1247 const char *stnam
; /* start of the name, after "[(*" */
1248 const char *cp
; /* end of the name, e.g. before ']' */
1249 const char *res
; /* the string to be substituted */
1250 char *nbuf
; /* new buffer to copy buf->buf to */
1251 size_t maxl
; /* expected length of the escape name */
1252 size_t naml
; /* actual length of the escape name */
1253 size_t asz
; /* length of the replacement */
1254 size_t rsz
; /* length of the rest of the string */
1255 int inaml
; /* length returned from mandoc_escape() */
1256 int expand_count
; /* to avoid infinite loops */
1257 int npos
; /* position in numeric expression */
1258 int arg_complete
; /* argument not interrupted by eol */
1259 int quote_args
; /* true for \\$@, false for \\$* */
1260 int done
; /* no more input available */
1261 int deftype
; /* type of definition to paste */
1262 int rcsid
; /* kind of RCS id seen */
1263 enum mandocerr err
; /* for escape sequence problems */
1264 char sign
; /* increment number register */
1265 char term
; /* character terminating the escape */
1267 /* Search forward for comments. */
1270 start
= buf
->buf
+ pos
;
1271 for (stesc
= buf
->buf
+ pos
; *stesc
!= '\0'; stesc
++) {
1272 if (stesc
[0] != newesc
|| stesc
[1] == '\0')
1275 if (*stesc
!= '"' && *stesc
!= '#')
1278 /* Comment found, look for RCS id. */
1281 if ((cp
= strstr(stesc
, "$" "OpenBSD")) != NULL
) {
1282 rcsid
= 1 << MANDOC_OS_OPENBSD
;
1284 } else if ((cp
= strstr(stesc
, "$" "NetBSD")) != NULL
) {
1285 rcsid
= 1 << MANDOC_OS_NETBSD
;
1289 isalnum((unsigned char)*cp
) == 0 &&
1290 strchr(cp
, '$') != NULL
) {
1291 if (r
->man
->meta
.rcsids
& rcsid
)
1292 mandoc_msg(MANDOCERR_RCS_REP
, ln
,
1293 (int)(stesc
- buf
->buf
) + 1,
1295 r
->man
->meta
.rcsids
|= rcsid
;
1298 /* Handle trailing whitespace. */
1300 ep
= strchr(stesc
--, '\0') - 1;
1305 if (*ep
== ' ' || *ep
== '\t')
1306 mandoc_msg(MANDOCERR_SPACE_EOL
,
1307 ln
, (int)(ep
- buf
->buf
), NULL
);
1310 * Save comments preceding the title macro
1311 * in the syntax tree.
1314 if (newesc
!= ASCII_ESC
&& r
->options
& MPARSE_COMMENT
) {
1315 while (*ep
== ' ' || *ep
== '\t')
1318 n
= roff_node_alloc(r
->man
,
1319 ln
, stesc
+ 1 - buf
->buf
,
1320 ROFFT_COMMENT
, TOKEN_NONE
);
1321 n
->string
= mandoc_strdup(stesc
+ 2);
1322 roff_node_append(r
->man
, n
);
1323 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1324 r
->man
->next
= ROFF_NEXT_SIBLING
;
1327 /* Line continuation with comment. */
1329 if (stesc
[1] == '#') {
1331 return ROFF_IGN
| ROFF_APPEND
;
1334 /* Discard normal comments. */
1336 while (stesc
> start
&& stesc
[-1] == ' ' &&
1337 (stesc
== start
+ 1 || stesc
[-2] != '\\'))
1346 /* Notice the end of the input. */
1348 if (*stesc
== '\n') {
1354 while (stesc
>= start
) {
1355 if (*stesc
!= newesc
) {
1358 * If we have a non-standard escape character,
1359 * escape literal backslashes because all
1360 * processing in subsequent functions uses
1361 * the standard escaping rules.
1364 if (newesc
!= ASCII_ESC
&& *stesc
== '\\') {
1366 buf
->sz
= mandoc_asprintf(&nbuf
, "%s\\e%s",
1367 buf
->buf
, stesc
+ 1) + 1;
1369 stesc
= nbuf
+ (stesc
- buf
->buf
);
1374 /* Search backwards for the next escape. */
1380 /* If it is escaped, skip it. */
1382 for (cp
= stesc
- 1; cp
>= start
; cp
--)
1383 if (*cp
!= r
->escape
)
1386 if ((stesc
- cp
) % 2 == 0) {
1390 } else if (stesc
[1] != '\0') {
1397 return ROFF_IGN
| ROFF_APPEND
;
1400 /* Decide whether to expand or to check only. */
1418 if (sign
== '+' || sign
== '-')
1424 switch(mandoc_escape(&cp
, &stnam
, &inaml
)) {
1425 case ESCAPE_SPECIAL
:
1426 if (mchars_spec2cp(stnam
, inaml
) >= 0)
1430 err
= MANDOCERR_ESC_BAD
;
1433 err
= MANDOCERR_ESC_UNDEF
;
1436 err
= MANDOCERR_ESC_UNSUPP
;
1441 if (err
!= MANDOCERR_OK
)
1442 mandoc_msg(err
, ln
, (int)(stesc
- buf
->buf
),
1443 "%.*s", (int)(cp
- stesc
), stesc
);
1448 if (EXPAND_LIMIT
< ++expand_count
) {
1449 mandoc_msg(MANDOCERR_ROFFLOOP
,
1450 ln
, (int)(stesc
- buf
->buf
), NULL
);
1455 * The third character decides the length
1456 * of the name of the string or register.
1457 * Save a pointer to the name.
1484 /* Advance to the end of the name. */
1488 while (maxl
== 0 || naml
< maxl
) {
1490 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
1491 (int)(stesc
- buf
->buf
), "%s", stesc
);
1495 if (maxl
== 0 && *cp
== term
) {
1499 if (*cp
++ != '\\' || *esct
!= 'w') {
1503 switch (mandoc_escape(&cp
, NULL
, NULL
)) {
1504 case ESCAPE_SPECIAL
:
1505 case ESCAPE_UNICODE
:
1506 case ESCAPE_NUMBERED
:
1508 case ESCAPE_OVERSTRIKE
:
1517 * Retrieve the replacement string; if it is
1518 * undefined, resume searching for escapes.
1524 deftype
= ROFFDEF_USER
| ROFFDEF_PRE
;
1525 res
= roff_getstrn(r
, stnam
, naml
, &deftype
);
1528 * If not overriden, let \*(.T
1529 * through to the formatters.
1532 if (res
== NULL
&& naml
== 2 &&
1533 stnam
[0] == '.' && stnam
[1] == 'T') {
1534 roff_setstrn(&r
->strtab
,
1535 ".T", 2, NULL
, 0, 0);
1542 if (r
->mstackpos
< 0) {
1543 mandoc_msg(MANDOCERR_ARG_UNDEF
, ln
,
1544 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1547 ctx
= r
->mstack
+ r
->mstackpos
;
1548 npos
= esct
[1] - '1';
1549 if (npos
>= 0 && npos
<= 8) {
1550 res
= npos
< ctx
->argc
?
1551 ctx
->argv
[npos
] : "";
1556 else if (esct
[1] == '@')
1559 mandoc_msg(MANDOCERR_ARG_NONUM
, ln
,
1560 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1564 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1568 asz
+= 2; /* quotes */
1569 asz
+= strlen(ctx
->argv
[npos
]);
1572 rsz
= buf
->sz
- (stesc
- buf
->buf
) - 3;
1574 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1576 nbuf
= mandoc_realloc(buf
->buf
, buf
->sz
);
1578 stesc
= nbuf
+ (stesc
- buf
->buf
);
1581 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1583 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1588 cp
= ctx
->argv
[npos
];
1597 ubuf
[0] = arg_complete
&&
1598 roff_evalnum(r
, ln
, stnam
, &npos
,
1599 NULL
, ROFFNUM_SCALE
) &&
1600 stnam
+ npos
+ 1 == cp
? '1' : '0';
1605 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1606 roff_getregn(r
, stnam
, naml
, sign
));
1611 /* use even incomplete args */
1612 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1619 mandoc_msg(MANDOCERR_STR_UNDEF
,
1620 ln
, (int)(stesc
- buf
->buf
),
1621 "%.*s", (int)naml
, stnam
);
1623 } else if (buf
->sz
+ strlen(res
) > SHRT_MAX
) {
1624 mandoc_msg(MANDOCERR_ROFFLOOP
,
1625 ln
, (int)(stesc
- buf
->buf
), NULL
);
1629 /* Replace the escape sequence by the string. */
1632 buf
->sz
= mandoc_asprintf(&nbuf
, "%s%s%s",
1633 buf
->buf
, res
, cp
) + 1;
1635 /* Prepare for the next replacement. */
1638 stesc
= nbuf
+ (stesc
- buf
->buf
) + strlen(res
);
1646 * Parse a quoted or unquoted roff-style request or macro argument.
1647 * Return a pointer to the parsed argument, which is either the original
1648 * pointer or advanced by one byte in case the argument is quoted.
1649 * NUL-terminate the argument in place.
1650 * Collapse pairs of quotes inside quoted arguments.
1651 * Advance the argument pointer to the next argument,
1652 * or to the NUL byte terminating the argument line.
1655 roff_getarg(struct roff
*r
, char **cpp
, int ln
, int *pos
)
1659 int newesc
, pairs
, quoted
, white
;
1661 /* Quoting can only start with a new word. */
1664 if ('"' == *start
) {
1669 newesc
= pairs
= white
= 0;
1670 for (cp
= start
; '\0' != *cp
; cp
++) {
1673 * Move the following text left
1674 * after quoted quotes and after "\\" and "\t".
1679 if ('\\' == cp
[0]) {
1681 * In copy mode, translate double to single
1682 * backslashes and backslash-t to literal tabs.
1693 cp
[-pairs
] = ASCII_ESC
;
1698 /* Skip escaped blanks. */
1705 } else if (0 == quoted
) {
1707 /* Unescaped blanks end unquoted args. */
1711 } else if ('"' == cp
[0]) {
1713 /* Quoted quotes collapse. */
1717 /* Unquoted quotes end quoted args. */
1724 /* Quoted argument without a closing quote. */
1726 mandoc_msg(MANDOCERR_ARG_QUOTE
, ln
, *pos
, NULL
);
1728 /* NUL-terminate this argument and move to the next one. */
1736 *pos
+= (int)(cp
- start
) + (quoted
? 1 : 0);
1739 if ('\0' == *cp
&& (white
|| ' ' == cp
[-1]))
1740 mandoc_msg(MANDOCERR_SPACE_EOL
, ln
, *pos
, NULL
);
1742 start
= mandoc_strdup(start
);
1747 buf
.sz
= strlen(start
) + 1;
1749 if (roff_expand(r
, &buf
, ln
, 0, ASCII_ESC
) & ROFF_IGN
) {
1751 buf
.buf
= mandoc_strdup("");
1758 * Process text streams.
1761 roff_parsetext(struct roff
*r
, struct buf
*buf
, int pos
, int *offs
)
1767 enum mandoc_esc esc
;
1769 /* Spring the input line trap. */
1771 if (roffit_lines
== 1) {
1772 isz
= mandoc_asprintf(&p
, "%s\n.%s", buf
->buf
, roffit_macro
);
1779 return ROFF_REPARSE
;
1780 } else if (roffit_lines
> 1)
1783 if (roffce_node
!= NULL
&& buf
->buf
[pos
] != '\0') {
1784 if (roffce_lines
< 1) {
1785 r
->man
->last
= roffce_node
;
1786 r
->man
->next
= ROFF_NEXT_SIBLING
;
1793 /* Convert all breakable hyphens into ASCII_HYPH. */
1795 start
= p
= buf
->buf
+ pos
;
1797 while (*p
!= '\0') {
1798 sz
= strcspn(p
, "-\\");
1805 /* Skip over escapes. */
1807 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
1808 if (esc
== ESCAPE_ERROR
)
1813 } else if (p
== start
) {
1818 if (isalpha((unsigned char)p
[-1]) &&
1819 isalpha((unsigned char)p
[1]))
1827 roff_parseln(struct roff
*r
, int ln
, struct buf
*buf
, int *offs
, size_t len
)
1831 int pos
; /* parse point */
1832 int spos
; /* saved parse point for messages */
1833 int ppos
; /* original offset in buf->buf */
1834 int ctl
; /* macro line (boolean) */
1838 if (len
> 80 && r
->tbl
== NULL
&& r
->eqn
== NULL
&&
1839 (r
->man
->flags
& ROFF_NOFILL
) == 0 &&
1840 strchr(" .\\", buf
->buf
[pos
]) == NULL
&&
1841 buf
->buf
[pos
] != r
->control
&&
1842 strcspn(buf
->buf
, " ") < 80)
1843 mandoc_msg(MANDOCERR_TEXT_LONG
, ln
, (int)len
- 1,
1844 "%.20s...", buf
->buf
+ pos
);
1846 /* Handle in-line equation delimiters. */
1848 if (r
->tbl
== NULL
&&
1849 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
1850 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
1851 e
= roff_eqndelim(r
, buf
, pos
);
1852 if (e
== ROFF_REPARSE
)
1854 assert(e
== ROFF_CONT
);
1857 /* Expand some escape sequences. */
1859 e
= roff_expand(r
, buf
, ln
, pos
, r
->escape
);
1860 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1862 assert(e
== ROFF_CONT
);
1864 ctl
= roff_getcontrol(r
, buf
->buf
, &pos
);
1867 * First, if a scope is open and we're not a macro, pass the
1868 * text through the macro's filter.
1869 * Equations process all content themselves.
1870 * Tables process almost all content themselves, but we want
1871 * to warn about macros before passing it there.
1874 if (r
->last
!= NULL
&& ! ctl
) {
1876 e
= (*roffs
[t
].text
)(r
, t
, buf
, ln
, pos
, pos
, offs
);
1877 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1882 if (r
->eqn
!= NULL
&& strncmp(buf
->buf
+ ppos
, ".EN", 3)) {
1883 eqn_read(r
->eqn
, buf
->buf
+ ppos
);
1886 if (r
->tbl
!= NULL
&& (ctl
== 0 || buf
->buf
[pos
] == '\0')) {
1887 tbl_read(r
->tbl
, ln
, buf
->buf
, ppos
);
1888 roff_addtbl(r
->man
, ln
, r
->tbl
);
1892 r
->options
&= ~MPARSE_COMMENT
;
1893 return roff_parsetext(r
, buf
, pos
, offs
) | e
;
1896 /* Skip empty request lines. */
1898 if (buf
->buf
[pos
] == '"') {
1899 mandoc_msg(MANDOCERR_COMMENT_BAD
, ln
, pos
, NULL
);
1901 } else if (buf
->buf
[pos
] == '\0')
1905 * If a scope is open, go to the child handler for that macro,
1906 * as it may want to preprocess before doing anything with it.
1907 * Don't do so if an equation is open.
1912 return (*roffs
[t
].sub
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
1915 /* No scope is open. This is a new request or macro. */
1917 r
->options
&= ~MPARSE_COMMENT
;
1919 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
1921 /* Tables ignore most macros. */
1923 if (r
->tbl
!= NULL
&& (t
== TOKEN_NONE
|| t
== ROFF_TS
||
1924 t
== ROFF_br
|| t
== ROFF_ce
|| t
== ROFF_rj
|| t
== ROFF_sp
)) {
1925 mandoc_msg(MANDOCERR_TBLMACRO
,
1926 ln
, pos
, "%s", buf
->buf
+ spos
);
1927 if (t
!= TOKEN_NONE
)
1929 while (buf
->buf
[pos
] != '\0' && buf
->buf
[pos
] != ' ')
1931 while (buf
->buf
[pos
] == ' ')
1933 tbl_read(r
->tbl
, ln
, buf
->buf
, pos
);
1934 roff_addtbl(r
->man
, ln
, r
->tbl
);
1938 /* For now, let high level macros abort .ce mode. */
1940 if (ctl
&& roffce_node
!= NULL
&&
1941 (t
== TOKEN_NONE
|| t
== ROFF_Dd
|| t
== ROFF_EQ
||
1942 t
== ROFF_TH
|| t
== ROFF_TS
)) {
1943 r
->man
->last
= roffce_node
;
1944 r
->man
->next
= ROFF_NEXT_SIBLING
;
1950 * This is neither a roff request nor a user-defined macro.
1951 * Let the standard macro set parsers handle it.
1954 if (t
== TOKEN_NONE
)
1957 /* Execute a roff request or a user defined macro. */
1959 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, spos
, pos
, offs
);
1963 * Internal interface function to tell the roff parser that execution
1964 * of the current macro ended. This is required because macro
1965 * definitions usually do not end with a .return request.
1968 roff_userret(struct roff
*r
)
1973 assert(r
->mstackpos
>= 0);
1974 ctx
= r
->mstack
+ r
->mstackpos
;
1975 for (i
= 0; i
< ctx
->argc
; i
++)
1982 roff_endparse(struct roff
*r
)
1984 if (r
->last
!= NULL
)
1985 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->last
->line
,
1986 r
->last
->col
, "%s", roff_name
[r
->last
->tok
]);
1988 if (r
->eqn
!= NULL
) {
1989 mandoc_msg(MANDOCERR_BLK_NOEND
,
1990 r
->eqn
->node
->line
, r
->eqn
->node
->pos
, "EQ");
1995 if (r
->tbl
!= NULL
) {
2002 * Parse a roff node's type from the input buffer. This must be in the
2003 * form of ".foo xxx" in the usual way.
2005 static enum roff_tok
2006 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
2016 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
2020 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
2022 deftype
= ROFFDEF_USER
| ROFFDEF_REN
;
2023 r
->current_string
= roff_getstrn(r
, mac
, maclen
, &deftype
);
2032 t
= roffhash_find(r
->reqtab
, mac
, maclen
);
2035 if (t
!= TOKEN_NONE
)
2037 else if (deftype
== ROFFDEF_UNDEF
) {
2038 /* Using an undefined macro defines it to be empty. */
2039 roff_setstrn(&r
->strtab
, mac
, maclen
, "", 0, 0);
2040 roff_setstrn(&r
->rentab
, mac
, maclen
, NULL
, 0, 0);
2045 /* --- handling of request blocks ----------------------------------------- */
2048 * Close a macro definition block or an "ignore" block.
2051 roff_cblock(ROFF_ARGS
)
2055 if (r
->last
== NULL
) {
2056 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
2060 switch (r
->last
->tok
) {
2069 /* Remapped in roff_block(). */
2072 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
2077 roffnode_cleanscope(r
);
2080 * If a conditional block with braces is still open,
2081 * check for "\}" block end markers.
2084 if (r
->last
!= NULL
&& r
->last
->endspan
< 0) {
2085 rr
= 1; /* If arguments follow "\}", warn about them. */
2086 roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2089 if (buf
->buf
[pos
] != '\0')
2090 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
2091 ".. %s", buf
->buf
+ pos
);
2097 * Pop all nodes ending at the end of the current input line.
2098 * Return the number of loops ended.
2101 roffnode_cleanscope(struct roff
*r
)
2106 while (r
->last
!= NULL
&& r
->last
->endspan
> 0) {
2107 if (--r
->last
->endspan
!= 0)
2109 inloop
+= roffnode_pop(r
);
2115 * Handle the closing "\}" of a conditional block.
2116 * Apart from generating warnings, this only pops nodes.
2117 * Return the number of loops ended.
2120 roff_ccond(struct roff
*r
, int ln
, int ppos
)
2122 if (NULL
== r
->last
) {
2123 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2127 switch (r
->last
->tok
) {
2134 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2138 if (r
->last
->endspan
> -1) {
2139 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2143 return roffnode_pop(r
) + roffnode_cleanscope(r
);
2147 roff_block(ROFF_ARGS
)
2149 const char *name
, *value
;
2150 char *call
, *cp
, *iname
, *rname
;
2151 size_t csz
, namesz
, rsz
;
2154 /* Ignore groff compatibility mode for now. */
2156 if (tok
== ROFF_de1
)
2158 else if (tok
== ROFF_dei1
)
2160 else if (tok
== ROFF_am1
)
2162 else if (tok
== ROFF_ami1
)
2165 /* Parse the macro name argument. */
2167 cp
= buf
->buf
+ pos
;
2168 if (tok
== ROFF_ig
) {
2173 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2174 iname
[namesz
] = '\0';
2177 /* Resolve the macro name argument if it is indirect. */
2179 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2180 deftype
= ROFFDEF_USER
;
2181 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2183 mandoc_msg(MANDOCERR_STR_UNDEF
,
2184 ln
, (int)(iname
- buf
->buf
),
2185 "%.*s", (int)namesz
, iname
);
2188 namesz
= strlen(name
);
2192 if (namesz
== 0 && tok
!= ROFF_ig
) {
2193 mandoc_msg(MANDOCERR_REQ_EMPTY
,
2194 ln
, ppos
, "%s", roff_name
[tok
]);
2198 roffnode_push(r
, tok
, name
, ln
, ppos
);
2201 * At the beginning of a `de' macro, clear the existing string
2202 * with the same name, if there is one. New content will be
2203 * appended from roff_block_text() in multiline mode.
2206 if (tok
== ROFF_de
|| tok
== ROFF_dei
) {
2207 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
2208 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2209 } else if (tok
== ROFF_am
|| tok
== ROFF_ami
) {
2210 deftype
= ROFFDEF_ANY
;
2211 value
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2212 switch (deftype
) { /* Before appending, ... */
2213 case ROFFDEF_PRE
: /* copy predefined to user-defined. */
2214 roff_setstrn(&r
->strtab
, name
, namesz
,
2215 value
, strlen(value
), 0);
2217 case ROFFDEF_REN
: /* call original standard macro. */
2218 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2219 (int)strlen(value
), value
);
2220 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2221 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2224 case ROFFDEF_STD
: /* rename and call standard macro. */
2225 rsz
= mandoc_asprintf(&rname
, "__%s_renamed", name
);
2226 roff_setstrn(&r
->rentab
, rname
, rsz
, name
, namesz
, 0);
2227 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2229 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2241 /* Get the custom end marker. */
2244 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2246 /* Resolve the end marker if it is indirect. */
2248 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2249 deftype
= ROFFDEF_USER
;
2250 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2252 mandoc_msg(MANDOCERR_STR_UNDEF
,
2253 ln
, (int)(iname
- buf
->buf
),
2254 "%.*s", (int)namesz
, iname
);
2257 namesz
= strlen(name
);
2262 r
->last
->end
= mandoc_strndup(name
, namesz
);
2265 mandoc_msg(MANDOCERR_ARG_EXCESS
,
2266 ln
, pos
, ".%s ... %s", roff_name
[tok
], cp
);
2272 roff_block_sub(ROFF_ARGS
)
2278 * First check whether a custom macro exists at this level. If
2279 * it does, then check against it. This is some of groff's
2280 * stranger behaviours. If we encountered a custom end-scope
2281 * tag and that tag also happens to be a "real" macro, then we
2282 * need to try interpreting it again as a real macro. If it's
2283 * not, then return ignore. Else continue.
2287 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
2288 if (buf
->buf
[i
] != r
->last
->end
[j
])
2291 if (r
->last
->end
[j
] == '\0' &&
2292 (buf
->buf
[i
] == '\0' ||
2293 buf
->buf
[i
] == ' ' ||
2294 buf
->buf
[i
] == '\t')) {
2296 roffnode_cleanscope(r
);
2298 while (buf
->buf
[i
] == ' ' || buf
->buf
[i
] == '\t')
2302 if (roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
) !=
2310 * If we have no custom end-query or lookup failed, then try
2311 * pulling it out of the hashtable.
2314 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2316 if (t
!= ROFF_cblock
) {
2318 roff_setstr(r
, r
->last
->name
, buf
->buf
+ ppos
, 2);
2322 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2326 roff_block_text(ROFF_ARGS
)
2330 roff_setstr(r
, r
->last
->name
, buf
->buf
+ pos
, 2);
2336 * Check for a closing "\}" and handle it.
2337 * In this function, the final "int *offs" argument is used for
2338 * different purposes than elsewhere:
2339 * Input: *offs == 0: caller wants to discard arguments following \}
2340 * *offs == 1: caller wants to preserve text following \}
2341 * Output: *offs = 0: tell caller to discard input line
2342 * *offs = 1: tell caller to use input line
2345 roff_cond_checkend(ROFF_ARGS
)
2348 int endloop
, irc
, rr
;
2352 endloop
= tok
!= ROFF_while
? ROFF_IGN
:
2353 rr
? ROFF_LOOPCONT
: ROFF_LOOPEXIT
;
2354 if (roffnode_cleanscope(r
))
2358 * If "\}" occurs on a macro line without a preceding macro or
2359 * a text line contains nothing else, drop the line completely.
2362 ep
= buf
->buf
+ pos
;
2363 if (ep
[0] == '\\' && ep
[1] == '}' && (ep
[2] == '\0' || *offs
== 0))
2367 * The closing delimiter "\}" rewinds the conditional scope
2368 * but is otherwise ignored when interpreting the line.
2371 while ((ep
= strchr(ep
, '\\')) != NULL
) {
2379 memmove(ep
, ep
+ 2, strlen(ep
+ 2) + 1);
2380 if (roff_ccond(r
, ln
, ep
- buf
->buf
))
2396 * Parse and process a request or macro line in conditional scope.
2399 roff_cond_sub(ROFF_ARGS
)
2401 struct roffnode
*bl
;
2405 rr
= 0; /* If arguments follow "\}", skip them. */
2406 irc
= roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2407 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2409 /* For now, let high level macros abort .ce mode. */
2411 if (roffce_node
!= NULL
&&
2412 (t
== TOKEN_NONE
|| t
== ROFF_Dd
|| t
== ROFF_EQ
||
2413 t
== ROFF_TH
|| t
== ROFF_TS
)) {
2414 r
->man
->last
= roffce_node
;
2415 r
->man
->next
= ROFF_NEXT_SIBLING
;
2421 * Fully handle known macros when they are structurally
2422 * required or when the conditional evaluated to true.
2425 if (t
== ROFF_break
) {
2426 if (irc
& ROFF_LOOPMASK
)
2427 irc
= ROFF_IGN
| ROFF_LOOPEXIT
;
2429 for (bl
= r
->last
; bl
!= NULL
; bl
= bl
->parent
) {
2431 if (bl
->tok
== ROFF_while
)
2435 } else if (t
!= TOKEN_NONE
&&
2436 (rr
|| roffs
[t
].flags
& ROFFMAC_STRUCT
)) {
2437 irc
|= (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2438 if (irc
& ROFF_WHILE
)
2439 irc
&= ~(ROFF_LOOPCONT
| ROFF_LOOPEXIT
);
2441 irc
|= rr
? ROFF_CONT
: ROFF_IGN
;
2446 * Parse and process a text line in conditional scope.
2449 roff_cond_text(ROFF_ARGS
)
2453 rr
= 1; /* If arguments follow "\}", preserve them. */
2454 irc
= roff_cond_checkend(r
, tok
, buf
, ln
, ppos
, pos
, &rr
);
2460 /* --- handling of numeric and conditional expressions -------------------- */
2463 * Parse a single signed integer number. Stop at the first non-digit.
2464 * If there is at least one digit, return success and advance the
2465 * parse point, else return failure and let the parse point unchanged.
2466 * Ignore overflows, treat them just like the C language.
2469 roff_getnum(const char *v
, int *pos
, int *res
, int flags
)
2471 int myres
, scaled
, n
, p
;
2478 if (n
|| v
[p
] == '+')
2481 if (flags
& ROFFNUM_WHITE
)
2482 while (isspace((unsigned char)v
[p
]))
2485 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
2486 *res
= 10 * *res
+ v
[p
] - '0';
2493 /* Each number may be followed by one optional scaling unit. */
2497 scaled
= *res
* 65536;
2500 scaled
= *res
* 240;
2503 scaled
= *res
* 240 / 2.54;
2514 scaled
= *res
* 10 / 3;
2520 scaled
= *res
* 6 / 25;
2527 if (flags
& ROFFNUM_SCALE
)
2535 * Evaluate a string comparison condition.
2536 * The first character is the delimiter.
2537 * Succeed if the string up to its second occurrence
2538 * matches the string up to its third occurence.
2539 * Advance the cursor after the third occurrence
2540 * or lacking that, to the end of the line.
2543 roff_evalstrcond(const char *v
, int *pos
)
2545 const char *s1
, *s2
, *s3
;
2549 s1
= v
+ *pos
; /* initial delimiter */
2550 s2
= s1
+ 1; /* for scanning the first string */
2551 s3
= strchr(s2
, *s1
); /* for scanning the second string */
2553 if (NULL
== s3
) /* found no middle delimiter */
2556 while ('\0' != *++s3
) {
2557 if (*s2
!= *s3
) { /* mismatch */
2558 s3
= strchr(s3
, *s1
);
2561 if (*s3
== *s1
) { /* found the final delimiter */
2570 s3
= strchr(s2
, '\0');
2571 else if (*s3
!= '\0')
2578 * Evaluate an optionally negated single character, numerical,
2579 * or string condition.
2582 roff_evalcond(struct roff
*r
, int ln
, char *v
, int *pos
)
2584 const char *start
, *end
;
2587 int deftype
, len
, number
, savepos
, istrue
, wanttrue
;
2589 if ('!' == v
[*pos
]) {
2610 } while (v
[*pos
] == ' ');
2613 * Quirk for groff compatibility:
2614 * The horizontal tab is neither available nor unavailable.
2617 if (v
[*pos
] == '\t') {
2622 /* Printable ASCII characters are available. */
2624 if (v
[*pos
] != '\\') {
2630 switch (mandoc_escape(&end
, &start
, &len
)) {
2631 case ESCAPE_SPECIAL
:
2632 istrue
= mchars_spec2cp(start
, len
) != -1;
2634 case ESCAPE_UNICODE
:
2637 case ESCAPE_NUMBERED
:
2638 istrue
= mchars_num2char(start
, len
) != -1;
2645 return istrue
== wanttrue
;
2652 sz
= roff_getname(r
, &cp
, ln
, cp
- v
);
2655 else if (v
[*pos
] == 'r')
2656 istrue
= roff_hasregn(r
, name
, sz
);
2658 deftype
= ROFFDEF_ANY
;
2659 roff_getstrn(r
, name
, sz
, &deftype
);
2662 *pos
= (name
+ sz
) - v
;
2663 return istrue
== wanttrue
;
2669 if (roff_evalnum(r
, ln
, v
, pos
, &number
, ROFFNUM_SCALE
))
2670 return (number
> 0) == wanttrue
;
2671 else if (*pos
== savepos
)
2672 return roff_evalstrcond(v
, pos
) == wanttrue
;
2678 roff_line_ignore(ROFF_ARGS
)
2685 roff_insec(ROFF_ARGS
)
2688 mandoc_msg(MANDOCERR_REQ_INSEC
, ln
, ppos
, "%s", roff_name
[tok
]);
2693 roff_unsupp(ROFF_ARGS
)
2696 mandoc_msg(MANDOCERR_REQ_UNSUPP
, ln
, ppos
, "%s", roff_name
[tok
]);
2701 roff_cond(ROFF_ARGS
)
2705 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
2708 * An `.el' has no conditional body: it will consume the value
2709 * of the current rstack entry set in prior `ie' calls or
2712 * If we're not an `el', however, then evaluate the conditional.
2715 r
->last
->rule
= tok
== ROFF_el
?
2716 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
2717 roff_evalcond(r
, ln
, buf
->buf
, &pos
);
2720 * An if-else will put the NEGATION of the current evaluated
2721 * conditional into the stack of rules.
2724 if (tok
== ROFF_ie
) {
2725 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
2727 r
->rstack
= mandoc_reallocarray(r
->rstack
,
2728 r
->rstacksz
, sizeof(int));
2730 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
2733 /* If the parent has false as its rule, then so do we. */
2735 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
2740 * If there is nothing on the line after the conditional,
2741 * not even whitespace, use next-line scope.
2742 * Except that .while does not support next-line scope.
2745 if (buf
->buf
[pos
] == '\0' && tok
!= ROFF_while
) {
2746 r
->last
->endspan
= 2;
2750 while (buf
->buf
[pos
] == ' ')
2753 /* An opening brace requests multiline scope. */
2755 if (buf
->buf
[pos
] == '\\' && buf
->buf
[pos
+ 1] == '{') {
2756 r
->last
->endspan
= -1;
2758 while (buf
->buf
[pos
] == ' ')
2764 * Anything else following the conditional causes
2765 * single-line scope. Warn if the scope contains
2766 * nothing but trailing whitespace.
2769 if (buf
->buf
[pos
] == '\0')
2770 mandoc_msg(MANDOCERR_COND_EMPTY
,
2771 ln
, ppos
, "%s", roff_name
[tok
]);
2773 r
->last
->endspan
= 1;
2778 if (tok
== ROFF_while
)
2790 /* Ignore groff compatibility mode for now. */
2792 if (tok
== ROFF_ds1
)
2794 else if (tok
== ROFF_as1
)
2798 * The first word is the name of the string.
2799 * If it is empty or terminated by an escape sequence,
2800 * abort the `ds' request without defining anything.
2803 name
= string
= buf
->buf
+ pos
;
2807 namesz
= roff_getname(r
, &string
, ln
, pos
);
2808 switch (name
[namesz
]) {
2812 string
= buf
->buf
+ pos
+ namesz
;
2818 /* Read past the initial double-quote, if any. */
2822 /* The rest is the value. */
2823 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
2825 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2830 * Parse a single operator, one or two characters long.
2831 * If the operator is recognized, return success and advance the
2832 * parse point, else return failure and let the parse point unchanged.
2835 roff_getop(const char *v
, int *pos
, char *res
)
2850 switch (v
[*pos
+ 1]) {
2868 switch (v
[*pos
+ 1]) {
2882 if ('=' == v
[*pos
+ 1])
2894 * Evaluate either a parenthesized numeric expression
2895 * or a single signed integer number.
2898 roff_evalpar(struct roff
*r
, int ln
,
2899 const char *v
, int *pos
, int *res
, int flags
)
2903 return roff_getnum(v
, pos
, res
, flags
);
2906 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, flags
| ROFFNUM_WHITE
))
2910 * Omission of the closing parenthesis
2911 * is an error in validation mode,
2912 * but ignored in evaluation mode.
2917 else if (NULL
== res
)
2924 * Evaluate a complete numeric expression.
2925 * Proceed left to right, there is no concept of precedence.
2928 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
2929 int *pos
, int *res
, int flags
)
2931 int mypos
, operand2
;
2939 if (flags
& ROFFNUM_WHITE
)
2940 while (isspace((unsigned char)v
[*pos
]))
2943 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
, flags
))
2947 if (flags
& ROFFNUM_WHITE
)
2948 while (isspace((unsigned char)v
[*pos
]))
2951 if ( ! roff_getop(v
, pos
, &operator))
2954 if (flags
& ROFFNUM_WHITE
)
2955 while (isspace((unsigned char)v
[*pos
]))
2958 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
, flags
))
2961 if (flags
& ROFFNUM_WHITE
)
2962 while (isspace((unsigned char)v
[*pos
]))
2979 if (operand2
== 0) {
2980 mandoc_msg(MANDOCERR_DIVZERO
,
2988 if (operand2
== 0) {
2989 mandoc_msg(MANDOCERR_DIVZERO
,
2997 *res
= *res
< operand2
;
3000 *res
= *res
> operand2
;
3003 *res
= *res
<= operand2
;
3006 *res
= *res
>= operand2
;
3009 *res
= *res
== operand2
;
3012 *res
= *res
!= operand2
;
3015 *res
= *res
&& operand2
;
3018 *res
= *res
|| operand2
;
3021 if (operand2
< *res
)
3025 if (operand2
> *res
)
3035 /* --- register management ------------------------------------------------ */
3038 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
3040 roff_setregn(r
, name
, strlen(name
), val
, sign
, INT_MIN
);
3044 roff_setregn(struct roff
*r
, const char *name
, size_t len
,
3045 int val
, char sign
, int step
)
3047 struct roffreg
*reg
;
3049 /* Search for an existing register with the same name. */
3052 while (reg
!= NULL
&& (reg
->key
.sz
!= len
||
3053 strncmp(reg
->key
.p
, name
, len
) != 0))
3057 /* Create a new register. */
3058 reg
= mandoc_malloc(sizeof(struct roffreg
));
3059 reg
->key
.p
= mandoc_strndup(name
, len
);
3063 reg
->next
= r
->regtab
;
3069 else if ('-' == sign
)
3073 if (step
!= INT_MIN
)
3078 * Handle some predefined read-only number registers.
3079 * For now, return -1 if the requested register is not predefined;
3080 * in case a predefined read-only register having the value -1
3081 * were to turn up, another special value would have to be chosen.
3084 roff_getregro(const struct roff
*r
, const char *name
)
3088 case '$': /* Number of arguments of the last macro evaluated. */
3089 return r
->mstackpos
< 0 ? 0 : r
->mstack
[r
->mstackpos
].argc
;
3090 case 'A': /* ASCII approximation mode is always off. */
3092 case 'g': /* Groff compatibility mode is always on. */
3094 case 'H': /* Fixed horizontal resolution. */
3096 case 'j': /* Always adjust left margin only. */
3098 case 'T': /* Some output device is always defined. */
3100 case 'V': /* Fixed vertical resolution. */
3108 roff_getreg(struct roff
*r
, const char *name
)
3110 return roff_getregn(r
, name
, strlen(name
), '\0');
3114 roff_getregn(struct roff
*r
, const char *name
, size_t len
, char sign
)
3116 struct roffreg
*reg
;
3119 if ('.' == name
[0] && 2 == len
) {
3120 val
= roff_getregro(r
, name
+ 1);
3125 for (reg
= r
->regtab
; reg
; reg
= reg
->next
) {
3126 if (len
== reg
->key
.sz
&&
3127 0 == strncmp(name
, reg
->key
.p
, len
)) {
3130 reg
->val
+= reg
->step
;
3133 reg
->val
-= reg
->step
;
3142 roff_setregn(r
, name
, len
, 0, '\0', INT_MIN
);
3147 roff_hasregn(const struct roff
*r
, const char *name
, size_t len
)
3149 struct roffreg
*reg
;
3152 if ('.' == name
[0] && 2 == len
) {
3153 val
= roff_getregro(r
, name
+ 1);
3158 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
3159 if (len
== reg
->key
.sz
&&
3160 0 == strncmp(name
, reg
->key
.p
, len
))
3167 roff_freereg(struct roffreg
*reg
)
3169 struct roffreg
*old_reg
;
3171 while (NULL
!= reg
) {
3182 char *key
, *val
, *step
;
3187 key
= val
= buf
->buf
+ pos
;
3191 keysz
= roff_getname(r
, &val
, ln
, pos
);
3192 if (key
[keysz
] == '\\' || key
[keysz
] == '\t')
3196 if (sign
== '+' || sign
== '-')
3200 if (roff_evalnum(r
, ln
, val
, &len
, &iv
, ROFFNUM_SCALE
) == 0)
3204 while (isspace((unsigned char)*step
))
3206 if (roff_evalnum(r
, ln
, step
, NULL
, &is
, 0) == 0)
3209 roff_setregn(r
, key
, keysz
, iv
, sign
, is
);
3216 struct roffreg
*reg
, **prev
;
3220 name
= cp
= buf
->buf
+ pos
;
3223 namesz
= roff_getname(r
, &cp
, ln
, pos
);
3224 name
[namesz
] = '\0';
3229 if (reg
== NULL
|| !strcmp(name
, reg
->key
.p
))
3241 /* --- handler functions for roff requests -------------------------------- */
3250 cp
= buf
->buf
+ pos
;
3251 while (*cp
!= '\0') {
3253 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- buf
->buf
));
3254 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
3255 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
3256 if (name
[namesz
] == '\\' || name
[namesz
] == '\t')
3267 /* Parse the number of lines. */
3269 if ( ! roff_evalnum(r
, ln
, buf
->buf
, &pos
, &iv
, 0)) {
3270 mandoc_msg(MANDOCERR_IT_NONUM
,
3271 ln
, ppos
, "%s", buf
->buf
+ 1);
3275 while (isspace((unsigned char)buf
->buf
[pos
]))
3279 * Arm the input line trap.
3280 * Special-casing "an-trap" is an ugly workaround to cope
3281 * with DocBook stupidly fiddling with man(7) internals.
3285 roffit_macro
= mandoc_strdup(iv
!= 1 ||
3286 strcmp(buf
->buf
+ pos
, "an-trap") ?
3287 buf
->buf
+ pos
: "br");
3295 enum roff_tok t
, te
;
3302 r
->format
= MPARSE_MDOC
;
3303 mask
= MPARSE_MDOC
| MPARSE_QUICK
;
3309 r
->format
= MPARSE_MAN
;
3310 mask
= MPARSE_QUICK
;
3315 if ((r
->options
& mask
) == 0)
3316 for (t
= tok
; t
< te
; t
++)
3317 roff_setstr(r
, roff_name
[t
], NULL
, 0);
3324 r
->man
->flags
&= ~ROFF_NONOFILL
;
3325 if (r
->tbl
== NULL
) {
3326 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "TE");
3329 if (tbl_end(r
->tbl
, 0) == 0) {
3332 buf
->buf
= mandoc_strdup(".sp");
3335 return ROFF_REPARSE
;
3346 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "T&");
3348 tbl_restart(ln
, ppos
, r
->tbl
);
3354 * Handle in-line equation delimiters.
3357 roff_eqndelim(struct roff
*r
, struct buf
*buf
, int pos
)
3360 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
3363 * Outside equations, look for an opening delimiter.
3364 * If we are inside an equation, we already know it is
3365 * in-line, or this function wouldn't have been called;
3366 * so look for a closing delimiter.
3369 cp1
= buf
->buf
+ pos
;
3370 cp2
= strchr(cp1
, r
->eqn
== NULL
?
3371 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
3376 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
3378 /* Handle preceding text, protecting whitespace. */
3380 if (*buf
->buf
!= '\0') {
3387 * Prepare replacing the delimiter with an equation macro
3388 * and drop leading white space from the equation.
3391 if (r
->eqn
== NULL
) {
3398 /* Handle following text, protecting whitespace. */
3406 /* Do the actual replacement. */
3408 buf
->sz
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", buf
->buf
,
3409 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
3413 /* Toggle the in-line state of the eqn subsystem. */
3415 r
->eqn_inline
= r
->eqn
== NULL
;
3416 return ROFF_REPARSE
;
3422 struct roff_node
*n
;
3424 if (r
->man
->meta
.macroset
== MACROSET_MAN
)
3425 man_breakscope(r
->man
, ROFF_EQ
);
3426 n
= roff_node_alloc(r
->man
, ln
, ppos
, ROFFT_EQN
, TOKEN_NONE
);
3427 if (ln
> r
->man
->last
->line
)
3428 n
->flags
|= NODE_LINE
;
3429 n
->eqn
= eqn_box_new();
3430 roff_node_append(r
->man
, n
);
3431 r
->man
->next
= ROFF_NEXT_SIBLING
;
3433 assert(r
->eqn
== NULL
);
3434 if (r
->last_eqn
== NULL
)
3435 r
->last_eqn
= eqn_alloc();
3437 eqn_reset(r
->last_eqn
);
3438 r
->eqn
= r
->last_eqn
;
3441 if (buf
->buf
[pos
] != '\0')
3442 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3443 ".EQ %s", buf
->buf
+ pos
);
3451 if (r
->eqn
!= NULL
) {
3455 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "EN");
3456 if (buf
->buf
[pos
] != '\0')
3457 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3458 "EN %s", buf
->buf
+ pos
);
3465 if (r
->tbl
!= NULL
) {
3466 mandoc_msg(MANDOCERR_BLK_BROKEN
, ln
, ppos
, "TS breaks TS");
3469 r
->man
->flags
|= ROFF_NONOFILL
;
3470 r
->tbl
= tbl_alloc(ppos
, ln
, r
->last_tbl
);
3471 if (r
->last_tbl
== NULL
)
3472 r
->first_tbl
= r
->tbl
;
3473 r
->last_tbl
= r
->tbl
;
3478 roff_noarg(ROFF_ARGS
)
3480 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
))
3481 man_breakscope(r
->man
, tok
);
3482 if (tok
== ROFF_brp
)
3484 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3485 if (buf
->buf
[pos
] != '\0')
3486 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3487 "%s %s", roff_name
[tok
], buf
->buf
+ pos
);
3489 r
->man
->flags
|= ROFF_NOFILL
;
3490 else if (tok
== ROFF_fi
)
3491 r
->man
->flags
&= ~ROFF_NOFILL
;
3492 r
->man
->last
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3493 r
->man
->next
= ROFF_NEXT_SIBLING
;
3498 roff_onearg(ROFF_ARGS
)
3500 struct roff_node
*n
;
3504 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
) &&
3505 (tok
== ROFF_ce
|| tok
== ROFF_rj
|| tok
== ROFF_sp
||
3507 man_breakscope(r
->man
, tok
);
3509 if (roffce_node
!= NULL
&& (tok
== ROFF_ce
|| tok
== ROFF_rj
)) {
3510 r
->man
->last
= roffce_node
;
3511 r
->man
->next
= ROFF_NEXT_SIBLING
;
3514 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3517 cp
= buf
->buf
+ pos
;
3519 while (*cp
!= '\0' && *cp
!= ' ')
3524 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3525 ln
, (int)(cp
- buf
->buf
),
3526 "%s ... %s", roff_name
[tok
], cp
);
3527 roff_word_alloc(r
->man
, ln
, pos
, buf
->buf
+ pos
);
3530 if (tok
== ROFF_ce
|| tok
== ROFF_rj
) {
3531 if (r
->man
->last
->type
== ROFFT_ELEM
) {
3532 roff_word_alloc(r
->man
, ln
, pos
, "1");
3533 r
->man
->last
->flags
|= NODE_NOSRC
;
3536 if (roff_evalnum(r
, ln
, r
->man
->last
->string
, &npos
,
3537 &roffce_lines
, 0) == 0) {
3538 mandoc_msg(MANDOCERR_CE_NONUM
,
3539 ln
, pos
, "ce %s", buf
->buf
+ pos
);
3542 if (roffce_lines
< 1) {
3543 r
->man
->last
= r
->man
->last
->parent
;
3547 roffce_node
= r
->man
->last
->parent
;
3549 n
->flags
|= NODE_VALID
| NODE_ENDED
;
3552 n
->flags
|= NODE_LINE
;
3553 r
->man
->next
= ROFF_NEXT_SIBLING
;
3558 roff_manyarg(ROFF_ARGS
)
3560 struct roff_node
*n
;
3563 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3566 for (sp
= ep
= buf
->buf
+ pos
; *sp
!= '\0'; sp
= ep
) {
3567 while (*ep
!= '\0' && *ep
!= ' ')
3571 roff_word_alloc(r
->man
, ln
, sp
- buf
->buf
, sp
);
3574 n
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3576 r
->man
->next
= ROFF_NEXT_SIBLING
;
3583 char *oldn
, *newn
, *end
, *value
;
3584 size_t oldsz
, newsz
, valsz
;
3586 newn
= oldn
= buf
->buf
+ pos
;
3590 newsz
= roff_getname(r
, &oldn
, ln
, pos
);
3591 if (newn
[newsz
] == '\\' || newn
[newsz
] == '\t' || *oldn
== '\0')
3595 oldsz
= roff_getname(r
, &end
, ln
, oldn
- buf
->buf
);
3599 valsz
= mandoc_asprintf(&value
, ".%.*s \\$@\\\"\n",
3601 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, valsz
, 0);
3602 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3608 * The .break request only makes sense inside conditionals,
3609 * and that case is already handled in roff_cond_sub().
3612 roff_break(ROFF_ARGS
)
3614 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, pos
, "break");
3625 if (*p
== '\0' || (r
->control
= *p
++) == '.')
3629 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3630 ln
, p
- buf
->buf
, "cc ... %s", p
);
3636 roff_char(ROFF_ARGS
)
3638 const char *p
, *kp
, *vp
;
3642 /* Parse the character to be replaced. */
3644 kp
= buf
->buf
+ pos
;
3646 if (*kp
== '\0' || (*kp
== '\\' &&
3647 mandoc_escape(&p
, NULL
, NULL
) != ESCAPE_SPECIAL
) ||
3648 (*p
!= ' ' && *p
!= '\0')) {
3649 mandoc_msg(MANDOCERR_CHAR_ARG
, ln
, pos
, "char %s", kp
);
3657 * If the replacement string contains a font escape sequence,
3658 * we have to restore the font at the end.
3664 while (*p
!= '\0') {
3667 switch (mandoc_escape(&p
, NULL
, NULL
)) {
3669 case ESCAPE_FONTROMAN
:
3670 case ESCAPE_FONTITALIC
:
3671 case ESCAPE_FONTBOLD
:
3676 case ESCAPE_FONTPREV
:
3684 mandoc_msg(MANDOCERR_CHAR_FONT
,
3685 ln
, (int)(vp
- buf
->buf
), "%s", vp
);
3688 * Approximate the effect of .char using the .tr tables.
3689 * XXX In groff, .char and .tr interact differently.
3693 if (r
->xtab
== NULL
)
3694 r
->xtab
= mandoc_calloc(128, sizeof(*r
->xtab
));
3695 assert((unsigned int)*kp
< 128);
3696 free(r
->xtab
[(int)*kp
].p
);
3697 r
->xtab
[(int)*kp
].sz
= mandoc_asprintf(&r
->xtab
[(int)*kp
].p
,
3698 "%s%s", vp
, font
? "\fP" : "");
3700 roff_setstrn(&r
->xmbtab
, kp
, ksz
, vp
, vsz
, 0);
3702 roff_setstrn(&r
->xmbtab
, kp
, ksz
, "\\fP", 3, 1);
3718 mandoc_msg(MANDOCERR_ARG_EXCESS
, ln
,
3719 (int)(p
- buf
->buf
), "ec ... %s", p
);
3728 if (buf
->buf
[pos
] != '\0')
3729 mandoc_msg(MANDOCERR_ARG_SKIP
,
3730 ln
, pos
, "eo %s", buf
->buf
+ pos
);
3737 while (buf
->buf
[pos
] == ' ')
3746 const char *p
, *first
, *second
;
3748 enum mandoc_esc esc
;
3753 mandoc_msg(MANDOCERR_REQ_EMPTY
, ln
, ppos
, "tr");
3757 while (*p
!= '\0') {
3761 if (*first
== '\\') {
3762 esc
= mandoc_escape(&p
, NULL
, NULL
);
3763 if (esc
== ESCAPE_ERROR
) {
3764 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3765 (int)(p
- buf
->buf
), "%s", first
);
3768 fsz
= (size_t)(p
- first
);
3772 if (*second
== '\\') {
3773 esc
= mandoc_escape(&p
, NULL
, NULL
);
3774 if (esc
== ESCAPE_ERROR
) {
3775 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3776 (int)(p
- buf
->buf
), "%s", second
);
3779 ssz
= (size_t)(p
- second
);
3780 } else if (*second
== '\0') {
3781 mandoc_msg(MANDOCERR_TR_ODD
, ln
,
3782 (int)(first
- buf
->buf
), "tr %s", first
);
3788 roff_setstrn(&r
->xmbtab
, first
, fsz
,
3793 if (r
->xtab
== NULL
)
3794 r
->xtab
= mandoc_calloc(128,
3795 sizeof(struct roffstr
));
3797 free(r
->xtab
[(int)*first
].p
);
3798 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
3799 r
->xtab
[(int)*first
].sz
= ssz
;
3806 * Implementation of the .return request.
3807 * There is no need to call roff_userret() from here.
3808 * The read module will call that after rewinding the reader stack
3809 * to the place from where the current macro was called.
3812 roff_return(ROFF_ARGS
)
3814 if (r
->mstackpos
>= 0)
3815 return ROFF_IGN
| ROFF_USERRET
;
3817 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "return");
3825 char *oldn
, *newn
, *end
;
3826 size_t oldsz
, newsz
;
3829 oldn
= newn
= buf
->buf
+ pos
;
3833 oldsz
= roff_getname(r
, &newn
, ln
, pos
);
3834 if (oldn
[oldsz
] == '\\' || oldn
[oldsz
] == '\t' || *newn
== '\0')
3838 newsz
= roff_getname(r
, &end
, ln
, newn
- buf
->buf
);
3842 deftype
= ROFFDEF_ANY
;
3843 value
= roff_getstrn(r
, oldn
, oldsz
, &deftype
);
3846 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3847 roff_setstrn(&r
->strtab
, oldn
, oldsz
, NULL
, 0, 0);
3848 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3851 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3852 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3855 roff_setstrn(&r
->rentab
, newn
, newsz
, value
, strlen(value
), 0);
3856 roff_setstrn(&r
->rentab
, oldn
, oldsz
, NULL
, 0, 0);
3857 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3860 roff_setstrn(&r
->rentab
, newn
, newsz
, oldn
, oldsz
, 0);
3861 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3864 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3865 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3872 roff_shift(ROFF_ARGS
)
3875 int argpos
, levels
, i
;
3879 if (buf
->buf
[pos
] != '\0' &&
3880 roff_evalnum(r
, ln
, buf
->buf
, &pos
, &levels
, 0) == 0) {
3881 mandoc_msg(MANDOCERR_CE_NONUM
,
3882 ln
, pos
, "shift %s", buf
->buf
+ pos
);
3885 if (r
->mstackpos
< 0) {
3886 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "shift");
3889 ctx
= r
->mstack
+ r
->mstackpos
;
3890 if (levels
> ctx
->argc
) {
3891 mandoc_msg(MANDOCERR_SHIFT
,
3892 ln
, argpos
, "%d, but max is %d", levels
, ctx
->argc
);
3896 mandoc_msg(MANDOCERR_ARG_NEG
, ln
, argpos
, "shift %d", levels
);
3901 for (i
= 0; i
< levels
; i
++)
3903 ctx
->argc
-= levels
;
3904 for (i
= 0; i
< ctx
->argc
; i
++)
3905 ctx
->argv
[i
] = ctx
->argv
[i
+ levels
];
3914 name
= buf
->buf
+ pos
;
3915 mandoc_msg(MANDOCERR_SO
, ln
, ppos
, "so %s", name
);
3918 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3919 * opening anything that's not in our cwd or anything beneath
3920 * it. Thus, explicitly disallow traversing up the file-system
3921 * or using absolute paths.
3924 if (*name
== '/' || strstr(name
, "../") || strstr(name
, "/..")) {
3925 mandoc_msg(MANDOCERR_SO_PATH
, ln
, ppos
, ".so %s", name
);
3926 buf
->sz
= mandoc_asprintf(&cp
,
3927 ".sp\nSee the file %s.\n.sp", name
) + 1;
3931 return ROFF_REPARSE
;
3938 /* --- user defined strings and macros ------------------------------------ */
3941 roff_userdef(ROFF_ARGS
)
3944 char *arg
, *ap
, *dst
, *src
;
3947 /* If the macro is empty, ignore it altogether. */
3949 if (*r
->current_string
== '\0')
3952 /* Initialize a new macro stack context. */
3954 if (++r
->mstackpos
== r
->mstacksz
) {
3955 r
->mstack
= mandoc_recallocarray(r
->mstack
,
3956 r
->mstacksz
, r
->mstacksz
+ 8, sizeof(*r
->mstack
));
3959 ctx
= r
->mstack
+ r
->mstackpos
;
3963 * Collect pointers to macro argument strings,
3964 * NUL-terminating them and escaping quotes.
3967 src
= buf
->buf
+ pos
;
3968 while (*src
!= '\0') {
3969 if (ctx
->argc
== ctx
->argsz
) {
3971 ctx
->argv
= mandoc_reallocarray(ctx
->argv
,
3972 ctx
->argsz
, sizeof(*ctx
->argv
));
3974 arg
= roff_getarg(r
, &src
, ln
, &pos
);
3975 sz
= 1; /* For the terminating NUL. */
3976 for (ap
= arg
; *ap
!= '\0'; ap
++)
3977 sz
+= *ap
== '"' ? 4 : 1;
3978 ctx
->argv
[ctx
->argc
++] = dst
= mandoc_malloc(sz
);
3979 for (ap
= arg
; *ap
!= '\0'; ap
++) {
3981 memcpy(dst
, "\\(dq", 4);
3990 /* Replace the macro invocation by the macro definition. */
3993 buf
->buf
= mandoc_strdup(r
->current_string
);
3994 buf
->sz
= strlen(buf
->buf
) + 1;
3997 return buf
->buf
[buf
->sz
- 2] == '\n' ?
3998 ROFF_REPARSE
| ROFF_USERCALL
: ROFF_IGN
| ROFF_APPEND
;
4002 * Calling a high-level macro that was renamed with .rn.
4003 * r->current_string has already been set up by roff_parse().
4006 roff_renamed(ROFF_ARGS
)
4010 buf
->sz
= mandoc_asprintf(&nbuf
, ".%s%s%s", r
->current_string
,
4011 buf
->buf
[pos
] == '\0' ? "" : " ", buf
->buf
+ pos
) + 1;
4019 * Measure the length in bytes of the roff identifier at *cpp
4020 * and advance the pointer to the next word.
4023 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
4032 /* Advance cp to the byte after the end of the name. */
4034 for (cp
= name
; 1; cp
++) {
4038 if (*cp
== ' ' || *cp
== '\t') {
4044 if (cp
[1] == '{' || cp
[1] == '}')
4048 mandoc_msg(MANDOCERR_NAMESC
, ln
, pos
,
4049 "%.*s", (int)(cp
- name
+ 1), name
);
4050 mandoc_escape((const char **)&cp
, NULL
, NULL
);
4054 /* Read past spaces. */
4064 * Store *string into the user-defined string called *name.
4065 * To clear an existing entry, call with (*r, *name, NULL, 0).
4066 * append == 0: replace mode
4067 * append == 1: single-line append mode
4068 * append == 2: multiline append mode, append '\n' after each call
4071 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
4076 namesz
= strlen(name
);
4077 roff_setstrn(&r
->strtab
, name
, namesz
, string
,
4078 string
? strlen(string
) : 0, append
);
4079 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
4083 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
4084 const char *string
, size_t stringsz
, int append
)
4089 size_t oldch
, newch
;
4091 /* Search for an existing string with the same name. */
4094 while (n
&& (namesz
!= n
->key
.sz
||
4095 strncmp(n
->key
.p
, name
, namesz
)))
4099 /* Create a new string table entry. */
4100 n
= mandoc_malloc(sizeof(struct roffkv
));
4101 n
->key
.p
= mandoc_strndup(name
, namesz
);
4107 } else if (0 == append
) {
4117 * One additional byte for the '\n' in multiline mode,
4118 * and one for the terminating '\0'.
4120 newch
= stringsz
+ (1 < append
? 2u : 1u);
4122 if (NULL
== n
->val
.p
) {
4123 n
->val
.p
= mandoc_malloc(newch
);
4128 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
4131 /* Skip existing content in the destination buffer. */
4132 c
= n
->val
.p
+ (int)oldch
;
4134 /* Append new content to the destination buffer. */
4136 while (i
< (int)stringsz
) {
4138 * Rudimentary roff copy mode:
4139 * Handle escaped backslashes.
4141 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
4146 /* Append terminating bytes. */
4151 n
->val
.sz
= (int)(c
- n
->val
.p
);
4155 roff_getstrn(struct roff
*r
, const char *name
, size_t len
,
4158 const struct roffkv
*n
;
4163 for (n
= r
->strtab
; n
!= NULL
; n
= n
->next
) {
4164 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4165 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4167 if (*deftype
& ROFFDEF_USER
) {
4168 *deftype
= ROFFDEF_USER
;
4175 for (n
= r
->rentab
; n
!= NULL
; n
= n
->next
) {
4176 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4177 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4179 if (*deftype
& ROFFDEF_REN
) {
4180 *deftype
= ROFFDEF_REN
;
4187 for (i
= 0; i
< PREDEFS_MAX
; i
++) {
4188 if (strncmp(name
, predefs
[i
].name
, len
) != 0 ||
4189 predefs
[i
].name
[len
] != '\0')
4191 if (*deftype
& ROFFDEF_PRE
) {
4192 *deftype
= ROFFDEF_PRE
;
4193 return predefs
[i
].str
;
4199 if (r
->man
->meta
.macroset
!= MACROSET_MAN
) {
4200 for (tok
= MDOC_Dd
; tok
< MDOC_MAX
; tok
++) {
4201 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4202 roff_name
[tok
][len
] != '\0')
4204 if (*deftype
& ROFFDEF_STD
) {
4205 *deftype
= ROFFDEF_STD
;
4213 if (r
->man
->meta
.macroset
!= MACROSET_MDOC
) {
4214 for (tok
= MAN_TH
; tok
< MAN_MAX
; tok
++) {
4215 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4216 roff_name
[tok
][len
] != '\0')
4218 if (*deftype
& ROFFDEF_STD
) {
4219 *deftype
= ROFFDEF_STD
;
4228 if (found
== 0 && *deftype
!= ROFFDEF_ANY
) {
4229 if (*deftype
& ROFFDEF_REN
) {
4231 * This might still be a request,
4232 * so do not treat it as undefined yet.
4234 *deftype
= ROFFDEF_UNDEF
;
4238 /* Using an undefined string defines it to be empty. */
4240 roff_setstrn(&r
->strtab
, name
, len
, "", 0, 0);
4241 roff_setstrn(&r
->rentab
, name
, len
, NULL
, 0, 0);
4249 roff_freestr(struct roffkv
*r
)
4251 struct roffkv
*n
, *nn
;
4253 for (n
= r
; n
; n
= nn
) {
4261 /* --- accessors and utility functions ------------------------------------ */
4264 * Duplicate an input string, making the appropriate character
4265 * conversations (as stipulated by `tr') along the way.
4266 * Returns a heap-allocated string with all the replacements made.
4269 roff_strdup(const struct roff
*r
, const char *p
)
4271 const struct roffkv
*cp
;
4275 enum mandoc_esc esc
;
4277 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
4278 return mandoc_strdup(p
);
4279 else if ('\0' == *p
)
4280 return mandoc_strdup("");
4283 * Step through each character looking for term matches
4284 * (remember that a `tr' can be invoked with an escape, which is
4285 * a glyph but the escape is multi-character).
4286 * We only do this if the character hash has been initialised
4287 * and the string is >0 length.
4293 while ('\0' != *p
) {
4294 assert((unsigned int)*p
< 128);
4295 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(unsigned int)*p
].p
) {
4296 sz
= r
->xtab
[(int)*p
].sz
;
4297 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4298 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
4302 } else if ('\\' != *p
) {
4303 res
= mandoc_realloc(res
, ssz
+ 2);
4308 /* Search for term matches. */
4309 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
4310 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
4315 * A match has been found.
4316 * Append the match to the array and move
4317 * forward by its keysize.
4319 res
= mandoc_realloc(res
,
4320 ssz
+ cp
->val
.sz
+ 1);
4321 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
4323 p
+= (int)cp
->key
.sz
;
4328 * Handle escapes carefully: we need to copy
4329 * over just the escape itself, or else we might
4330 * do replacements within the escape itself.
4331 * Make sure to pass along the bogus string.
4334 esc
= mandoc_escape(&p
, NULL
, NULL
);
4335 if (ESCAPE_ERROR
== esc
) {
4337 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4338 memcpy(res
+ ssz
, pp
, sz
);
4342 * We bail out on bad escapes.
4343 * No need to warn: we already did so when
4344 * roff_expand() was called.
4347 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4348 memcpy(res
+ ssz
, pp
, sz
);
4352 res
[(int)ssz
] = '\0';
4357 roff_getformat(const struct roff
*r
)
4364 * Find out whether a line is a macro line or not.
4365 * If it is, adjust the current position and return one; if it isn't,
4366 * return zero and don't change the current position.
4367 * If the control character has been set with `.cc', then let that grain
4369 * This is slighly contrary to groff, where using the non-breaking
4370 * control character when `cc' has been invoked will cause the
4371 * non-breaking macro contents to be printed verbatim.
4374 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
4380 if (r
->control
!= '\0' && cp
[pos
] == r
->control
)
4382 else if (r
->control
!= '\0')
4384 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
4386 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
4391 while (' ' == cp
[pos
] || '\t' == cp
[pos
])