]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.366 2019/07/01 22:56:24 schwarze Exp $ */
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43 * that an escape sequence resulted from copy-in processing and
44 * needs to be checked or interpolated. As it is used nowhere
45 * else, it is defined here rather than in a header file.
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define EXPAND_LIMIT 1000
52 /* Types of definitions of macros and strings. */
53 #define ROFFDEF_USER (1 << 1) /* User-defined. */
54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
58 ROFFDEF_REN | ROFFDEF_STD)
59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
61 /* --- data types --------------------------------------------------------- */
64 * An incredibly-simple string buffer.
67 char *p
; /* nil-terminated buffer */
68 size_t sz
; /* saved strlen(p) */
72 * A key-value roffstr pair as part of a singly-linked list.
77 struct roffkv
*next
; /* next in list */
81 * A single number register as part of a singly-linked list.
91 * Association of request and macro names with token IDs.
99 * A macro processing context.
100 * More than one is needed when macro calls are nested.
109 struct roff_man
*man
; /* mdoc or man parser */
110 struct roffnode
*last
; /* leaf of stack */
111 struct mctx
*mstack
; /* stack of macro contexts */
112 int *rstack
; /* stack of inverted `ie' values */
113 struct ohash
*reqtab
; /* request lookup table */
114 struct roffreg
*regtab
; /* number registers */
115 struct roffkv
*strtab
; /* user-defined strings & macros */
116 struct roffkv
*rentab
; /* renamed strings & macros */
117 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
118 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
119 const char *current_string
; /* value of last called user macro */
120 struct tbl_node
*first_tbl
; /* first table parsed */
121 struct tbl_node
*last_tbl
; /* last table parsed */
122 struct tbl_node
*tbl
; /* current table being parsed */
123 struct eqn_node
*last_eqn
; /* equation parser */
124 struct eqn_node
*eqn
; /* active equation parser */
125 int eqn_inline
; /* current equation is inline */
126 int options
; /* parse options */
127 int mstacksz
; /* current size of mstack */
128 int mstackpos
; /* position in mstack */
129 int rstacksz
; /* current size limit of rstack */
130 int rstackpos
; /* position in rstack */
131 int format
; /* current file in mdoc or man format */
132 char control
; /* control character */
133 char escape
; /* escape character */
137 * A macro definition, condition, or ignored block.
140 enum roff_tok tok
; /* type of node */
141 struct roffnode
*parent
; /* up one in stack */
142 int line
; /* parse line */
143 int col
; /* parse col */
144 char *name
; /* node name, e.g. macro name */
145 char *end
; /* custom end macro of the block */
146 int endspan
; /* scope to: 1=eol 2=next line -1=\} */
147 int rule
; /* content is: 1=evaluated 0=skipped */
150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
151 enum roff_tok tok, /* tok of macro */ \
152 struct buf *buf, /* input buffer */ \
153 int ln, /* parse line */ \
154 int ppos, /* original pos in buffer */ \
155 int pos, /* current pos in buffer */ \
156 int *offs /* reset offset of buffer data */
158 typedef int (*roffproc
)(ROFF_ARGS
);
161 roffproc proc
; /* process new macro */
162 roffproc text
; /* process as child text of macro */
163 roffproc sub
; /* process as child of macro */
165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
169 const char *name
; /* predefined input name */
170 const char *str
; /* replacement symbol */
173 #define PREDEF(__name, __str) \
174 { (__name), (__str) },
176 /* --- function prototypes ------------------------------------------------ */
178 static int roffnode_cleanscope(struct roff
*);
179 static int roffnode_pop(struct roff
*);
180 static void roffnode_push(struct roff
*, enum roff_tok
,
181 const char *, int, int);
182 static void roff_addtbl(struct roff_man
*, int, struct tbl_node
*);
183 static int roff_als(ROFF_ARGS
);
184 static int roff_block(ROFF_ARGS
);
185 static int roff_block_text(ROFF_ARGS
);
186 static int roff_block_sub(ROFF_ARGS
);
187 static int roff_break(ROFF_ARGS
);
188 static int roff_cblock(ROFF_ARGS
);
189 static int roff_cc(ROFF_ARGS
);
190 static int roff_ccond(struct roff
*, int, int);
191 static int roff_char(ROFF_ARGS
);
192 static int roff_cond(ROFF_ARGS
);
193 static int roff_cond_text(ROFF_ARGS
);
194 static int roff_cond_sub(ROFF_ARGS
);
195 static int roff_ds(ROFF_ARGS
);
196 static int roff_ec(ROFF_ARGS
);
197 static int roff_eo(ROFF_ARGS
);
198 static int roff_eqndelim(struct roff
*, struct buf
*, int);
199 static int roff_evalcond(struct roff
*r
, int, char *, int *);
200 static int roff_evalnum(struct roff
*, int,
201 const char *, int *, int *, int);
202 static int roff_evalpar(struct roff
*, int,
203 const char *, int *, int *, int);
204 static int roff_evalstrcond(const char *, int *);
205 static int roff_expand(struct roff
*, struct buf
*,
207 static void roff_free1(struct roff
*);
208 static void roff_freereg(struct roffreg
*);
209 static void roff_freestr(struct roffkv
*);
210 static size_t roff_getname(struct roff
*, char **, int, int);
211 static int roff_getnum(const char *, int *, int *, int);
212 static int roff_getop(const char *, int *, char *);
213 static int roff_getregn(struct roff
*,
214 const char *, size_t, char);
215 static int roff_getregro(const struct roff
*,
217 static const char *roff_getstrn(struct roff
*,
218 const char *, size_t, int *);
219 static int roff_hasregn(const struct roff
*,
220 const char *, size_t);
221 static int roff_insec(ROFF_ARGS
);
222 static int roff_it(ROFF_ARGS
);
223 static int roff_line_ignore(ROFF_ARGS
);
224 static void roff_man_alloc1(struct roff_man
*);
225 static void roff_man_free1(struct roff_man
*);
226 static int roff_manyarg(ROFF_ARGS
);
227 static int roff_noarg(ROFF_ARGS
);
228 static int roff_nop(ROFF_ARGS
);
229 static int roff_nr(ROFF_ARGS
);
230 static int roff_onearg(ROFF_ARGS
);
231 static enum roff_tok
roff_parse(struct roff
*, char *, int *,
233 static int roff_parsetext(struct roff
*, struct buf
*,
235 static int roff_renamed(ROFF_ARGS
);
236 static int roff_return(ROFF_ARGS
);
237 static int roff_rm(ROFF_ARGS
);
238 static int roff_rn(ROFF_ARGS
);
239 static int roff_rr(ROFF_ARGS
);
240 static void roff_setregn(struct roff
*, const char *,
241 size_t, int, char, int);
242 static void roff_setstr(struct roff
*,
243 const char *, const char *, int);
244 static void roff_setstrn(struct roffkv
**, const char *,
245 size_t, const char *, size_t, int);
246 static int roff_shift(ROFF_ARGS
);
247 static int roff_so(ROFF_ARGS
);
248 static int roff_tr(ROFF_ARGS
);
249 static int roff_Dd(ROFF_ARGS
);
250 static int roff_TE(ROFF_ARGS
);
251 static int roff_TS(ROFF_ARGS
);
252 static int roff_EQ(ROFF_ARGS
);
253 static int roff_EN(ROFF_ARGS
);
254 static int roff_T_(ROFF_ARGS
);
255 static int roff_unsupp(ROFF_ARGS
);
256 static int roff_userdef(ROFF_ARGS
);
258 /* --- constant data ------------------------------------------------------ */
260 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
261 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
263 const char *__roff_name
[MAN_MAX
+ 1] = {
264 "br", "ce", "fi", "ft",
268 "ab", "ad", "af", "aln",
269 "als", "am", "am1", "ami",
270 "ami1", "as", "as1", "asciify",
271 "backtrace", "bd", "bleedat", "blm",
272 "box", "boxa", "bp", "BP",
273 "break", "breakchar", "brnl", "brp",
275 "cf", "cflags", "ch", "char",
276 "chop", "class", "close", "CL",
277 "color", "composite", "continue", "cp",
278 "cropat", "cs", "cu", "da",
279 "dch", "Dd", "de", "de1",
280 "defcolor", "dei", "dei1", "device",
281 "devicem", "di", "do", "ds",
282 "ds1", "dwh", "dt", "ec",
283 "ecr", "ecs", "el", "em",
284 "EN", "eo", "EP", "EQ",
285 "errprint", "ev", "evc", "ex",
286 "fallback", "fam", "fc", "fchar",
287 "fcolor", "fdeferlig", "feature", "fkern",
288 "fl", "flig", "fp", "fps",
289 "fschar", "fspacewidth", "fspecial", "ftr",
290 "fzoom", "gcolor", "hc", "hcode",
291 "hidechar", "hla", "hlm", "hpf",
292 "hpfa", "hpfcode", "hw", "hy",
293 "hylang", "hylen", "hym", "hypp",
294 "hys", "ie", "if", "ig",
295 "index", "it", "itc", "IX",
296 "kern", "kernafter", "kernbefore", "kernpair",
297 "lc", "lc_ctype", "lds", "length",
298 "letadj", "lf", "lg", "lhang",
299 "linetabs", "lnr", "lnrf", "lpfx",
301 "mediasize", "minss", "mk", "mso",
302 "na", "ne", "nh", "nhychar",
303 "nm", "nn", "nop", "nr",
304 "nrf", "nroff", "ns", "nx",
305 "open", "opena", "os", "output",
306 "padj", "papersize", "pc", "pev",
307 "pi", "PI", "pl", "pm",
309 "psbb", "pshape", "pso", "ptr",
310 "pvs", "rchar", "rd", "recursionlimit",
311 "return", "rfschar", "rhang",
312 "rm", "rn", "rnn", "rr",
313 "rs", "rt", "schar", "sentchar",
314 "shc", "shift", "sizes", "so",
315 "spacewidth", "special", "spreadwarn", "ss",
316 "sty", "substring", "sv", "sy",
319 "tm", "tm1", "tmc", "tr",
320 "track", "transchar", "trf", "trimat",
321 "trin", "trnt", "troff", "TS",
322 "uf", "ul", "unformat", "unwatch",
323 "unwatchn", "vpt", "vs", "warn",
324 "warnscale", "watch", "watchlength", "watchn",
325 "wh", "while", "write", "writec",
326 "writem", "xflag", ".", NULL
,
328 "Dd", "Dt", "Os", "Sh",
329 "Ss", "Pp", "D1", "Dl",
330 "Bd", "Ed", "Bl", "El",
331 "It", "Ad", "An", "Ap",
332 "Ar", "Cd", "Cm", "Dv",
333 "Er", "Ev", "Ex", "Fa",
334 "Fd", "Fl", "Fn", "Ft",
335 "Ic", "In", "Li", "Nd",
336 "Nm", "Op", "Ot", "Pa",
337 "Rv", "St", "Va", "Vt",
338 "Xr", "%A", "%B", "%D",
339 "%I", "%J", "%N", "%O",
340 "%P", "%R", "%T", "%V",
341 "Ac", "Ao", "Aq", "At",
342 "Bc", "Bf", "Bo", "Bq",
343 "Bsx", "Bx", "Db", "Dc",
344 "Do", "Dq", "Ec", "Ef",
345 "Em", "Eo", "Fx", "Ms",
346 "No", "Ns", "Nx", "Ox",
347 "Pc", "Pf", "Po", "Pq",
348 "Qc", "Ql", "Qo", "Qq",
349 "Re", "Rs", "Sc", "So",
350 "Sq", "Sm", "Sx", "Sy",
351 "Tn", "Ux", "Xc", "Xo",
352 "Fo", "Fc", "Oo", "Oc",
353 "Bk", "Ek", "Bt", "Hf",
354 "Fr", "Ud", "Lb", "Lp",
355 "Lk", "Mt", "Brq", "Bro",
356 "Brc", "%C", "Es", "En",
357 "Dx", "%Q", "%U", "Ta",
359 "TH", "SH", "SS", "TP",
361 "LP", "PP", "P", "IP",
362 "HP", "SM", "SB", "BI",
363 "IB", "BR", "RB", "R",
364 "B", "I", "IR", "RI",
365 "RE", "RS", "DT", "UC",
369 "UE", "MT", "ME", NULL
371 const char *const *roff_name
= __roff_name
;
373 static struct roffmac roffs
[TOKEN_NONE
] = {
374 { roff_noarg
, NULL
, NULL
, 0 }, /* br */
375 { roff_onearg
, NULL
, NULL
, 0 }, /* ce */
376 { roff_noarg
, NULL
, NULL
, 0 }, /* fi */
377 { roff_onearg
, NULL
, NULL
, 0 }, /* ft */
378 { roff_onearg
, NULL
, NULL
, 0 }, /* ll */
379 { roff_onearg
, NULL
, NULL
, 0 }, /* mc */
380 { roff_noarg
, NULL
, NULL
, 0 }, /* nf */
381 { roff_onearg
, NULL
, NULL
, 0 }, /* po */
382 { roff_onearg
, NULL
, NULL
, 0 }, /* rj */
383 { roff_onearg
, NULL
, NULL
, 0 }, /* sp */
384 { roff_manyarg
, NULL
, NULL
, 0 }, /* ta */
385 { roff_onearg
, NULL
, NULL
, 0 }, /* ti */
386 { NULL
, NULL
, NULL
, 0 }, /* ROFF_MAX */
387 { roff_unsupp
, NULL
, NULL
, 0 }, /* ab */
388 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ad */
389 { roff_line_ignore
, NULL
, NULL
, 0 }, /* af */
390 { roff_unsupp
, NULL
, NULL
, 0 }, /* aln */
391 { roff_als
, NULL
, NULL
, 0 }, /* als */
392 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am */
393 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am1 */
394 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami */
395 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami1 */
396 { roff_ds
, NULL
, NULL
, 0 }, /* as */
397 { roff_ds
, NULL
, NULL
, 0 }, /* as1 */
398 { roff_unsupp
, NULL
, NULL
, 0 }, /* asciify */
399 { roff_line_ignore
, NULL
, NULL
, 0 }, /* backtrace */
400 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bd */
401 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bleedat */
402 { roff_unsupp
, NULL
, NULL
, 0 }, /* blm */
403 { roff_unsupp
, NULL
, NULL
, 0 }, /* box */
404 { roff_unsupp
, NULL
, NULL
, 0 }, /* boxa */
405 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bp */
406 { roff_unsupp
, NULL
, NULL
, 0 }, /* BP */
407 { roff_break
, NULL
, NULL
, 0 }, /* break */
408 { roff_line_ignore
, NULL
, NULL
, 0 }, /* breakchar */
409 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brnl */
410 { roff_noarg
, NULL
, NULL
, 0 }, /* brp */
411 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brpnl */
412 { roff_unsupp
, NULL
, NULL
, 0 }, /* c2 */
413 { roff_cc
, NULL
, NULL
, 0 }, /* cc */
414 { roff_insec
, NULL
, NULL
, 0 }, /* cf */
415 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cflags */
416 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ch */
417 { roff_char
, NULL
, NULL
, 0 }, /* char */
418 { roff_unsupp
, NULL
, NULL
, 0 }, /* chop */
419 { roff_line_ignore
, NULL
, NULL
, 0 }, /* class */
420 { roff_insec
, NULL
, NULL
, 0 }, /* close */
421 { roff_unsupp
, NULL
, NULL
, 0 }, /* CL */
422 { roff_line_ignore
, NULL
, NULL
, 0 }, /* color */
423 { roff_unsupp
, NULL
, NULL
, 0 }, /* composite */
424 { roff_unsupp
, NULL
, NULL
, 0 }, /* continue */
425 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cp */
426 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cropat */
427 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cs */
428 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cu */
429 { roff_unsupp
, NULL
, NULL
, 0 }, /* da */
430 { roff_unsupp
, NULL
, NULL
, 0 }, /* dch */
431 { roff_Dd
, NULL
, NULL
, 0 }, /* Dd */
432 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de */
433 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de1 */
434 { roff_line_ignore
, NULL
, NULL
, 0 }, /* defcolor */
435 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei */
436 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei1 */
437 { roff_unsupp
, NULL
, NULL
, 0 }, /* device */
438 { roff_unsupp
, NULL
, NULL
, 0 }, /* devicem */
439 { roff_unsupp
, NULL
, NULL
, 0 }, /* di */
440 { roff_unsupp
, NULL
, NULL
, 0 }, /* do */
441 { roff_ds
, NULL
, NULL
, 0 }, /* ds */
442 { roff_ds
, NULL
, NULL
, 0 }, /* ds1 */
443 { roff_unsupp
, NULL
, NULL
, 0 }, /* dwh */
444 { roff_unsupp
, NULL
, NULL
, 0 }, /* dt */
445 { roff_ec
, NULL
, NULL
, 0 }, /* ec */
446 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecr */
447 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecs */
448 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* el */
449 { roff_unsupp
, NULL
, NULL
, 0 }, /* em */
450 { roff_EN
, NULL
, NULL
, 0 }, /* EN */
451 { roff_eo
, NULL
, NULL
, 0 }, /* eo */
452 { roff_unsupp
, NULL
, NULL
, 0 }, /* EP */
453 { roff_EQ
, NULL
, NULL
, 0 }, /* EQ */
454 { roff_line_ignore
, NULL
, NULL
, 0 }, /* errprint */
455 { roff_unsupp
, NULL
, NULL
, 0 }, /* ev */
456 { roff_unsupp
, NULL
, NULL
, 0 }, /* evc */
457 { roff_unsupp
, NULL
, NULL
, 0 }, /* ex */
458 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fallback */
459 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fam */
460 { roff_unsupp
, NULL
, NULL
, 0 }, /* fc */
461 { roff_unsupp
, NULL
, NULL
, 0 }, /* fchar */
462 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fcolor */
463 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fdeferlig */
464 { roff_line_ignore
, NULL
, NULL
, 0 }, /* feature */
465 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fkern */
466 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fl */
467 { roff_line_ignore
, NULL
, NULL
, 0 }, /* flig */
468 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fp */
469 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fps */
470 { roff_unsupp
, NULL
, NULL
, 0 }, /* fschar */
471 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspacewidth */
472 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspecial */
473 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ftr */
474 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fzoom */
475 { roff_line_ignore
, NULL
, NULL
, 0 }, /* gcolor */
476 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hc */
477 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hcode */
478 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hidechar */
479 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hla */
480 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hlm */
481 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpf */
482 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfa */
483 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfcode */
484 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hw */
485 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hy */
486 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylang */
487 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylen */
488 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hym */
489 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hypp */
490 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hys */
491 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* ie */
492 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* if */
493 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ig */
494 { roff_unsupp
, NULL
, NULL
, 0 }, /* index */
495 { roff_it
, NULL
, NULL
, 0 }, /* it */
496 { roff_unsupp
, NULL
, NULL
, 0 }, /* itc */
497 { roff_line_ignore
, NULL
, NULL
, 0 }, /* IX */
498 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kern */
499 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernafter */
500 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernbefore */
501 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernpair */
502 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc */
503 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc_ctype */
504 { roff_unsupp
, NULL
, NULL
, 0 }, /* lds */
505 { roff_unsupp
, NULL
, NULL
, 0 }, /* length */
506 { roff_line_ignore
, NULL
, NULL
, 0 }, /* letadj */
507 { roff_insec
, NULL
, NULL
, 0 }, /* lf */
508 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lg */
509 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lhang */
510 { roff_unsupp
, NULL
, NULL
, 0 }, /* linetabs */
511 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnr */
512 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnrf */
513 { roff_unsupp
, NULL
, NULL
, 0 }, /* lpfx */
514 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ls */
515 { roff_unsupp
, NULL
, NULL
, 0 }, /* lsm */
516 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lt */
517 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mediasize */
518 { roff_line_ignore
, NULL
, NULL
, 0 }, /* minss */
519 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mk */
520 { roff_insec
, NULL
, NULL
, 0 }, /* mso */
521 { roff_line_ignore
, NULL
, NULL
, 0 }, /* na */
522 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ne */
523 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nh */
524 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nhychar */
525 { roff_unsupp
, NULL
, NULL
, 0 }, /* nm */
526 { roff_unsupp
, NULL
, NULL
, 0 }, /* nn */
527 { roff_nop
, NULL
, NULL
, 0 }, /* nop */
528 { roff_nr
, NULL
, NULL
, 0 }, /* nr */
529 { roff_unsupp
, NULL
, NULL
, 0 }, /* nrf */
530 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nroff */
531 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ns */
532 { roff_insec
, NULL
, NULL
, 0 }, /* nx */
533 { roff_insec
, NULL
, NULL
, 0 }, /* open */
534 { roff_insec
, NULL
, NULL
, 0 }, /* opena */
535 { roff_line_ignore
, NULL
, NULL
, 0 }, /* os */
536 { roff_unsupp
, NULL
, NULL
, 0 }, /* output */
537 { roff_line_ignore
, NULL
, NULL
, 0 }, /* padj */
538 { roff_line_ignore
, NULL
, NULL
, 0 }, /* papersize */
539 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pc */
540 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pev */
541 { roff_insec
, NULL
, NULL
, 0 }, /* pi */
542 { roff_unsupp
, NULL
, NULL
, 0 }, /* PI */
543 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pl */
544 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pm */
545 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pn */
546 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pnr */
547 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ps */
548 { roff_unsupp
, NULL
, NULL
, 0 }, /* psbb */
549 { roff_unsupp
, NULL
, NULL
, 0 }, /* pshape */
550 { roff_insec
, NULL
, NULL
, 0 }, /* pso */
551 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ptr */
552 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pvs */
553 { roff_unsupp
, NULL
, NULL
, 0 }, /* rchar */
554 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rd */
555 { roff_line_ignore
, NULL
, NULL
, 0 }, /* recursionlimit */
556 { roff_return
, NULL
, NULL
, 0 }, /* return */
557 { roff_unsupp
, NULL
, NULL
, 0 }, /* rfschar */
558 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rhang */
559 { roff_rm
, NULL
, NULL
, 0 }, /* rm */
560 { roff_rn
, NULL
, NULL
, 0 }, /* rn */
561 { roff_unsupp
, NULL
, NULL
, 0 }, /* rnn */
562 { roff_rr
, NULL
, NULL
, 0 }, /* rr */
563 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rs */
564 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rt */
565 { roff_unsupp
, NULL
, NULL
, 0 }, /* schar */
566 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sentchar */
567 { roff_line_ignore
, NULL
, NULL
, 0 }, /* shc */
568 { roff_shift
, NULL
, NULL
, 0 }, /* shift */
569 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sizes */
570 { roff_so
, NULL
, NULL
, 0 }, /* so */
571 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spacewidth */
572 { roff_line_ignore
, NULL
, NULL
, 0 }, /* special */
573 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spreadwarn */
574 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ss */
575 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sty */
576 { roff_unsupp
, NULL
, NULL
, 0 }, /* substring */
577 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sv */
578 { roff_insec
, NULL
, NULL
, 0 }, /* sy */
579 { roff_T_
, NULL
, NULL
, 0 }, /* T& */
580 { roff_unsupp
, NULL
, NULL
, 0 }, /* tc */
581 { roff_TE
, NULL
, NULL
, 0 }, /* TE */
582 { roff_Dd
, NULL
, NULL
, 0 }, /* TH */
583 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tkf */
584 { roff_unsupp
, NULL
, NULL
, 0 }, /* tl */
585 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm */
586 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm1 */
587 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tmc */
588 { roff_tr
, NULL
, NULL
, 0 }, /* tr */
589 { roff_line_ignore
, NULL
, NULL
, 0 }, /* track */
590 { roff_line_ignore
, NULL
, NULL
, 0 }, /* transchar */
591 { roff_insec
, NULL
, NULL
, 0 }, /* trf */
592 { roff_line_ignore
, NULL
, NULL
, 0 }, /* trimat */
593 { roff_unsupp
, NULL
, NULL
, 0 }, /* trin */
594 { roff_unsupp
, NULL
, NULL
, 0 }, /* trnt */
595 { roff_line_ignore
, NULL
, NULL
, 0 }, /* troff */
596 { roff_TS
, NULL
, NULL
, 0 }, /* TS */
597 { roff_line_ignore
, NULL
, NULL
, 0 }, /* uf */
598 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ul */
599 { roff_unsupp
, NULL
, NULL
, 0 }, /* unformat */
600 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatch */
601 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatchn */
602 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vpt */
603 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vs */
604 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warn */
605 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warnscale */
606 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watch */
607 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchlength */
608 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchn */
609 { roff_unsupp
, NULL
, NULL
, 0 }, /* wh */
610 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /*while*/
611 { roff_insec
, NULL
, NULL
, 0 }, /* write */
612 { roff_insec
, NULL
, NULL
, 0 }, /* writec */
613 { roff_insec
, NULL
, NULL
, 0 }, /* writem */
614 { roff_line_ignore
, NULL
, NULL
, 0 }, /* xflag */
615 { roff_cblock
, NULL
, NULL
, 0 }, /* . */
616 { roff_renamed
, NULL
, NULL
, 0 },
617 { roff_userdef
, NULL
, NULL
, 0 }
620 /* Array of injected predefined strings. */
621 #define PREDEFS_MAX 38
622 static const struct predef predefs
[PREDEFS_MAX
] = {
623 #include "predefs.in"
626 static int roffce_lines
; /* number of input lines to center */
627 static struct roff_node
*roffce_node
; /* active request */
628 static int roffit_lines
; /* number of lines to delay */
629 static char *roffit_macro
; /* nil-terminated macro line */
632 /* --- request table ------------------------------------------------------ */
635 roffhash_alloc(enum roff_tok mintok
, enum roff_tok maxtok
)
643 htab
= mandoc_malloc(sizeof(*htab
));
644 mandoc_ohash_init(htab
, 8, offsetof(struct roffreq
, name
));
646 for (tok
= mintok
; tok
< maxtok
; tok
++) {
647 if (roff_name
[tok
] == NULL
)
649 sz
= strlen(roff_name
[tok
]);
650 req
= mandoc_malloc(sizeof(*req
) + sz
+ 1);
652 memcpy(req
->name
, roff_name
[tok
], sz
+ 1);
653 slot
= ohash_qlookup(htab
, req
->name
);
654 ohash_insert(htab
, slot
, req
);
660 roffhash_free(struct ohash
*htab
)
667 for (req
= ohash_first(htab
, &slot
); req
!= NULL
;
668 req
= ohash_next(htab
, &slot
))
675 roffhash_find(struct ohash
*htab
, const char *name
, size_t sz
)
682 req
= ohash_find(htab
, ohash_qlookupi(htab
, name
, &end
));
684 req
= ohash_find(htab
, ohash_qlookup(htab
, name
));
685 return req
== NULL
? TOKEN_NONE
: req
->tok
;
688 /* --- stack of request blocks -------------------------------------------- */
691 * Pop the current node off of the stack of roff instructions currently
692 * pending. Return 1 if it is a loop or 0 otherwise.
695 roffnode_pop(struct roff
*r
)
701 inloop
= p
->tok
== ROFF_while
;
710 * Push a roff node onto the instruction stack. This must later be
711 * removed with roffnode_pop().
714 roffnode_push(struct roff
*r
, enum roff_tok tok
, const char *name
,
719 p
= mandoc_calloc(1, sizeof(struct roffnode
));
722 p
->name
= mandoc_strdup(name
);
726 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
731 /* --- roff parser state data management ---------------------------------- */
734 roff_free1(struct roff
*r
)
738 tbl_free(r
->first_tbl
);
739 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
741 eqn_free(r
->last_eqn
);
742 r
->last_eqn
= r
->eqn
= NULL
;
744 while (r
->mstackpos
>= 0)
755 roff_freereg(r
->regtab
);
758 roff_freestr(r
->strtab
);
759 roff_freestr(r
->rentab
);
760 roff_freestr(r
->xmbtab
);
761 r
->strtab
= r
->rentab
= r
->xmbtab
= NULL
;
764 for (i
= 0; i
< 128; i
++)
771 roff_reset(struct roff
*r
)
774 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
784 roff_free(struct roff
*r
)
789 for (i
= 0; i
< r
->mstacksz
; i
++)
790 free(r
->mstack
[i
].argv
);
792 roffhash_free(r
->reqtab
);
797 roff_alloc(int options
)
801 r
= mandoc_calloc(1, sizeof(struct roff
));
802 r
->reqtab
= roffhash_alloc(0, ROFF_RENAMED
);
803 r
->options
= options
;
804 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
811 /* --- syntax tree state data management ---------------------------------- */
814 roff_man_free1(struct roff_man
*man
)
816 if (man
->meta
.first
!= NULL
)
817 roff_node_delete(man
, man
->meta
.first
);
818 free(man
->meta
.msec
);
821 free(man
->meta
.arch
);
822 free(man
->meta
.title
);
823 free(man
->meta
.name
);
824 free(man
->meta
.date
);
825 free(man
->meta
.sodest
);
829 roff_state_reset(struct roff_man
*man
)
831 man
->last
= man
->meta
.first
;
834 man
->lastsec
= man
->lastnamed
= SEC_NONE
;
835 man
->next
= ROFF_NEXT_CHILD
;
836 roff_setreg(man
->roff
, "nS", 0, '=');
840 roff_man_alloc1(struct roff_man
*man
)
842 memset(&man
->meta
, 0, sizeof(man
->meta
));
843 man
->meta
.first
= mandoc_calloc(1, sizeof(*man
->meta
.first
));
844 man
->meta
.first
->type
= ROFFT_ROOT
;
845 man
->meta
.macroset
= MACROSET_NONE
;
846 roff_state_reset(man
);
850 roff_man_reset(struct roff_man
*man
)
853 roff_man_alloc1(man
);
857 roff_man_free(struct roff_man
*man
)
864 roff_man_alloc(struct roff
*roff
, const char *os_s
, int quick
)
866 struct roff_man
*man
;
868 man
= mandoc_calloc(1, sizeof(*man
));
872 roff_man_alloc1(man
);
877 /* --- syntax tree handling ----------------------------------------------- */
880 roff_node_alloc(struct roff_man
*man
, int line
, int pos
,
881 enum roff_type type
, int tok
)
885 n
= mandoc_calloc(1, sizeof(*n
));
890 n
->sec
= man
->lastsec
;
892 if (man
->flags
& MDOC_SYNOPSIS
)
893 n
->flags
|= NODE_SYNPRETTY
;
895 n
->flags
&= ~NODE_SYNPRETTY
;
896 if ((man
->flags
& (ROFF_NOFILL
| ROFF_NONOFILL
)) == ROFF_NOFILL
)
897 n
->flags
|= NODE_NOFILL
;
899 n
->flags
&= ~NODE_NOFILL
;
900 if (man
->flags
& MDOC_NEWLINE
)
901 n
->flags
|= NODE_LINE
;
902 man
->flags
&= ~MDOC_NEWLINE
;
908 roff_node_append(struct roff_man
*man
, struct roff_node
*n
)
912 case ROFF_NEXT_SIBLING
:
913 if (man
->last
->next
!= NULL
) {
914 n
->next
= man
->last
->next
;
915 man
->last
->next
->prev
= n
;
917 man
->last
->parent
->last
= n
;
920 n
->parent
= man
->last
->parent
;
922 case ROFF_NEXT_CHILD
:
923 if (man
->last
->child
!= NULL
) {
924 n
->next
= man
->last
->child
;
925 man
->last
->child
->prev
= n
;
928 man
->last
->child
= n
;
929 n
->parent
= man
->last
;
941 if (n
->end
!= ENDBODY_NOT
)
953 * Copy over the normalised-data pointer of our parent. Not
954 * everybody has one, but copying a null pointer is fine.
957 n
->norm
= n
->parent
->norm
;
958 assert(n
->parent
->type
== ROFFT_BLOCK
);
962 roff_word_alloc(struct roff_man
*man
, int line
, int pos
, const char *word
)
966 n
= roff_node_alloc(man
, line
, pos
, ROFFT_TEXT
, TOKEN_NONE
);
967 n
->string
= roff_strdup(man
->roff
, word
);
968 roff_node_append(man
, n
);
969 n
->flags
|= NODE_VALID
| NODE_ENDED
;
970 man
->next
= ROFF_NEXT_SIBLING
;
974 roff_word_append(struct roff_man
*man
, const char *word
)
977 char *addstr
, *newstr
;
980 addstr
= roff_strdup(man
->roff
, word
);
981 mandoc_asprintf(&newstr
, "%s %s", n
->string
, addstr
);
985 man
->next
= ROFF_NEXT_SIBLING
;
989 roff_elem_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
993 n
= roff_node_alloc(man
, line
, pos
, ROFFT_ELEM
, tok
);
994 roff_node_append(man
, n
);
995 man
->next
= ROFF_NEXT_CHILD
;
999 roff_block_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1001 struct roff_node
*n
;
1003 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BLOCK
, tok
);
1004 roff_node_append(man
, n
);
1005 man
->next
= ROFF_NEXT_CHILD
;
1010 roff_head_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1012 struct roff_node
*n
;
1014 n
= roff_node_alloc(man
, line
, pos
, ROFFT_HEAD
, tok
);
1015 roff_node_append(man
, n
);
1016 man
->next
= ROFF_NEXT_CHILD
;
1021 roff_body_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1023 struct roff_node
*n
;
1025 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BODY
, tok
);
1026 roff_node_append(man
, n
);
1027 man
->next
= ROFF_NEXT_CHILD
;
1032 roff_addtbl(struct roff_man
*man
, int line
, struct tbl_node
*tbl
)
1034 struct roff_node
*n
;
1035 struct tbl_span
*span
;
1037 if (man
->meta
.macroset
== MACROSET_MAN
)
1038 man_breakscope(man
, ROFF_TS
);
1039 while ((span
= tbl_span(tbl
)) != NULL
) {
1040 n
= roff_node_alloc(man
, line
, 0, ROFFT_TBL
, TOKEN_NONE
);
1042 roff_node_append(man
, n
);
1043 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1044 man
->next
= ROFF_NEXT_SIBLING
;
1049 roff_node_unlink(struct roff_man
*man
, struct roff_node
*n
)
1052 /* Adjust siblings. */
1055 n
->prev
->next
= n
->next
;
1057 n
->next
->prev
= n
->prev
;
1059 /* Adjust parent. */
1061 if (n
->parent
!= NULL
) {
1062 if (n
->parent
->child
== n
)
1063 n
->parent
->child
= n
->next
;
1064 if (n
->parent
->last
== n
)
1065 n
->parent
->last
= n
->prev
;
1068 /* Adjust parse point. */
1072 if (man
->last
== n
) {
1073 if (n
->prev
== NULL
) {
1074 man
->last
= n
->parent
;
1075 man
->next
= ROFF_NEXT_CHILD
;
1077 man
->last
= n
->prev
;
1078 man
->next
= ROFF_NEXT_SIBLING
;
1081 if (man
->meta
.first
== n
)
1082 man
->meta
.first
= NULL
;
1086 roff_node_relink(struct roff_man
*man
, struct roff_node
*n
)
1088 roff_node_unlink(man
, n
);
1089 n
->prev
= n
->next
= NULL
;
1090 roff_node_append(man
, n
);
1094 roff_node_free(struct roff_node
*n
)
1097 if (n
->args
!= NULL
)
1098 mdoc_argv_free(n
->args
);
1099 if (n
->type
== ROFFT_BLOCK
|| n
->type
== ROFFT_ELEM
)
1101 eqn_box_free(n
->eqn
);
1107 roff_node_delete(struct roff_man
*man
, struct roff_node
*n
)
1110 while (n
->child
!= NULL
)
1111 roff_node_delete(man
, n
->child
);
1112 roff_node_unlink(man
, n
);
1117 deroff(char **dest
, const struct roff_node
*n
)
1122 if (n
->type
!= ROFFT_TEXT
) {
1123 for (n
= n
->child
; n
!= NULL
; n
= n
->next
)
1128 /* Skip leading whitespace. */
1130 for (cp
= n
->string
; *cp
!= '\0'; cp
++) {
1131 if (cp
[0] == '\\' && cp
[1] != '\0' &&
1132 strchr(" %&0^|~", cp
[1]) != NULL
)
1134 else if ( ! isspace((unsigned char)*cp
))
1138 /* Skip trailing backslash. */
1141 if (sz
> 0 && cp
[sz
- 1] == '\\')
1144 /* Skip trailing whitespace. */
1147 if ( ! isspace((unsigned char)cp
[sz
-1]))
1150 /* Skip empty strings. */
1155 if (*dest
== NULL
) {
1156 *dest
= mandoc_strndup(cp
, sz
);
1160 mandoc_asprintf(&cp
, "%s %*s", *dest
, (int)sz
, cp
);
1165 /* --- main functions of the roff parser ---------------------------------- */
1168 * In the current line, expand escape sequences that produce parsable
1169 * input text. Also check the syntax of the remaining escape sequences,
1170 * which typically produce output glyphs or change formatter state.
1173 roff_expand(struct roff
*r
, struct buf
*buf
, int ln
, int pos
, char newesc
)
1175 struct mctx
*ctx
; /* current macro call context */
1176 char ubuf
[24]; /* buffer to print the number */
1177 struct roff_node
*n
; /* used for header comments */
1178 const char *start
; /* start of the string to process */
1179 char *stesc
; /* start of an escape sequence ('\\') */
1180 const char *esct
; /* type of esccape sequence */
1181 char *ep
; /* end of comment string */
1182 const char *stnam
; /* start of the name, after "[(*" */
1183 const char *cp
; /* end of the name, e.g. before ']' */
1184 const char *res
; /* the string to be substituted */
1185 char *nbuf
; /* new buffer to copy buf->buf to */
1186 size_t maxl
; /* expected length of the escape name */
1187 size_t naml
; /* actual length of the escape name */
1188 size_t asz
; /* length of the replacement */
1189 size_t rsz
; /* length of the rest of the string */
1190 int inaml
; /* length returned from mandoc_escape() */
1191 int expand_count
; /* to avoid infinite loops */
1192 int npos
; /* position in numeric expression */
1193 int arg_complete
; /* argument not interrupted by eol */
1194 int quote_args
; /* true for \\$@, false for \\$* */
1195 int done
; /* no more input available */
1196 int deftype
; /* type of definition to paste */
1197 int rcsid
; /* kind of RCS id seen */
1198 enum mandocerr err
; /* for escape sequence problems */
1199 char sign
; /* increment number register */
1200 char term
; /* character terminating the escape */
1202 /* Search forward for comments. */
1205 start
= buf
->buf
+ pos
;
1206 for (stesc
= buf
->buf
+ pos
; *stesc
!= '\0'; stesc
++) {
1207 if (stesc
[0] != newesc
|| stesc
[1] == '\0')
1210 if (*stesc
!= '"' && *stesc
!= '#')
1213 /* Comment found, look for RCS id. */
1216 if ((cp
= strstr(stesc
, "$" "OpenBSD")) != NULL
) {
1217 rcsid
= 1 << MANDOC_OS_OPENBSD
;
1219 } else if ((cp
= strstr(stesc
, "$" "NetBSD")) != NULL
) {
1220 rcsid
= 1 << MANDOC_OS_NETBSD
;
1224 isalnum((unsigned char)*cp
) == 0 &&
1225 strchr(cp
, '$') != NULL
) {
1226 if (r
->man
->meta
.rcsids
& rcsid
)
1227 mandoc_msg(MANDOCERR_RCS_REP
, ln
,
1228 (int)(stesc
- buf
->buf
) + 1,
1230 r
->man
->meta
.rcsids
|= rcsid
;
1233 /* Handle trailing whitespace. */
1235 ep
= strchr(stesc
--, '\0') - 1;
1240 if (*ep
== ' ' || *ep
== '\t')
1241 mandoc_msg(MANDOCERR_SPACE_EOL
,
1242 ln
, (int)(ep
- buf
->buf
), NULL
);
1245 * Save comments preceding the title macro
1246 * in the syntax tree.
1249 if (newesc
!= ASCII_ESC
&& r
->format
== 0) {
1250 while (*ep
== ' ' || *ep
== '\t')
1253 n
= roff_node_alloc(r
->man
,
1254 ln
, stesc
+ 1 - buf
->buf
,
1255 ROFFT_COMMENT
, TOKEN_NONE
);
1256 n
->string
= mandoc_strdup(stesc
+ 2);
1257 roff_node_append(r
->man
, n
);
1258 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1259 r
->man
->next
= ROFF_NEXT_SIBLING
;
1262 /* Line continuation with comment. */
1264 if (stesc
[1] == '#') {
1266 return ROFF_IGN
| ROFF_APPEND
;
1269 /* Discard normal comments. */
1271 while (stesc
> start
&& stesc
[-1] == ' ' &&
1272 (stesc
== start
+ 1 || stesc
[-2] != '\\'))
1281 /* Notice the end of the input. */
1283 if (*stesc
== '\n') {
1289 while (stesc
>= start
) {
1290 if (*stesc
!= newesc
) {
1293 * If we have a non-standard escape character,
1294 * escape literal backslashes because all
1295 * processing in subsequent functions uses
1296 * the standard escaping rules.
1299 if (newesc
!= ASCII_ESC
&& *stesc
== '\\') {
1301 buf
->sz
= mandoc_asprintf(&nbuf
, "%s\\e%s",
1302 buf
->buf
, stesc
+ 1) + 1;
1304 stesc
= nbuf
+ (stesc
- buf
->buf
);
1309 /* Search backwards for the next escape. */
1315 /* If it is escaped, skip it. */
1317 for (cp
= stesc
- 1; cp
>= start
; cp
--)
1318 if (*cp
!= r
->escape
)
1321 if ((stesc
- cp
) % 2 == 0) {
1325 } else if (stesc
[1] != '\0') {
1332 return ROFF_IGN
| ROFF_APPEND
;
1335 /* Decide whether to expand or to check only. */
1353 if (sign
== '+' || sign
== '-')
1359 switch(mandoc_escape(&cp
, &stnam
, &inaml
)) {
1360 case ESCAPE_SPECIAL
:
1361 if (mchars_spec2cp(stnam
, inaml
) >= 0)
1365 err
= MANDOCERR_ESC_BAD
;
1368 err
= MANDOCERR_ESC_UNDEF
;
1371 err
= MANDOCERR_ESC_UNSUPP
;
1376 if (err
!= MANDOCERR_OK
)
1377 mandoc_msg(err
, ln
, (int)(stesc
- buf
->buf
),
1378 "%.*s", (int)(cp
- stesc
), stesc
);
1383 if (EXPAND_LIMIT
< ++expand_count
) {
1384 mandoc_msg(MANDOCERR_ROFFLOOP
,
1385 ln
, (int)(stesc
- buf
->buf
), NULL
);
1390 * The third character decides the length
1391 * of the name of the string or register.
1392 * Save a pointer to the name.
1419 /* Advance to the end of the name. */
1423 while (maxl
== 0 || naml
< maxl
) {
1425 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
1426 (int)(stesc
- buf
->buf
), "%s", stesc
);
1430 if (maxl
== 0 && *cp
== term
) {
1434 if (*cp
++ != '\\' || *esct
!= 'w') {
1438 switch (mandoc_escape(&cp
, NULL
, NULL
)) {
1439 case ESCAPE_SPECIAL
:
1440 case ESCAPE_UNICODE
:
1441 case ESCAPE_NUMBERED
:
1443 case ESCAPE_OVERSTRIKE
:
1452 * Retrieve the replacement string; if it is
1453 * undefined, resume searching for escapes.
1459 deftype
= ROFFDEF_USER
| ROFFDEF_PRE
;
1460 res
= roff_getstrn(r
, stnam
, naml
, &deftype
);
1463 * If not overriden, let \*(.T
1464 * through to the formatters.
1467 if (res
== NULL
&& naml
== 2 &&
1468 stnam
[0] == '.' && stnam
[1] == 'T') {
1469 roff_setstrn(&r
->strtab
,
1470 ".T", 2, NULL
, 0, 0);
1477 if (r
->mstackpos
< 0) {
1478 mandoc_msg(MANDOCERR_ARG_UNDEF
, ln
,
1479 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1482 ctx
= r
->mstack
+ r
->mstackpos
;
1483 npos
= esct
[1] - '1';
1484 if (npos
>= 0 && npos
<= 8) {
1485 res
= npos
< ctx
->argc
?
1486 ctx
->argv
[npos
] : "";
1491 else if (esct
[1] == '@')
1494 mandoc_msg(MANDOCERR_ARG_NONUM
, ln
,
1495 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1499 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1503 asz
+= 2; /* quotes */
1504 asz
+= strlen(ctx
->argv
[npos
]);
1507 rsz
= buf
->sz
- (stesc
- buf
->buf
) - 3;
1509 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1511 nbuf
= mandoc_realloc(buf
->buf
, buf
->sz
);
1513 stesc
= nbuf
+ (stesc
- buf
->buf
);
1516 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1518 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1523 cp
= ctx
->argv
[npos
];
1532 ubuf
[0] = arg_complete
&&
1533 roff_evalnum(r
, ln
, stnam
, &npos
,
1534 NULL
, ROFFNUM_SCALE
) &&
1535 stnam
+ npos
+ 1 == cp
? '1' : '0';
1540 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1541 roff_getregn(r
, stnam
, naml
, sign
));
1546 /* use even incomplete args */
1547 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1554 mandoc_msg(MANDOCERR_STR_UNDEF
,
1555 ln
, (int)(stesc
- buf
->buf
),
1556 "%.*s", (int)naml
, stnam
);
1558 } else if (buf
->sz
+ strlen(res
) > SHRT_MAX
) {
1559 mandoc_msg(MANDOCERR_ROFFLOOP
,
1560 ln
, (int)(stesc
- buf
->buf
), NULL
);
1564 /* Replace the escape sequence by the string. */
1567 buf
->sz
= mandoc_asprintf(&nbuf
, "%s%s%s",
1568 buf
->buf
, res
, cp
) + 1;
1570 /* Prepare for the next replacement. */
1573 stesc
= nbuf
+ (stesc
- buf
->buf
) + strlen(res
);
1581 * Parse a quoted or unquoted roff-style request or macro argument.
1582 * Return a pointer to the parsed argument, which is either the original
1583 * pointer or advanced by one byte in case the argument is quoted.
1584 * NUL-terminate the argument in place.
1585 * Collapse pairs of quotes inside quoted arguments.
1586 * Advance the argument pointer to the next argument,
1587 * or to the NUL byte terminating the argument line.
1590 roff_getarg(struct roff
*r
, char **cpp
, int ln
, int *pos
)
1594 int newesc
, pairs
, quoted
, white
;
1596 /* Quoting can only start with a new word. */
1599 if ('"' == *start
) {
1604 newesc
= pairs
= white
= 0;
1605 for (cp
= start
; '\0' != *cp
; cp
++) {
1608 * Move the following text left
1609 * after quoted quotes and after "\\" and "\t".
1614 if ('\\' == cp
[0]) {
1616 * In copy mode, translate double to single
1617 * backslashes and backslash-t to literal tabs.
1628 cp
[-pairs
] = ASCII_ESC
;
1633 /* Skip escaped blanks. */
1640 } else if (0 == quoted
) {
1642 /* Unescaped blanks end unquoted args. */
1646 } else if ('"' == cp
[0]) {
1648 /* Quoted quotes collapse. */
1652 /* Unquoted quotes end quoted args. */
1659 /* Quoted argument without a closing quote. */
1661 mandoc_msg(MANDOCERR_ARG_QUOTE
, ln
, *pos
, NULL
);
1663 /* NUL-terminate this argument and move to the next one. */
1671 *pos
+= (int)(cp
- start
) + (quoted
? 1 : 0);
1674 if ('\0' == *cp
&& (white
|| ' ' == cp
[-1]))
1675 mandoc_msg(MANDOCERR_SPACE_EOL
, ln
, *pos
, NULL
);
1677 start
= mandoc_strdup(start
);
1682 buf
.sz
= strlen(start
) + 1;
1684 if (roff_expand(r
, &buf
, ln
, 0, ASCII_ESC
) & ROFF_IGN
) {
1686 buf
.buf
= mandoc_strdup("");
1693 * Process text streams.
1696 roff_parsetext(struct roff
*r
, struct buf
*buf
, int pos
, int *offs
)
1702 enum mandoc_esc esc
;
1704 /* Spring the input line trap. */
1706 if (roffit_lines
== 1) {
1707 isz
= mandoc_asprintf(&p
, "%s\n.%s", buf
->buf
, roffit_macro
);
1714 return ROFF_REPARSE
;
1715 } else if (roffit_lines
> 1)
1718 if (roffce_node
!= NULL
&& buf
->buf
[pos
] != '\0') {
1719 if (roffce_lines
< 1) {
1720 r
->man
->last
= roffce_node
;
1721 r
->man
->next
= ROFF_NEXT_SIBLING
;
1728 /* Convert all breakable hyphens into ASCII_HYPH. */
1730 start
= p
= buf
->buf
+ pos
;
1732 while (*p
!= '\0') {
1733 sz
= strcspn(p
, "-\\");
1740 /* Skip over escapes. */
1742 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
1743 if (esc
== ESCAPE_ERROR
)
1748 } else if (p
== start
) {
1753 if (isalpha((unsigned char)p
[-1]) &&
1754 isalpha((unsigned char)p
[1]))
1762 roff_parseln(struct roff
*r
, int ln
, struct buf
*buf
, int *offs
)
1766 int pos
; /* parse point */
1767 int spos
; /* saved parse point for messages */
1768 int ppos
; /* original offset in buf->buf */
1769 int ctl
; /* macro line (boolean) */
1773 /* Handle in-line equation delimiters. */
1775 if (r
->tbl
== NULL
&&
1776 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
1777 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
1778 e
= roff_eqndelim(r
, buf
, pos
);
1779 if (e
== ROFF_REPARSE
)
1781 assert(e
== ROFF_CONT
);
1784 /* Expand some escape sequences. */
1786 e
= roff_expand(r
, buf
, ln
, pos
, r
->escape
);
1787 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1789 assert(e
== ROFF_CONT
);
1791 ctl
= roff_getcontrol(r
, buf
->buf
, &pos
);
1794 * First, if a scope is open and we're not a macro, pass the
1795 * text through the macro's filter.
1796 * Equations process all content themselves.
1797 * Tables process almost all content themselves, but we want
1798 * to warn about macros before passing it there.
1801 if (r
->last
!= NULL
&& ! ctl
) {
1803 e
= (*roffs
[t
].text
)(r
, t
, buf
, ln
, pos
, pos
, offs
);
1804 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1809 if (r
->eqn
!= NULL
&& strncmp(buf
->buf
+ ppos
, ".EN", 3)) {
1810 eqn_read(r
->eqn
, buf
->buf
+ ppos
);
1813 if (r
->tbl
!= NULL
&& (ctl
== 0 || buf
->buf
[pos
] == '\0')) {
1814 tbl_read(r
->tbl
, ln
, buf
->buf
, ppos
);
1815 roff_addtbl(r
->man
, ln
, r
->tbl
);
1819 return roff_parsetext(r
, buf
, pos
, offs
) | e
;
1821 /* Skip empty request lines. */
1823 if (buf
->buf
[pos
] == '"') {
1824 mandoc_msg(MANDOCERR_COMMENT_BAD
, ln
, pos
, NULL
);
1826 } else if (buf
->buf
[pos
] == '\0')
1830 * If a scope is open, go to the child handler for that macro,
1831 * as it may want to preprocess before doing anything with it.
1832 * Don't do so if an equation is open.
1837 return (*roffs
[t
].sub
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
1840 /* No scope is open. This is a new request or macro. */
1843 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
1845 /* Tables ignore most macros. */
1847 if (r
->tbl
!= NULL
&& (t
== TOKEN_NONE
|| t
== ROFF_TS
||
1848 t
== ROFF_br
|| t
== ROFF_ce
|| t
== ROFF_rj
|| t
== ROFF_sp
)) {
1849 mandoc_msg(MANDOCERR_TBLMACRO
,
1850 ln
, pos
, "%s", buf
->buf
+ spos
);
1851 if (t
!= TOKEN_NONE
)
1853 while (buf
->buf
[pos
] != '\0' && buf
->buf
[pos
] != ' ')
1855 while (buf
->buf
[pos
] == ' ')
1857 tbl_read(r
->tbl
, ln
, buf
->buf
, pos
);
1858 roff_addtbl(r
->man
, ln
, r
->tbl
);
1862 /* For now, let high level macros abort .ce mode. */
1864 if (ctl
&& roffce_node
!= NULL
&&
1865 (t
== TOKEN_NONE
|| t
== ROFF_Dd
|| t
== ROFF_EQ
||
1866 t
== ROFF_TH
|| t
== ROFF_TS
)) {
1867 r
->man
->last
= roffce_node
;
1868 r
->man
->next
= ROFF_NEXT_SIBLING
;
1874 * This is neither a roff request nor a user-defined macro.
1875 * Let the standard macro set parsers handle it.
1878 if (t
== TOKEN_NONE
)
1881 /* Execute a roff request or a user defined macro. */
1883 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, spos
, pos
, offs
);
1887 * Internal interface function to tell the roff parser that execution
1888 * of the current macro ended. This is required because macro
1889 * definitions usually do not end with a .return request.
1892 roff_userret(struct roff
*r
)
1897 assert(r
->mstackpos
>= 0);
1898 ctx
= r
->mstack
+ r
->mstackpos
;
1899 for (i
= 0; i
< ctx
->argc
; i
++)
1906 roff_endparse(struct roff
*r
)
1908 if (r
->last
!= NULL
)
1909 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->last
->line
,
1910 r
->last
->col
, "%s", roff_name
[r
->last
->tok
]);
1912 if (r
->eqn
!= NULL
) {
1913 mandoc_msg(MANDOCERR_BLK_NOEND
,
1914 r
->eqn
->node
->line
, r
->eqn
->node
->pos
, "EQ");
1919 if (r
->tbl
!= NULL
) {
1926 * Parse a roff node's type from the input buffer. This must be in the
1927 * form of ".foo xxx" in the usual way.
1929 static enum roff_tok
1930 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
1940 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
1944 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
1946 deftype
= ROFFDEF_USER
| ROFFDEF_REN
;
1947 r
->current_string
= roff_getstrn(r
, mac
, maclen
, &deftype
);
1956 t
= roffhash_find(r
->reqtab
, mac
, maclen
);
1959 if (t
!= TOKEN_NONE
)
1961 else if (deftype
== ROFFDEF_UNDEF
) {
1962 /* Using an undefined macro defines it to be empty. */
1963 roff_setstrn(&r
->strtab
, mac
, maclen
, "", 0, 0);
1964 roff_setstrn(&r
->rentab
, mac
, maclen
, NULL
, 0, 0);
1969 /* --- handling of request blocks ----------------------------------------- */
1972 roff_cblock(ROFF_ARGS
)
1976 * A block-close `..' should only be invoked as a child of an
1977 * ignore macro, otherwise raise a warning and just ignore it.
1980 if (r
->last
== NULL
) {
1981 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
1985 switch (r
->last
->tok
) {
1987 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1990 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1995 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
1999 if (buf
->buf
[pos
] != '\0')
2000 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
2001 ".. %s", buf
->buf
+ pos
);
2004 roffnode_cleanscope(r
);
2010 * Pop all nodes ending at the end of the current input line.
2011 * Return the number of loops ended.
2014 roffnode_cleanscope(struct roff
*r
)
2019 while (r
->last
!= NULL
) {
2020 if (--r
->last
->endspan
!= 0)
2022 inloop
+= roffnode_pop(r
);
2028 * Handle the closing \} of a conditional block.
2029 * Apart from generating warnings, this only pops nodes.
2030 * Return the number of loops ended.
2033 roff_ccond(struct roff
*r
, int ln
, int ppos
)
2035 if (NULL
== r
->last
) {
2036 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2040 switch (r
->last
->tok
) {
2047 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2051 if (r
->last
->endspan
> -1) {
2052 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2056 return roffnode_pop(r
) + roffnode_cleanscope(r
);
2060 roff_block(ROFF_ARGS
)
2062 const char *name
, *value
;
2063 char *call
, *cp
, *iname
, *rname
;
2064 size_t csz
, namesz
, rsz
;
2067 /* Ignore groff compatibility mode for now. */
2069 if (tok
== ROFF_de1
)
2071 else if (tok
== ROFF_dei1
)
2073 else if (tok
== ROFF_am1
)
2075 else if (tok
== ROFF_ami1
)
2078 /* Parse the macro name argument. */
2080 cp
= buf
->buf
+ pos
;
2081 if (tok
== ROFF_ig
) {
2086 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2087 iname
[namesz
] = '\0';
2090 /* Resolve the macro name argument if it is indirect. */
2092 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2093 deftype
= ROFFDEF_USER
;
2094 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2096 mandoc_msg(MANDOCERR_STR_UNDEF
,
2097 ln
, (int)(iname
- buf
->buf
),
2098 "%.*s", (int)namesz
, iname
);
2101 namesz
= strlen(name
);
2105 if (namesz
== 0 && tok
!= ROFF_ig
) {
2106 mandoc_msg(MANDOCERR_REQ_EMPTY
,
2107 ln
, ppos
, "%s", roff_name
[tok
]);
2111 roffnode_push(r
, tok
, name
, ln
, ppos
);
2114 * At the beginning of a `de' macro, clear the existing string
2115 * with the same name, if there is one. New content will be
2116 * appended from roff_block_text() in multiline mode.
2119 if (tok
== ROFF_de
|| tok
== ROFF_dei
) {
2120 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
2121 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2122 } else if (tok
== ROFF_am
|| tok
== ROFF_ami
) {
2123 deftype
= ROFFDEF_ANY
;
2124 value
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2125 switch (deftype
) { /* Before appending, ... */
2126 case ROFFDEF_PRE
: /* copy predefined to user-defined. */
2127 roff_setstrn(&r
->strtab
, name
, namesz
,
2128 value
, strlen(value
), 0);
2130 case ROFFDEF_REN
: /* call original standard macro. */
2131 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2132 (int)strlen(value
), value
);
2133 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2134 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2137 case ROFFDEF_STD
: /* rename and call standard macro. */
2138 rsz
= mandoc_asprintf(&rname
, "__%s_renamed", name
);
2139 roff_setstrn(&r
->rentab
, rname
, rsz
, name
, namesz
, 0);
2140 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2142 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2154 /* Get the custom end marker. */
2157 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2159 /* Resolve the end marker if it is indirect. */
2161 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2162 deftype
= ROFFDEF_USER
;
2163 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2165 mandoc_msg(MANDOCERR_STR_UNDEF
,
2166 ln
, (int)(iname
- buf
->buf
),
2167 "%.*s", (int)namesz
, iname
);
2170 namesz
= strlen(name
);
2175 r
->last
->end
= mandoc_strndup(name
, namesz
);
2178 mandoc_msg(MANDOCERR_ARG_EXCESS
,
2179 ln
, pos
, ".%s ... %s", roff_name
[tok
], cp
);
2185 roff_block_sub(ROFF_ARGS
)
2191 * First check whether a custom macro exists at this level. If
2192 * it does, then check against it. This is some of groff's
2193 * stranger behaviours. If we encountered a custom end-scope
2194 * tag and that tag also happens to be a "real" macro, then we
2195 * need to try interpreting it again as a real macro. If it's
2196 * not, then return ignore. Else continue.
2200 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
2201 if (buf
->buf
[i
] != r
->last
->end
[j
])
2204 if (r
->last
->end
[j
] == '\0' &&
2205 (buf
->buf
[i
] == '\0' ||
2206 buf
->buf
[i
] == ' ' ||
2207 buf
->buf
[i
] == '\t')) {
2209 roffnode_cleanscope(r
);
2211 while (buf
->buf
[i
] == ' ' || buf
->buf
[i
] == '\t')
2215 if (roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
) !=
2223 * If we have no custom end-query or lookup failed, then try
2224 * pulling it out of the hashtable.
2227 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2229 if (t
!= ROFF_cblock
) {
2231 roff_setstr(r
, r
->last
->name
, buf
->buf
+ ppos
, 2);
2235 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2239 roff_block_text(ROFF_ARGS
)
2243 roff_setstr(r
, r
->last
->name
, buf
->buf
+ pos
, 2);
2249 roff_cond_sub(ROFF_ARGS
)
2251 struct roffnode
*bl
;
2253 int endloop
, irc
, rr
;
2258 endloop
= tok
!= ROFF_while
? ROFF_IGN
:
2259 rr
? ROFF_LOOPCONT
: ROFF_LOOPEXIT
;
2260 if (roffnode_cleanscope(r
))
2264 * If `\}' occurs on a macro line without a preceding macro,
2265 * drop the line completely.
2268 ep
= buf
->buf
+ pos
;
2269 if (ep
[0] == '\\' && ep
[1] == '}')
2273 * The closing delimiter `\}' rewinds the conditional scope
2274 * but is otherwise ignored when interpreting the line.
2277 while ((ep
= strchr(ep
, '\\')) != NULL
) {
2280 memmove(ep
, ep
+ 2, strlen(ep
+ 2) + 1);
2281 if (roff_ccond(r
, ln
, ep
- buf
->buf
))
2294 * Fully handle known macros when they are structurally
2295 * required or when the conditional evaluated to true.
2298 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2299 if (t
== ROFF_break
) {
2300 if (irc
& ROFF_LOOPMASK
)
2301 irc
= ROFF_IGN
| ROFF_LOOPEXIT
;
2303 for (bl
= r
->last
; bl
!= NULL
; bl
= bl
->parent
) {
2305 if (bl
->tok
== ROFF_while
)
2309 } else if (t
!= TOKEN_NONE
&&
2310 (rr
|| roffs
[t
].flags
& ROFFMAC_STRUCT
))
2311 irc
|= (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2313 irc
|= rr
? ROFF_CONT
: ROFF_IGN
;
2318 roff_cond_text(ROFF_ARGS
)
2321 int endloop
, irc
, rr
;
2325 endloop
= tok
!= ROFF_while
? ROFF_IGN
:
2326 rr
? ROFF_LOOPCONT
: ROFF_LOOPEXIT
;
2327 if (roffnode_cleanscope(r
))
2331 * If `\}' occurs on a text line with neither preceding
2332 * nor following characters, drop the line completely.
2335 ep
= buf
->buf
+ pos
;
2336 if (strcmp(ep
, "\\}") == 0)
2340 * The closing delimiter `\}' rewinds the conditional scope
2341 * but is otherwise ignored when interpreting the line.
2344 while ((ep
= strchr(ep
, '\\')) != NULL
) {
2347 memmove(ep
, ep
+ 2, strlen(ep
+ 2) + 1);
2348 if (roff_ccond(r
, ln
, ep
- buf
->buf
))
2364 /* --- handling of numeric and conditional expressions -------------------- */
2367 * Parse a single signed integer number. Stop at the first non-digit.
2368 * If there is at least one digit, return success and advance the
2369 * parse point, else return failure and let the parse point unchanged.
2370 * Ignore overflows, treat them just like the C language.
2373 roff_getnum(const char *v
, int *pos
, int *res
, int flags
)
2375 int myres
, scaled
, n
, p
;
2382 if (n
|| v
[p
] == '+')
2385 if (flags
& ROFFNUM_WHITE
)
2386 while (isspace((unsigned char)v
[p
]))
2389 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
2390 *res
= 10 * *res
+ v
[p
] - '0';
2397 /* Each number may be followed by one optional scaling unit. */
2401 scaled
= *res
* 65536;
2404 scaled
= *res
* 240;
2407 scaled
= *res
* 240 / 2.54;
2418 scaled
= *res
* 10 / 3;
2424 scaled
= *res
* 6 / 25;
2431 if (flags
& ROFFNUM_SCALE
)
2439 * Evaluate a string comparison condition.
2440 * The first character is the delimiter.
2441 * Succeed if the string up to its second occurrence
2442 * matches the string up to its third occurence.
2443 * Advance the cursor after the third occurrence
2444 * or lacking that, to the end of the line.
2447 roff_evalstrcond(const char *v
, int *pos
)
2449 const char *s1
, *s2
, *s3
;
2453 s1
= v
+ *pos
; /* initial delimiter */
2454 s2
= s1
+ 1; /* for scanning the first string */
2455 s3
= strchr(s2
, *s1
); /* for scanning the second string */
2457 if (NULL
== s3
) /* found no middle delimiter */
2460 while ('\0' != *++s3
) {
2461 if (*s2
!= *s3
) { /* mismatch */
2462 s3
= strchr(s3
, *s1
);
2465 if (*s3
== *s1
) { /* found the final delimiter */
2474 s3
= strchr(s2
, '\0');
2475 else if (*s3
!= '\0')
2482 * Evaluate an optionally negated single character, numerical,
2483 * or string condition.
2486 roff_evalcond(struct roff
*r
, int ln
, char *v
, int *pos
)
2488 const char *start
, *end
;
2491 int deftype
, len
, number
, savepos
, istrue
, wanttrue
;
2493 if ('!' == v
[*pos
]) {
2514 } while (v
[*pos
] == ' ');
2517 * Quirk for groff compatibility:
2518 * The horizontal tab is neither available nor unavailable.
2521 if (v
[*pos
] == '\t') {
2526 /* Printable ASCII characters are available. */
2528 if (v
[*pos
] != '\\') {
2534 switch (mandoc_escape(&end
, &start
, &len
)) {
2535 case ESCAPE_SPECIAL
:
2536 istrue
= mchars_spec2cp(start
, len
) != -1;
2538 case ESCAPE_UNICODE
:
2541 case ESCAPE_NUMBERED
:
2542 istrue
= mchars_num2char(start
, len
) != -1;
2549 return istrue
== wanttrue
;
2556 sz
= roff_getname(r
, &cp
, ln
, cp
- v
);
2559 else if (v
[*pos
] == 'r')
2560 istrue
= roff_hasregn(r
, name
, sz
);
2562 deftype
= ROFFDEF_ANY
;
2563 roff_getstrn(r
, name
, sz
, &deftype
);
2566 *pos
= (name
+ sz
) - v
;
2567 return istrue
== wanttrue
;
2573 if (roff_evalnum(r
, ln
, v
, pos
, &number
, ROFFNUM_SCALE
))
2574 return (number
> 0) == wanttrue
;
2575 else if (*pos
== savepos
)
2576 return roff_evalstrcond(v
, pos
) == wanttrue
;
2582 roff_line_ignore(ROFF_ARGS
)
2589 roff_insec(ROFF_ARGS
)
2592 mandoc_msg(MANDOCERR_REQ_INSEC
, ln
, ppos
, "%s", roff_name
[tok
]);
2597 roff_unsupp(ROFF_ARGS
)
2600 mandoc_msg(MANDOCERR_REQ_UNSUPP
, ln
, ppos
, "%s", roff_name
[tok
]);
2605 roff_cond(ROFF_ARGS
)
2609 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
2612 * An `.el' has no conditional body: it will consume the value
2613 * of the current rstack entry set in prior `ie' calls or
2616 * If we're not an `el', however, then evaluate the conditional.
2619 r
->last
->rule
= tok
== ROFF_el
?
2620 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
2621 roff_evalcond(r
, ln
, buf
->buf
, &pos
);
2624 * An if-else will put the NEGATION of the current evaluated
2625 * conditional into the stack of rules.
2628 if (tok
== ROFF_ie
) {
2629 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
2631 r
->rstack
= mandoc_reallocarray(r
->rstack
,
2632 r
->rstacksz
, sizeof(int));
2634 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
2637 /* If the parent has false as its rule, then so do we. */
2639 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
2644 * If there is nothing on the line after the conditional,
2645 * not even whitespace, use next-line scope.
2646 * Except that .while does not support next-line scope.
2649 if (buf
->buf
[pos
] == '\0' && tok
!= ROFF_while
) {
2650 r
->last
->endspan
= 2;
2654 while (buf
->buf
[pos
] == ' ')
2657 /* An opening brace requests multiline scope. */
2659 if (buf
->buf
[pos
] == '\\' && buf
->buf
[pos
+ 1] == '{') {
2660 r
->last
->endspan
= -1;
2662 while (buf
->buf
[pos
] == ' ')
2668 * Anything else following the conditional causes
2669 * single-line scope. Warn if the scope contains
2670 * nothing but trailing whitespace.
2673 if (buf
->buf
[pos
] == '\0')
2674 mandoc_msg(MANDOCERR_COND_EMPTY
,
2675 ln
, ppos
, "%s", roff_name
[tok
]);
2677 r
->last
->endspan
= 1;
2682 if (tok
== ROFF_while
)
2694 /* Ignore groff compatibility mode for now. */
2696 if (tok
== ROFF_ds1
)
2698 else if (tok
== ROFF_as1
)
2702 * The first word is the name of the string.
2703 * If it is empty or terminated by an escape sequence,
2704 * abort the `ds' request without defining anything.
2707 name
= string
= buf
->buf
+ pos
;
2711 namesz
= roff_getname(r
, &string
, ln
, pos
);
2712 switch (name
[namesz
]) {
2716 string
= buf
->buf
+ pos
+ namesz
;
2722 /* Read past the initial double-quote, if any. */
2726 /* The rest is the value. */
2727 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
2729 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2734 * Parse a single operator, one or two characters long.
2735 * If the operator is recognized, return success and advance the
2736 * parse point, else return failure and let the parse point unchanged.
2739 roff_getop(const char *v
, int *pos
, char *res
)
2754 switch (v
[*pos
+ 1]) {
2772 switch (v
[*pos
+ 1]) {
2786 if ('=' == v
[*pos
+ 1])
2798 * Evaluate either a parenthesized numeric expression
2799 * or a single signed integer number.
2802 roff_evalpar(struct roff
*r
, int ln
,
2803 const char *v
, int *pos
, int *res
, int flags
)
2807 return roff_getnum(v
, pos
, res
, flags
);
2810 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, flags
| ROFFNUM_WHITE
))
2814 * Omission of the closing parenthesis
2815 * is an error in validation mode,
2816 * but ignored in evaluation mode.
2821 else if (NULL
== res
)
2828 * Evaluate a complete numeric expression.
2829 * Proceed left to right, there is no concept of precedence.
2832 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
2833 int *pos
, int *res
, int flags
)
2835 int mypos
, operand2
;
2843 if (flags
& ROFFNUM_WHITE
)
2844 while (isspace((unsigned char)v
[*pos
]))
2847 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
, flags
))
2851 if (flags
& ROFFNUM_WHITE
)
2852 while (isspace((unsigned char)v
[*pos
]))
2855 if ( ! roff_getop(v
, pos
, &operator))
2858 if (flags
& ROFFNUM_WHITE
)
2859 while (isspace((unsigned char)v
[*pos
]))
2862 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
, flags
))
2865 if (flags
& ROFFNUM_WHITE
)
2866 while (isspace((unsigned char)v
[*pos
]))
2883 if (operand2
== 0) {
2884 mandoc_msg(MANDOCERR_DIVZERO
,
2892 if (operand2
== 0) {
2893 mandoc_msg(MANDOCERR_DIVZERO
,
2901 *res
= *res
< operand2
;
2904 *res
= *res
> operand2
;
2907 *res
= *res
<= operand2
;
2910 *res
= *res
>= operand2
;
2913 *res
= *res
== operand2
;
2916 *res
= *res
!= operand2
;
2919 *res
= *res
&& operand2
;
2922 *res
= *res
|| operand2
;
2925 if (operand2
< *res
)
2929 if (operand2
> *res
)
2939 /* --- register management ------------------------------------------------ */
2942 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
2944 roff_setregn(r
, name
, strlen(name
), val
, sign
, INT_MIN
);
2948 roff_setregn(struct roff
*r
, const char *name
, size_t len
,
2949 int val
, char sign
, int step
)
2951 struct roffreg
*reg
;
2953 /* Search for an existing register with the same name. */
2956 while (reg
!= NULL
&& (reg
->key
.sz
!= len
||
2957 strncmp(reg
->key
.p
, name
, len
) != 0))
2961 /* Create a new register. */
2962 reg
= mandoc_malloc(sizeof(struct roffreg
));
2963 reg
->key
.p
= mandoc_strndup(name
, len
);
2967 reg
->next
= r
->regtab
;
2973 else if ('-' == sign
)
2977 if (step
!= INT_MIN
)
2982 * Handle some predefined read-only number registers.
2983 * For now, return -1 if the requested register is not predefined;
2984 * in case a predefined read-only register having the value -1
2985 * were to turn up, another special value would have to be chosen.
2988 roff_getregro(const struct roff
*r
, const char *name
)
2992 case '$': /* Number of arguments of the last macro evaluated. */
2993 return r
->mstackpos
< 0 ? 0 : r
->mstack
[r
->mstackpos
].argc
;
2994 case 'A': /* ASCII approximation mode is always off. */
2996 case 'g': /* Groff compatibility mode is always on. */
2998 case 'H': /* Fixed horizontal resolution. */
3000 case 'j': /* Always adjust left margin only. */
3002 case 'T': /* Some output device is always defined. */
3004 case 'V': /* Fixed vertical resolution. */
3012 roff_getreg(struct roff
*r
, const char *name
)
3014 return roff_getregn(r
, name
, strlen(name
), '\0');
3018 roff_getregn(struct roff
*r
, const char *name
, size_t len
, char sign
)
3020 struct roffreg
*reg
;
3023 if ('.' == name
[0] && 2 == len
) {
3024 val
= roff_getregro(r
, name
+ 1);
3029 for (reg
= r
->regtab
; reg
; reg
= reg
->next
) {
3030 if (len
== reg
->key
.sz
&&
3031 0 == strncmp(name
, reg
->key
.p
, len
)) {
3034 reg
->val
+= reg
->step
;
3037 reg
->val
-= reg
->step
;
3046 roff_setregn(r
, name
, len
, 0, '\0', INT_MIN
);
3051 roff_hasregn(const struct roff
*r
, const char *name
, size_t len
)
3053 struct roffreg
*reg
;
3056 if ('.' == name
[0] && 2 == len
) {
3057 val
= roff_getregro(r
, name
+ 1);
3062 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
3063 if (len
== reg
->key
.sz
&&
3064 0 == strncmp(name
, reg
->key
.p
, len
))
3071 roff_freereg(struct roffreg
*reg
)
3073 struct roffreg
*old_reg
;
3075 while (NULL
!= reg
) {
3086 char *key
, *val
, *step
;
3091 key
= val
= buf
->buf
+ pos
;
3095 keysz
= roff_getname(r
, &val
, ln
, pos
);
3096 if (key
[keysz
] == '\\' || key
[keysz
] == '\t')
3100 if (sign
== '+' || sign
== '-')
3104 if (roff_evalnum(r
, ln
, val
, &len
, &iv
, ROFFNUM_SCALE
) == 0)
3108 while (isspace((unsigned char)*step
))
3110 if (roff_evalnum(r
, ln
, step
, NULL
, &is
, 0) == 0)
3113 roff_setregn(r
, key
, keysz
, iv
, sign
, is
);
3120 struct roffreg
*reg
, **prev
;
3124 name
= cp
= buf
->buf
+ pos
;
3127 namesz
= roff_getname(r
, &cp
, ln
, pos
);
3128 name
[namesz
] = '\0';
3133 if (reg
== NULL
|| !strcmp(name
, reg
->key
.p
))
3145 /* --- handler functions for roff requests -------------------------------- */
3154 cp
= buf
->buf
+ pos
;
3155 while (*cp
!= '\0') {
3157 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- buf
->buf
));
3158 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
3159 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
3160 if (name
[namesz
] == '\\' || name
[namesz
] == '\t')
3171 /* Parse the number of lines. */
3173 if ( ! roff_evalnum(r
, ln
, buf
->buf
, &pos
, &iv
, 0)) {
3174 mandoc_msg(MANDOCERR_IT_NONUM
,
3175 ln
, ppos
, "%s", buf
->buf
+ 1);
3179 while (isspace((unsigned char)buf
->buf
[pos
]))
3183 * Arm the input line trap.
3184 * Special-casing "an-trap" is an ugly workaround to cope
3185 * with DocBook stupidly fiddling with man(7) internals.
3189 roffit_macro
= mandoc_strdup(iv
!= 1 ||
3190 strcmp(buf
->buf
+ pos
, "an-trap") ?
3191 buf
->buf
+ pos
: "br");
3199 enum roff_tok t
, te
;
3206 r
->format
= MPARSE_MDOC
;
3207 mask
= MPARSE_MDOC
| MPARSE_QUICK
;
3213 r
->format
= MPARSE_MAN
;
3214 mask
= MPARSE_QUICK
;
3219 if ((r
->options
& mask
) == 0)
3220 for (t
= tok
; t
< te
; t
++)
3221 roff_setstr(r
, roff_name
[t
], NULL
, 0);
3228 r
->man
->flags
&= ~ROFF_NONOFILL
;
3229 if (r
->tbl
== NULL
) {
3230 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "TE");
3233 if (tbl_end(r
->tbl
, 0) == 0) {
3236 buf
->buf
= mandoc_strdup(".sp");
3239 return ROFF_REPARSE
;
3250 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "T&");
3252 tbl_restart(ln
, ppos
, r
->tbl
);
3258 * Handle in-line equation delimiters.
3261 roff_eqndelim(struct roff
*r
, struct buf
*buf
, int pos
)
3264 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
3267 * Outside equations, look for an opening delimiter.
3268 * If we are inside an equation, we already know it is
3269 * in-line, or this function wouldn't have been called;
3270 * so look for a closing delimiter.
3273 cp1
= buf
->buf
+ pos
;
3274 cp2
= strchr(cp1
, r
->eqn
== NULL
?
3275 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
3280 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
3282 /* Handle preceding text, protecting whitespace. */
3284 if (*buf
->buf
!= '\0') {
3291 * Prepare replacing the delimiter with an equation macro
3292 * and drop leading white space from the equation.
3295 if (r
->eqn
== NULL
) {
3302 /* Handle following text, protecting whitespace. */
3310 /* Do the actual replacement. */
3312 buf
->sz
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", buf
->buf
,
3313 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
3317 /* Toggle the in-line state of the eqn subsystem. */
3319 r
->eqn_inline
= r
->eqn
== NULL
;
3320 return ROFF_REPARSE
;
3326 struct roff_node
*n
;
3328 if (r
->man
->meta
.macroset
== MACROSET_MAN
)
3329 man_breakscope(r
->man
, ROFF_EQ
);
3330 n
= roff_node_alloc(r
->man
, ln
, ppos
, ROFFT_EQN
, TOKEN_NONE
);
3331 if (ln
> r
->man
->last
->line
)
3332 n
->flags
|= NODE_LINE
;
3333 n
->eqn
= eqn_box_new();
3334 roff_node_append(r
->man
, n
);
3335 r
->man
->next
= ROFF_NEXT_SIBLING
;
3337 assert(r
->eqn
== NULL
);
3338 if (r
->last_eqn
== NULL
)
3339 r
->last_eqn
= eqn_alloc();
3341 eqn_reset(r
->last_eqn
);
3342 r
->eqn
= r
->last_eqn
;
3345 if (buf
->buf
[pos
] != '\0')
3346 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3347 ".EQ %s", buf
->buf
+ pos
);
3355 if (r
->eqn
!= NULL
) {
3359 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "EN");
3360 if (buf
->buf
[pos
] != '\0')
3361 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3362 "EN %s", buf
->buf
+ pos
);
3369 if (r
->tbl
!= NULL
) {
3370 mandoc_msg(MANDOCERR_BLK_BROKEN
, ln
, ppos
, "TS breaks TS");
3373 r
->man
->flags
|= ROFF_NONOFILL
;
3374 r
->tbl
= tbl_alloc(ppos
, ln
, r
->last_tbl
);
3375 if (r
->last_tbl
== NULL
)
3376 r
->first_tbl
= r
->tbl
;
3377 r
->last_tbl
= r
->tbl
;
3382 roff_noarg(ROFF_ARGS
)
3384 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
))
3385 man_breakscope(r
->man
, tok
);
3386 if (tok
== ROFF_brp
)
3388 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3389 if (buf
->buf
[pos
] != '\0')
3390 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3391 "%s %s", roff_name
[tok
], buf
->buf
+ pos
);
3393 r
->man
->flags
|= ROFF_NOFILL
;
3394 else if (tok
== ROFF_fi
)
3395 r
->man
->flags
&= ~ROFF_NOFILL
;
3396 r
->man
->last
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3397 r
->man
->next
= ROFF_NEXT_SIBLING
;
3402 roff_onearg(ROFF_ARGS
)
3404 struct roff_node
*n
;
3408 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
) &&
3409 (tok
== ROFF_ce
|| tok
== ROFF_rj
|| tok
== ROFF_sp
||
3411 man_breakscope(r
->man
, tok
);
3413 if (roffce_node
!= NULL
&& (tok
== ROFF_ce
|| tok
== ROFF_rj
)) {
3414 r
->man
->last
= roffce_node
;
3415 r
->man
->next
= ROFF_NEXT_SIBLING
;
3418 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3421 cp
= buf
->buf
+ pos
;
3423 while (*cp
!= '\0' && *cp
!= ' ')
3428 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3429 ln
, (int)(cp
- buf
->buf
),
3430 "%s ... %s", roff_name
[tok
], cp
);
3431 roff_word_alloc(r
->man
, ln
, pos
, buf
->buf
+ pos
);
3434 if (tok
== ROFF_ce
|| tok
== ROFF_rj
) {
3435 if (r
->man
->last
->type
== ROFFT_ELEM
) {
3436 roff_word_alloc(r
->man
, ln
, pos
, "1");
3437 r
->man
->last
->flags
|= NODE_NOSRC
;
3440 if (roff_evalnum(r
, ln
, r
->man
->last
->string
, &npos
,
3441 &roffce_lines
, 0) == 0) {
3442 mandoc_msg(MANDOCERR_CE_NONUM
,
3443 ln
, pos
, "ce %s", buf
->buf
+ pos
);
3446 if (roffce_lines
< 1) {
3447 r
->man
->last
= r
->man
->last
->parent
;
3451 roffce_node
= r
->man
->last
->parent
;
3453 n
->flags
|= NODE_VALID
| NODE_ENDED
;
3456 n
->flags
|= NODE_LINE
;
3457 r
->man
->next
= ROFF_NEXT_SIBLING
;
3462 roff_manyarg(ROFF_ARGS
)
3464 struct roff_node
*n
;
3467 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3470 for (sp
= ep
= buf
->buf
+ pos
; *sp
!= '\0'; sp
= ep
) {
3471 while (*ep
!= '\0' && *ep
!= ' ')
3475 roff_word_alloc(r
->man
, ln
, sp
- buf
->buf
, sp
);
3478 n
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3480 r
->man
->next
= ROFF_NEXT_SIBLING
;
3487 char *oldn
, *newn
, *end
, *value
;
3488 size_t oldsz
, newsz
, valsz
;
3490 newn
= oldn
= buf
->buf
+ pos
;
3494 newsz
= roff_getname(r
, &oldn
, ln
, pos
);
3495 if (newn
[newsz
] == '\\' || newn
[newsz
] == '\t' || *oldn
== '\0')
3499 oldsz
= roff_getname(r
, &end
, ln
, oldn
- buf
->buf
);
3503 valsz
= mandoc_asprintf(&value
, ".%.*s \\$@\\\"\n",
3505 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, valsz
, 0);
3506 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3512 * The .break request only makes sense inside conditionals,
3513 * and that case is already handled in roff_cond_sub().
3516 roff_break(ROFF_ARGS
)
3518 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, pos
, "break");
3529 if (*p
== '\0' || (r
->control
= *p
++) == '.')
3533 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3534 ln
, p
- buf
->buf
, "cc ... %s", p
);
3540 roff_char(ROFF_ARGS
)
3542 const char *p
, *kp
, *vp
;
3546 /* Parse the character to be replaced. */
3548 kp
= buf
->buf
+ pos
;
3550 if (*kp
== '\0' || (*kp
== '\\' &&
3551 mandoc_escape(&p
, NULL
, NULL
) != ESCAPE_SPECIAL
) ||
3552 (*p
!= ' ' && *p
!= '\0')) {
3553 mandoc_msg(MANDOCERR_CHAR_ARG
, ln
, pos
, "char %s", kp
);
3561 * If the replacement string contains a font escape sequence,
3562 * we have to restore the font at the end.
3568 while (*p
!= '\0') {
3571 switch (mandoc_escape(&p
, NULL
, NULL
)) {
3573 case ESCAPE_FONTROMAN
:
3574 case ESCAPE_FONTITALIC
:
3575 case ESCAPE_FONTBOLD
:
3578 case ESCAPE_FONTPREV
:
3586 mandoc_msg(MANDOCERR_CHAR_FONT
,
3587 ln
, (int)(vp
- buf
->buf
), "%s", vp
);
3590 * Approximate the effect of .char using the .tr tables.
3591 * XXX In groff, .char and .tr interact differently.
3595 if (r
->xtab
== NULL
)
3596 r
->xtab
= mandoc_calloc(128, sizeof(*r
->xtab
));
3597 assert((unsigned int)*kp
< 128);
3598 free(r
->xtab
[(int)*kp
].p
);
3599 r
->xtab
[(int)*kp
].sz
= mandoc_asprintf(&r
->xtab
[(int)*kp
].p
,
3600 "%s%s", vp
, font
? "\fP" : "");
3602 roff_setstrn(&r
->xmbtab
, kp
, ksz
, vp
, vsz
, 0);
3604 roff_setstrn(&r
->xmbtab
, kp
, ksz
, "\\fP", 3, 1);
3620 mandoc_msg(MANDOCERR_ARG_EXCESS
, ln
,
3621 (int)(p
- buf
->buf
), "ec ... %s", p
);
3630 if (buf
->buf
[pos
] != '\0')
3631 mandoc_msg(MANDOCERR_ARG_SKIP
,
3632 ln
, pos
, "eo %s", buf
->buf
+ pos
);
3639 while (buf
->buf
[pos
] == ' ')
3648 const char *p
, *first
, *second
;
3650 enum mandoc_esc esc
;
3655 mandoc_msg(MANDOCERR_REQ_EMPTY
, ln
, ppos
, "tr");
3659 while (*p
!= '\0') {
3663 if (*first
== '\\') {
3664 esc
= mandoc_escape(&p
, NULL
, NULL
);
3665 if (esc
== ESCAPE_ERROR
) {
3666 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3667 (int)(p
- buf
->buf
), "%s", first
);
3670 fsz
= (size_t)(p
- first
);
3674 if (*second
== '\\') {
3675 esc
= mandoc_escape(&p
, NULL
, NULL
);
3676 if (esc
== ESCAPE_ERROR
) {
3677 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3678 (int)(p
- buf
->buf
), "%s", second
);
3681 ssz
= (size_t)(p
- second
);
3682 } else if (*second
== '\0') {
3683 mandoc_msg(MANDOCERR_TR_ODD
, ln
,
3684 (int)(first
- buf
->buf
), "tr %s", first
);
3690 roff_setstrn(&r
->xmbtab
, first
, fsz
,
3695 if (r
->xtab
== NULL
)
3696 r
->xtab
= mandoc_calloc(128,
3697 sizeof(struct roffstr
));
3699 free(r
->xtab
[(int)*first
].p
);
3700 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
3701 r
->xtab
[(int)*first
].sz
= ssz
;
3708 * Implementation of the .return request.
3709 * There is no need to call roff_userret() from here.
3710 * The read module will call that after rewinding the reader stack
3711 * to the place from where the current macro was called.
3714 roff_return(ROFF_ARGS
)
3716 if (r
->mstackpos
>= 0)
3717 return ROFF_IGN
| ROFF_USERRET
;
3719 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "return");
3727 char *oldn
, *newn
, *end
;
3728 size_t oldsz
, newsz
;
3731 oldn
= newn
= buf
->buf
+ pos
;
3735 oldsz
= roff_getname(r
, &newn
, ln
, pos
);
3736 if (oldn
[oldsz
] == '\\' || oldn
[oldsz
] == '\t' || *newn
== '\0')
3740 newsz
= roff_getname(r
, &end
, ln
, newn
- buf
->buf
);
3744 deftype
= ROFFDEF_ANY
;
3745 value
= roff_getstrn(r
, oldn
, oldsz
, &deftype
);
3748 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3749 roff_setstrn(&r
->strtab
, oldn
, oldsz
, NULL
, 0, 0);
3750 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3753 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3754 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3757 roff_setstrn(&r
->rentab
, newn
, newsz
, value
, strlen(value
), 0);
3758 roff_setstrn(&r
->rentab
, oldn
, oldsz
, NULL
, 0, 0);
3759 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3762 roff_setstrn(&r
->rentab
, newn
, newsz
, oldn
, oldsz
, 0);
3763 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3766 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3767 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3774 roff_shift(ROFF_ARGS
)
3780 if (buf
->buf
[pos
] != '\0' &&
3781 roff_evalnum(r
, ln
, buf
->buf
, &pos
, &levels
, 0) == 0) {
3782 mandoc_msg(MANDOCERR_CE_NONUM
,
3783 ln
, pos
, "shift %s", buf
->buf
+ pos
);
3786 if (r
->mstackpos
< 0) {
3787 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "shift");
3790 ctx
= r
->mstack
+ r
->mstackpos
;
3791 if (levels
> ctx
->argc
) {
3792 mandoc_msg(MANDOCERR_SHIFT
,
3793 ln
, pos
, "%d, but max is %d", levels
, ctx
->argc
);
3798 for (i
= 0; i
< levels
; i
++)
3800 ctx
->argc
-= levels
;
3801 for (i
= 0; i
< ctx
->argc
; i
++)
3802 ctx
->argv
[i
] = ctx
->argv
[i
+ levels
];
3811 name
= buf
->buf
+ pos
;
3812 mandoc_msg(MANDOCERR_SO
, ln
, ppos
, "so %s", name
);
3815 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3816 * opening anything that's not in our cwd or anything beneath
3817 * it. Thus, explicitly disallow traversing up the file-system
3818 * or using absolute paths.
3821 if (*name
== '/' || strstr(name
, "../") || strstr(name
, "/..")) {
3822 mandoc_msg(MANDOCERR_SO_PATH
, ln
, ppos
, ".so %s", name
);
3823 buf
->sz
= mandoc_asprintf(&cp
,
3824 ".sp\nSee the file %s.\n.sp", name
) + 1;
3828 return ROFF_REPARSE
;
3835 /* --- user defined strings and macros ------------------------------------ */
3838 roff_userdef(ROFF_ARGS
)
3841 char *arg
, *ap
, *dst
, *src
;
3844 /* If the macro is empty, ignore it altogether. */
3846 if (*r
->current_string
== '\0')
3849 /* Initialize a new macro stack context. */
3851 if (++r
->mstackpos
== r
->mstacksz
) {
3852 r
->mstack
= mandoc_recallocarray(r
->mstack
,
3853 r
->mstacksz
, r
->mstacksz
+ 8, sizeof(*r
->mstack
));
3856 ctx
= r
->mstack
+ r
->mstackpos
;
3862 * Collect pointers to macro argument strings,
3863 * NUL-terminating them and escaping quotes.
3866 src
= buf
->buf
+ pos
;
3867 while (*src
!= '\0') {
3868 if (ctx
->argc
== ctx
->argsz
) {
3870 ctx
->argv
= mandoc_reallocarray(ctx
->argv
,
3871 ctx
->argsz
, sizeof(*ctx
->argv
));
3873 arg
= roff_getarg(r
, &src
, ln
, &pos
);
3874 sz
= 1; /* For the terminating NUL. */
3875 for (ap
= arg
; *ap
!= '\0'; ap
++)
3876 sz
+= *ap
== '"' ? 4 : 1;
3877 ctx
->argv
[ctx
->argc
++] = dst
= mandoc_malloc(sz
);
3878 for (ap
= arg
; *ap
!= '\0'; ap
++) {
3880 memcpy(dst
, "\\(dq", 4);
3889 /* Replace the macro invocation by the macro definition. */
3892 buf
->buf
= mandoc_strdup(r
->current_string
);
3893 buf
->sz
= strlen(buf
->buf
) + 1;
3896 return buf
->buf
[buf
->sz
- 2] == '\n' ?
3897 ROFF_REPARSE
| ROFF_USERCALL
: ROFF_IGN
| ROFF_APPEND
;
3901 * Calling a high-level macro that was renamed with .rn.
3902 * r->current_string has already been set up by roff_parse().
3905 roff_renamed(ROFF_ARGS
)
3909 buf
->sz
= mandoc_asprintf(&nbuf
, ".%s%s%s", r
->current_string
,
3910 buf
->buf
[pos
] == '\0' ? "" : " ", buf
->buf
+ pos
) + 1;
3918 * Measure the length in bytes of the roff identifier at *cpp
3919 * and advance the pointer to the next word.
3922 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
3931 /* Advance cp to the byte after the end of the name. */
3933 for (cp
= name
; 1; cp
++) {
3937 if (*cp
== ' ' || *cp
== '\t') {
3943 if (cp
[1] == '{' || cp
[1] == '}')
3947 mandoc_msg(MANDOCERR_NAMESC
, ln
, pos
,
3948 "%.*s", (int)(cp
- name
+ 1), name
);
3949 mandoc_escape((const char **)&cp
, NULL
, NULL
);
3953 /* Read past spaces. */
3963 * Store *string into the user-defined string called *name.
3964 * To clear an existing entry, call with (*r, *name, NULL, 0).
3965 * append == 0: replace mode
3966 * append == 1: single-line append mode
3967 * append == 2: multiline append mode, append '\n' after each call
3970 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
3975 namesz
= strlen(name
);
3976 roff_setstrn(&r
->strtab
, name
, namesz
, string
,
3977 string
? strlen(string
) : 0, append
);
3978 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
3982 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
3983 const char *string
, size_t stringsz
, int append
)
3988 size_t oldch
, newch
;
3990 /* Search for an existing string with the same name. */
3993 while (n
&& (namesz
!= n
->key
.sz
||
3994 strncmp(n
->key
.p
, name
, namesz
)))
3998 /* Create a new string table entry. */
3999 n
= mandoc_malloc(sizeof(struct roffkv
));
4000 n
->key
.p
= mandoc_strndup(name
, namesz
);
4006 } else if (0 == append
) {
4016 * One additional byte for the '\n' in multiline mode,
4017 * and one for the terminating '\0'.
4019 newch
= stringsz
+ (1 < append
? 2u : 1u);
4021 if (NULL
== n
->val
.p
) {
4022 n
->val
.p
= mandoc_malloc(newch
);
4027 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
4030 /* Skip existing content in the destination buffer. */
4031 c
= n
->val
.p
+ (int)oldch
;
4033 /* Append new content to the destination buffer. */
4035 while (i
< (int)stringsz
) {
4037 * Rudimentary roff copy mode:
4038 * Handle escaped backslashes.
4040 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
4045 /* Append terminating bytes. */
4050 n
->val
.sz
= (int)(c
- n
->val
.p
);
4054 roff_getstrn(struct roff
*r
, const char *name
, size_t len
,
4057 const struct roffkv
*n
;
4062 for (n
= r
->strtab
; n
!= NULL
; n
= n
->next
) {
4063 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4064 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4066 if (*deftype
& ROFFDEF_USER
) {
4067 *deftype
= ROFFDEF_USER
;
4074 for (n
= r
->rentab
; n
!= NULL
; n
= n
->next
) {
4075 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4076 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4078 if (*deftype
& ROFFDEF_REN
) {
4079 *deftype
= ROFFDEF_REN
;
4086 for (i
= 0; i
< PREDEFS_MAX
; i
++) {
4087 if (strncmp(name
, predefs
[i
].name
, len
) != 0 ||
4088 predefs
[i
].name
[len
] != '\0')
4090 if (*deftype
& ROFFDEF_PRE
) {
4091 *deftype
= ROFFDEF_PRE
;
4092 return predefs
[i
].str
;
4098 if (r
->man
->meta
.macroset
!= MACROSET_MAN
) {
4099 for (tok
= MDOC_Dd
; tok
< MDOC_MAX
; tok
++) {
4100 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4101 roff_name
[tok
][len
] != '\0')
4103 if (*deftype
& ROFFDEF_STD
) {
4104 *deftype
= ROFFDEF_STD
;
4112 if (r
->man
->meta
.macroset
!= MACROSET_MDOC
) {
4113 for (tok
= MAN_TH
; tok
< MAN_MAX
; tok
++) {
4114 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4115 roff_name
[tok
][len
] != '\0')
4117 if (*deftype
& ROFFDEF_STD
) {
4118 *deftype
= ROFFDEF_STD
;
4127 if (found
== 0 && *deftype
!= ROFFDEF_ANY
) {
4128 if (*deftype
& ROFFDEF_REN
) {
4130 * This might still be a request,
4131 * so do not treat it as undefined yet.
4133 *deftype
= ROFFDEF_UNDEF
;
4137 /* Using an undefined string defines it to be empty. */
4139 roff_setstrn(&r
->strtab
, name
, len
, "", 0, 0);
4140 roff_setstrn(&r
->rentab
, name
, len
, NULL
, 0, 0);
4148 roff_freestr(struct roffkv
*r
)
4150 struct roffkv
*n
, *nn
;
4152 for (n
= r
; n
; n
= nn
) {
4160 /* --- accessors and utility functions ------------------------------------ */
4163 * Duplicate an input string, making the appropriate character
4164 * conversations (as stipulated by `tr') along the way.
4165 * Returns a heap-allocated string with all the replacements made.
4168 roff_strdup(const struct roff
*r
, const char *p
)
4170 const struct roffkv
*cp
;
4174 enum mandoc_esc esc
;
4176 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
4177 return mandoc_strdup(p
);
4178 else if ('\0' == *p
)
4179 return mandoc_strdup("");
4182 * Step through each character looking for term matches
4183 * (remember that a `tr' can be invoked with an escape, which is
4184 * a glyph but the escape is multi-character).
4185 * We only do this if the character hash has been initialised
4186 * and the string is >0 length.
4192 while ('\0' != *p
) {
4193 assert((unsigned int)*p
< 128);
4194 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(unsigned int)*p
].p
) {
4195 sz
= r
->xtab
[(int)*p
].sz
;
4196 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4197 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
4201 } else if ('\\' != *p
) {
4202 res
= mandoc_realloc(res
, ssz
+ 2);
4207 /* Search for term matches. */
4208 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
4209 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
4214 * A match has been found.
4215 * Append the match to the array and move
4216 * forward by its keysize.
4218 res
= mandoc_realloc(res
,
4219 ssz
+ cp
->val
.sz
+ 1);
4220 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
4222 p
+= (int)cp
->key
.sz
;
4227 * Handle escapes carefully: we need to copy
4228 * over just the escape itself, or else we might
4229 * do replacements within the escape itself.
4230 * Make sure to pass along the bogus string.
4233 esc
= mandoc_escape(&p
, NULL
, NULL
);
4234 if (ESCAPE_ERROR
== esc
) {
4236 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4237 memcpy(res
+ ssz
, pp
, sz
);
4241 * We bail out on bad escapes.
4242 * No need to warn: we already did so when
4243 * roff_expand() was called.
4246 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4247 memcpy(res
+ ssz
, pp
, sz
);
4251 res
[(int)ssz
] = '\0';
4256 roff_getformat(const struct roff
*r
)
4263 * Find out whether a line is a macro line or not.
4264 * If it is, adjust the current position and return one; if it isn't,
4265 * return zero and don't change the current position.
4266 * If the control character has been set with `.cc', then let that grain
4268 * This is slighly contrary to groff, where using the non-breaking
4269 * control character when `cc' has been invoked will cause the
4270 * non-breaking macro contents to be printed verbatim.
4273 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
4279 if (r
->control
!= '\0' && cp
[pos
] == r
->control
)
4281 else if (r
->control
!= '\0')
4283 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
4285 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
4290 while (' ' == cp
[pos
] || '\t' == cp
[pos
])