]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.368 2019/12/26 19:51:51 schwarze Exp $ */
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43 * that an escape sequence resulted from copy-in processing and
44 * needs to be checked or interpolated. As it is used nowhere
45 * else, it is defined here rather than in a header file.
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define EXPAND_LIMIT 1000
52 /* Types of definitions of macros and strings. */
53 #define ROFFDEF_USER (1 << 1) /* User-defined. */
54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
58 ROFFDEF_REN | ROFFDEF_STD)
59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
61 /* --- data types --------------------------------------------------------- */
64 * An incredibly-simple string buffer.
67 char *p
; /* nil-terminated buffer */
68 size_t sz
; /* saved strlen(p) */
72 * A key-value roffstr pair as part of a singly-linked list.
77 struct roffkv
*next
; /* next in list */
81 * A single number register as part of a singly-linked list.
91 * Association of request and macro names with token IDs.
99 * A macro processing context.
100 * More than one is needed when macro calls are nested.
109 struct roff_man
*man
; /* mdoc or man parser */
110 struct roffnode
*last
; /* leaf of stack */
111 struct mctx
*mstack
; /* stack of macro contexts */
112 int *rstack
; /* stack of inverted `ie' values */
113 struct ohash
*reqtab
; /* request lookup table */
114 struct roffreg
*regtab
; /* number registers */
115 struct roffkv
*strtab
; /* user-defined strings & macros */
116 struct roffkv
*rentab
; /* renamed strings & macros */
117 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
118 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
119 const char *current_string
; /* value of last called user macro */
120 struct tbl_node
*first_tbl
; /* first table parsed */
121 struct tbl_node
*last_tbl
; /* last table parsed */
122 struct tbl_node
*tbl
; /* current table being parsed */
123 struct eqn_node
*last_eqn
; /* equation parser */
124 struct eqn_node
*eqn
; /* active equation parser */
125 int eqn_inline
; /* current equation is inline */
126 int options
; /* parse options */
127 int mstacksz
; /* current size of mstack */
128 int mstackpos
; /* position in mstack */
129 int rstacksz
; /* current size limit of rstack */
130 int rstackpos
; /* position in rstack */
131 int format
; /* current file in mdoc or man format */
132 char control
; /* control character */
133 char escape
; /* escape character */
137 * A macro definition, condition, or ignored block.
140 enum roff_tok tok
; /* type of node */
141 struct roffnode
*parent
; /* up one in stack */
142 int line
; /* parse line */
143 int col
; /* parse col */
144 char *name
; /* node name, e.g. macro name */
145 char *end
; /* custom end macro of the block */
146 int endspan
; /* scope to: 1=eol 2=next line -1=\} */
147 int rule
; /* content is: 1=evaluated 0=skipped */
150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
151 enum roff_tok tok, /* tok of macro */ \
152 struct buf *buf, /* input buffer */ \
153 int ln, /* parse line */ \
154 int ppos, /* original pos in buffer */ \
155 int pos, /* current pos in buffer */ \
156 int *offs /* reset offset of buffer data */
158 typedef int (*roffproc
)(ROFF_ARGS
);
161 roffproc proc
; /* process new macro */
162 roffproc text
; /* process as child text of macro */
163 roffproc sub
; /* process as child of macro */
165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
169 const char *name
; /* predefined input name */
170 const char *str
; /* replacement symbol */
173 #define PREDEF(__name, __str) \
174 { (__name), (__str) },
176 /* --- function prototypes ------------------------------------------------ */
178 static int roffnode_cleanscope(struct roff
*);
179 static int roffnode_pop(struct roff
*);
180 static void roffnode_push(struct roff
*, enum roff_tok
,
181 const char *, int, int);
182 static void roff_addtbl(struct roff_man
*, int, struct tbl_node
*);
183 static int roff_als(ROFF_ARGS
);
184 static int roff_block(ROFF_ARGS
);
185 static int roff_block_text(ROFF_ARGS
);
186 static int roff_block_sub(ROFF_ARGS
);
187 static int roff_break(ROFF_ARGS
);
188 static int roff_cblock(ROFF_ARGS
);
189 static int roff_cc(ROFF_ARGS
);
190 static int roff_ccond(struct roff
*, int, int);
191 static int roff_char(ROFF_ARGS
);
192 static int roff_cond(ROFF_ARGS
);
193 static int roff_cond_text(ROFF_ARGS
);
194 static int roff_cond_sub(ROFF_ARGS
);
195 static int roff_ds(ROFF_ARGS
);
196 static int roff_ec(ROFF_ARGS
);
197 static int roff_eo(ROFF_ARGS
);
198 static int roff_eqndelim(struct roff
*, struct buf
*, int);
199 static int roff_evalcond(struct roff
*r
, int, char *, int *);
200 static int roff_evalnum(struct roff
*, int,
201 const char *, int *, int *, int);
202 static int roff_evalpar(struct roff
*, int,
203 const char *, int *, int *, int);
204 static int roff_evalstrcond(const char *, int *);
205 static int roff_expand(struct roff
*, struct buf
*,
207 static void roff_free1(struct roff
*);
208 static void roff_freereg(struct roffreg
*);
209 static void roff_freestr(struct roffkv
*);
210 static size_t roff_getname(struct roff
*, char **, int, int);
211 static int roff_getnum(const char *, int *, int *, int);
212 static int roff_getop(const char *, int *, char *);
213 static int roff_getregn(struct roff
*,
214 const char *, size_t, char);
215 static int roff_getregro(const struct roff
*,
217 static const char *roff_getstrn(struct roff
*,
218 const char *, size_t, int *);
219 static int roff_hasregn(const struct roff
*,
220 const char *, size_t);
221 static int roff_insec(ROFF_ARGS
);
222 static int roff_it(ROFF_ARGS
);
223 static int roff_line_ignore(ROFF_ARGS
);
224 static void roff_man_alloc1(struct roff_man
*);
225 static void roff_man_free1(struct roff_man
*);
226 static int roff_manyarg(ROFF_ARGS
);
227 static int roff_noarg(ROFF_ARGS
);
228 static int roff_nop(ROFF_ARGS
);
229 static int roff_nr(ROFF_ARGS
);
230 static int roff_onearg(ROFF_ARGS
);
231 static enum roff_tok
roff_parse(struct roff
*, char *, int *,
233 static int roff_parsetext(struct roff
*, struct buf
*,
235 static int roff_renamed(ROFF_ARGS
);
236 static int roff_return(ROFF_ARGS
);
237 static int roff_rm(ROFF_ARGS
);
238 static int roff_rn(ROFF_ARGS
);
239 static int roff_rr(ROFF_ARGS
);
240 static void roff_setregn(struct roff
*, const char *,
241 size_t, int, char, int);
242 static void roff_setstr(struct roff
*,
243 const char *, const char *, int);
244 static void roff_setstrn(struct roffkv
**, const char *,
245 size_t, const char *, size_t, int);
246 static int roff_shift(ROFF_ARGS
);
247 static int roff_so(ROFF_ARGS
);
248 static int roff_tr(ROFF_ARGS
);
249 static int roff_Dd(ROFF_ARGS
);
250 static int roff_TE(ROFF_ARGS
);
251 static int roff_TS(ROFF_ARGS
);
252 static int roff_EQ(ROFF_ARGS
);
253 static int roff_EN(ROFF_ARGS
);
254 static int roff_T_(ROFF_ARGS
);
255 static int roff_unsupp(ROFF_ARGS
);
256 static int roff_userdef(ROFF_ARGS
);
258 /* --- constant data ------------------------------------------------------ */
260 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
261 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
263 const char *__roff_name
[MAN_MAX
+ 1] = {
264 "br", "ce", "fi", "ft",
268 "ab", "ad", "af", "aln",
269 "als", "am", "am1", "ami",
270 "ami1", "as", "as1", "asciify",
271 "backtrace", "bd", "bleedat", "blm",
272 "box", "boxa", "bp", "BP",
273 "break", "breakchar", "brnl", "brp",
275 "cf", "cflags", "ch", "char",
276 "chop", "class", "close", "CL",
277 "color", "composite", "continue", "cp",
278 "cropat", "cs", "cu", "da",
279 "dch", "Dd", "de", "de1",
280 "defcolor", "dei", "dei1", "device",
281 "devicem", "di", "do", "ds",
282 "ds1", "dwh", "dt", "ec",
283 "ecr", "ecs", "el", "em",
284 "EN", "eo", "EP", "EQ",
285 "errprint", "ev", "evc", "ex",
286 "fallback", "fam", "fc", "fchar",
287 "fcolor", "fdeferlig", "feature", "fkern",
288 "fl", "flig", "fp", "fps",
289 "fschar", "fspacewidth", "fspecial", "ftr",
290 "fzoom", "gcolor", "hc", "hcode",
291 "hidechar", "hla", "hlm", "hpf",
292 "hpfa", "hpfcode", "hw", "hy",
293 "hylang", "hylen", "hym", "hypp",
294 "hys", "ie", "if", "ig",
295 "index", "it", "itc", "IX",
296 "kern", "kernafter", "kernbefore", "kernpair",
297 "lc", "lc_ctype", "lds", "length",
298 "letadj", "lf", "lg", "lhang",
299 "linetabs", "lnr", "lnrf", "lpfx",
301 "mediasize", "minss", "mk", "mso",
302 "na", "ne", "nh", "nhychar",
303 "nm", "nn", "nop", "nr",
304 "nrf", "nroff", "ns", "nx",
305 "open", "opena", "os", "output",
306 "padj", "papersize", "pc", "pev",
307 "pi", "PI", "pl", "pm",
309 "psbb", "pshape", "pso", "ptr",
310 "pvs", "rchar", "rd", "recursionlimit",
311 "return", "rfschar", "rhang",
312 "rm", "rn", "rnn", "rr",
313 "rs", "rt", "schar", "sentchar",
314 "shc", "shift", "sizes", "so",
315 "spacewidth", "special", "spreadwarn", "ss",
316 "sty", "substring", "sv", "sy",
319 "tm", "tm1", "tmc", "tr",
320 "track", "transchar", "trf", "trimat",
321 "trin", "trnt", "troff", "TS",
322 "uf", "ul", "unformat", "unwatch",
323 "unwatchn", "vpt", "vs", "warn",
324 "warnscale", "watch", "watchlength", "watchn",
325 "wh", "while", "write", "writec",
326 "writem", "xflag", ".", NULL
,
328 "Dd", "Dt", "Os", "Sh",
329 "Ss", "Pp", "D1", "Dl",
330 "Bd", "Ed", "Bl", "El",
331 "It", "Ad", "An", "Ap",
332 "Ar", "Cd", "Cm", "Dv",
333 "Er", "Ev", "Ex", "Fa",
334 "Fd", "Fl", "Fn", "Ft",
335 "Ic", "In", "Li", "Nd",
336 "Nm", "Op", "Ot", "Pa",
337 "Rv", "St", "Va", "Vt",
338 "Xr", "%A", "%B", "%D",
339 "%I", "%J", "%N", "%O",
340 "%P", "%R", "%T", "%V",
341 "Ac", "Ao", "Aq", "At",
342 "Bc", "Bf", "Bo", "Bq",
343 "Bsx", "Bx", "Db", "Dc",
344 "Do", "Dq", "Ec", "Ef",
345 "Em", "Eo", "Fx", "Ms",
346 "No", "Ns", "Nx", "Ox",
347 "Pc", "Pf", "Po", "Pq",
348 "Qc", "Ql", "Qo", "Qq",
349 "Re", "Rs", "Sc", "So",
350 "Sq", "Sm", "Sx", "Sy",
351 "Tn", "Ux", "Xc", "Xo",
352 "Fo", "Fc", "Oo", "Oc",
353 "Bk", "Ek", "Bt", "Hf",
354 "Fr", "Ud", "Lb", "Lp",
355 "Lk", "Mt", "Brq", "Bro",
356 "Brc", "%C", "Es", "En",
357 "Dx", "%Q", "%U", "Ta",
359 "TH", "SH", "SS", "TP",
361 "LP", "PP", "P", "IP",
362 "HP", "SM", "SB", "BI",
363 "IB", "BR", "RB", "R",
364 "B", "I", "IR", "RI",
365 "RE", "RS", "DT", "UC",
369 "UE", "MT", "ME", NULL
371 const char *const *roff_name
= __roff_name
;
373 static struct roffmac roffs
[TOKEN_NONE
] = {
374 { roff_noarg
, NULL
, NULL
, 0 }, /* br */
375 { roff_onearg
, NULL
, NULL
, 0 }, /* ce */
376 { roff_noarg
, NULL
, NULL
, 0 }, /* fi */
377 { roff_onearg
, NULL
, NULL
, 0 }, /* ft */
378 { roff_onearg
, NULL
, NULL
, 0 }, /* ll */
379 { roff_onearg
, NULL
, NULL
, 0 }, /* mc */
380 { roff_noarg
, NULL
, NULL
, 0 }, /* nf */
381 { roff_onearg
, NULL
, NULL
, 0 }, /* po */
382 { roff_onearg
, NULL
, NULL
, 0 }, /* rj */
383 { roff_onearg
, NULL
, NULL
, 0 }, /* sp */
384 { roff_manyarg
, NULL
, NULL
, 0 }, /* ta */
385 { roff_onearg
, NULL
, NULL
, 0 }, /* ti */
386 { NULL
, NULL
, NULL
, 0 }, /* ROFF_MAX */
387 { roff_unsupp
, NULL
, NULL
, 0 }, /* ab */
388 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ad */
389 { roff_line_ignore
, NULL
, NULL
, 0 }, /* af */
390 { roff_unsupp
, NULL
, NULL
, 0 }, /* aln */
391 { roff_als
, NULL
, NULL
, 0 }, /* als */
392 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am */
393 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* am1 */
394 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami */
395 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ami1 */
396 { roff_ds
, NULL
, NULL
, 0 }, /* as */
397 { roff_ds
, NULL
, NULL
, 0 }, /* as1 */
398 { roff_unsupp
, NULL
, NULL
, 0 }, /* asciify */
399 { roff_line_ignore
, NULL
, NULL
, 0 }, /* backtrace */
400 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bd */
401 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bleedat */
402 { roff_unsupp
, NULL
, NULL
, 0 }, /* blm */
403 { roff_unsupp
, NULL
, NULL
, 0 }, /* box */
404 { roff_unsupp
, NULL
, NULL
, 0 }, /* boxa */
405 { roff_line_ignore
, NULL
, NULL
, 0 }, /* bp */
406 { roff_unsupp
, NULL
, NULL
, 0 }, /* BP */
407 { roff_break
, NULL
, NULL
, 0 }, /* break */
408 { roff_line_ignore
, NULL
, NULL
, 0 }, /* breakchar */
409 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brnl */
410 { roff_noarg
, NULL
, NULL
, 0 }, /* brp */
411 { roff_line_ignore
, NULL
, NULL
, 0 }, /* brpnl */
412 { roff_unsupp
, NULL
, NULL
, 0 }, /* c2 */
413 { roff_cc
, NULL
, NULL
, 0 }, /* cc */
414 { roff_insec
, NULL
, NULL
, 0 }, /* cf */
415 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cflags */
416 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ch */
417 { roff_char
, NULL
, NULL
, 0 }, /* char */
418 { roff_unsupp
, NULL
, NULL
, 0 }, /* chop */
419 { roff_line_ignore
, NULL
, NULL
, 0 }, /* class */
420 { roff_insec
, NULL
, NULL
, 0 }, /* close */
421 { roff_unsupp
, NULL
, NULL
, 0 }, /* CL */
422 { roff_line_ignore
, NULL
, NULL
, 0 }, /* color */
423 { roff_unsupp
, NULL
, NULL
, 0 }, /* composite */
424 { roff_unsupp
, NULL
, NULL
, 0 }, /* continue */
425 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cp */
426 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cropat */
427 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cs */
428 { roff_line_ignore
, NULL
, NULL
, 0 }, /* cu */
429 { roff_unsupp
, NULL
, NULL
, 0 }, /* da */
430 { roff_unsupp
, NULL
, NULL
, 0 }, /* dch */
431 { roff_Dd
, NULL
, NULL
, 0 }, /* Dd */
432 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de */
433 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* de1 */
434 { roff_line_ignore
, NULL
, NULL
, 0 }, /* defcolor */
435 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei */
436 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* dei1 */
437 { roff_unsupp
, NULL
, NULL
, 0 }, /* device */
438 { roff_unsupp
, NULL
, NULL
, 0 }, /* devicem */
439 { roff_unsupp
, NULL
, NULL
, 0 }, /* di */
440 { roff_unsupp
, NULL
, NULL
, 0 }, /* do */
441 { roff_ds
, NULL
, NULL
, 0 }, /* ds */
442 { roff_ds
, NULL
, NULL
, 0 }, /* ds1 */
443 { roff_unsupp
, NULL
, NULL
, 0 }, /* dwh */
444 { roff_unsupp
, NULL
, NULL
, 0 }, /* dt */
445 { roff_ec
, NULL
, NULL
, 0 }, /* ec */
446 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecr */
447 { roff_unsupp
, NULL
, NULL
, 0 }, /* ecs */
448 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* el */
449 { roff_unsupp
, NULL
, NULL
, 0 }, /* em */
450 { roff_EN
, NULL
, NULL
, 0 }, /* EN */
451 { roff_eo
, NULL
, NULL
, 0 }, /* eo */
452 { roff_unsupp
, NULL
, NULL
, 0 }, /* EP */
453 { roff_EQ
, NULL
, NULL
, 0 }, /* EQ */
454 { roff_line_ignore
, NULL
, NULL
, 0 }, /* errprint */
455 { roff_unsupp
, NULL
, NULL
, 0 }, /* ev */
456 { roff_unsupp
, NULL
, NULL
, 0 }, /* evc */
457 { roff_unsupp
, NULL
, NULL
, 0 }, /* ex */
458 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fallback */
459 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fam */
460 { roff_unsupp
, NULL
, NULL
, 0 }, /* fc */
461 { roff_unsupp
, NULL
, NULL
, 0 }, /* fchar */
462 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fcolor */
463 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fdeferlig */
464 { roff_line_ignore
, NULL
, NULL
, 0 }, /* feature */
465 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fkern */
466 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fl */
467 { roff_line_ignore
, NULL
, NULL
, 0 }, /* flig */
468 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fp */
469 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fps */
470 { roff_unsupp
, NULL
, NULL
, 0 }, /* fschar */
471 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspacewidth */
472 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fspecial */
473 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ftr */
474 { roff_line_ignore
, NULL
, NULL
, 0 }, /* fzoom */
475 { roff_line_ignore
, NULL
, NULL
, 0 }, /* gcolor */
476 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hc */
477 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hcode */
478 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hidechar */
479 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hla */
480 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hlm */
481 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpf */
482 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfa */
483 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hpfcode */
484 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hw */
485 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hy */
486 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylang */
487 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hylen */
488 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hym */
489 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hypp */
490 { roff_line_ignore
, NULL
, NULL
, 0 }, /* hys */
491 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* ie */
492 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /* if */
493 { roff_block
, roff_block_text
, roff_block_sub
, 0 }, /* ig */
494 { roff_unsupp
, NULL
, NULL
, 0 }, /* index */
495 { roff_it
, NULL
, NULL
, 0 }, /* it */
496 { roff_unsupp
, NULL
, NULL
, 0 }, /* itc */
497 { roff_line_ignore
, NULL
, NULL
, 0 }, /* IX */
498 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kern */
499 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernafter */
500 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernbefore */
501 { roff_line_ignore
, NULL
, NULL
, 0 }, /* kernpair */
502 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc */
503 { roff_unsupp
, NULL
, NULL
, 0 }, /* lc_ctype */
504 { roff_unsupp
, NULL
, NULL
, 0 }, /* lds */
505 { roff_unsupp
, NULL
, NULL
, 0 }, /* length */
506 { roff_line_ignore
, NULL
, NULL
, 0 }, /* letadj */
507 { roff_insec
, NULL
, NULL
, 0 }, /* lf */
508 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lg */
509 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lhang */
510 { roff_unsupp
, NULL
, NULL
, 0 }, /* linetabs */
511 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnr */
512 { roff_unsupp
, NULL
, NULL
, 0 }, /* lnrf */
513 { roff_unsupp
, NULL
, NULL
, 0 }, /* lpfx */
514 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ls */
515 { roff_unsupp
, NULL
, NULL
, 0 }, /* lsm */
516 { roff_line_ignore
, NULL
, NULL
, 0 }, /* lt */
517 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mediasize */
518 { roff_line_ignore
, NULL
, NULL
, 0 }, /* minss */
519 { roff_line_ignore
, NULL
, NULL
, 0 }, /* mk */
520 { roff_insec
, NULL
, NULL
, 0 }, /* mso */
521 { roff_line_ignore
, NULL
, NULL
, 0 }, /* na */
522 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ne */
523 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nh */
524 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nhychar */
525 { roff_unsupp
, NULL
, NULL
, 0 }, /* nm */
526 { roff_unsupp
, NULL
, NULL
, 0 }, /* nn */
527 { roff_nop
, NULL
, NULL
, 0 }, /* nop */
528 { roff_nr
, NULL
, NULL
, 0 }, /* nr */
529 { roff_unsupp
, NULL
, NULL
, 0 }, /* nrf */
530 { roff_line_ignore
, NULL
, NULL
, 0 }, /* nroff */
531 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ns */
532 { roff_insec
, NULL
, NULL
, 0 }, /* nx */
533 { roff_insec
, NULL
, NULL
, 0 }, /* open */
534 { roff_insec
, NULL
, NULL
, 0 }, /* opena */
535 { roff_line_ignore
, NULL
, NULL
, 0 }, /* os */
536 { roff_unsupp
, NULL
, NULL
, 0 }, /* output */
537 { roff_line_ignore
, NULL
, NULL
, 0 }, /* padj */
538 { roff_line_ignore
, NULL
, NULL
, 0 }, /* papersize */
539 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pc */
540 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pev */
541 { roff_insec
, NULL
, NULL
, 0 }, /* pi */
542 { roff_unsupp
, NULL
, NULL
, 0 }, /* PI */
543 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pl */
544 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pm */
545 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pn */
546 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pnr */
547 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ps */
548 { roff_unsupp
, NULL
, NULL
, 0 }, /* psbb */
549 { roff_unsupp
, NULL
, NULL
, 0 }, /* pshape */
550 { roff_insec
, NULL
, NULL
, 0 }, /* pso */
551 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ptr */
552 { roff_line_ignore
, NULL
, NULL
, 0 }, /* pvs */
553 { roff_unsupp
, NULL
, NULL
, 0 }, /* rchar */
554 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rd */
555 { roff_line_ignore
, NULL
, NULL
, 0 }, /* recursionlimit */
556 { roff_return
, NULL
, NULL
, 0 }, /* return */
557 { roff_unsupp
, NULL
, NULL
, 0 }, /* rfschar */
558 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rhang */
559 { roff_rm
, NULL
, NULL
, 0 }, /* rm */
560 { roff_rn
, NULL
, NULL
, 0 }, /* rn */
561 { roff_unsupp
, NULL
, NULL
, 0 }, /* rnn */
562 { roff_rr
, NULL
, NULL
, 0 }, /* rr */
563 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rs */
564 { roff_line_ignore
, NULL
, NULL
, 0 }, /* rt */
565 { roff_unsupp
, NULL
, NULL
, 0 }, /* schar */
566 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sentchar */
567 { roff_line_ignore
, NULL
, NULL
, 0 }, /* shc */
568 { roff_shift
, NULL
, NULL
, 0 }, /* shift */
569 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sizes */
570 { roff_so
, NULL
, NULL
, 0 }, /* so */
571 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spacewidth */
572 { roff_line_ignore
, NULL
, NULL
, 0 }, /* special */
573 { roff_line_ignore
, NULL
, NULL
, 0 }, /* spreadwarn */
574 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ss */
575 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sty */
576 { roff_unsupp
, NULL
, NULL
, 0 }, /* substring */
577 { roff_line_ignore
, NULL
, NULL
, 0 }, /* sv */
578 { roff_insec
, NULL
, NULL
, 0 }, /* sy */
579 { roff_T_
, NULL
, NULL
, 0 }, /* T& */
580 { roff_unsupp
, NULL
, NULL
, 0 }, /* tc */
581 { roff_TE
, NULL
, NULL
, 0 }, /* TE */
582 { roff_Dd
, NULL
, NULL
, 0 }, /* TH */
583 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tkf */
584 { roff_unsupp
, NULL
, NULL
, 0 }, /* tl */
585 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm */
586 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tm1 */
587 { roff_line_ignore
, NULL
, NULL
, 0 }, /* tmc */
588 { roff_tr
, NULL
, NULL
, 0 }, /* tr */
589 { roff_line_ignore
, NULL
, NULL
, 0 }, /* track */
590 { roff_line_ignore
, NULL
, NULL
, 0 }, /* transchar */
591 { roff_insec
, NULL
, NULL
, 0 }, /* trf */
592 { roff_line_ignore
, NULL
, NULL
, 0 }, /* trimat */
593 { roff_unsupp
, NULL
, NULL
, 0 }, /* trin */
594 { roff_unsupp
, NULL
, NULL
, 0 }, /* trnt */
595 { roff_line_ignore
, NULL
, NULL
, 0 }, /* troff */
596 { roff_TS
, NULL
, NULL
, 0 }, /* TS */
597 { roff_line_ignore
, NULL
, NULL
, 0 }, /* uf */
598 { roff_line_ignore
, NULL
, NULL
, 0 }, /* ul */
599 { roff_unsupp
, NULL
, NULL
, 0 }, /* unformat */
600 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatch */
601 { roff_line_ignore
, NULL
, NULL
, 0 }, /* unwatchn */
602 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vpt */
603 { roff_line_ignore
, NULL
, NULL
, 0 }, /* vs */
604 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warn */
605 { roff_line_ignore
, NULL
, NULL
, 0 }, /* warnscale */
606 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watch */
607 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchlength */
608 { roff_line_ignore
, NULL
, NULL
, 0 }, /* watchn */
609 { roff_unsupp
, NULL
, NULL
, 0 }, /* wh */
610 { roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
}, /*while*/
611 { roff_insec
, NULL
, NULL
, 0 }, /* write */
612 { roff_insec
, NULL
, NULL
, 0 }, /* writec */
613 { roff_insec
, NULL
, NULL
, 0 }, /* writem */
614 { roff_line_ignore
, NULL
, NULL
, 0 }, /* xflag */
615 { roff_cblock
, NULL
, NULL
, 0 }, /* . */
616 { roff_renamed
, NULL
, NULL
, 0 },
617 { roff_userdef
, NULL
, NULL
, 0 }
620 /* Array of injected predefined strings. */
621 #define PREDEFS_MAX 38
622 static const struct predef predefs
[PREDEFS_MAX
] = {
623 #include "predefs.in"
626 static int roffce_lines
; /* number of input lines to center */
627 static struct roff_node
*roffce_node
; /* active request */
628 static int roffit_lines
; /* number of lines to delay */
629 static char *roffit_macro
; /* nil-terminated macro line */
632 /* --- request table ------------------------------------------------------ */
635 roffhash_alloc(enum roff_tok mintok
, enum roff_tok maxtok
)
643 htab
= mandoc_malloc(sizeof(*htab
));
644 mandoc_ohash_init(htab
, 8, offsetof(struct roffreq
, name
));
646 for (tok
= mintok
; tok
< maxtok
; tok
++) {
647 if (roff_name
[tok
] == NULL
)
649 sz
= strlen(roff_name
[tok
]);
650 req
= mandoc_malloc(sizeof(*req
) + sz
+ 1);
652 memcpy(req
->name
, roff_name
[tok
], sz
+ 1);
653 slot
= ohash_qlookup(htab
, req
->name
);
654 ohash_insert(htab
, slot
, req
);
660 roffhash_free(struct ohash
*htab
)
667 for (req
= ohash_first(htab
, &slot
); req
!= NULL
;
668 req
= ohash_next(htab
, &slot
))
675 roffhash_find(struct ohash
*htab
, const char *name
, size_t sz
)
682 req
= ohash_find(htab
, ohash_qlookupi(htab
, name
, &end
));
684 req
= ohash_find(htab
, ohash_qlookup(htab
, name
));
685 return req
== NULL
? TOKEN_NONE
: req
->tok
;
688 /* --- stack of request blocks -------------------------------------------- */
691 * Pop the current node off of the stack of roff instructions currently
692 * pending. Return 1 if it is a loop or 0 otherwise.
695 roffnode_pop(struct roff
*r
)
701 inloop
= p
->tok
== ROFF_while
;
710 * Push a roff node onto the instruction stack. This must later be
711 * removed with roffnode_pop().
714 roffnode_push(struct roff
*r
, enum roff_tok tok
, const char *name
,
719 p
= mandoc_calloc(1, sizeof(struct roffnode
));
722 p
->name
= mandoc_strdup(name
);
726 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
731 /* --- roff parser state data management ---------------------------------- */
734 roff_free1(struct roff
*r
)
738 tbl_free(r
->first_tbl
);
739 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
741 eqn_free(r
->last_eqn
);
742 r
->last_eqn
= r
->eqn
= NULL
;
744 while (r
->mstackpos
>= 0)
755 roff_freereg(r
->regtab
);
758 roff_freestr(r
->strtab
);
759 roff_freestr(r
->rentab
);
760 roff_freestr(r
->xmbtab
);
761 r
->strtab
= r
->rentab
= r
->xmbtab
= NULL
;
764 for (i
= 0; i
< 128; i
++)
771 roff_reset(struct roff
*r
)
774 r
->options
|= MPARSE_COMMENT
;
775 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
785 roff_free(struct roff
*r
)
790 for (i
= 0; i
< r
->mstacksz
; i
++)
791 free(r
->mstack
[i
].argv
);
793 roffhash_free(r
->reqtab
);
798 roff_alloc(int options
)
802 r
= mandoc_calloc(1, sizeof(struct roff
));
803 r
->reqtab
= roffhash_alloc(0, ROFF_RENAMED
);
804 r
->options
= options
| MPARSE_COMMENT
;
805 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
812 /* --- syntax tree state data management ---------------------------------- */
815 roff_man_free1(struct roff_man
*man
)
817 if (man
->meta
.first
!= NULL
)
818 roff_node_delete(man
, man
->meta
.first
);
819 free(man
->meta
.msec
);
822 free(man
->meta
.arch
);
823 free(man
->meta
.title
);
824 free(man
->meta
.name
);
825 free(man
->meta
.date
);
826 free(man
->meta
.sodest
);
830 roff_state_reset(struct roff_man
*man
)
832 man
->last
= man
->meta
.first
;
835 man
->lastsec
= man
->lastnamed
= SEC_NONE
;
836 man
->next
= ROFF_NEXT_CHILD
;
837 roff_setreg(man
->roff
, "nS", 0, '=');
841 roff_man_alloc1(struct roff_man
*man
)
843 memset(&man
->meta
, 0, sizeof(man
->meta
));
844 man
->meta
.first
= mandoc_calloc(1, sizeof(*man
->meta
.first
));
845 man
->meta
.first
->type
= ROFFT_ROOT
;
846 man
->meta
.macroset
= MACROSET_NONE
;
847 roff_state_reset(man
);
851 roff_man_reset(struct roff_man
*man
)
854 roff_man_alloc1(man
);
858 roff_man_free(struct roff_man
*man
)
865 roff_man_alloc(struct roff
*roff
, const char *os_s
, int quick
)
867 struct roff_man
*man
;
869 man
= mandoc_calloc(1, sizeof(*man
));
873 roff_man_alloc1(man
);
878 /* --- syntax tree handling ----------------------------------------------- */
881 roff_node_alloc(struct roff_man
*man
, int line
, int pos
,
882 enum roff_type type
, int tok
)
886 n
= mandoc_calloc(1, sizeof(*n
));
891 n
->sec
= man
->lastsec
;
893 if (man
->flags
& MDOC_SYNOPSIS
)
894 n
->flags
|= NODE_SYNPRETTY
;
896 n
->flags
&= ~NODE_SYNPRETTY
;
897 if ((man
->flags
& (ROFF_NOFILL
| ROFF_NONOFILL
)) == ROFF_NOFILL
)
898 n
->flags
|= NODE_NOFILL
;
900 n
->flags
&= ~NODE_NOFILL
;
901 if (man
->flags
& MDOC_NEWLINE
)
902 n
->flags
|= NODE_LINE
;
903 man
->flags
&= ~MDOC_NEWLINE
;
909 roff_node_append(struct roff_man
*man
, struct roff_node
*n
)
913 case ROFF_NEXT_SIBLING
:
914 if (man
->last
->next
!= NULL
) {
915 n
->next
= man
->last
->next
;
916 man
->last
->next
->prev
= n
;
918 man
->last
->parent
->last
= n
;
921 n
->parent
= man
->last
->parent
;
923 case ROFF_NEXT_CHILD
:
924 if (man
->last
->child
!= NULL
) {
925 n
->next
= man
->last
->child
;
926 man
->last
->child
->prev
= n
;
929 man
->last
->child
= n
;
930 n
->parent
= man
->last
;
942 if (n
->end
!= ENDBODY_NOT
)
954 * Copy over the normalised-data pointer of our parent. Not
955 * everybody has one, but copying a null pointer is fine.
958 n
->norm
= n
->parent
->norm
;
959 assert(n
->parent
->type
== ROFFT_BLOCK
);
963 roff_word_alloc(struct roff_man
*man
, int line
, int pos
, const char *word
)
967 n
= roff_node_alloc(man
, line
, pos
, ROFFT_TEXT
, TOKEN_NONE
);
968 n
->string
= roff_strdup(man
->roff
, word
);
969 roff_node_append(man
, n
);
970 n
->flags
|= NODE_VALID
| NODE_ENDED
;
971 man
->next
= ROFF_NEXT_SIBLING
;
975 roff_word_append(struct roff_man
*man
, const char *word
)
978 char *addstr
, *newstr
;
981 addstr
= roff_strdup(man
->roff
, word
);
982 mandoc_asprintf(&newstr
, "%s %s", n
->string
, addstr
);
986 man
->next
= ROFF_NEXT_SIBLING
;
990 roff_elem_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
994 n
= roff_node_alloc(man
, line
, pos
, ROFFT_ELEM
, tok
);
995 roff_node_append(man
, n
);
996 man
->next
= ROFF_NEXT_CHILD
;
1000 roff_block_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1002 struct roff_node
*n
;
1004 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BLOCK
, tok
);
1005 roff_node_append(man
, n
);
1006 man
->next
= ROFF_NEXT_CHILD
;
1011 roff_head_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1013 struct roff_node
*n
;
1015 n
= roff_node_alloc(man
, line
, pos
, ROFFT_HEAD
, tok
);
1016 roff_node_append(man
, n
);
1017 man
->next
= ROFF_NEXT_CHILD
;
1022 roff_body_alloc(struct roff_man
*man
, int line
, int pos
, int tok
)
1024 struct roff_node
*n
;
1026 n
= roff_node_alloc(man
, line
, pos
, ROFFT_BODY
, tok
);
1027 roff_node_append(man
, n
);
1028 man
->next
= ROFF_NEXT_CHILD
;
1033 roff_addtbl(struct roff_man
*man
, int line
, struct tbl_node
*tbl
)
1035 struct roff_node
*n
;
1036 struct tbl_span
*span
;
1038 if (man
->meta
.macroset
== MACROSET_MAN
)
1039 man_breakscope(man
, ROFF_TS
);
1040 while ((span
= tbl_span(tbl
)) != NULL
) {
1041 n
= roff_node_alloc(man
, line
, 0, ROFFT_TBL
, TOKEN_NONE
);
1043 roff_node_append(man
, n
);
1044 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1045 man
->next
= ROFF_NEXT_SIBLING
;
1050 roff_node_unlink(struct roff_man
*man
, struct roff_node
*n
)
1053 /* Adjust siblings. */
1056 n
->prev
->next
= n
->next
;
1058 n
->next
->prev
= n
->prev
;
1060 /* Adjust parent. */
1062 if (n
->parent
!= NULL
) {
1063 if (n
->parent
->child
== n
)
1064 n
->parent
->child
= n
->next
;
1065 if (n
->parent
->last
== n
)
1066 n
->parent
->last
= n
->prev
;
1069 /* Adjust parse point. */
1073 if (man
->last
== n
) {
1074 if (n
->prev
== NULL
) {
1075 man
->last
= n
->parent
;
1076 man
->next
= ROFF_NEXT_CHILD
;
1078 man
->last
= n
->prev
;
1079 man
->next
= ROFF_NEXT_SIBLING
;
1082 if (man
->meta
.first
== n
)
1083 man
->meta
.first
= NULL
;
1087 roff_node_relink(struct roff_man
*man
, struct roff_node
*n
)
1089 roff_node_unlink(man
, n
);
1090 n
->prev
= n
->next
= NULL
;
1091 roff_node_append(man
, n
);
1095 roff_node_free(struct roff_node
*n
)
1098 if (n
->args
!= NULL
)
1099 mdoc_argv_free(n
->args
);
1100 if (n
->type
== ROFFT_BLOCK
|| n
->type
== ROFFT_ELEM
)
1102 eqn_box_free(n
->eqn
);
1108 roff_node_delete(struct roff_man
*man
, struct roff_node
*n
)
1111 while (n
->child
!= NULL
)
1112 roff_node_delete(man
, n
->child
);
1113 roff_node_unlink(man
, n
);
1118 deroff(char **dest
, const struct roff_node
*n
)
1123 if (n
->type
!= ROFFT_TEXT
) {
1124 for (n
= n
->child
; n
!= NULL
; n
= n
->next
)
1129 /* Skip leading whitespace. */
1131 for (cp
= n
->string
; *cp
!= '\0'; cp
++) {
1132 if (cp
[0] == '\\' && cp
[1] != '\0' &&
1133 strchr(" %&0^|~", cp
[1]) != NULL
)
1135 else if ( ! isspace((unsigned char)*cp
))
1139 /* Skip trailing backslash. */
1142 if (sz
> 0 && cp
[sz
- 1] == '\\')
1145 /* Skip trailing whitespace. */
1148 if ( ! isspace((unsigned char)cp
[sz
-1]))
1151 /* Skip empty strings. */
1156 if (*dest
== NULL
) {
1157 *dest
= mandoc_strndup(cp
, sz
);
1161 mandoc_asprintf(&cp
, "%s %*s", *dest
, (int)sz
, cp
);
1166 /* --- main functions of the roff parser ---------------------------------- */
1169 * In the current line, expand escape sequences that produce parsable
1170 * input text. Also check the syntax of the remaining escape sequences,
1171 * which typically produce output glyphs or change formatter state.
1174 roff_expand(struct roff
*r
, struct buf
*buf
, int ln
, int pos
, char newesc
)
1176 struct mctx
*ctx
; /* current macro call context */
1177 char ubuf
[24]; /* buffer to print the number */
1178 struct roff_node
*n
; /* used for header comments */
1179 const char *start
; /* start of the string to process */
1180 char *stesc
; /* start of an escape sequence ('\\') */
1181 const char *esct
; /* type of esccape sequence */
1182 char *ep
; /* end of comment string */
1183 const char *stnam
; /* start of the name, after "[(*" */
1184 const char *cp
; /* end of the name, e.g. before ']' */
1185 const char *res
; /* the string to be substituted */
1186 char *nbuf
; /* new buffer to copy buf->buf to */
1187 size_t maxl
; /* expected length of the escape name */
1188 size_t naml
; /* actual length of the escape name */
1189 size_t asz
; /* length of the replacement */
1190 size_t rsz
; /* length of the rest of the string */
1191 int inaml
; /* length returned from mandoc_escape() */
1192 int expand_count
; /* to avoid infinite loops */
1193 int npos
; /* position in numeric expression */
1194 int arg_complete
; /* argument not interrupted by eol */
1195 int quote_args
; /* true for \\$@, false for \\$* */
1196 int done
; /* no more input available */
1197 int deftype
; /* type of definition to paste */
1198 int rcsid
; /* kind of RCS id seen */
1199 enum mandocerr err
; /* for escape sequence problems */
1200 char sign
; /* increment number register */
1201 char term
; /* character terminating the escape */
1203 /* Search forward for comments. */
1206 start
= buf
->buf
+ pos
;
1207 for (stesc
= buf
->buf
+ pos
; *stesc
!= '\0'; stesc
++) {
1208 if (stesc
[0] != newesc
|| stesc
[1] == '\0')
1211 if (*stesc
!= '"' && *stesc
!= '#')
1214 /* Comment found, look for RCS id. */
1217 if ((cp
= strstr(stesc
, "$" "OpenBSD")) != NULL
) {
1218 rcsid
= 1 << MANDOC_OS_OPENBSD
;
1220 } else if ((cp
= strstr(stesc
, "$" "NetBSD")) != NULL
) {
1221 rcsid
= 1 << MANDOC_OS_NETBSD
;
1225 isalnum((unsigned char)*cp
) == 0 &&
1226 strchr(cp
, '$') != NULL
) {
1227 if (r
->man
->meta
.rcsids
& rcsid
)
1228 mandoc_msg(MANDOCERR_RCS_REP
, ln
,
1229 (int)(stesc
- buf
->buf
) + 1,
1231 r
->man
->meta
.rcsids
|= rcsid
;
1234 /* Handle trailing whitespace. */
1236 ep
= strchr(stesc
--, '\0') - 1;
1241 if (*ep
== ' ' || *ep
== '\t')
1242 mandoc_msg(MANDOCERR_SPACE_EOL
,
1243 ln
, (int)(ep
- buf
->buf
), NULL
);
1246 * Save comments preceding the title macro
1247 * in the syntax tree.
1250 if (newesc
!= ASCII_ESC
&& r
->options
& MPARSE_COMMENT
) {
1251 while (*ep
== ' ' || *ep
== '\t')
1254 n
= roff_node_alloc(r
->man
,
1255 ln
, stesc
+ 1 - buf
->buf
,
1256 ROFFT_COMMENT
, TOKEN_NONE
);
1257 n
->string
= mandoc_strdup(stesc
+ 2);
1258 roff_node_append(r
->man
, n
);
1259 n
->flags
|= NODE_VALID
| NODE_ENDED
;
1260 r
->man
->next
= ROFF_NEXT_SIBLING
;
1263 /* Line continuation with comment. */
1265 if (stesc
[1] == '#') {
1267 return ROFF_IGN
| ROFF_APPEND
;
1270 /* Discard normal comments. */
1272 while (stesc
> start
&& stesc
[-1] == ' ' &&
1273 (stesc
== start
+ 1 || stesc
[-2] != '\\'))
1282 /* Notice the end of the input. */
1284 if (*stesc
== '\n') {
1290 while (stesc
>= start
) {
1291 if (*stesc
!= newesc
) {
1294 * If we have a non-standard escape character,
1295 * escape literal backslashes because all
1296 * processing in subsequent functions uses
1297 * the standard escaping rules.
1300 if (newesc
!= ASCII_ESC
&& *stesc
== '\\') {
1302 buf
->sz
= mandoc_asprintf(&nbuf
, "%s\\e%s",
1303 buf
->buf
, stesc
+ 1) + 1;
1305 stesc
= nbuf
+ (stesc
- buf
->buf
);
1310 /* Search backwards for the next escape. */
1316 /* If it is escaped, skip it. */
1318 for (cp
= stesc
- 1; cp
>= start
; cp
--)
1319 if (*cp
!= r
->escape
)
1322 if ((stesc
- cp
) % 2 == 0) {
1326 } else if (stesc
[1] != '\0') {
1333 return ROFF_IGN
| ROFF_APPEND
;
1336 /* Decide whether to expand or to check only. */
1354 if (sign
== '+' || sign
== '-')
1360 switch(mandoc_escape(&cp
, &stnam
, &inaml
)) {
1361 case ESCAPE_SPECIAL
:
1362 if (mchars_spec2cp(stnam
, inaml
) >= 0)
1366 err
= MANDOCERR_ESC_BAD
;
1369 err
= MANDOCERR_ESC_UNDEF
;
1372 err
= MANDOCERR_ESC_UNSUPP
;
1377 if (err
!= MANDOCERR_OK
)
1378 mandoc_msg(err
, ln
, (int)(stesc
- buf
->buf
),
1379 "%.*s", (int)(cp
- stesc
), stesc
);
1384 if (EXPAND_LIMIT
< ++expand_count
) {
1385 mandoc_msg(MANDOCERR_ROFFLOOP
,
1386 ln
, (int)(stesc
- buf
->buf
), NULL
);
1391 * The third character decides the length
1392 * of the name of the string or register.
1393 * Save a pointer to the name.
1420 /* Advance to the end of the name. */
1424 while (maxl
== 0 || naml
< maxl
) {
1426 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
1427 (int)(stesc
- buf
->buf
), "%s", stesc
);
1431 if (maxl
== 0 && *cp
== term
) {
1435 if (*cp
++ != '\\' || *esct
!= 'w') {
1439 switch (mandoc_escape(&cp
, NULL
, NULL
)) {
1440 case ESCAPE_SPECIAL
:
1441 case ESCAPE_UNICODE
:
1442 case ESCAPE_NUMBERED
:
1444 case ESCAPE_OVERSTRIKE
:
1453 * Retrieve the replacement string; if it is
1454 * undefined, resume searching for escapes.
1460 deftype
= ROFFDEF_USER
| ROFFDEF_PRE
;
1461 res
= roff_getstrn(r
, stnam
, naml
, &deftype
);
1464 * If not overriden, let \*(.T
1465 * through to the formatters.
1468 if (res
== NULL
&& naml
== 2 &&
1469 stnam
[0] == '.' && stnam
[1] == 'T') {
1470 roff_setstrn(&r
->strtab
,
1471 ".T", 2, NULL
, 0, 0);
1478 if (r
->mstackpos
< 0) {
1479 mandoc_msg(MANDOCERR_ARG_UNDEF
, ln
,
1480 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1483 ctx
= r
->mstack
+ r
->mstackpos
;
1484 npos
= esct
[1] - '1';
1485 if (npos
>= 0 && npos
<= 8) {
1486 res
= npos
< ctx
->argc
?
1487 ctx
->argv
[npos
] : "";
1492 else if (esct
[1] == '@')
1495 mandoc_msg(MANDOCERR_ARG_NONUM
, ln
,
1496 (int)(stesc
- buf
->buf
), "%.3s", stesc
);
1500 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1504 asz
+= 2; /* quotes */
1505 asz
+= strlen(ctx
->argv
[npos
]);
1508 rsz
= buf
->sz
- (stesc
- buf
->buf
) - 3;
1510 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1512 nbuf
= mandoc_realloc(buf
->buf
, buf
->sz
);
1514 stesc
= nbuf
+ (stesc
- buf
->buf
);
1517 memmove(stesc
+ asz
, stesc
+ 3, rsz
);
1519 for (npos
= 0; npos
< ctx
->argc
; npos
++) {
1524 cp
= ctx
->argv
[npos
];
1533 ubuf
[0] = arg_complete
&&
1534 roff_evalnum(r
, ln
, stnam
, &npos
,
1535 NULL
, ROFFNUM_SCALE
) &&
1536 stnam
+ npos
+ 1 == cp
? '1' : '0';
1541 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1542 roff_getregn(r
, stnam
, naml
, sign
));
1547 /* use even incomplete args */
1548 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
1555 mandoc_msg(MANDOCERR_STR_UNDEF
,
1556 ln
, (int)(stesc
- buf
->buf
),
1557 "%.*s", (int)naml
, stnam
);
1559 } else if (buf
->sz
+ strlen(res
) > SHRT_MAX
) {
1560 mandoc_msg(MANDOCERR_ROFFLOOP
,
1561 ln
, (int)(stesc
- buf
->buf
), NULL
);
1565 /* Replace the escape sequence by the string. */
1568 buf
->sz
= mandoc_asprintf(&nbuf
, "%s%s%s",
1569 buf
->buf
, res
, cp
) + 1;
1571 /* Prepare for the next replacement. */
1574 stesc
= nbuf
+ (stesc
- buf
->buf
) + strlen(res
);
1582 * Parse a quoted or unquoted roff-style request or macro argument.
1583 * Return a pointer to the parsed argument, which is either the original
1584 * pointer or advanced by one byte in case the argument is quoted.
1585 * NUL-terminate the argument in place.
1586 * Collapse pairs of quotes inside quoted arguments.
1587 * Advance the argument pointer to the next argument,
1588 * or to the NUL byte terminating the argument line.
1591 roff_getarg(struct roff
*r
, char **cpp
, int ln
, int *pos
)
1595 int newesc
, pairs
, quoted
, white
;
1597 /* Quoting can only start with a new word. */
1600 if ('"' == *start
) {
1605 newesc
= pairs
= white
= 0;
1606 for (cp
= start
; '\0' != *cp
; cp
++) {
1609 * Move the following text left
1610 * after quoted quotes and after "\\" and "\t".
1615 if ('\\' == cp
[0]) {
1617 * In copy mode, translate double to single
1618 * backslashes and backslash-t to literal tabs.
1629 cp
[-pairs
] = ASCII_ESC
;
1634 /* Skip escaped blanks. */
1641 } else if (0 == quoted
) {
1643 /* Unescaped blanks end unquoted args. */
1647 } else if ('"' == cp
[0]) {
1649 /* Quoted quotes collapse. */
1653 /* Unquoted quotes end quoted args. */
1660 /* Quoted argument without a closing quote. */
1662 mandoc_msg(MANDOCERR_ARG_QUOTE
, ln
, *pos
, NULL
);
1664 /* NUL-terminate this argument and move to the next one. */
1672 *pos
+= (int)(cp
- start
) + (quoted
? 1 : 0);
1675 if ('\0' == *cp
&& (white
|| ' ' == cp
[-1]))
1676 mandoc_msg(MANDOCERR_SPACE_EOL
, ln
, *pos
, NULL
);
1678 start
= mandoc_strdup(start
);
1683 buf
.sz
= strlen(start
) + 1;
1685 if (roff_expand(r
, &buf
, ln
, 0, ASCII_ESC
) & ROFF_IGN
) {
1687 buf
.buf
= mandoc_strdup("");
1694 * Process text streams.
1697 roff_parsetext(struct roff
*r
, struct buf
*buf
, int pos
, int *offs
)
1703 enum mandoc_esc esc
;
1705 /* Spring the input line trap. */
1707 if (roffit_lines
== 1) {
1708 isz
= mandoc_asprintf(&p
, "%s\n.%s", buf
->buf
, roffit_macro
);
1715 return ROFF_REPARSE
;
1716 } else if (roffit_lines
> 1)
1719 if (roffce_node
!= NULL
&& buf
->buf
[pos
] != '\0') {
1720 if (roffce_lines
< 1) {
1721 r
->man
->last
= roffce_node
;
1722 r
->man
->next
= ROFF_NEXT_SIBLING
;
1729 /* Convert all breakable hyphens into ASCII_HYPH. */
1731 start
= p
= buf
->buf
+ pos
;
1733 while (*p
!= '\0') {
1734 sz
= strcspn(p
, "-\\");
1741 /* Skip over escapes. */
1743 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
1744 if (esc
== ESCAPE_ERROR
)
1749 } else if (p
== start
) {
1754 if (isalpha((unsigned char)p
[-1]) &&
1755 isalpha((unsigned char)p
[1]))
1763 roff_parseln(struct roff
*r
, int ln
, struct buf
*buf
, int *offs
)
1767 int pos
; /* parse point */
1768 int spos
; /* saved parse point for messages */
1769 int ppos
; /* original offset in buf->buf */
1770 int ctl
; /* macro line (boolean) */
1774 /* Handle in-line equation delimiters. */
1776 if (r
->tbl
== NULL
&&
1777 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
1778 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
1779 e
= roff_eqndelim(r
, buf
, pos
);
1780 if (e
== ROFF_REPARSE
)
1782 assert(e
== ROFF_CONT
);
1785 /* Expand some escape sequences. */
1787 e
= roff_expand(r
, buf
, ln
, pos
, r
->escape
);
1788 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1790 assert(e
== ROFF_CONT
);
1792 ctl
= roff_getcontrol(r
, buf
->buf
, &pos
);
1795 * First, if a scope is open and we're not a macro, pass the
1796 * text through the macro's filter.
1797 * Equations process all content themselves.
1798 * Tables process almost all content themselves, but we want
1799 * to warn about macros before passing it there.
1802 if (r
->last
!= NULL
&& ! ctl
) {
1804 e
= (*roffs
[t
].text
)(r
, t
, buf
, ln
, pos
, pos
, offs
);
1805 if ((e
& ROFF_MASK
) == ROFF_IGN
)
1810 if (r
->eqn
!= NULL
&& strncmp(buf
->buf
+ ppos
, ".EN", 3)) {
1811 eqn_read(r
->eqn
, buf
->buf
+ ppos
);
1814 if (r
->tbl
!= NULL
&& (ctl
== 0 || buf
->buf
[pos
] == '\0')) {
1815 tbl_read(r
->tbl
, ln
, buf
->buf
, ppos
);
1816 roff_addtbl(r
->man
, ln
, r
->tbl
);
1820 r
->options
&= ~MPARSE_COMMENT
;
1821 return roff_parsetext(r
, buf
, pos
, offs
) | e
;
1824 /* Skip empty request lines. */
1826 if (buf
->buf
[pos
] == '"') {
1827 mandoc_msg(MANDOCERR_COMMENT_BAD
, ln
, pos
, NULL
);
1829 } else if (buf
->buf
[pos
] == '\0')
1833 * If a scope is open, go to the child handler for that macro,
1834 * as it may want to preprocess before doing anything with it.
1835 * Don't do so if an equation is open.
1840 return (*roffs
[t
].sub
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
1843 /* No scope is open. This is a new request or macro. */
1845 r
->options
&= ~MPARSE_COMMENT
;
1847 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
1849 /* Tables ignore most macros. */
1851 if (r
->tbl
!= NULL
&& (t
== TOKEN_NONE
|| t
== ROFF_TS
||
1852 t
== ROFF_br
|| t
== ROFF_ce
|| t
== ROFF_rj
|| t
== ROFF_sp
)) {
1853 mandoc_msg(MANDOCERR_TBLMACRO
,
1854 ln
, pos
, "%s", buf
->buf
+ spos
);
1855 if (t
!= TOKEN_NONE
)
1857 while (buf
->buf
[pos
] != '\0' && buf
->buf
[pos
] != ' ')
1859 while (buf
->buf
[pos
] == ' ')
1861 tbl_read(r
->tbl
, ln
, buf
->buf
, pos
);
1862 roff_addtbl(r
->man
, ln
, r
->tbl
);
1866 /* For now, let high level macros abort .ce mode. */
1868 if (ctl
&& roffce_node
!= NULL
&&
1869 (t
== TOKEN_NONE
|| t
== ROFF_Dd
|| t
== ROFF_EQ
||
1870 t
== ROFF_TH
|| t
== ROFF_TS
)) {
1871 r
->man
->last
= roffce_node
;
1872 r
->man
->next
= ROFF_NEXT_SIBLING
;
1878 * This is neither a roff request nor a user-defined macro.
1879 * Let the standard macro set parsers handle it.
1882 if (t
== TOKEN_NONE
)
1885 /* Execute a roff request or a user defined macro. */
1887 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, spos
, pos
, offs
);
1891 * Internal interface function to tell the roff parser that execution
1892 * of the current macro ended. This is required because macro
1893 * definitions usually do not end with a .return request.
1896 roff_userret(struct roff
*r
)
1901 assert(r
->mstackpos
>= 0);
1902 ctx
= r
->mstack
+ r
->mstackpos
;
1903 for (i
= 0; i
< ctx
->argc
; i
++)
1910 roff_endparse(struct roff
*r
)
1912 if (r
->last
!= NULL
)
1913 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->last
->line
,
1914 r
->last
->col
, "%s", roff_name
[r
->last
->tok
]);
1916 if (r
->eqn
!= NULL
) {
1917 mandoc_msg(MANDOCERR_BLK_NOEND
,
1918 r
->eqn
->node
->line
, r
->eqn
->node
->pos
, "EQ");
1923 if (r
->tbl
!= NULL
) {
1930 * Parse a roff node's type from the input buffer. This must be in the
1931 * form of ".foo xxx" in the usual way.
1933 static enum roff_tok
1934 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
1944 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
1948 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
1950 deftype
= ROFFDEF_USER
| ROFFDEF_REN
;
1951 r
->current_string
= roff_getstrn(r
, mac
, maclen
, &deftype
);
1960 t
= roffhash_find(r
->reqtab
, mac
, maclen
);
1963 if (t
!= TOKEN_NONE
)
1965 else if (deftype
== ROFFDEF_UNDEF
) {
1966 /* Using an undefined macro defines it to be empty. */
1967 roff_setstrn(&r
->strtab
, mac
, maclen
, "", 0, 0);
1968 roff_setstrn(&r
->rentab
, mac
, maclen
, NULL
, 0, 0);
1973 /* --- handling of request blocks ----------------------------------------- */
1976 roff_cblock(ROFF_ARGS
)
1980 * A block-close `..' should only be invoked as a child of an
1981 * ignore macro, otherwise raise a warning and just ignore it.
1984 if (r
->last
== NULL
) {
1985 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
1989 switch (r
->last
->tok
) {
1991 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1994 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1999 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "..");
2003 if (buf
->buf
[pos
] != '\0')
2004 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
2005 ".. %s", buf
->buf
+ pos
);
2008 roffnode_cleanscope(r
);
2014 * Pop all nodes ending at the end of the current input line.
2015 * Return the number of loops ended.
2018 roffnode_cleanscope(struct roff
*r
)
2023 while (r
->last
!= NULL
) {
2024 if (--r
->last
->endspan
!= 0)
2026 inloop
+= roffnode_pop(r
);
2032 * Handle the closing \} of a conditional block.
2033 * Apart from generating warnings, this only pops nodes.
2034 * Return the number of loops ended.
2037 roff_ccond(struct roff
*r
, int ln
, int ppos
)
2039 if (NULL
== r
->last
) {
2040 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2044 switch (r
->last
->tok
) {
2051 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2055 if (r
->last
->endspan
> -1) {
2056 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "\\}");
2060 return roffnode_pop(r
) + roffnode_cleanscope(r
);
2064 roff_block(ROFF_ARGS
)
2066 const char *name
, *value
;
2067 char *call
, *cp
, *iname
, *rname
;
2068 size_t csz
, namesz
, rsz
;
2071 /* Ignore groff compatibility mode for now. */
2073 if (tok
== ROFF_de1
)
2075 else if (tok
== ROFF_dei1
)
2077 else if (tok
== ROFF_am1
)
2079 else if (tok
== ROFF_ami1
)
2082 /* Parse the macro name argument. */
2084 cp
= buf
->buf
+ pos
;
2085 if (tok
== ROFF_ig
) {
2090 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2091 iname
[namesz
] = '\0';
2094 /* Resolve the macro name argument if it is indirect. */
2096 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2097 deftype
= ROFFDEF_USER
;
2098 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2100 mandoc_msg(MANDOCERR_STR_UNDEF
,
2101 ln
, (int)(iname
- buf
->buf
),
2102 "%.*s", (int)namesz
, iname
);
2105 namesz
= strlen(name
);
2109 if (namesz
== 0 && tok
!= ROFF_ig
) {
2110 mandoc_msg(MANDOCERR_REQ_EMPTY
,
2111 ln
, ppos
, "%s", roff_name
[tok
]);
2115 roffnode_push(r
, tok
, name
, ln
, ppos
);
2118 * At the beginning of a `de' macro, clear the existing string
2119 * with the same name, if there is one. New content will be
2120 * appended from roff_block_text() in multiline mode.
2123 if (tok
== ROFF_de
|| tok
== ROFF_dei
) {
2124 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
2125 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2126 } else if (tok
== ROFF_am
|| tok
== ROFF_ami
) {
2127 deftype
= ROFFDEF_ANY
;
2128 value
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2129 switch (deftype
) { /* Before appending, ... */
2130 case ROFFDEF_PRE
: /* copy predefined to user-defined. */
2131 roff_setstrn(&r
->strtab
, name
, namesz
,
2132 value
, strlen(value
), 0);
2134 case ROFFDEF_REN
: /* call original standard macro. */
2135 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2136 (int)strlen(value
), value
);
2137 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2138 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2141 case ROFFDEF_STD
: /* rename and call standard macro. */
2142 rsz
= mandoc_asprintf(&rname
, "__%s_renamed", name
);
2143 roff_setstrn(&r
->rentab
, rname
, rsz
, name
, namesz
, 0);
2144 csz
= mandoc_asprintf(&call
, ".%.*s \\$* \\\"\n",
2146 roff_setstrn(&r
->strtab
, name
, namesz
, call
, csz
, 0);
2158 /* Get the custom end marker. */
2161 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
2163 /* Resolve the end marker if it is indirect. */
2165 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
2166 deftype
= ROFFDEF_USER
;
2167 name
= roff_getstrn(r
, iname
, namesz
, &deftype
);
2169 mandoc_msg(MANDOCERR_STR_UNDEF
,
2170 ln
, (int)(iname
- buf
->buf
),
2171 "%.*s", (int)namesz
, iname
);
2174 namesz
= strlen(name
);
2179 r
->last
->end
= mandoc_strndup(name
, namesz
);
2182 mandoc_msg(MANDOCERR_ARG_EXCESS
,
2183 ln
, pos
, ".%s ... %s", roff_name
[tok
], cp
);
2189 roff_block_sub(ROFF_ARGS
)
2195 * First check whether a custom macro exists at this level. If
2196 * it does, then check against it. This is some of groff's
2197 * stranger behaviours. If we encountered a custom end-scope
2198 * tag and that tag also happens to be a "real" macro, then we
2199 * need to try interpreting it again as a real macro. If it's
2200 * not, then return ignore. Else continue.
2204 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
2205 if (buf
->buf
[i
] != r
->last
->end
[j
])
2208 if (r
->last
->end
[j
] == '\0' &&
2209 (buf
->buf
[i
] == '\0' ||
2210 buf
->buf
[i
] == ' ' ||
2211 buf
->buf
[i
] == '\t')) {
2213 roffnode_cleanscope(r
);
2215 while (buf
->buf
[i
] == ' ' || buf
->buf
[i
] == '\t')
2219 if (roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
) !=
2227 * If we have no custom end-query or lookup failed, then try
2228 * pulling it out of the hashtable.
2231 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2233 if (t
!= ROFF_cblock
) {
2235 roff_setstr(r
, r
->last
->name
, buf
->buf
+ ppos
, 2);
2239 return (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2243 roff_block_text(ROFF_ARGS
)
2247 roff_setstr(r
, r
->last
->name
, buf
->buf
+ pos
, 2);
2253 roff_cond_sub(ROFF_ARGS
)
2255 struct roffnode
*bl
;
2257 int endloop
, irc
, rr
;
2262 endloop
= tok
!= ROFF_while
? ROFF_IGN
:
2263 rr
? ROFF_LOOPCONT
: ROFF_LOOPEXIT
;
2264 if (roffnode_cleanscope(r
))
2268 * If `\}' occurs on a macro line without a preceding macro,
2269 * drop the line completely.
2272 ep
= buf
->buf
+ pos
;
2273 if (ep
[0] == '\\' && ep
[1] == '}')
2277 * The closing delimiter `\}' rewinds the conditional scope
2278 * but is otherwise ignored when interpreting the line.
2281 while ((ep
= strchr(ep
, '\\')) != NULL
) {
2284 memmove(ep
, ep
+ 2, strlen(ep
+ 2) + 1);
2285 if (roff_ccond(r
, ln
, ep
- buf
->buf
))
2297 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
2299 /* For now, let high level macros abort .ce mode. */
2301 if (roffce_node
!= NULL
&&
2302 (t
== TOKEN_NONE
|| t
== ROFF_Dd
|| t
== ROFF_EQ
||
2303 t
== ROFF_TH
|| t
== ROFF_TS
)) {
2304 r
->man
->last
= roffce_node
;
2305 r
->man
->next
= ROFF_NEXT_SIBLING
;
2311 * Fully handle known macros when they are structurally
2312 * required or when the conditional evaluated to true.
2315 if (t
== ROFF_break
) {
2316 if (irc
& ROFF_LOOPMASK
)
2317 irc
= ROFF_IGN
| ROFF_LOOPEXIT
;
2319 for (bl
= r
->last
; bl
!= NULL
; bl
= bl
->parent
) {
2321 if (bl
->tok
== ROFF_while
)
2325 } else if (t
!= TOKEN_NONE
&&
2326 (rr
|| roffs
[t
].flags
& ROFFMAC_STRUCT
))
2327 irc
|= (*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
);
2329 irc
|= rr
? ROFF_CONT
: ROFF_IGN
;
2334 roff_cond_text(ROFF_ARGS
)
2337 int endloop
, irc
, rr
;
2341 endloop
= tok
!= ROFF_while
? ROFF_IGN
:
2342 rr
? ROFF_LOOPCONT
: ROFF_LOOPEXIT
;
2343 if (roffnode_cleanscope(r
))
2347 * If `\}' occurs on a text line with neither preceding
2348 * nor following characters, drop the line completely.
2351 ep
= buf
->buf
+ pos
;
2352 if (strcmp(ep
, "\\}") == 0)
2356 * The closing delimiter `\}' rewinds the conditional scope
2357 * but is otherwise ignored when interpreting the line.
2360 while ((ep
= strchr(ep
, '\\')) != NULL
) {
2363 memmove(ep
, ep
+ 2, strlen(ep
+ 2) + 1);
2364 if (roff_ccond(r
, ln
, ep
- buf
->buf
))
2380 /* --- handling of numeric and conditional expressions -------------------- */
2383 * Parse a single signed integer number. Stop at the first non-digit.
2384 * If there is at least one digit, return success and advance the
2385 * parse point, else return failure and let the parse point unchanged.
2386 * Ignore overflows, treat them just like the C language.
2389 roff_getnum(const char *v
, int *pos
, int *res
, int flags
)
2391 int myres
, scaled
, n
, p
;
2398 if (n
|| v
[p
] == '+')
2401 if (flags
& ROFFNUM_WHITE
)
2402 while (isspace((unsigned char)v
[p
]))
2405 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
2406 *res
= 10 * *res
+ v
[p
] - '0';
2413 /* Each number may be followed by one optional scaling unit. */
2417 scaled
= *res
* 65536;
2420 scaled
= *res
* 240;
2423 scaled
= *res
* 240 / 2.54;
2434 scaled
= *res
* 10 / 3;
2440 scaled
= *res
* 6 / 25;
2447 if (flags
& ROFFNUM_SCALE
)
2455 * Evaluate a string comparison condition.
2456 * The first character is the delimiter.
2457 * Succeed if the string up to its second occurrence
2458 * matches the string up to its third occurence.
2459 * Advance the cursor after the third occurrence
2460 * or lacking that, to the end of the line.
2463 roff_evalstrcond(const char *v
, int *pos
)
2465 const char *s1
, *s2
, *s3
;
2469 s1
= v
+ *pos
; /* initial delimiter */
2470 s2
= s1
+ 1; /* for scanning the first string */
2471 s3
= strchr(s2
, *s1
); /* for scanning the second string */
2473 if (NULL
== s3
) /* found no middle delimiter */
2476 while ('\0' != *++s3
) {
2477 if (*s2
!= *s3
) { /* mismatch */
2478 s3
= strchr(s3
, *s1
);
2481 if (*s3
== *s1
) { /* found the final delimiter */
2490 s3
= strchr(s2
, '\0');
2491 else if (*s3
!= '\0')
2498 * Evaluate an optionally negated single character, numerical,
2499 * or string condition.
2502 roff_evalcond(struct roff
*r
, int ln
, char *v
, int *pos
)
2504 const char *start
, *end
;
2507 int deftype
, len
, number
, savepos
, istrue
, wanttrue
;
2509 if ('!' == v
[*pos
]) {
2530 } while (v
[*pos
] == ' ');
2533 * Quirk for groff compatibility:
2534 * The horizontal tab is neither available nor unavailable.
2537 if (v
[*pos
] == '\t') {
2542 /* Printable ASCII characters are available. */
2544 if (v
[*pos
] != '\\') {
2550 switch (mandoc_escape(&end
, &start
, &len
)) {
2551 case ESCAPE_SPECIAL
:
2552 istrue
= mchars_spec2cp(start
, len
) != -1;
2554 case ESCAPE_UNICODE
:
2557 case ESCAPE_NUMBERED
:
2558 istrue
= mchars_num2char(start
, len
) != -1;
2565 return istrue
== wanttrue
;
2572 sz
= roff_getname(r
, &cp
, ln
, cp
- v
);
2575 else if (v
[*pos
] == 'r')
2576 istrue
= roff_hasregn(r
, name
, sz
);
2578 deftype
= ROFFDEF_ANY
;
2579 roff_getstrn(r
, name
, sz
, &deftype
);
2582 *pos
= (name
+ sz
) - v
;
2583 return istrue
== wanttrue
;
2589 if (roff_evalnum(r
, ln
, v
, pos
, &number
, ROFFNUM_SCALE
))
2590 return (number
> 0) == wanttrue
;
2591 else if (*pos
== savepos
)
2592 return roff_evalstrcond(v
, pos
) == wanttrue
;
2598 roff_line_ignore(ROFF_ARGS
)
2605 roff_insec(ROFF_ARGS
)
2608 mandoc_msg(MANDOCERR_REQ_INSEC
, ln
, ppos
, "%s", roff_name
[tok
]);
2613 roff_unsupp(ROFF_ARGS
)
2616 mandoc_msg(MANDOCERR_REQ_UNSUPP
, ln
, ppos
, "%s", roff_name
[tok
]);
2621 roff_cond(ROFF_ARGS
)
2625 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
2628 * An `.el' has no conditional body: it will consume the value
2629 * of the current rstack entry set in prior `ie' calls or
2632 * If we're not an `el', however, then evaluate the conditional.
2635 r
->last
->rule
= tok
== ROFF_el
?
2636 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
2637 roff_evalcond(r
, ln
, buf
->buf
, &pos
);
2640 * An if-else will put the NEGATION of the current evaluated
2641 * conditional into the stack of rules.
2644 if (tok
== ROFF_ie
) {
2645 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
2647 r
->rstack
= mandoc_reallocarray(r
->rstack
,
2648 r
->rstacksz
, sizeof(int));
2650 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
2653 /* If the parent has false as its rule, then so do we. */
2655 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
2660 * If there is nothing on the line after the conditional,
2661 * not even whitespace, use next-line scope.
2662 * Except that .while does not support next-line scope.
2665 if (buf
->buf
[pos
] == '\0' && tok
!= ROFF_while
) {
2666 r
->last
->endspan
= 2;
2670 while (buf
->buf
[pos
] == ' ')
2673 /* An opening brace requests multiline scope. */
2675 if (buf
->buf
[pos
] == '\\' && buf
->buf
[pos
+ 1] == '{') {
2676 r
->last
->endspan
= -1;
2678 while (buf
->buf
[pos
] == ' ')
2684 * Anything else following the conditional causes
2685 * single-line scope. Warn if the scope contains
2686 * nothing but trailing whitespace.
2689 if (buf
->buf
[pos
] == '\0')
2690 mandoc_msg(MANDOCERR_COND_EMPTY
,
2691 ln
, ppos
, "%s", roff_name
[tok
]);
2693 r
->last
->endspan
= 1;
2698 if (tok
== ROFF_while
)
2710 /* Ignore groff compatibility mode for now. */
2712 if (tok
== ROFF_ds1
)
2714 else if (tok
== ROFF_as1
)
2718 * The first word is the name of the string.
2719 * If it is empty or terminated by an escape sequence,
2720 * abort the `ds' request without defining anything.
2723 name
= string
= buf
->buf
+ pos
;
2727 namesz
= roff_getname(r
, &string
, ln
, pos
);
2728 switch (name
[namesz
]) {
2732 string
= buf
->buf
+ pos
+ namesz
;
2738 /* Read past the initial double-quote, if any. */
2742 /* The rest is the value. */
2743 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
2745 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
2750 * Parse a single operator, one or two characters long.
2751 * If the operator is recognized, return success and advance the
2752 * parse point, else return failure and let the parse point unchanged.
2755 roff_getop(const char *v
, int *pos
, char *res
)
2770 switch (v
[*pos
+ 1]) {
2788 switch (v
[*pos
+ 1]) {
2802 if ('=' == v
[*pos
+ 1])
2814 * Evaluate either a parenthesized numeric expression
2815 * or a single signed integer number.
2818 roff_evalpar(struct roff
*r
, int ln
,
2819 const char *v
, int *pos
, int *res
, int flags
)
2823 return roff_getnum(v
, pos
, res
, flags
);
2826 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, flags
| ROFFNUM_WHITE
))
2830 * Omission of the closing parenthesis
2831 * is an error in validation mode,
2832 * but ignored in evaluation mode.
2837 else if (NULL
== res
)
2844 * Evaluate a complete numeric expression.
2845 * Proceed left to right, there is no concept of precedence.
2848 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
2849 int *pos
, int *res
, int flags
)
2851 int mypos
, operand2
;
2859 if (flags
& ROFFNUM_WHITE
)
2860 while (isspace((unsigned char)v
[*pos
]))
2863 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
, flags
))
2867 if (flags
& ROFFNUM_WHITE
)
2868 while (isspace((unsigned char)v
[*pos
]))
2871 if ( ! roff_getop(v
, pos
, &operator))
2874 if (flags
& ROFFNUM_WHITE
)
2875 while (isspace((unsigned char)v
[*pos
]))
2878 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
, flags
))
2881 if (flags
& ROFFNUM_WHITE
)
2882 while (isspace((unsigned char)v
[*pos
]))
2899 if (operand2
== 0) {
2900 mandoc_msg(MANDOCERR_DIVZERO
,
2908 if (operand2
== 0) {
2909 mandoc_msg(MANDOCERR_DIVZERO
,
2917 *res
= *res
< operand2
;
2920 *res
= *res
> operand2
;
2923 *res
= *res
<= operand2
;
2926 *res
= *res
>= operand2
;
2929 *res
= *res
== operand2
;
2932 *res
= *res
!= operand2
;
2935 *res
= *res
&& operand2
;
2938 *res
= *res
|| operand2
;
2941 if (operand2
< *res
)
2945 if (operand2
> *res
)
2955 /* --- register management ------------------------------------------------ */
2958 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
2960 roff_setregn(r
, name
, strlen(name
), val
, sign
, INT_MIN
);
2964 roff_setregn(struct roff
*r
, const char *name
, size_t len
,
2965 int val
, char sign
, int step
)
2967 struct roffreg
*reg
;
2969 /* Search for an existing register with the same name. */
2972 while (reg
!= NULL
&& (reg
->key
.sz
!= len
||
2973 strncmp(reg
->key
.p
, name
, len
) != 0))
2977 /* Create a new register. */
2978 reg
= mandoc_malloc(sizeof(struct roffreg
));
2979 reg
->key
.p
= mandoc_strndup(name
, len
);
2983 reg
->next
= r
->regtab
;
2989 else if ('-' == sign
)
2993 if (step
!= INT_MIN
)
2998 * Handle some predefined read-only number registers.
2999 * For now, return -1 if the requested register is not predefined;
3000 * in case a predefined read-only register having the value -1
3001 * were to turn up, another special value would have to be chosen.
3004 roff_getregro(const struct roff
*r
, const char *name
)
3008 case '$': /* Number of arguments of the last macro evaluated. */
3009 return r
->mstackpos
< 0 ? 0 : r
->mstack
[r
->mstackpos
].argc
;
3010 case 'A': /* ASCII approximation mode is always off. */
3012 case 'g': /* Groff compatibility mode is always on. */
3014 case 'H': /* Fixed horizontal resolution. */
3016 case 'j': /* Always adjust left margin only. */
3018 case 'T': /* Some output device is always defined. */
3020 case 'V': /* Fixed vertical resolution. */
3028 roff_getreg(struct roff
*r
, const char *name
)
3030 return roff_getregn(r
, name
, strlen(name
), '\0');
3034 roff_getregn(struct roff
*r
, const char *name
, size_t len
, char sign
)
3036 struct roffreg
*reg
;
3039 if ('.' == name
[0] && 2 == len
) {
3040 val
= roff_getregro(r
, name
+ 1);
3045 for (reg
= r
->regtab
; reg
; reg
= reg
->next
) {
3046 if (len
== reg
->key
.sz
&&
3047 0 == strncmp(name
, reg
->key
.p
, len
)) {
3050 reg
->val
+= reg
->step
;
3053 reg
->val
-= reg
->step
;
3062 roff_setregn(r
, name
, len
, 0, '\0', INT_MIN
);
3067 roff_hasregn(const struct roff
*r
, const char *name
, size_t len
)
3069 struct roffreg
*reg
;
3072 if ('.' == name
[0] && 2 == len
) {
3073 val
= roff_getregro(r
, name
+ 1);
3078 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
3079 if (len
== reg
->key
.sz
&&
3080 0 == strncmp(name
, reg
->key
.p
, len
))
3087 roff_freereg(struct roffreg
*reg
)
3089 struct roffreg
*old_reg
;
3091 while (NULL
!= reg
) {
3102 char *key
, *val
, *step
;
3107 key
= val
= buf
->buf
+ pos
;
3111 keysz
= roff_getname(r
, &val
, ln
, pos
);
3112 if (key
[keysz
] == '\\' || key
[keysz
] == '\t')
3116 if (sign
== '+' || sign
== '-')
3120 if (roff_evalnum(r
, ln
, val
, &len
, &iv
, ROFFNUM_SCALE
) == 0)
3124 while (isspace((unsigned char)*step
))
3126 if (roff_evalnum(r
, ln
, step
, NULL
, &is
, 0) == 0)
3129 roff_setregn(r
, key
, keysz
, iv
, sign
, is
);
3136 struct roffreg
*reg
, **prev
;
3140 name
= cp
= buf
->buf
+ pos
;
3143 namesz
= roff_getname(r
, &cp
, ln
, pos
);
3144 name
[namesz
] = '\0';
3149 if (reg
== NULL
|| !strcmp(name
, reg
->key
.p
))
3161 /* --- handler functions for roff requests -------------------------------- */
3170 cp
= buf
->buf
+ pos
;
3171 while (*cp
!= '\0') {
3173 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- buf
->buf
));
3174 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
3175 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
3176 if (name
[namesz
] == '\\' || name
[namesz
] == '\t')
3187 /* Parse the number of lines. */
3189 if ( ! roff_evalnum(r
, ln
, buf
->buf
, &pos
, &iv
, 0)) {
3190 mandoc_msg(MANDOCERR_IT_NONUM
,
3191 ln
, ppos
, "%s", buf
->buf
+ 1);
3195 while (isspace((unsigned char)buf
->buf
[pos
]))
3199 * Arm the input line trap.
3200 * Special-casing "an-trap" is an ugly workaround to cope
3201 * with DocBook stupidly fiddling with man(7) internals.
3205 roffit_macro
= mandoc_strdup(iv
!= 1 ||
3206 strcmp(buf
->buf
+ pos
, "an-trap") ?
3207 buf
->buf
+ pos
: "br");
3215 enum roff_tok t
, te
;
3222 r
->format
= MPARSE_MDOC
;
3223 mask
= MPARSE_MDOC
| MPARSE_QUICK
;
3229 r
->format
= MPARSE_MAN
;
3230 mask
= MPARSE_QUICK
;
3235 if ((r
->options
& mask
) == 0)
3236 for (t
= tok
; t
< te
; t
++)
3237 roff_setstr(r
, roff_name
[t
], NULL
, 0);
3244 r
->man
->flags
&= ~ROFF_NONOFILL
;
3245 if (r
->tbl
== NULL
) {
3246 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "TE");
3249 if (tbl_end(r
->tbl
, 0) == 0) {
3252 buf
->buf
= mandoc_strdup(".sp");
3255 return ROFF_REPARSE
;
3266 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "T&");
3268 tbl_restart(ln
, ppos
, r
->tbl
);
3274 * Handle in-line equation delimiters.
3277 roff_eqndelim(struct roff
*r
, struct buf
*buf
, int pos
)
3280 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
3283 * Outside equations, look for an opening delimiter.
3284 * If we are inside an equation, we already know it is
3285 * in-line, or this function wouldn't have been called;
3286 * so look for a closing delimiter.
3289 cp1
= buf
->buf
+ pos
;
3290 cp2
= strchr(cp1
, r
->eqn
== NULL
?
3291 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
3296 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
3298 /* Handle preceding text, protecting whitespace. */
3300 if (*buf
->buf
!= '\0') {
3307 * Prepare replacing the delimiter with an equation macro
3308 * and drop leading white space from the equation.
3311 if (r
->eqn
== NULL
) {
3318 /* Handle following text, protecting whitespace. */
3326 /* Do the actual replacement. */
3328 buf
->sz
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", buf
->buf
,
3329 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
3333 /* Toggle the in-line state of the eqn subsystem. */
3335 r
->eqn_inline
= r
->eqn
== NULL
;
3336 return ROFF_REPARSE
;
3342 struct roff_node
*n
;
3344 if (r
->man
->meta
.macroset
== MACROSET_MAN
)
3345 man_breakscope(r
->man
, ROFF_EQ
);
3346 n
= roff_node_alloc(r
->man
, ln
, ppos
, ROFFT_EQN
, TOKEN_NONE
);
3347 if (ln
> r
->man
->last
->line
)
3348 n
->flags
|= NODE_LINE
;
3349 n
->eqn
= eqn_box_new();
3350 roff_node_append(r
->man
, n
);
3351 r
->man
->next
= ROFF_NEXT_SIBLING
;
3353 assert(r
->eqn
== NULL
);
3354 if (r
->last_eqn
== NULL
)
3355 r
->last_eqn
= eqn_alloc();
3357 eqn_reset(r
->last_eqn
);
3358 r
->eqn
= r
->last_eqn
;
3361 if (buf
->buf
[pos
] != '\0')
3362 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3363 ".EQ %s", buf
->buf
+ pos
);
3371 if (r
->eqn
!= NULL
) {
3375 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, ppos
, "EN");
3376 if (buf
->buf
[pos
] != '\0')
3377 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3378 "EN %s", buf
->buf
+ pos
);
3385 if (r
->tbl
!= NULL
) {
3386 mandoc_msg(MANDOCERR_BLK_BROKEN
, ln
, ppos
, "TS breaks TS");
3389 r
->man
->flags
|= ROFF_NONOFILL
;
3390 r
->tbl
= tbl_alloc(ppos
, ln
, r
->last_tbl
);
3391 if (r
->last_tbl
== NULL
)
3392 r
->first_tbl
= r
->tbl
;
3393 r
->last_tbl
= r
->tbl
;
3398 roff_noarg(ROFF_ARGS
)
3400 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
))
3401 man_breakscope(r
->man
, tok
);
3402 if (tok
== ROFF_brp
)
3404 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3405 if (buf
->buf
[pos
] != '\0')
3406 mandoc_msg(MANDOCERR_ARG_SKIP
, ln
, pos
,
3407 "%s %s", roff_name
[tok
], buf
->buf
+ pos
);
3409 r
->man
->flags
|= ROFF_NOFILL
;
3410 else if (tok
== ROFF_fi
)
3411 r
->man
->flags
&= ~ROFF_NOFILL
;
3412 r
->man
->last
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3413 r
->man
->next
= ROFF_NEXT_SIBLING
;
3418 roff_onearg(ROFF_ARGS
)
3420 struct roff_node
*n
;
3424 if (r
->man
->flags
& (MAN_BLINE
| MAN_ELINE
) &&
3425 (tok
== ROFF_ce
|| tok
== ROFF_rj
|| tok
== ROFF_sp
||
3427 man_breakscope(r
->man
, tok
);
3429 if (roffce_node
!= NULL
&& (tok
== ROFF_ce
|| tok
== ROFF_rj
)) {
3430 r
->man
->last
= roffce_node
;
3431 r
->man
->next
= ROFF_NEXT_SIBLING
;
3434 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3437 cp
= buf
->buf
+ pos
;
3439 while (*cp
!= '\0' && *cp
!= ' ')
3444 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3445 ln
, (int)(cp
- buf
->buf
),
3446 "%s ... %s", roff_name
[tok
], cp
);
3447 roff_word_alloc(r
->man
, ln
, pos
, buf
->buf
+ pos
);
3450 if (tok
== ROFF_ce
|| tok
== ROFF_rj
) {
3451 if (r
->man
->last
->type
== ROFFT_ELEM
) {
3452 roff_word_alloc(r
->man
, ln
, pos
, "1");
3453 r
->man
->last
->flags
|= NODE_NOSRC
;
3456 if (roff_evalnum(r
, ln
, r
->man
->last
->string
, &npos
,
3457 &roffce_lines
, 0) == 0) {
3458 mandoc_msg(MANDOCERR_CE_NONUM
,
3459 ln
, pos
, "ce %s", buf
->buf
+ pos
);
3462 if (roffce_lines
< 1) {
3463 r
->man
->last
= r
->man
->last
->parent
;
3467 roffce_node
= r
->man
->last
->parent
;
3469 n
->flags
|= NODE_VALID
| NODE_ENDED
;
3472 n
->flags
|= NODE_LINE
;
3473 r
->man
->next
= ROFF_NEXT_SIBLING
;
3478 roff_manyarg(ROFF_ARGS
)
3480 struct roff_node
*n
;
3483 roff_elem_alloc(r
->man
, ln
, ppos
, tok
);
3486 for (sp
= ep
= buf
->buf
+ pos
; *sp
!= '\0'; sp
= ep
) {
3487 while (*ep
!= '\0' && *ep
!= ' ')
3491 roff_word_alloc(r
->man
, ln
, sp
- buf
->buf
, sp
);
3494 n
->flags
|= NODE_LINE
| NODE_VALID
| NODE_ENDED
;
3496 r
->man
->next
= ROFF_NEXT_SIBLING
;
3503 char *oldn
, *newn
, *end
, *value
;
3504 size_t oldsz
, newsz
, valsz
;
3506 newn
= oldn
= buf
->buf
+ pos
;
3510 newsz
= roff_getname(r
, &oldn
, ln
, pos
);
3511 if (newn
[newsz
] == '\\' || newn
[newsz
] == '\t' || *oldn
== '\0')
3515 oldsz
= roff_getname(r
, &end
, ln
, oldn
- buf
->buf
);
3519 valsz
= mandoc_asprintf(&value
, ".%.*s \\$@\\\"\n",
3521 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, valsz
, 0);
3522 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3528 * The .break request only makes sense inside conditionals,
3529 * and that case is already handled in roff_cond_sub().
3532 roff_break(ROFF_ARGS
)
3534 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, ln
, pos
, "break");
3545 if (*p
== '\0' || (r
->control
= *p
++) == '.')
3549 mandoc_msg(MANDOCERR_ARG_EXCESS
,
3550 ln
, p
- buf
->buf
, "cc ... %s", p
);
3556 roff_char(ROFF_ARGS
)
3558 const char *p
, *kp
, *vp
;
3562 /* Parse the character to be replaced. */
3564 kp
= buf
->buf
+ pos
;
3566 if (*kp
== '\0' || (*kp
== '\\' &&
3567 mandoc_escape(&p
, NULL
, NULL
) != ESCAPE_SPECIAL
) ||
3568 (*p
!= ' ' && *p
!= '\0')) {
3569 mandoc_msg(MANDOCERR_CHAR_ARG
, ln
, pos
, "char %s", kp
);
3577 * If the replacement string contains a font escape sequence,
3578 * we have to restore the font at the end.
3584 while (*p
!= '\0') {
3587 switch (mandoc_escape(&p
, NULL
, NULL
)) {
3589 case ESCAPE_FONTROMAN
:
3590 case ESCAPE_FONTITALIC
:
3591 case ESCAPE_FONTBOLD
:
3594 case ESCAPE_FONTPREV
:
3602 mandoc_msg(MANDOCERR_CHAR_FONT
,
3603 ln
, (int)(vp
- buf
->buf
), "%s", vp
);
3606 * Approximate the effect of .char using the .tr tables.
3607 * XXX In groff, .char and .tr interact differently.
3611 if (r
->xtab
== NULL
)
3612 r
->xtab
= mandoc_calloc(128, sizeof(*r
->xtab
));
3613 assert((unsigned int)*kp
< 128);
3614 free(r
->xtab
[(int)*kp
].p
);
3615 r
->xtab
[(int)*kp
].sz
= mandoc_asprintf(&r
->xtab
[(int)*kp
].p
,
3616 "%s%s", vp
, font
? "\fP" : "");
3618 roff_setstrn(&r
->xmbtab
, kp
, ksz
, vp
, vsz
, 0);
3620 roff_setstrn(&r
->xmbtab
, kp
, ksz
, "\\fP", 3, 1);
3636 mandoc_msg(MANDOCERR_ARG_EXCESS
, ln
,
3637 (int)(p
- buf
->buf
), "ec ... %s", p
);
3646 if (buf
->buf
[pos
] != '\0')
3647 mandoc_msg(MANDOCERR_ARG_SKIP
,
3648 ln
, pos
, "eo %s", buf
->buf
+ pos
);
3655 while (buf
->buf
[pos
] == ' ')
3664 const char *p
, *first
, *second
;
3666 enum mandoc_esc esc
;
3671 mandoc_msg(MANDOCERR_REQ_EMPTY
, ln
, ppos
, "tr");
3675 while (*p
!= '\0') {
3679 if (*first
== '\\') {
3680 esc
= mandoc_escape(&p
, NULL
, NULL
);
3681 if (esc
== ESCAPE_ERROR
) {
3682 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3683 (int)(p
- buf
->buf
), "%s", first
);
3686 fsz
= (size_t)(p
- first
);
3690 if (*second
== '\\') {
3691 esc
= mandoc_escape(&p
, NULL
, NULL
);
3692 if (esc
== ESCAPE_ERROR
) {
3693 mandoc_msg(MANDOCERR_ESC_BAD
, ln
,
3694 (int)(p
- buf
->buf
), "%s", second
);
3697 ssz
= (size_t)(p
- second
);
3698 } else if (*second
== '\0') {
3699 mandoc_msg(MANDOCERR_TR_ODD
, ln
,
3700 (int)(first
- buf
->buf
), "tr %s", first
);
3706 roff_setstrn(&r
->xmbtab
, first
, fsz
,
3711 if (r
->xtab
== NULL
)
3712 r
->xtab
= mandoc_calloc(128,
3713 sizeof(struct roffstr
));
3715 free(r
->xtab
[(int)*first
].p
);
3716 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
3717 r
->xtab
[(int)*first
].sz
= ssz
;
3724 * Implementation of the .return request.
3725 * There is no need to call roff_userret() from here.
3726 * The read module will call that after rewinding the reader stack
3727 * to the place from where the current macro was called.
3730 roff_return(ROFF_ARGS
)
3732 if (r
->mstackpos
>= 0)
3733 return ROFF_IGN
| ROFF_USERRET
;
3735 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "return");
3743 char *oldn
, *newn
, *end
;
3744 size_t oldsz
, newsz
;
3747 oldn
= newn
= buf
->buf
+ pos
;
3751 oldsz
= roff_getname(r
, &newn
, ln
, pos
);
3752 if (oldn
[oldsz
] == '\\' || oldn
[oldsz
] == '\t' || *newn
== '\0')
3756 newsz
= roff_getname(r
, &end
, ln
, newn
- buf
->buf
);
3760 deftype
= ROFFDEF_ANY
;
3761 value
= roff_getstrn(r
, oldn
, oldsz
, &deftype
);
3764 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3765 roff_setstrn(&r
->strtab
, oldn
, oldsz
, NULL
, 0, 0);
3766 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3769 roff_setstrn(&r
->strtab
, newn
, newsz
, value
, strlen(value
), 0);
3770 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3773 roff_setstrn(&r
->rentab
, newn
, newsz
, value
, strlen(value
), 0);
3774 roff_setstrn(&r
->rentab
, oldn
, oldsz
, NULL
, 0, 0);
3775 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3778 roff_setstrn(&r
->rentab
, newn
, newsz
, oldn
, oldsz
, 0);
3779 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3782 roff_setstrn(&r
->strtab
, newn
, newsz
, NULL
, 0, 0);
3783 roff_setstrn(&r
->rentab
, newn
, newsz
, NULL
, 0, 0);
3790 roff_shift(ROFF_ARGS
)
3796 if (buf
->buf
[pos
] != '\0' &&
3797 roff_evalnum(r
, ln
, buf
->buf
, &pos
, &levels
, 0) == 0) {
3798 mandoc_msg(MANDOCERR_CE_NONUM
,
3799 ln
, pos
, "shift %s", buf
->buf
+ pos
);
3802 if (r
->mstackpos
< 0) {
3803 mandoc_msg(MANDOCERR_REQ_NOMAC
, ln
, ppos
, "shift");
3806 ctx
= r
->mstack
+ r
->mstackpos
;
3807 if (levels
> ctx
->argc
) {
3808 mandoc_msg(MANDOCERR_SHIFT
,
3809 ln
, pos
, "%d, but max is %d", levels
, ctx
->argc
);
3814 for (i
= 0; i
< levels
; i
++)
3816 ctx
->argc
-= levels
;
3817 for (i
= 0; i
< ctx
->argc
; i
++)
3818 ctx
->argv
[i
] = ctx
->argv
[i
+ levels
];
3827 name
= buf
->buf
+ pos
;
3828 mandoc_msg(MANDOCERR_SO
, ln
, ppos
, "so %s", name
);
3831 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3832 * opening anything that's not in our cwd or anything beneath
3833 * it. Thus, explicitly disallow traversing up the file-system
3834 * or using absolute paths.
3837 if (*name
== '/' || strstr(name
, "../") || strstr(name
, "/..")) {
3838 mandoc_msg(MANDOCERR_SO_PATH
, ln
, ppos
, ".so %s", name
);
3839 buf
->sz
= mandoc_asprintf(&cp
,
3840 ".sp\nSee the file %s.\n.sp", name
) + 1;
3844 return ROFF_REPARSE
;
3851 /* --- user defined strings and macros ------------------------------------ */
3854 roff_userdef(ROFF_ARGS
)
3857 char *arg
, *ap
, *dst
, *src
;
3860 /* If the macro is empty, ignore it altogether. */
3862 if (*r
->current_string
== '\0')
3865 /* Initialize a new macro stack context. */
3867 if (++r
->mstackpos
== r
->mstacksz
) {
3868 r
->mstack
= mandoc_recallocarray(r
->mstack
,
3869 r
->mstacksz
, r
->mstacksz
+ 8, sizeof(*r
->mstack
));
3872 ctx
= r
->mstack
+ r
->mstackpos
;
3878 * Collect pointers to macro argument strings,
3879 * NUL-terminating them and escaping quotes.
3882 src
= buf
->buf
+ pos
;
3883 while (*src
!= '\0') {
3884 if (ctx
->argc
== ctx
->argsz
) {
3886 ctx
->argv
= mandoc_reallocarray(ctx
->argv
,
3887 ctx
->argsz
, sizeof(*ctx
->argv
));
3889 arg
= roff_getarg(r
, &src
, ln
, &pos
);
3890 sz
= 1; /* For the terminating NUL. */
3891 for (ap
= arg
; *ap
!= '\0'; ap
++)
3892 sz
+= *ap
== '"' ? 4 : 1;
3893 ctx
->argv
[ctx
->argc
++] = dst
= mandoc_malloc(sz
);
3894 for (ap
= arg
; *ap
!= '\0'; ap
++) {
3896 memcpy(dst
, "\\(dq", 4);
3905 /* Replace the macro invocation by the macro definition. */
3908 buf
->buf
= mandoc_strdup(r
->current_string
);
3909 buf
->sz
= strlen(buf
->buf
) + 1;
3912 return buf
->buf
[buf
->sz
- 2] == '\n' ?
3913 ROFF_REPARSE
| ROFF_USERCALL
: ROFF_IGN
| ROFF_APPEND
;
3917 * Calling a high-level macro that was renamed with .rn.
3918 * r->current_string has already been set up by roff_parse().
3921 roff_renamed(ROFF_ARGS
)
3925 buf
->sz
= mandoc_asprintf(&nbuf
, ".%s%s%s", r
->current_string
,
3926 buf
->buf
[pos
] == '\0' ? "" : " ", buf
->buf
+ pos
) + 1;
3934 * Measure the length in bytes of the roff identifier at *cpp
3935 * and advance the pointer to the next word.
3938 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
3947 /* Advance cp to the byte after the end of the name. */
3949 for (cp
= name
; 1; cp
++) {
3953 if (*cp
== ' ' || *cp
== '\t') {
3959 if (cp
[1] == '{' || cp
[1] == '}')
3963 mandoc_msg(MANDOCERR_NAMESC
, ln
, pos
,
3964 "%.*s", (int)(cp
- name
+ 1), name
);
3965 mandoc_escape((const char **)&cp
, NULL
, NULL
);
3969 /* Read past spaces. */
3979 * Store *string into the user-defined string called *name.
3980 * To clear an existing entry, call with (*r, *name, NULL, 0).
3981 * append == 0: replace mode
3982 * append == 1: single-line append mode
3983 * append == 2: multiline append mode, append '\n' after each call
3986 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
3991 namesz
= strlen(name
);
3992 roff_setstrn(&r
->strtab
, name
, namesz
, string
,
3993 string
? strlen(string
) : 0, append
);
3994 roff_setstrn(&r
->rentab
, name
, namesz
, NULL
, 0, 0);
3998 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
3999 const char *string
, size_t stringsz
, int append
)
4004 size_t oldch
, newch
;
4006 /* Search for an existing string with the same name. */
4009 while (n
&& (namesz
!= n
->key
.sz
||
4010 strncmp(n
->key
.p
, name
, namesz
)))
4014 /* Create a new string table entry. */
4015 n
= mandoc_malloc(sizeof(struct roffkv
));
4016 n
->key
.p
= mandoc_strndup(name
, namesz
);
4022 } else if (0 == append
) {
4032 * One additional byte for the '\n' in multiline mode,
4033 * and one for the terminating '\0'.
4035 newch
= stringsz
+ (1 < append
? 2u : 1u);
4037 if (NULL
== n
->val
.p
) {
4038 n
->val
.p
= mandoc_malloc(newch
);
4043 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
4046 /* Skip existing content in the destination buffer. */
4047 c
= n
->val
.p
+ (int)oldch
;
4049 /* Append new content to the destination buffer. */
4051 while (i
< (int)stringsz
) {
4053 * Rudimentary roff copy mode:
4054 * Handle escaped backslashes.
4056 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
4061 /* Append terminating bytes. */
4066 n
->val
.sz
= (int)(c
- n
->val
.p
);
4070 roff_getstrn(struct roff
*r
, const char *name
, size_t len
,
4073 const struct roffkv
*n
;
4078 for (n
= r
->strtab
; n
!= NULL
; n
= n
->next
) {
4079 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4080 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4082 if (*deftype
& ROFFDEF_USER
) {
4083 *deftype
= ROFFDEF_USER
;
4090 for (n
= r
->rentab
; n
!= NULL
; n
= n
->next
) {
4091 if (strncmp(name
, n
->key
.p
, len
) != 0 ||
4092 n
->key
.p
[len
] != '\0' || n
->val
.p
== NULL
)
4094 if (*deftype
& ROFFDEF_REN
) {
4095 *deftype
= ROFFDEF_REN
;
4102 for (i
= 0; i
< PREDEFS_MAX
; i
++) {
4103 if (strncmp(name
, predefs
[i
].name
, len
) != 0 ||
4104 predefs
[i
].name
[len
] != '\0')
4106 if (*deftype
& ROFFDEF_PRE
) {
4107 *deftype
= ROFFDEF_PRE
;
4108 return predefs
[i
].str
;
4114 if (r
->man
->meta
.macroset
!= MACROSET_MAN
) {
4115 for (tok
= MDOC_Dd
; tok
< MDOC_MAX
; tok
++) {
4116 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4117 roff_name
[tok
][len
] != '\0')
4119 if (*deftype
& ROFFDEF_STD
) {
4120 *deftype
= ROFFDEF_STD
;
4128 if (r
->man
->meta
.macroset
!= MACROSET_MDOC
) {
4129 for (tok
= MAN_TH
; tok
< MAN_MAX
; tok
++) {
4130 if (strncmp(name
, roff_name
[tok
], len
) != 0 ||
4131 roff_name
[tok
][len
] != '\0')
4133 if (*deftype
& ROFFDEF_STD
) {
4134 *deftype
= ROFFDEF_STD
;
4143 if (found
== 0 && *deftype
!= ROFFDEF_ANY
) {
4144 if (*deftype
& ROFFDEF_REN
) {
4146 * This might still be a request,
4147 * so do not treat it as undefined yet.
4149 *deftype
= ROFFDEF_UNDEF
;
4153 /* Using an undefined string defines it to be empty. */
4155 roff_setstrn(&r
->strtab
, name
, len
, "", 0, 0);
4156 roff_setstrn(&r
->rentab
, name
, len
, NULL
, 0, 0);
4164 roff_freestr(struct roffkv
*r
)
4166 struct roffkv
*n
, *nn
;
4168 for (n
= r
; n
; n
= nn
) {
4176 /* --- accessors and utility functions ------------------------------------ */
4179 * Duplicate an input string, making the appropriate character
4180 * conversations (as stipulated by `tr') along the way.
4181 * Returns a heap-allocated string with all the replacements made.
4184 roff_strdup(const struct roff
*r
, const char *p
)
4186 const struct roffkv
*cp
;
4190 enum mandoc_esc esc
;
4192 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
4193 return mandoc_strdup(p
);
4194 else if ('\0' == *p
)
4195 return mandoc_strdup("");
4198 * Step through each character looking for term matches
4199 * (remember that a `tr' can be invoked with an escape, which is
4200 * a glyph but the escape is multi-character).
4201 * We only do this if the character hash has been initialised
4202 * and the string is >0 length.
4208 while ('\0' != *p
) {
4209 assert((unsigned int)*p
< 128);
4210 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(unsigned int)*p
].p
) {
4211 sz
= r
->xtab
[(int)*p
].sz
;
4212 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4213 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
4217 } else if ('\\' != *p
) {
4218 res
= mandoc_realloc(res
, ssz
+ 2);
4223 /* Search for term matches. */
4224 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
4225 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
4230 * A match has been found.
4231 * Append the match to the array and move
4232 * forward by its keysize.
4234 res
= mandoc_realloc(res
,
4235 ssz
+ cp
->val
.sz
+ 1);
4236 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
4238 p
+= (int)cp
->key
.sz
;
4243 * Handle escapes carefully: we need to copy
4244 * over just the escape itself, or else we might
4245 * do replacements within the escape itself.
4246 * Make sure to pass along the bogus string.
4249 esc
= mandoc_escape(&p
, NULL
, NULL
);
4250 if (ESCAPE_ERROR
== esc
) {
4252 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4253 memcpy(res
+ ssz
, pp
, sz
);
4257 * We bail out on bad escapes.
4258 * No need to warn: we already did so when
4259 * roff_expand() was called.
4262 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
4263 memcpy(res
+ ssz
, pp
, sz
);
4267 res
[(int)ssz
] = '\0';
4272 roff_getformat(const struct roff
*r
)
4279 * Find out whether a line is a macro line or not.
4280 * If it is, adjust the current position and return one; if it isn't,
4281 * return zero and don't change the current position.
4282 * If the control character has been set with `.cc', then let that grain
4284 * This is slighly contrary to groff, where using the non-breaking
4285 * control character when `cc' has been invoked will cause the
4286 * non-breaking macro contents to be printed verbatim.
4289 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
4295 if (r
->control
!= '\0' && cp
[pos
] == r
->control
)
4297 else if (r
->control
!= '\0')
4299 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
4301 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
4306 while (' ' == cp
[pos
] || '\t' == cp
[pos
])