]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
replace the last legacy generic message type, "argument count wrong",
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.260 2015/02/06 16:06:25 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33
34 /* Maximum number of nested if-else conditionals. */
35 #define RSTACK_MAX 128
36
37 /* Maximum number of string expansions per line, to break infinite loops. */
38 #define EXPAND_LIMIT 1000
39
40 enum rofft {
41 ROFF_ab,
42 ROFF_ad,
43 ROFF_af,
44 ROFF_aln,
45 ROFF_als,
46 ROFF_am,
47 ROFF_am1,
48 ROFF_ami,
49 ROFF_ami1,
50 ROFF_as,
51 ROFF_as1,
52 ROFF_asciify,
53 ROFF_backtrace,
54 ROFF_bd,
55 ROFF_bleedat,
56 ROFF_blm,
57 ROFF_box,
58 ROFF_boxa,
59 ROFF_bp,
60 ROFF_BP,
61 /* MAN_br, MDOC_br */
62 ROFF_break,
63 ROFF_breakchar,
64 ROFF_brnl,
65 ROFF_brp,
66 ROFF_brpnl,
67 ROFF_c2,
68 ROFF_cc,
69 ROFF_ce,
70 ROFF_cf,
71 ROFF_cflags,
72 ROFF_ch,
73 ROFF_char,
74 ROFF_chop,
75 ROFF_class,
76 ROFF_close,
77 ROFF_CL,
78 ROFF_color,
79 ROFF_composite,
80 ROFF_continue,
81 ROFF_cp,
82 ROFF_cropat,
83 ROFF_cs,
84 ROFF_cu,
85 ROFF_da,
86 ROFF_dch,
87 ROFF_Dd,
88 ROFF_de,
89 ROFF_de1,
90 ROFF_defcolor,
91 ROFF_dei,
92 ROFF_dei1,
93 ROFF_device,
94 ROFF_devicem,
95 ROFF_di,
96 ROFF_do,
97 ROFF_ds,
98 ROFF_ds1,
99 ROFF_dwh,
100 ROFF_dt,
101 ROFF_ec,
102 ROFF_ecr,
103 ROFF_ecs,
104 ROFF_el,
105 ROFF_em,
106 ROFF_EN,
107 ROFF_eo,
108 ROFF_EP,
109 ROFF_EQ,
110 ROFF_errprint,
111 ROFF_ev,
112 ROFF_evc,
113 ROFF_ex,
114 ROFF_fallback,
115 ROFF_fam,
116 ROFF_fc,
117 ROFF_fchar,
118 ROFF_fcolor,
119 ROFF_fdeferlig,
120 ROFF_feature,
121 /* MAN_fi; ignored in mdoc(7) */
122 ROFF_fkern,
123 ROFF_fl,
124 ROFF_flig,
125 ROFF_fp,
126 ROFF_fps,
127 ROFF_fschar,
128 ROFF_fspacewidth,
129 ROFF_fspecial,
130 /* MAN_ft; ignored in mdoc(7) */
131 ROFF_ftr,
132 ROFF_fzoom,
133 ROFF_gcolor,
134 ROFF_hc,
135 ROFF_hcode,
136 ROFF_hidechar,
137 ROFF_hla,
138 ROFF_hlm,
139 ROFF_hpf,
140 ROFF_hpfa,
141 ROFF_hpfcode,
142 ROFF_hw,
143 ROFF_hy,
144 ROFF_hylang,
145 ROFF_hylen,
146 ROFF_hym,
147 ROFF_hypp,
148 ROFF_hys,
149 ROFF_ie,
150 ROFF_if,
151 ROFF_ig,
152 /* MAN_in; ignored in mdoc(7) */
153 ROFF_index,
154 ROFF_it,
155 ROFF_itc,
156 ROFF_IX,
157 ROFF_kern,
158 ROFF_kernafter,
159 ROFF_kernbefore,
160 ROFF_kernpair,
161 ROFF_lc,
162 ROFF_lc_ctype,
163 ROFF_lds,
164 ROFF_length,
165 ROFF_letadj,
166 ROFF_lf,
167 ROFF_lg,
168 ROFF_lhang,
169 ROFF_linetabs,
170 /* MAN_ll, MDOC_ll */
171 ROFF_lnr,
172 ROFF_lnrf,
173 ROFF_lpfx,
174 ROFF_ls,
175 ROFF_lsm,
176 ROFF_lt,
177 ROFF_mc,
178 ROFF_mediasize,
179 ROFF_minss,
180 ROFF_mk,
181 ROFF_mso,
182 ROFF_na,
183 ROFF_ne,
184 /* MAN_nf; ignored in mdoc(7) */
185 ROFF_nh,
186 ROFF_nhychar,
187 ROFF_nm,
188 ROFF_nn,
189 ROFF_nop,
190 ROFF_nr,
191 ROFF_nrf,
192 ROFF_nroff,
193 ROFF_ns,
194 ROFF_nx,
195 ROFF_open,
196 ROFF_opena,
197 ROFF_os,
198 ROFF_output,
199 ROFF_padj,
200 ROFF_papersize,
201 ROFF_pc,
202 ROFF_pev,
203 ROFF_pi,
204 ROFF_PI,
205 ROFF_pl,
206 ROFF_pm,
207 ROFF_pn,
208 ROFF_pnr,
209 ROFF_po,
210 ROFF_ps,
211 ROFF_psbb,
212 ROFF_pshape,
213 ROFF_pso,
214 ROFF_ptr,
215 ROFF_pvs,
216 ROFF_rchar,
217 ROFF_rd,
218 ROFF_recursionlimit,
219 ROFF_return,
220 ROFF_rfschar,
221 ROFF_rhang,
222 ROFF_rj,
223 ROFF_rm,
224 ROFF_rn,
225 ROFF_rnn,
226 ROFF_rr,
227 ROFF_rs,
228 ROFF_rt,
229 ROFF_schar,
230 ROFF_sentchar,
231 ROFF_shc,
232 ROFF_shift,
233 ROFF_sizes,
234 ROFF_so,
235 /* MAN_sp, MDOC_sp */
236 ROFF_spacewidth,
237 ROFF_special,
238 ROFF_spreadwarn,
239 ROFF_ss,
240 ROFF_sty,
241 ROFF_substring,
242 ROFF_sv,
243 ROFF_sy,
244 ROFF_T_,
245 ROFF_ta,
246 ROFF_tc,
247 ROFF_TE,
248 ROFF_TH,
249 ROFF_ti,
250 ROFF_tkf,
251 ROFF_tl,
252 ROFF_tm,
253 ROFF_tm1,
254 ROFF_tmc,
255 ROFF_tr,
256 ROFF_track,
257 ROFF_transchar,
258 ROFF_trf,
259 ROFF_trimat,
260 ROFF_trin,
261 ROFF_trnt,
262 ROFF_troff,
263 ROFF_TS,
264 ROFF_uf,
265 ROFF_ul,
266 ROFF_unformat,
267 ROFF_unwatch,
268 ROFF_unwatchn,
269 ROFF_vpt,
270 ROFF_vs,
271 ROFF_warn,
272 ROFF_warnscale,
273 ROFF_watch,
274 ROFF_watchlength,
275 ROFF_watchn,
276 ROFF_wh,
277 ROFF_while,
278 ROFF_write,
279 ROFF_writec,
280 ROFF_writem,
281 ROFF_xflag,
282 ROFF_cblock,
283 ROFF_USERDEF,
284 ROFF_MAX
285 };
286
287 /*
288 * An incredibly-simple string buffer.
289 */
290 struct roffstr {
291 char *p; /* nil-terminated buffer */
292 size_t sz; /* saved strlen(p) */
293 };
294
295 /*
296 * A key-value roffstr pair as part of a singly-linked list.
297 */
298 struct roffkv {
299 struct roffstr key;
300 struct roffstr val;
301 struct roffkv *next; /* next in list */
302 };
303
304 /*
305 * A single number register as part of a singly-linked list.
306 */
307 struct roffreg {
308 struct roffstr key;
309 int val;
310 struct roffreg *next;
311 };
312
313 struct roff {
314 struct mparse *parse; /* parse point */
315 const struct mchars *mchars; /* character table */
316 struct roffnode *last; /* leaf of stack */
317 int *rstack; /* stack of inverted `ie' values */
318 struct roffreg *regtab; /* number registers */
319 struct roffkv *strtab; /* user-defined strings & macros */
320 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
321 struct roffstr *xtab; /* single-byte trans table (`tr') */
322 const char *current_string; /* value of last called user macro */
323 struct tbl_node *first_tbl; /* first table parsed */
324 struct tbl_node *last_tbl; /* last table parsed */
325 struct tbl_node *tbl; /* current table being parsed */
326 struct eqn_node *last_eqn; /* last equation parsed */
327 struct eqn_node *first_eqn; /* first equation parsed */
328 struct eqn_node *eqn; /* current equation being parsed */
329 int eqn_inline; /* current equation is inline */
330 int options; /* parse options */
331 int rstacksz; /* current size limit of rstack */
332 int rstackpos; /* position in rstack */
333 int format; /* current file in mdoc or man format */
334 char control; /* control character */
335 };
336
337 struct roffnode {
338 enum rofft tok; /* type of node */
339 struct roffnode *parent; /* up one in stack */
340 int line; /* parse line */
341 int col; /* parse col */
342 char *name; /* node name, e.g. macro name */
343 char *end; /* end-rules: custom token */
344 int endspan; /* end-rules: next-line or infty */
345 int rule; /* current evaluation rule */
346 };
347
348 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
349 enum rofft tok, /* tok of macro */ \
350 struct buf *buf, /* input buffer */ \
351 int ln, /* parse line */ \
352 int ppos, /* original pos in buffer */ \
353 int pos, /* current pos in buffer */ \
354 int *offs /* reset offset of buffer data */
355
356 typedef enum rofferr (*roffproc)(ROFF_ARGS);
357
358 struct roffmac {
359 const char *name; /* macro name */
360 roffproc proc; /* process new macro */
361 roffproc text; /* process as child text of macro */
362 roffproc sub; /* process as child of macro */
363 int flags;
364 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
365 struct roffmac *next;
366 };
367
368 struct predef {
369 const char *name; /* predefined input name */
370 const char *str; /* replacement symbol */
371 };
372
373 #define PREDEF(__name, __str) \
374 { (__name), (__str) },
375
376 static enum rofft roffhash_find(const char *, size_t);
377 static void roffhash_init(void);
378 static void roffnode_cleanscope(struct roff *);
379 static void roffnode_pop(struct roff *);
380 static void roffnode_push(struct roff *, enum rofft,
381 const char *, int, int);
382 static enum rofferr roff_block(ROFF_ARGS);
383 static enum rofferr roff_block_text(ROFF_ARGS);
384 static enum rofferr roff_block_sub(ROFF_ARGS);
385 static enum rofferr roff_brp(ROFF_ARGS);
386 static enum rofferr roff_cblock(ROFF_ARGS);
387 static enum rofferr roff_cc(ROFF_ARGS);
388 static void roff_ccond(struct roff *, int, int);
389 static enum rofferr roff_cond(ROFF_ARGS);
390 static enum rofferr roff_cond_text(ROFF_ARGS);
391 static enum rofferr roff_cond_sub(ROFF_ARGS);
392 static enum rofferr roff_ds(ROFF_ARGS);
393 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
394 static int roff_evalcond(struct roff *r, int,
395 const char *, int *);
396 static int roff_evalnum(struct roff *, int,
397 const char *, int *, int *, int);
398 static int roff_evalpar(struct roff *, int,
399 const char *, int *, int *);
400 static int roff_evalstrcond(const char *, int *);
401 static void roff_free1(struct roff *);
402 static void roff_freereg(struct roffreg *);
403 static void roff_freestr(struct roffkv *);
404 static size_t roff_getname(struct roff *, char **, int, int);
405 static int roff_getnum(const char *, int *, int *);
406 static int roff_getop(const char *, int *, char *);
407 static int roff_getregn(const struct roff *,
408 const char *, size_t);
409 static int roff_getregro(const char *name);
410 static const char *roff_getstrn(const struct roff *,
411 const char *, size_t);
412 static enum rofferr roff_insec(ROFF_ARGS);
413 static enum rofferr roff_it(ROFF_ARGS);
414 static enum rofferr roff_line_ignore(ROFF_ARGS);
415 static enum rofferr roff_nr(ROFF_ARGS);
416 static enum rofft roff_parse(struct roff *, char *, int *,
417 int, int);
418 static enum rofferr roff_parsetext(struct buf *, int, int *);
419 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
420 static enum rofferr roff_rm(ROFF_ARGS);
421 static enum rofferr roff_rr(ROFF_ARGS);
422 static void roff_setstr(struct roff *,
423 const char *, const char *, int);
424 static void roff_setstrn(struct roffkv **, const char *,
425 size_t, const char *, size_t, int);
426 static enum rofferr roff_so(ROFF_ARGS);
427 static enum rofferr roff_tr(ROFF_ARGS);
428 static enum rofferr roff_Dd(ROFF_ARGS);
429 static enum rofferr roff_TH(ROFF_ARGS);
430 static enum rofferr roff_TE(ROFF_ARGS);
431 static enum rofferr roff_TS(ROFF_ARGS);
432 static enum rofferr roff_EQ(ROFF_ARGS);
433 static enum rofferr roff_EN(ROFF_ARGS);
434 static enum rofferr roff_T_(ROFF_ARGS);
435 static enum rofferr roff_unsupp(ROFF_ARGS);
436 static enum rofferr roff_userdef(ROFF_ARGS);
437
438 /* See roffhash_find() */
439
440 #define ASCII_HI 126
441 #define ASCII_LO 33
442 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
443
444 static struct roffmac *hash[HASHWIDTH];
445
446 static struct roffmac roffs[ROFF_MAX] = {
447 { "ab", roff_unsupp, NULL, NULL, 0, NULL },
448 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
449 { "af", roff_line_ignore, NULL, NULL, 0, NULL },
450 { "aln", roff_unsupp, NULL, NULL, 0, NULL },
451 { "als", roff_unsupp, NULL, NULL, 0, NULL },
452 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
453 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
454 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
455 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
456 { "as", roff_ds, NULL, NULL, 0, NULL },
457 { "as1", roff_ds, NULL, NULL, 0, NULL },
458 { "asciify", roff_unsupp, NULL, NULL, 0, NULL },
459 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
460 { "bd", roff_line_ignore, NULL, NULL, 0, NULL },
461 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
462 { "blm", roff_unsupp, NULL, NULL, 0, NULL },
463 { "box", roff_unsupp, NULL, NULL, 0, NULL },
464 { "boxa", roff_unsupp, NULL, NULL, 0, NULL },
465 { "bp", roff_line_ignore, NULL, NULL, 0, NULL },
466 { "BP", roff_unsupp, NULL, NULL, 0, NULL },
467 { "break", roff_unsupp, NULL, NULL, 0, NULL },
468 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
469 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
470 { "brp", roff_brp, NULL, NULL, 0, NULL },
471 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
472 { "c2", roff_unsupp, NULL, NULL, 0, NULL },
473 { "cc", roff_cc, NULL, NULL, 0, NULL },
474 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
475 { "cf", roff_insec, NULL, NULL, 0, NULL },
476 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
477 { "ch", roff_line_ignore, NULL, NULL, 0, NULL },
478 { "char", roff_unsupp, NULL, NULL, 0, NULL },
479 { "chop", roff_unsupp, NULL, NULL, 0, NULL },
480 { "class", roff_line_ignore, NULL, NULL, 0, NULL },
481 { "close", roff_insec, NULL, NULL, 0, NULL },
482 { "CL", roff_unsupp, NULL, NULL, 0, NULL },
483 { "color", roff_line_ignore, NULL, NULL, 0, NULL },
484 { "composite", roff_unsupp, NULL, NULL, 0, NULL },
485 { "continue", roff_unsupp, NULL, NULL, 0, NULL },
486 { "cp", roff_line_ignore, NULL, NULL, 0, NULL },
487 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
488 { "cs", roff_line_ignore, NULL, NULL, 0, NULL },
489 { "cu", roff_line_ignore, NULL, NULL, 0, NULL },
490 { "da", roff_unsupp, NULL, NULL, 0, NULL },
491 { "dch", roff_unsupp, NULL, NULL, 0, NULL },
492 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
493 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
494 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
495 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
496 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
497 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
498 { "device", roff_unsupp, NULL, NULL, 0, NULL },
499 { "devicem", roff_unsupp, NULL, NULL, 0, NULL },
500 { "di", roff_unsupp, NULL, NULL, 0, NULL },
501 { "do", roff_unsupp, NULL, NULL, 0, NULL },
502 { "ds", roff_ds, NULL, NULL, 0, NULL },
503 { "ds1", roff_ds, NULL, NULL, 0, NULL },
504 { "dwh", roff_unsupp, NULL, NULL, 0, NULL },
505 { "dt", roff_unsupp, NULL, NULL, 0, NULL },
506 { "ec", roff_unsupp, NULL, NULL, 0, NULL },
507 { "ecr", roff_unsupp, NULL, NULL, 0, NULL },
508 { "ecs", roff_unsupp, NULL, NULL, 0, NULL },
509 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
510 { "em", roff_unsupp, NULL, NULL, 0, NULL },
511 { "EN", roff_EN, NULL, NULL, 0, NULL },
512 { "eo", roff_unsupp, NULL, NULL, 0, NULL },
513 { "EP", roff_unsupp, NULL, NULL, 0, NULL },
514 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
515 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
516 { "ev", roff_unsupp, NULL, NULL, 0, NULL },
517 { "evc", roff_unsupp, NULL, NULL, 0, NULL },
518 { "ex", roff_unsupp, NULL, NULL, 0, NULL },
519 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
520 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
521 { "fc", roff_unsupp, NULL, NULL, 0, NULL },
522 { "fchar", roff_unsupp, NULL, NULL, 0, NULL },
523 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
524 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
525 { "feature", roff_line_ignore, NULL, NULL, 0, NULL },
526 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
527 { "fl", roff_line_ignore, NULL, NULL, 0, NULL },
528 { "flig", roff_line_ignore, NULL, NULL, 0, NULL },
529 { "fp", roff_line_ignore, NULL, NULL, 0, NULL },
530 { "fps", roff_line_ignore, NULL, NULL, 0, NULL },
531 { "fschar", roff_unsupp, NULL, NULL, 0, NULL },
532 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
533 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
534 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
535 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
536 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
537 { "hc", roff_line_ignore, NULL, NULL, 0, NULL },
538 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
539 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
540 { "hla", roff_line_ignore, NULL, NULL, 0, NULL },
541 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
542 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
543 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
544 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
545 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
546 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
547 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
548 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
549 { "hym", roff_line_ignore, NULL, NULL, 0, NULL },
550 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
551 { "hys", roff_line_ignore, NULL, NULL, 0, NULL },
552 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
553 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
554 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
555 { "index", roff_unsupp, NULL, NULL, 0, NULL },
556 { "it", roff_it, NULL, NULL, 0, NULL },
557 { "itc", roff_unsupp, NULL, NULL, 0, NULL },
558 { "IX", roff_line_ignore, NULL, NULL, 0, NULL },
559 { "kern", roff_line_ignore, NULL, NULL, 0, NULL },
560 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
561 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
562 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
563 { "lc", roff_unsupp, NULL, NULL, 0, NULL },
564 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
565 { "lds", roff_unsupp, NULL, NULL, 0, NULL },
566 { "length", roff_unsupp, NULL, NULL, 0, NULL },
567 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
568 { "lf", roff_insec, NULL, NULL, 0, NULL },
569 { "lg", roff_line_ignore, NULL, NULL, 0, NULL },
570 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
571 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
572 { "lnr", roff_unsupp, NULL, NULL, 0, NULL },
573 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
574 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
575 { "ls", roff_line_ignore, NULL, NULL, 0, NULL },
576 { "lsm", roff_unsupp, NULL, NULL, 0, NULL },
577 { "lt", roff_line_ignore, NULL, NULL, 0, NULL },
578 { "mc", roff_line_ignore, NULL, NULL, 0, NULL },
579 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
580 { "minss", roff_line_ignore, NULL, NULL, 0, NULL },
581 { "mk", roff_line_ignore, NULL, NULL, 0, NULL },
582 { "mso", roff_insec, NULL, NULL, 0, NULL },
583 { "na", roff_line_ignore, NULL, NULL, 0, NULL },
584 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
585 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
586 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
587 { "nm", roff_unsupp, NULL, NULL, 0, NULL },
588 { "nn", roff_unsupp, NULL, NULL, 0, NULL },
589 { "nop", roff_unsupp, NULL, NULL, 0, NULL },
590 { "nr", roff_nr, NULL, NULL, 0, NULL },
591 { "nrf", roff_unsupp, NULL, NULL, 0, NULL },
592 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
593 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
594 { "nx", roff_insec, NULL, NULL, 0, NULL },
595 { "open", roff_insec, NULL, NULL, 0, NULL },
596 { "opena", roff_insec, NULL, NULL, 0, NULL },
597 { "os", roff_line_ignore, NULL, NULL, 0, NULL },
598 { "output", roff_unsupp, NULL, NULL, 0, NULL },
599 { "padj", roff_line_ignore, NULL, NULL, 0, NULL },
600 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
601 { "pc", roff_line_ignore, NULL, NULL, 0, NULL },
602 { "pev", roff_line_ignore, NULL, NULL, 0, NULL },
603 { "pi", roff_insec, NULL, NULL, 0, NULL },
604 { "PI", roff_unsupp, NULL, NULL, 0, NULL },
605 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
606 { "pm", roff_line_ignore, NULL, NULL, 0, NULL },
607 { "pn", roff_line_ignore, NULL, NULL, 0, NULL },
608 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
609 { "po", roff_line_ignore, NULL, NULL, 0, NULL },
610 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
611 { "psbb", roff_unsupp, NULL, NULL, 0, NULL },
612 { "pshape", roff_unsupp, NULL, NULL, 0, NULL },
613 { "pso", roff_insec, NULL, NULL, 0, NULL },
614 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
615 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
616 { "rchar", roff_unsupp, NULL, NULL, 0, NULL },
617 { "rd", roff_line_ignore, NULL, NULL, 0, NULL },
618 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
619 { "return", roff_unsupp, NULL, NULL, 0, NULL },
620 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
621 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
622 { "rj", roff_line_ignore, NULL, NULL, 0, NULL },
623 { "rm", roff_rm, NULL, NULL, 0, NULL },
624 { "rn", roff_unsupp, NULL, NULL, 0, NULL },
625 { "rnn", roff_unsupp, NULL, NULL, 0, NULL },
626 { "rr", roff_rr, NULL, NULL, 0, NULL },
627 { "rs", roff_line_ignore, NULL, NULL, 0, NULL },
628 { "rt", roff_line_ignore, NULL, NULL, 0, NULL },
629 { "schar", roff_unsupp, NULL, NULL, 0, NULL },
630 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
631 { "shc", roff_line_ignore, NULL, NULL, 0, NULL },
632 { "shift", roff_unsupp, NULL, NULL, 0, NULL },
633 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
634 { "so", roff_so, NULL, NULL, 0, NULL },
635 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
636 { "special", roff_line_ignore, NULL, NULL, 0, NULL },
637 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
638 { "ss", roff_line_ignore, NULL, NULL, 0, NULL },
639 { "sty", roff_line_ignore, NULL, NULL, 0, NULL },
640 { "substring", roff_unsupp, NULL, NULL, 0, NULL },
641 { "sv", roff_line_ignore, NULL, NULL, 0, NULL },
642 { "sy", roff_insec, NULL, NULL, 0, NULL },
643 { "T&", roff_T_, NULL, NULL, 0, NULL },
644 { "ta", roff_unsupp, NULL, NULL, 0, NULL },
645 { "tc", roff_unsupp, NULL, NULL, 0, NULL },
646 { "TE", roff_TE, NULL, NULL, 0, NULL },
647 { "TH", roff_TH, NULL, NULL, 0, NULL },
648 { "ti", roff_unsupp, NULL, NULL, 0, NULL },
649 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
650 { "tl", roff_unsupp, NULL, NULL, 0, NULL },
651 { "tm", roff_line_ignore, NULL, NULL, 0, NULL },
652 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
653 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
654 { "tr", roff_tr, NULL, NULL, 0, NULL },
655 { "track", roff_line_ignore, NULL, NULL, 0, NULL },
656 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
657 { "trf", roff_insec, NULL, NULL, 0, NULL },
658 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
659 { "trin", roff_unsupp, NULL, NULL, 0, NULL },
660 { "trnt", roff_unsupp, NULL, NULL, 0, NULL },
661 { "troff", roff_line_ignore, NULL, NULL, 0, NULL },
662 { "TS", roff_TS, NULL, NULL, 0, NULL },
663 { "uf", roff_line_ignore, NULL, NULL, 0, NULL },
664 { "ul", roff_line_ignore, NULL, NULL, 0, NULL },
665 { "unformat", roff_unsupp, NULL, NULL, 0, NULL },
666 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
667 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
668 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
669 { "vs", roff_line_ignore, NULL, NULL, 0, NULL },
670 { "warn", roff_line_ignore, NULL, NULL, 0, NULL },
671 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
672 { "watch", roff_line_ignore, NULL, NULL, 0, NULL },
673 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
674 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
675 { "wh", roff_unsupp, NULL, NULL, 0, NULL },
676 { "while", roff_unsupp, NULL, NULL, 0, NULL },
677 { "write", roff_insec, NULL, NULL, 0, NULL },
678 { "writec", roff_insec, NULL, NULL, 0, NULL },
679 { "writem", roff_insec, NULL, NULL, 0, NULL },
680 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
681 { ".", roff_cblock, NULL, NULL, 0, NULL },
682 { NULL, roff_userdef, NULL, NULL, 0, NULL },
683 };
684
685 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
686 const char *const __mdoc_reserved[] = {
687 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
688 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
689 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
690 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
691 "Dt", "Dv", "Dx", "D1",
692 "Ec", "Ed", "Ef", "Ek", "El", "Em",
693 "En", "Eo", "Er", "Es", "Ev", "Ex",
694 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
695 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
696 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
697 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
698 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
699 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
700 "Sc", "Sh", "Sm", "So", "Sq",
701 "Ss", "St", "Sx", "Sy",
702 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
703 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
704 "%P", "%Q", "%R", "%T", "%U", "%V",
705 NULL
706 };
707
708 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
709 const char *const __man_reserved[] = {
710 "AT", "B", "BI", "BR", "DT",
711 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
712 "LP", "OP", "P", "PD", "PP",
713 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
714 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
715 NULL
716 };
717
718 /* Array of injected predefined strings. */
719 #define PREDEFS_MAX 38
720 static const struct predef predefs[PREDEFS_MAX] = {
721 #include "predefs.in"
722 };
723
724 /* See roffhash_find() */
725 #define ROFF_HASH(p) (p[0] - ASCII_LO)
726
727 static int roffit_lines; /* number of lines to delay */
728 static char *roffit_macro; /* nil-terminated macro line */
729
730
731 static void
732 roffhash_init(void)
733 {
734 struct roffmac *n;
735 int buc, i;
736
737 for (i = 0; i < (int)ROFF_USERDEF; i++) {
738 assert(roffs[i].name[0] >= ASCII_LO);
739 assert(roffs[i].name[0] <= ASCII_HI);
740
741 buc = ROFF_HASH(roffs[i].name);
742
743 if (NULL != (n = hash[buc])) {
744 for ( ; n->next; n = n->next)
745 /* Do nothing. */ ;
746 n->next = &roffs[i];
747 } else
748 hash[buc] = &roffs[i];
749 }
750 }
751
752 /*
753 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
754 * the nil-terminated string name could be found.
755 */
756 static enum rofft
757 roffhash_find(const char *p, size_t s)
758 {
759 int buc;
760 struct roffmac *n;
761
762 /*
763 * libroff has an extremely simple hashtable, for the time
764 * being, which simply keys on the first character, which must
765 * be printable, then walks a chain. It works well enough until
766 * optimised.
767 */
768
769 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
770 return(ROFF_MAX);
771
772 buc = ROFF_HASH(p);
773
774 if (NULL == (n = hash[buc]))
775 return(ROFF_MAX);
776 for ( ; n; n = n->next)
777 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
778 return((enum rofft)(n - roffs));
779
780 return(ROFF_MAX);
781 }
782
783 /*
784 * Pop the current node off of the stack of roff instructions currently
785 * pending.
786 */
787 static void
788 roffnode_pop(struct roff *r)
789 {
790 struct roffnode *p;
791
792 assert(r->last);
793 p = r->last;
794
795 r->last = r->last->parent;
796 free(p->name);
797 free(p->end);
798 free(p);
799 }
800
801 /*
802 * Push a roff node onto the instruction stack. This must later be
803 * removed with roffnode_pop().
804 */
805 static void
806 roffnode_push(struct roff *r, enum rofft tok, const char *name,
807 int line, int col)
808 {
809 struct roffnode *p;
810
811 p = mandoc_calloc(1, sizeof(struct roffnode));
812 p->tok = tok;
813 if (name)
814 p->name = mandoc_strdup(name);
815 p->parent = r->last;
816 p->line = line;
817 p->col = col;
818 p->rule = p->parent ? p->parent->rule : 0;
819
820 r->last = p;
821 }
822
823 static void
824 roff_free1(struct roff *r)
825 {
826 struct tbl_node *tbl;
827 struct eqn_node *e;
828 int i;
829
830 while (NULL != (tbl = r->first_tbl)) {
831 r->first_tbl = tbl->next;
832 tbl_free(tbl);
833 }
834 r->first_tbl = r->last_tbl = r->tbl = NULL;
835
836 while (NULL != (e = r->first_eqn)) {
837 r->first_eqn = e->next;
838 eqn_free(e);
839 }
840 r->first_eqn = r->last_eqn = r->eqn = NULL;
841
842 while (r->last)
843 roffnode_pop(r);
844
845 free (r->rstack);
846 r->rstack = NULL;
847 r->rstacksz = 0;
848 r->rstackpos = -1;
849
850 roff_freereg(r->regtab);
851 r->regtab = NULL;
852
853 roff_freestr(r->strtab);
854 roff_freestr(r->xmbtab);
855 r->strtab = r->xmbtab = NULL;
856
857 if (r->xtab)
858 for (i = 0; i < 128; i++)
859 free(r->xtab[i].p);
860 free(r->xtab);
861 r->xtab = NULL;
862 }
863
864 void
865 roff_reset(struct roff *r)
866 {
867
868 roff_free1(r);
869 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
870 r->control = 0;
871 }
872
873 void
874 roff_free(struct roff *r)
875 {
876
877 roff_free1(r);
878 free(r);
879 }
880
881 struct roff *
882 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
883 {
884 struct roff *r;
885
886 r = mandoc_calloc(1, sizeof(struct roff));
887 r->parse = parse;
888 r->mchars = mchars;
889 r->options = options;
890 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
891 r->rstackpos = -1;
892
893 roffhash_init();
894
895 return(r);
896 }
897
898 /*
899 * In the current line, expand escape sequences that tend to get
900 * used in numerical expressions and conditional requests.
901 * Also check the syntax of the remaining escape sequences.
902 */
903 static enum rofferr
904 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
905 {
906 char ubuf[24]; /* buffer to print the number */
907 const char *start; /* start of the string to process */
908 char *stesc; /* start of an escape sequence ('\\') */
909 const char *stnam; /* start of the name, after "[(*" */
910 const char *cp; /* end of the name, e.g. before ']' */
911 const char *res; /* the string to be substituted */
912 char *nbuf; /* new buffer to copy buf->buf to */
913 size_t maxl; /* expected length of the escape name */
914 size_t naml; /* actual length of the escape name */
915 enum mandoc_esc esc; /* type of the escape sequence */
916 int inaml; /* length returned from mandoc_escape() */
917 int expand_count; /* to avoid infinite loops */
918 int npos; /* position in numeric expression */
919 int arg_complete; /* argument not interrupted by eol */
920 char term; /* character terminating the escape */
921
922 expand_count = 0;
923 start = buf->buf + pos;
924 stesc = strchr(start, '\0') - 1;
925 while (stesc-- > start) {
926
927 /* Search backwards for the next backslash. */
928
929 if (*stesc != '\\')
930 continue;
931
932 /* If it is escaped, skip it. */
933
934 for (cp = stesc - 1; cp >= start; cp--)
935 if (*cp != '\\')
936 break;
937
938 if ((stesc - cp) % 2 == 0) {
939 stesc = (char *)cp;
940 continue;
941 }
942
943 /* Decide whether to expand or to check only. */
944
945 term = '\0';
946 cp = stesc + 1;
947 switch (*cp) {
948 case '*':
949 res = NULL;
950 break;
951 case 'B':
952 /* FALLTHROUGH */
953 case 'w':
954 term = cp[1];
955 /* FALLTHROUGH */
956 case 'n':
957 res = ubuf;
958 break;
959 default:
960 esc = mandoc_escape(&cp, &stnam, &inaml);
961 if (esc == ESCAPE_ERROR ||
962 (esc == ESCAPE_SPECIAL &&
963 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
964 mandoc_vmsg(MANDOCERR_ESC_BAD,
965 r->parse, ln, (int)(stesc - buf->buf),
966 "%.*s", (int)(cp - stesc), stesc);
967 continue;
968 }
969
970 if (EXPAND_LIMIT < ++expand_count) {
971 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
972 ln, (int)(stesc - buf->buf), NULL);
973 return(ROFF_IGN);
974 }
975
976 /*
977 * The third character decides the length
978 * of the name of the string or register.
979 * Save a pointer to the name.
980 */
981
982 if (term == '\0') {
983 switch (*++cp) {
984 case '\0':
985 maxl = 0;
986 break;
987 case '(':
988 cp++;
989 maxl = 2;
990 break;
991 case '[':
992 cp++;
993 term = ']';
994 maxl = 0;
995 break;
996 default:
997 maxl = 1;
998 break;
999 }
1000 } else {
1001 cp += 2;
1002 maxl = 0;
1003 }
1004 stnam = cp;
1005
1006 /* Advance to the end of the name. */
1007
1008 naml = 0;
1009 arg_complete = 1;
1010 while (maxl == 0 || naml < maxl) {
1011 if (*cp == '\0') {
1012 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1013 ln, (int)(stesc - buf->buf), stesc);
1014 arg_complete = 0;
1015 break;
1016 }
1017 if (maxl == 0 && *cp == term) {
1018 cp++;
1019 break;
1020 }
1021 if (*cp++ != '\\' || stesc[1] != 'w') {
1022 naml++;
1023 continue;
1024 }
1025 switch (mandoc_escape(&cp, NULL, NULL)) {
1026 case ESCAPE_SPECIAL:
1027 /* FALLTHROUGH */
1028 case ESCAPE_UNICODE:
1029 /* FALLTHROUGH */
1030 case ESCAPE_NUMBERED:
1031 /* FALLTHROUGH */
1032 case ESCAPE_OVERSTRIKE:
1033 naml++;
1034 break;
1035 default:
1036 break;
1037 }
1038 }
1039
1040 /*
1041 * Retrieve the replacement string; if it is
1042 * undefined, resume searching for escapes.
1043 */
1044
1045 switch (stesc[1]) {
1046 case '*':
1047 if (arg_complete)
1048 res = roff_getstrn(r, stnam, naml);
1049 break;
1050 case 'B':
1051 npos = 0;
1052 ubuf[0] = arg_complete &&
1053 roff_evalnum(r, ln, stnam, &npos, NULL, 0) &&
1054 stnam + npos + 1 == cp ? '1' : '0';
1055 ubuf[1] = '\0';
1056 break;
1057 case 'n':
1058 if (arg_complete)
1059 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1060 roff_getregn(r, stnam, naml));
1061 else
1062 ubuf[0] = '\0';
1063 break;
1064 case 'w':
1065 /* use even incomplete args */
1066 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1067 24 * (int)naml);
1068 break;
1069 }
1070
1071 if (res == NULL) {
1072 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1073 r->parse, ln, (int)(stesc - buf->buf),
1074 "%.*s", (int)naml, stnam);
1075 res = "";
1076 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1077 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1078 ln, (int)(stesc - buf->buf), NULL);
1079 return(ROFF_IGN);
1080 }
1081
1082 /* Replace the escape sequence by the string. */
1083
1084 *stesc = '\0';
1085 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1086 buf->buf, res, cp) + 1;
1087
1088 /* Prepare for the next replacement. */
1089
1090 start = nbuf + pos;
1091 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1092 free(buf->buf);
1093 buf->buf = nbuf;
1094 }
1095 return(ROFF_CONT);
1096 }
1097
1098 /*
1099 * Process text streams:
1100 * Convert all breakable hyphens into ASCII_HYPH.
1101 * Decrement and spring input line trap.
1102 */
1103 static enum rofferr
1104 roff_parsetext(struct buf *buf, int pos, int *offs)
1105 {
1106 size_t sz;
1107 const char *start;
1108 char *p;
1109 int isz;
1110 enum mandoc_esc esc;
1111
1112 start = p = buf->buf + pos;
1113
1114 while (*p != '\0') {
1115 sz = strcspn(p, "-\\");
1116 p += sz;
1117
1118 if (*p == '\0')
1119 break;
1120
1121 if (*p == '\\') {
1122 /* Skip over escapes. */
1123 p++;
1124 esc = mandoc_escape((const char **)&p, NULL, NULL);
1125 if (esc == ESCAPE_ERROR)
1126 break;
1127 continue;
1128 } else if (p == start) {
1129 p++;
1130 continue;
1131 }
1132
1133 if (isalpha((unsigned char)p[-1]) &&
1134 isalpha((unsigned char)p[1]))
1135 *p = ASCII_HYPH;
1136 p++;
1137 }
1138
1139 /* Spring the input line trap. */
1140 if (roffit_lines == 1) {
1141 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1142 free(buf->buf);
1143 buf->buf = p;
1144 buf->sz = isz + 1;
1145 *offs = 0;
1146 free(roffit_macro);
1147 roffit_lines = 0;
1148 return(ROFF_REPARSE);
1149 } else if (roffit_lines > 1)
1150 --roffit_lines;
1151 return(ROFF_CONT);
1152 }
1153
1154 enum rofferr
1155 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1156 {
1157 enum rofft t;
1158 enum rofferr e;
1159 int pos; /* parse point */
1160 int spos; /* saved parse point for messages */
1161 int ppos; /* original offset in buf->buf */
1162 int ctl; /* macro line (boolean) */
1163
1164 ppos = pos = *offs;
1165
1166 /* Handle in-line equation delimiters. */
1167
1168 if (r->tbl == NULL &&
1169 r->last_eqn != NULL && r->last_eqn->delim &&
1170 (r->eqn == NULL || r->eqn_inline)) {
1171 e = roff_eqndelim(r, buf, pos);
1172 if (e == ROFF_REPARSE)
1173 return(e);
1174 assert(e == ROFF_CONT);
1175 }
1176
1177 /* Expand some escape sequences. */
1178
1179 e = roff_res(r, buf, ln, pos);
1180 if (e == ROFF_IGN)
1181 return(e);
1182 assert(e == ROFF_CONT);
1183
1184 ctl = roff_getcontrol(r, buf->buf, &pos);
1185
1186 /*
1187 * First, if a scope is open and we're not a macro, pass the
1188 * text through the macro's filter.
1189 * Equations process all content themselves.
1190 * Tables process almost all content themselves, but we want
1191 * to warn about macros before passing it there.
1192 */
1193
1194 if (r->last != NULL && ! ctl) {
1195 t = r->last->tok;
1196 assert(roffs[t].text);
1197 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1198 assert(e == ROFF_IGN || e == ROFF_CONT);
1199 if (e != ROFF_CONT)
1200 return(e);
1201 }
1202 if (r->eqn != NULL)
1203 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
1204 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1205 return(tbl_read(r->tbl, ln, buf->buf, ppos));
1206 if ( ! ctl)
1207 return(roff_parsetext(buf, pos, offs));
1208
1209 /* Skip empty request lines. */
1210
1211 if (buf->buf[pos] == '"') {
1212 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1213 ln, pos, NULL);
1214 return(ROFF_IGN);
1215 } else if (buf->buf[pos] == '\0')
1216 return(ROFF_IGN);
1217
1218 /*
1219 * If a scope is open, go to the child handler for that macro,
1220 * as it may want to preprocess before doing anything with it.
1221 * Don't do so if an equation is open.
1222 */
1223
1224 if (r->last) {
1225 t = r->last->tok;
1226 assert(roffs[t].sub);
1227 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
1228 }
1229
1230 /* No scope is open. This is a new request or macro. */
1231
1232 spos = pos;
1233 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1234
1235 /* Tables ignore most macros. */
1236
1237 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1238 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1239 ln, pos, buf->buf + spos);
1240 if (t == ROFF_TS)
1241 return(ROFF_IGN);
1242 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1243 pos++;
1244 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1245 pos++;
1246 return(tbl_read(r->tbl, ln, buf->buf, pos));
1247 }
1248
1249 /*
1250 * This is neither a roff request nor a user-defined macro.
1251 * Let the standard macro set parsers handle it.
1252 */
1253
1254 if (t == ROFF_MAX)
1255 return(ROFF_CONT);
1256
1257 /* Execute a roff request or a user defined macro. */
1258
1259 assert(roffs[t].proc);
1260 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1261 }
1262
1263 void
1264 roff_endparse(struct roff *r)
1265 {
1266
1267 if (r->last)
1268 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1269 r->last->line, r->last->col,
1270 roffs[r->last->tok].name);
1271
1272 if (r->eqn) {
1273 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1274 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1275 eqn_end(&r->eqn);
1276 }
1277
1278 if (r->tbl) {
1279 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1280 r->tbl->line, r->tbl->pos, "TS");
1281 tbl_end(&r->tbl);
1282 }
1283 }
1284
1285 /*
1286 * Parse a roff node's type from the input buffer. This must be in the
1287 * form of ".foo xxx" in the usual way.
1288 */
1289 static enum rofft
1290 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1291 {
1292 char *cp;
1293 const char *mac;
1294 size_t maclen;
1295 enum rofft t;
1296
1297 cp = buf + *pos;
1298
1299 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1300 return(ROFF_MAX);
1301
1302 mac = cp;
1303 maclen = roff_getname(r, &cp, ln, ppos);
1304
1305 t = (r->current_string = roff_getstrn(r, mac, maclen))
1306 ? ROFF_USERDEF : roffhash_find(mac, maclen);
1307
1308 if (ROFF_MAX != t)
1309 *pos = cp - buf;
1310
1311 return(t);
1312 }
1313
1314 static enum rofferr
1315 roff_cblock(ROFF_ARGS)
1316 {
1317
1318 /*
1319 * A block-close `..' should only be invoked as a child of an
1320 * ignore macro, otherwise raise a warning and just ignore it.
1321 */
1322
1323 if (r->last == NULL) {
1324 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1325 ln, ppos, "..");
1326 return(ROFF_IGN);
1327 }
1328
1329 switch (r->last->tok) {
1330 case ROFF_am:
1331 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1332 /* FALLTHROUGH */
1333 case ROFF_ami:
1334 /* FALLTHROUGH */
1335 case ROFF_de:
1336 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1337 /* FALLTHROUGH */
1338 case ROFF_dei:
1339 /* FALLTHROUGH */
1340 case ROFF_ig:
1341 break;
1342 default:
1343 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1344 ln, ppos, "..");
1345 return(ROFF_IGN);
1346 }
1347
1348 if (buf->buf[pos] != '\0')
1349 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1350 ".. %s", buf->buf + pos);
1351
1352 roffnode_pop(r);
1353 roffnode_cleanscope(r);
1354 return(ROFF_IGN);
1355
1356 }
1357
1358 static void
1359 roffnode_cleanscope(struct roff *r)
1360 {
1361
1362 while (r->last) {
1363 if (--r->last->endspan != 0)
1364 break;
1365 roffnode_pop(r);
1366 }
1367 }
1368
1369 static void
1370 roff_ccond(struct roff *r, int ln, int ppos)
1371 {
1372
1373 if (NULL == r->last) {
1374 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1375 ln, ppos, "\\}");
1376 return;
1377 }
1378
1379 switch (r->last->tok) {
1380 case ROFF_el:
1381 /* FALLTHROUGH */
1382 case ROFF_ie:
1383 /* FALLTHROUGH */
1384 case ROFF_if:
1385 break;
1386 default:
1387 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1388 ln, ppos, "\\}");
1389 return;
1390 }
1391
1392 if (r->last->endspan > -1) {
1393 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1394 ln, ppos, "\\}");
1395 return;
1396 }
1397
1398 roffnode_pop(r);
1399 roffnode_cleanscope(r);
1400 return;
1401 }
1402
1403 static enum rofferr
1404 roff_block(ROFF_ARGS)
1405 {
1406 const char *name;
1407 char *iname, *cp;
1408 size_t namesz;
1409
1410 /* Ignore groff compatibility mode for now. */
1411
1412 if (tok == ROFF_de1)
1413 tok = ROFF_de;
1414 else if (tok == ROFF_dei1)
1415 tok = ROFF_dei;
1416 else if (tok == ROFF_am1)
1417 tok = ROFF_am;
1418 else if (tok == ROFF_ami1)
1419 tok = ROFF_ami;
1420
1421 /* Parse the macro name argument. */
1422
1423 cp = buf->buf + pos;
1424 if (tok == ROFF_ig) {
1425 iname = NULL;
1426 namesz = 0;
1427 } else {
1428 iname = cp;
1429 namesz = roff_getname(r, &cp, ln, ppos);
1430 iname[namesz] = '\0';
1431 }
1432
1433 /* Resolve the macro name argument if it is indirect. */
1434
1435 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1436 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1437 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1438 r->parse, ln, (int)(iname - buf->buf),
1439 "%.*s", (int)namesz, iname);
1440 namesz = 0;
1441 } else
1442 namesz = strlen(name);
1443 } else
1444 name = iname;
1445
1446 if (namesz == 0 && tok != ROFF_ig) {
1447 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1448 ln, ppos, roffs[tok].name);
1449 return(ROFF_IGN);
1450 }
1451
1452 roffnode_push(r, tok, name, ln, ppos);
1453
1454 /*
1455 * At the beginning of a `de' macro, clear the existing string
1456 * with the same name, if there is one. New content will be
1457 * appended from roff_block_text() in multiline mode.
1458 */
1459
1460 if (tok == ROFF_de || tok == ROFF_dei)
1461 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1462
1463 if (*cp == '\0')
1464 return(ROFF_IGN);
1465
1466 /* Get the custom end marker. */
1467
1468 iname = cp;
1469 namesz = roff_getname(r, &cp, ln, ppos);
1470
1471 /* Resolve the end marker if it is indirect. */
1472
1473 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1474 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1475 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1476 r->parse, ln, (int)(iname - buf->buf),
1477 "%.*s", (int)namesz, iname);
1478 namesz = 0;
1479 } else
1480 namesz = strlen(name);
1481 } else
1482 name = iname;
1483
1484 if (namesz)
1485 r->last->end = mandoc_strndup(name, namesz);
1486
1487 if (*cp != '\0')
1488 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1489 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1490
1491 return(ROFF_IGN);
1492 }
1493
1494 static enum rofferr
1495 roff_block_sub(ROFF_ARGS)
1496 {
1497 enum rofft t;
1498 int i, j;
1499
1500 /*
1501 * First check whether a custom macro exists at this level. If
1502 * it does, then check against it. This is some of groff's
1503 * stranger behaviours. If we encountered a custom end-scope
1504 * tag and that tag also happens to be a "real" macro, then we
1505 * need to try interpreting it again as a real macro. If it's
1506 * not, then return ignore. Else continue.
1507 */
1508
1509 if (r->last->end) {
1510 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1511 if (buf->buf[i] != r->last->end[j])
1512 break;
1513
1514 if (r->last->end[j] == '\0' &&
1515 (buf->buf[i] == '\0' ||
1516 buf->buf[i] == ' ' ||
1517 buf->buf[i] == '\t')) {
1518 roffnode_pop(r);
1519 roffnode_cleanscope(r);
1520
1521 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1522 i++;
1523
1524 pos = i;
1525 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1526 ROFF_MAX)
1527 return(ROFF_RERUN);
1528 return(ROFF_IGN);
1529 }
1530 }
1531
1532 /*
1533 * If we have no custom end-query or lookup failed, then try
1534 * pulling it out of the hashtable.
1535 */
1536
1537 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1538
1539 if (t != ROFF_cblock) {
1540 if (tok != ROFF_ig)
1541 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1542 return(ROFF_IGN);
1543 }
1544
1545 assert(roffs[t].proc);
1546 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1547 }
1548
1549 static enum rofferr
1550 roff_block_text(ROFF_ARGS)
1551 {
1552
1553 if (tok != ROFF_ig)
1554 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1555
1556 return(ROFF_IGN);
1557 }
1558
1559 static enum rofferr
1560 roff_cond_sub(ROFF_ARGS)
1561 {
1562 enum rofft t;
1563 char *ep;
1564 int rr;
1565
1566 rr = r->last->rule;
1567 roffnode_cleanscope(r);
1568 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1569
1570 /*
1571 * Fully handle known macros when they are structurally
1572 * required or when the conditional evaluated to true.
1573 */
1574
1575 if ((t != ROFF_MAX) &&
1576 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1577 assert(roffs[t].proc);
1578 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1579 }
1580
1581 /*
1582 * If `\}' occurs on a macro line without a preceding macro,
1583 * drop the line completely.
1584 */
1585
1586 ep = buf->buf + pos;
1587 if (ep[0] == '\\' && ep[1] == '}')
1588 rr = 0;
1589
1590 /* Always check for the closing delimiter `\}'. */
1591
1592 while ((ep = strchr(ep, '\\')) != NULL) {
1593 if (*(++ep) == '}') {
1594 *ep = '&';
1595 roff_ccond(r, ln, ep - buf->buf - 1);
1596 }
1597 if (*ep != '\0')
1598 ++ep;
1599 }
1600 return(rr ? ROFF_CONT : ROFF_IGN);
1601 }
1602
1603 static enum rofferr
1604 roff_cond_text(ROFF_ARGS)
1605 {
1606 char *ep;
1607 int rr;
1608
1609 rr = r->last->rule;
1610 roffnode_cleanscope(r);
1611
1612 ep = buf->buf + pos;
1613 while ((ep = strchr(ep, '\\')) != NULL) {
1614 if (*(++ep) == '}') {
1615 *ep = '&';
1616 roff_ccond(r, ln, ep - buf->buf - 1);
1617 }
1618 if (*ep != '\0')
1619 ++ep;
1620 }
1621 return(rr ? ROFF_CONT : ROFF_IGN);
1622 }
1623
1624 /*
1625 * Parse a single signed integer number. Stop at the first non-digit.
1626 * If there is at least one digit, return success and advance the
1627 * parse point, else return failure and let the parse point unchanged.
1628 * Ignore overflows, treat them just like the C language.
1629 */
1630 static int
1631 roff_getnum(const char *v, int *pos, int *res)
1632 {
1633 int myres, n, p;
1634
1635 if (NULL == res)
1636 res = &myres;
1637
1638 p = *pos;
1639 n = v[p] == '-';
1640 if (n)
1641 p++;
1642
1643 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1644 *res = 10 * *res + v[p] - '0';
1645 if (p == *pos + n)
1646 return 0;
1647
1648 if (n)
1649 *res = -*res;
1650
1651 /* Each number may be followed by one optional scaling unit. */
1652
1653 switch (v[p]) {
1654 case 'f':
1655 *res *= 65536;
1656 break;
1657 case 'i':
1658 *res *= 240;
1659 break;
1660 case 'c':
1661 *res *= 240;
1662 *res /= 2.54;
1663 break;
1664 case 'v':
1665 /* FALLTROUGH */
1666 case 'P':
1667 *res *= 40;
1668 break;
1669 case 'm':
1670 /* FALLTROUGH */
1671 case 'n':
1672 *res *= 24;
1673 break;
1674 case 'p':
1675 *res *= 10;
1676 *res /= 3;
1677 break;
1678 case 'u':
1679 break;
1680 case 'M':
1681 *res *= 6;
1682 *res /= 25;
1683 break;
1684 default:
1685 p--;
1686 break;
1687 }
1688
1689 *pos = p + 1;
1690 return(1);
1691 }
1692
1693 /*
1694 * Evaluate a string comparison condition.
1695 * The first character is the delimiter.
1696 * Succeed if the string up to its second occurrence
1697 * matches the string up to its third occurence.
1698 * Advance the cursor after the third occurrence
1699 * or lacking that, to the end of the line.
1700 */
1701 static int
1702 roff_evalstrcond(const char *v, int *pos)
1703 {
1704 const char *s1, *s2, *s3;
1705 int match;
1706
1707 match = 0;
1708 s1 = v + *pos; /* initial delimiter */
1709 s2 = s1 + 1; /* for scanning the first string */
1710 s3 = strchr(s2, *s1); /* for scanning the second string */
1711
1712 if (NULL == s3) /* found no middle delimiter */
1713 goto out;
1714
1715 while ('\0' != *++s3) {
1716 if (*s2 != *s3) { /* mismatch */
1717 s3 = strchr(s3, *s1);
1718 break;
1719 }
1720 if (*s3 == *s1) { /* found the final delimiter */
1721 match = 1;
1722 break;
1723 }
1724 s2++;
1725 }
1726
1727 out:
1728 if (NULL == s3)
1729 s3 = strchr(s2, '\0');
1730 else if (*s3 != '\0')
1731 s3++;
1732 *pos = s3 - v;
1733 return(match);
1734 }
1735
1736 /*
1737 * Evaluate an optionally negated single character, numerical,
1738 * or string condition.
1739 */
1740 static int
1741 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1742 {
1743 int number, savepos, wanttrue;
1744
1745 if ('!' == v[*pos]) {
1746 wanttrue = 0;
1747 (*pos)++;
1748 } else
1749 wanttrue = 1;
1750
1751 switch (v[*pos]) {
1752 case '\0':
1753 return(0);
1754 case 'n':
1755 /* FALLTHROUGH */
1756 case 'o':
1757 (*pos)++;
1758 return(wanttrue);
1759 case 'c':
1760 /* FALLTHROUGH */
1761 case 'd':
1762 /* FALLTHROUGH */
1763 case 'e':
1764 /* FALLTHROUGH */
1765 case 'r':
1766 /* FALLTHROUGH */
1767 case 't':
1768 /* FALLTHROUGH */
1769 case 'v':
1770 (*pos)++;
1771 return(!wanttrue);
1772 default:
1773 break;
1774 }
1775
1776 savepos = *pos;
1777 if (roff_evalnum(r, ln, v, pos, &number, 0))
1778 return((number > 0) == wanttrue);
1779 else if (*pos == savepos)
1780 return(roff_evalstrcond(v, pos) == wanttrue);
1781 else
1782 return (0);
1783 }
1784
1785 static enum rofferr
1786 roff_line_ignore(ROFF_ARGS)
1787 {
1788
1789 return(ROFF_IGN);
1790 }
1791
1792 static enum rofferr
1793 roff_insec(ROFF_ARGS)
1794 {
1795
1796 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
1797 ln, ppos, roffs[tok].name);
1798 return(ROFF_IGN);
1799 }
1800
1801 static enum rofferr
1802 roff_unsupp(ROFF_ARGS)
1803 {
1804
1805 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
1806 ln, ppos, roffs[tok].name);
1807 return(ROFF_IGN);
1808 }
1809
1810 static enum rofferr
1811 roff_cond(ROFF_ARGS)
1812 {
1813
1814 roffnode_push(r, tok, NULL, ln, ppos);
1815
1816 /*
1817 * An `.el' has no conditional body: it will consume the value
1818 * of the current rstack entry set in prior `ie' calls or
1819 * defaults to DENY.
1820 *
1821 * If we're not an `el', however, then evaluate the conditional.
1822 */
1823
1824 r->last->rule = tok == ROFF_el ?
1825 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1826 roff_evalcond(r, ln, buf->buf, &pos);
1827
1828 /*
1829 * An if-else will put the NEGATION of the current evaluated
1830 * conditional into the stack of rules.
1831 */
1832
1833 if (tok == ROFF_ie) {
1834 if (r->rstackpos + 1 == r->rstacksz) {
1835 r->rstacksz += 16;
1836 r->rstack = mandoc_reallocarray(r->rstack,
1837 r->rstacksz, sizeof(int));
1838 }
1839 r->rstack[++r->rstackpos] = !r->last->rule;
1840 }
1841
1842 /* If the parent has false as its rule, then so do we. */
1843
1844 if (r->last->parent && !r->last->parent->rule)
1845 r->last->rule = 0;
1846
1847 /*
1848 * Determine scope.
1849 * If there is nothing on the line after the conditional,
1850 * not even whitespace, use next-line scope.
1851 */
1852
1853 if (buf->buf[pos] == '\0') {
1854 r->last->endspan = 2;
1855 goto out;
1856 }
1857
1858 while (buf->buf[pos] == ' ')
1859 pos++;
1860
1861 /* An opening brace requests multiline scope. */
1862
1863 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
1864 r->last->endspan = -1;
1865 pos += 2;
1866 goto out;
1867 }
1868
1869 /*
1870 * Anything else following the conditional causes
1871 * single-line scope. Warn if the scope contains
1872 * nothing but trailing whitespace.
1873 */
1874
1875 if (buf->buf[pos] == '\0')
1876 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1877 ln, ppos, roffs[tok].name);
1878
1879 r->last->endspan = 1;
1880
1881 out:
1882 *offs = pos;
1883 return(ROFF_RERUN);
1884 }
1885
1886 static enum rofferr
1887 roff_ds(ROFF_ARGS)
1888 {
1889 char *string;
1890 const char *name;
1891 size_t namesz;
1892
1893 /* Ignore groff compatibility mode for now. */
1894
1895 if (tok == ROFF_ds1)
1896 tok = ROFF_ds;
1897 else if (tok == ROFF_as1)
1898 tok = ROFF_as;
1899
1900 /*
1901 * The first word is the name of the string.
1902 * If it is empty or terminated by an escape sequence,
1903 * abort the `ds' request without defining anything.
1904 */
1905
1906 name = string = buf->buf + pos;
1907 if (*name == '\0')
1908 return(ROFF_IGN);
1909
1910 namesz = roff_getname(r, &string, ln, pos);
1911 if (name[namesz] == '\\')
1912 return(ROFF_IGN);
1913
1914 /* Read past the initial double-quote, if any. */
1915 if (*string == '"')
1916 string++;
1917
1918 /* The rest is the value. */
1919 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1920 ROFF_as == tok);
1921 return(ROFF_IGN);
1922 }
1923
1924 /*
1925 * Parse a single operator, one or two characters long.
1926 * If the operator is recognized, return success and advance the
1927 * parse point, else return failure and let the parse point unchanged.
1928 */
1929 static int
1930 roff_getop(const char *v, int *pos, char *res)
1931 {
1932
1933 *res = v[*pos];
1934
1935 switch (*res) {
1936 case '+':
1937 /* FALLTHROUGH */
1938 case '-':
1939 /* FALLTHROUGH */
1940 case '*':
1941 /* FALLTHROUGH */
1942 case '/':
1943 /* FALLTHROUGH */
1944 case '%':
1945 /* FALLTHROUGH */
1946 case '&':
1947 /* FALLTHROUGH */
1948 case ':':
1949 break;
1950 case '<':
1951 switch (v[*pos + 1]) {
1952 case '=':
1953 *res = 'l';
1954 (*pos)++;
1955 break;
1956 case '>':
1957 *res = '!';
1958 (*pos)++;
1959 break;
1960 case '?':
1961 *res = 'i';
1962 (*pos)++;
1963 break;
1964 default:
1965 break;
1966 }
1967 break;
1968 case '>':
1969 switch (v[*pos + 1]) {
1970 case '=':
1971 *res = 'g';
1972 (*pos)++;
1973 break;
1974 case '?':
1975 *res = 'a';
1976 (*pos)++;
1977 break;
1978 default:
1979 break;
1980 }
1981 break;
1982 case '=':
1983 if ('=' == v[*pos + 1])
1984 (*pos)++;
1985 break;
1986 default:
1987 return(0);
1988 }
1989 (*pos)++;
1990
1991 return(*res);
1992 }
1993
1994 /*
1995 * Evaluate either a parenthesized numeric expression
1996 * or a single signed integer number.
1997 */
1998 static int
1999 roff_evalpar(struct roff *r, int ln,
2000 const char *v, int *pos, int *res)
2001 {
2002
2003 if ('(' != v[*pos])
2004 return(roff_getnum(v, pos, res));
2005
2006 (*pos)++;
2007 if ( ! roff_evalnum(r, ln, v, pos, res, 1))
2008 return(0);
2009
2010 /*
2011 * Omission of the closing parenthesis
2012 * is an error in validation mode,
2013 * but ignored in evaluation mode.
2014 */
2015
2016 if (')' == v[*pos])
2017 (*pos)++;
2018 else if (NULL == res)
2019 return(0);
2020
2021 return(1);
2022 }
2023
2024 /*
2025 * Evaluate a complete numeric expression.
2026 * Proceed left to right, there is no concept of precedence.
2027 */
2028 static int
2029 roff_evalnum(struct roff *r, int ln, const char *v,
2030 int *pos, int *res, int skipwhite)
2031 {
2032 int mypos, operand2;
2033 char operator;
2034
2035 if (NULL == pos) {
2036 mypos = 0;
2037 pos = &mypos;
2038 }
2039
2040 if (skipwhite)
2041 while (isspace((unsigned char)v[*pos]))
2042 (*pos)++;
2043
2044 if ( ! roff_evalpar(r, ln, v, pos, res))
2045 return(0);
2046
2047 while (1) {
2048 if (skipwhite)
2049 while (isspace((unsigned char)v[*pos]))
2050 (*pos)++;
2051
2052 if ( ! roff_getop(v, pos, &operator))
2053 break;
2054
2055 if (skipwhite)
2056 while (isspace((unsigned char)v[*pos]))
2057 (*pos)++;
2058
2059 if ( ! roff_evalpar(r, ln, v, pos, &operand2))
2060 return(0);
2061
2062 if (skipwhite)
2063 while (isspace((unsigned char)v[*pos]))
2064 (*pos)++;
2065
2066 if (NULL == res)
2067 continue;
2068
2069 switch (operator) {
2070 case '+':
2071 *res += operand2;
2072 break;
2073 case '-':
2074 *res -= operand2;
2075 break;
2076 case '*':
2077 *res *= operand2;
2078 break;
2079 case '/':
2080 if (operand2 == 0) {
2081 mandoc_msg(MANDOCERR_DIVZERO,
2082 r->parse, ln, *pos, v);
2083 *res = 0;
2084 break;
2085 }
2086 *res /= operand2;
2087 break;
2088 case '%':
2089 if (operand2 == 0) {
2090 mandoc_msg(MANDOCERR_DIVZERO,
2091 r->parse, ln, *pos, v);
2092 *res = 0;
2093 break;
2094 }
2095 *res %= operand2;
2096 break;
2097 case '<':
2098 *res = *res < operand2;
2099 break;
2100 case '>':
2101 *res = *res > operand2;
2102 break;
2103 case 'l':
2104 *res = *res <= operand2;
2105 break;
2106 case 'g':
2107 *res = *res >= operand2;
2108 break;
2109 case '=':
2110 *res = *res == operand2;
2111 break;
2112 case '!':
2113 *res = *res != operand2;
2114 break;
2115 case '&':
2116 *res = *res && operand2;
2117 break;
2118 case ':':
2119 *res = *res || operand2;
2120 break;
2121 case 'i':
2122 if (operand2 < *res)
2123 *res = operand2;
2124 break;
2125 case 'a':
2126 if (operand2 > *res)
2127 *res = operand2;
2128 break;
2129 default:
2130 abort();
2131 }
2132 }
2133 return(1);
2134 }
2135
2136 void
2137 roff_setreg(struct roff *r, const char *name, int val, char sign)
2138 {
2139 struct roffreg *reg;
2140
2141 /* Search for an existing register with the same name. */
2142 reg = r->regtab;
2143
2144 while (reg && strcmp(name, reg->key.p))
2145 reg = reg->next;
2146
2147 if (NULL == reg) {
2148 /* Create a new register. */
2149 reg = mandoc_malloc(sizeof(struct roffreg));
2150 reg->key.p = mandoc_strdup(name);
2151 reg->key.sz = strlen(name);
2152 reg->val = 0;
2153 reg->next = r->regtab;
2154 r->regtab = reg;
2155 }
2156
2157 if ('+' == sign)
2158 reg->val += val;
2159 else if ('-' == sign)
2160 reg->val -= val;
2161 else
2162 reg->val = val;
2163 }
2164
2165 /*
2166 * Handle some predefined read-only number registers.
2167 * For now, return -1 if the requested register is not predefined;
2168 * in case a predefined read-only register having the value -1
2169 * were to turn up, another special value would have to be chosen.
2170 */
2171 static int
2172 roff_getregro(const char *name)
2173 {
2174
2175 switch (*name) {
2176 case 'A': /* ASCII approximation mode is always off. */
2177 return(0);
2178 case 'g': /* Groff compatibility mode is always on. */
2179 return(1);
2180 case 'H': /* Fixed horizontal resolution. */
2181 return (24);
2182 case 'j': /* Always adjust left margin only. */
2183 return(0);
2184 case 'T': /* Some output device is always defined. */
2185 return(1);
2186 case 'V': /* Fixed vertical resolution. */
2187 return (40);
2188 default:
2189 return (-1);
2190 }
2191 }
2192
2193 int
2194 roff_getreg(const struct roff *r, const char *name)
2195 {
2196 struct roffreg *reg;
2197 int val;
2198
2199 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2200 val = roff_getregro(name + 1);
2201 if (-1 != val)
2202 return (val);
2203 }
2204
2205 for (reg = r->regtab; reg; reg = reg->next)
2206 if (0 == strcmp(name, reg->key.p))
2207 return(reg->val);
2208
2209 return(0);
2210 }
2211
2212 static int
2213 roff_getregn(const struct roff *r, const char *name, size_t len)
2214 {
2215 struct roffreg *reg;
2216 int val;
2217
2218 if ('.' == name[0] && 2 == len) {
2219 val = roff_getregro(name + 1);
2220 if (-1 != val)
2221 return (val);
2222 }
2223
2224 for (reg = r->regtab; reg; reg = reg->next)
2225 if (len == reg->key.sz &&
2226 0 == strncmp(name, reg->key.p, len))
2227 return(reg->val);
2228
2229 return(0);
2230 }
2231
2232 static void
2233 roff_freereg(struct roffreg *reg)
2234 {
2235 struct roffreg *old_reg;
2236
2237 while (NULL != reg) {
2238 free(reg->key.p);
2239 old_reg = reg;
2240 reg = reg->next;
2241 free(old_reg);
2242 }
2243 }
2244
2245 static enum rofferr
2246 roff_nr(ROFF_ARGS)
2247 {
2248 char *key, *val;
2249 size_t keysz;
2250 int iv;
2251 char sign;
2252
2253 key = val = buf->buf + pos;
2254 if (*key == '\0')
2255 return(ROFF_IGN);
2256
2257 keysz = roff_getname(r, &val, ln, pos);
2258 if (key[keysz] == '\\')
2259 return(ROFF_IGN);
2260 key[keysz] = '\0';
2261
2262 sign = *val;
2263 if (sign == '+' || sign == '-')
2264 val++;
2265
2266 if (roff_evalnum(r, ln, val, NULL, &iv, 0))
2267 roff_setreg(r, key, iv, sign);
2268
2269 return(ROFF_IGN);
2270 }
2271
2272 static enum rofferr
2273 roff_rr(ROFF_ARGS)
2274 {
2275 struct roffreg *reg, **prev;
2276 char *name, *cp;
2277 size_t namesz;
2278
2279 name = cp = buf->buf + pos;
2280 if (*name == '\0')
2281 return(ROFF_IGN);
2282 namesz = roff_getname(r, &cp, ln, pos);
2283 name[namesz] = '\0';
2284
2285 prev = &r->regtab;
2286 while (1) {
2287 reg = *prev;
2288 if (reg == NULL || !strcmp(name, reg->key.p))
2289 break;
2290 prev = &reg->next;
2291 }
2292 if (reg != NULL) {
2293 *prev = reg->next;
2294 free(reg->key.p);
2295 free(reg);
2296 }
2297 return(ROFF_IGN);
2298 }
2299
2300 static enum rofferr
2301 roff_rm(ROFF_ARGS)
2302 {
2303 const char *name;
2304 char *cp;
2305 size_t namesz;
2306
2307 cp = buf->buf + pos;
2308 while (*cp != '\0') {
2309 name = cp;
2310 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2311 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2312 if (name[namesz] == '\\')
2313 break;
2314 }
2315 return(ROFF_IGN);
2316 }
2317
2318 static enum rofferr
2319 roff_it(ROFF_ARGS)
2320 {
2321 char *cp;
2322 size_t len;
2323 int iv;
2324
2325 /* Parse the number of lines. */
2326 cp = buf->buf + pos;
2327 len = strcspn(cp, " \t");
2328 cp[len] = '\0';
2329 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
2330 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2331 ln, ppos, buf->buf + 1);
2332 return(ROFF_IGN);
2333 }
2334 cp += len + 1;
2335
2336 /* Arm the input line trap. */
2337 roffit_lines = iv;
2338 roffit_macro = mandoc_strdup(cp);
2339 return(ROFF_IGN);
2340 }
2341
2342 static enum rofferr
2343 roff_Dd(ROFF_ARGS)
2344 {
2345 const char *const *cp;
2346
2347 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2348 for (cp = __mdoc_reserved; *cp; cp++)
2349 roff_setstr(r, *cp, NULL, 0);
2350
2351 if (r->format == 0)
2352 r->format = MPARSE_MDOC;
2353
2354 return(ROFF_CONT);
2355 }
2356
2357 static enum rofferr
2358 roff_TH(ROFF_ARGS)
2359 {
2360 const char *const *cp;
2361
2362 if ((r->options & MPARSE_QUICK) == 0)
2363 for (cp = __man_reserved; *cp; cp++)
2364 roff_setstr(r, *cp, NULL, 0);
2365
2366 if (r->format == 0)
2367 r->format = MPARSE_MAN;
2368
2369 return(ROFF_CONT);
2370 }
2371
2372 static enum rofferr
2373 roff_TE(ROFF_ARGS)
2374 {
2375
2376 if (NULL == r->tbl)
2377 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2378 ln, ppos, "TE");
2379 else if ( ! tbl_end(&r->tbl)) {
2380 free(buf->buf);
2381 buf->buf = mandoc_strdup(".sp");
2382 buf->sz = 4;
2383 return(ROFF_REPARSE);
2384 }
2385 return(ROFF_IGN);
2386 }
2387
2388 static enum rofferr
2389 roff_T_(ROFF_ARGS)
2390 {
2391
2392 if (NULL == r->tbl)
2393 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2394 ln, ppos, "T&");
2395 else
2396 tbl_restart(ppos, ln, r->tbl);
2397
2398 return(ROFF_IGN);
2399 }
2400
2401 /*
2402 * Handle in-line equation delimiters.
2403 */
2404 static enum rofferr
2405 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2406 {
2407 char *cp1, *cp2;
2408 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2409
2410 /*
2411 * Outside equations, look for an opening delimiter.
2412 * If we are inside an equation, we already know it is
2413 * in-line, or this function wouldn't have been called;
2414 * so look for a closing delimiter.
2415 */
2416
2417 cp1 = buf->buf + pos;
2418 cp2 = strchr(cp1, r->eqn == NULL ?
2419 r->last_eqn->odelim : r->last_eqn->cdelim);
2420 if (cp2 == NULL)
2421 return(ROFF_CONT);
2422
2423 *cp2++ = '\0';
2424 bef_pr = bef_nl = aft_nl = aft_pr = "";
2425
2426 /* Handle preceding text, protecting whitespace. */
2427
2428 if (*buf->buf != '\0') {
2429 if (r->eqn == NULL)
2430 bef_pr = "\\&";
2431 bef_nl = "\n";
2432 }
2433
2434 /*
2435 * Prepare replacing the delimiter with an equation macro
2436 * and drop leading white space from the equation.
2437 */
2438
2439 if (r->eqn == NULL) {
2440 while (*cp2 == ' ')
2441 cp2++;
2442 mac = ".EQ";
2443 } else
2444 mac = ".EN";
2445
2446 /* Handle following text, protecting whitespace. */
2447
2448 if (*cp2 != '\0') {
2449 aft_nl = "\n";
2450 if (r->eqn != NULL)
2451 aft_pr = "\\&";
2452 }
2453
2454 /* Do the actual replacement. */
2455
2456 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2457 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2458 free(buf->buf);
2459 buf->buf = cp1;
2460
2461 /* Toggle the in-line state of the eqn subsystem. */
2462
2463 r->eqn_inline = r->eqn == NULL;
2464 return(ROFF_REPARSE);
2465 }
2466
2467 static enum rofferr
2468 roff_EQ(ROFF_ARGS)
2469 {
2470 struct eqn_node *e;
2471
2472 assert(r->eqn == NULL);
2473 e = eqn_alloc(ppos, ln, r->parse);
2474
2475 if (r->last_eqn) {
2476 r->last_eqn->next = e;
2477 e->delim = r->last_eqn->delim;
2478 e->odelim = r->last_eqn->odelim;
2479 e->cdelim = r->last_eqn->cdelim;
2480 } else
2481 r->first_eqn = r->last_eqn = e;
2482
2483 r->eqn = r->last_eqn = e;
2484
2485 if (buf->buf[pos] != '\0')
2486 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2487 ".EQ %s", buf->buf + pos);
2488
2489 return(ROFF_IGN);
2490 }
2491
2492 static enum rofferr
2493 roff_EN(ROFF_ARGS)
2494 {
2495
2496 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2497 return(ROFF_IGN);
2498 }
2499
2500 static enum rofferr
2501 roff_TS(ROFF_ARGS)
2502 {
2503 struct tbl_node *tbl;
2504
2505 if (r->tbl) {
2506 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2507 ln, ppos, "TS breaks TS");
2508 tbl_end(&r->tbl);
2509 }
2510
2511 tbl = tbl_alloc(ppos, ln, r->parse);
2512
2513 if (r->last_tbl)
2514 r->last_tbl->next = tbl;
2515 else
2516 r->first_tbl = r->last_tbl = tbl;
2517
2518 r->tbl = r->last_tbl = tbl;
2519 return(ROFF_IGN);
2520 }
2521
2522 static enum rofferr
2523 roff_brp(ROFF_ARGS)
2524 {
2525
2526 buf->buf[pos - 1] = '\0';
2527 return(ROFF_CONT);
2528 }
2529
2530 static enum rofferr
2531 roff_cc(ROFF_ARGS)
2532 {
2533 const char *p;
2534
2535 p = buf->buf + pos;
2536
2537 if (*p == '\0' || (r->control = *p++) == '.')
2538 r->control = 0;
2539
2540 if (*p != '\0')
2541 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2542 ln, p - buf->buf, "cc ... %s", p);
2543
2544 return(ROFF_IGN);
2545 }
2546
2547 static enum rofferr
2548 roff_tr(ROFF_ARGS)
2549 {
2550 const char *p, *first, *second;
2551 size_t fsz, ssz;
2552 enum mandoc_esc esc;
2553
2554 p = buf->buf + pos;
2555
2556 if (*p == '\0') {
2557 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2558 return(ROFF_IGN);
2559 }
2560
2561 while (*p != '\0') {
2562 fsz = ssz = 1;
2563
2564 first = p++;
2565 if (*first == '\\') {
2566 esc = mandoc_escape(&p, NULL, NULL);
2567 if (esc == ESCAPE_ERROR) {
2568 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2569 ln, (int)(p - buf->buf), first);
2570 return(ROFF_IGN);
2571 }
2572 fsz = (size_t)(p - first);
2573 }
2574
2575 second = p++;
2576 if (*second == '\\') {
2577 esc = mandoc_escape(&p, NULL, NULL);
2578 if (esc == ESCAPE_ERROR) {
2579 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2580 ln, (int)(p - buf->buf), second);
2581 return(ROFF_IGN);
2582 }
2583 ssz = (size_t)(p - second);
2584 } else if (*second == '\0') {
2585 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2586 ln, first - buf->buf, "tr %s", first);
2587 second = " ";
2588 p--;
2589 }
2590
2591 if (fsz > 1) {
2592 roff_setstrn(&r->xmbtab, first, fsz,
2593 second, ssz, 0);
2594 continue;
2595 }
2596
2597 if (r->xtab == NULL)
2598 r->xtab = mandoc_calloc(128,
2599 sizeof(struct roffstr));
2600
2601 free(r->xtab[(int)*first].p);
2602 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2603 r->xtab[(int)*first].sz = ssz;
2604 }
2605
2606 return(ROFF_IGN);
2607 }
2608
2609 static enum rofferr
2610 roff_so(ROFF_ARGS)
2611 {
2612 char *name, *cp;
2613
2614 name = buf->buf + pos;
2615 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2616
2617 /*
2618 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2619 * opening anything that's not in our cwd or anything beneath
2620 * it. Thus, explicitly disallow traversing up the file-system
2621 * or using absolute paths.
2622 */
2623
2624 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2625 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2626 ".so %s", name);
2627 buf->sz = mandoc_asprintf(&cp,
2628 ".sp\nSee the file %s.\n.sp", name) + 1;
2629 free(buf->buf);
2630 buf->buf = cp;
2631 *offs = 0;
2632 return(ROFF_REPARSE);
2633 }
2634
2635 *offs = pos;
2636 return(ROFF_SO);
2637 }
2638
2639 static enum rofferr
2640 roff_userdef(ROFF_ARGS)
2641 {
2642 const char *arg[9];
2643 char *cp, *n1, *n2;
2644 int i;
2645
2646 /*
2647 * Collect pointers to macro argument strings
2648 * and NUL-terminate them.
2649 */
2650 cp = buf->buf + pos;
2651 for (i = 0; i < 9; i++)
2652 arg[i] = *cp == '\0' ? "" :
2653 mandoc_getarg(r->parse, &cp, ln, &pos);
2654
2655 /*
2656 * Expand macro arguments.
2657 */
2658 buf->sz = 0;
2659 n1 = cp = mandoc_strdup(r->current_string);
2660 while ((cp = strstr(cp, "\\$")) != NULL) {
2661 i = cp[2] - '1';
2662 if (0 > i || 8 < i) {
2663 /* Not an argument invocation. */
2664 cp += 2;
2665 continue;
2666 }
2667 *cp = '\0';
2668 buf->sz = mandoc_asprintf(&n2, "%s%s%s",
2669 n1, arg[i], cp + 3) + 1;
2670 cp = n2 + (cp - n1);
2671 free(n1);
2672 n1 = n2;
2673 }
2674
2675 /*
2676 * Replace the macro invocation
2677 * by the expanded macro.
2678 */
2679 free(buf->buf);
2680 buf->buf = n1;
2681 if (buf->sz == 0)
2682 buf->sz = strlen(buf->buf) + 1;
2683 *offs = 0;
2684
2685 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
2686 ROFF_REPARSE : ROFF_APPEND);
2687 }
2688
2689 static size_t
2690 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2691 {
2692 char *name, *cp;
2693 size_t namesz;
2694
2695 name = *cpp;
2696 if ('\0' == *name)
2697 return(0);
2698
2699 /* Read until end of name and terminate it with NUL. */
2700 for (cp = name; 1; cp++) {
2701 if ('\0' == *cp || ' ' == *cp) {
2702 namesz = cp - name;
2703 break;
2704 }
2705 if ('\\' != *cp)
2706 continue;
2707 namesz = cp - name;
2708 if ('{' == cp[1] || '}' == cp[1])
2709 break;
2710 cp++;
2711 if ('\\' == *cp)
2712 continue;
2713 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2714 "%.*s", (int)(cp - name + 1), name);
2715 mandoc_escape((const char **)&cp, NULL, NULL);
2716 break;
2717 }
2718
2719 /* Read past spaces. */
2720 while (' ' == *cp)
2721 cp++;
2722
2723 *cpp = cp;
2724 return(namesz);
2725 }
2726
2727 /*
2728 * Store *string into the user-defined string called *name.
2729 * To clear an existing entry, call with (*r, *name, NULL, 0).
2730 * append == 0: replace mode
2731 * append == 1: single-line append mode
2732 * append == 2: multiline append mode, append '\n' after each call
2733 */
2734 static void
2735 roff_setstr(struct roff *r, const char *name, const char *string,
2736 int append)
2737 {
2738
2739 roff_setstrn(&r->strtab, name, strlen(name), string,
2740 string ? strlen(string) : 0, append);
2741 }
2742
2743 static void
2744 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2745 const char *string, size_t stringsz, int append)
2746 {
2747 struct roffkv *n;
2748 char *c;
2749 int i;
2750 size_t oldch, newch;
2751
2752 /* Search for an existing string with the same name. */
2753 n = *r;
2754
2755 while (n && (namesz != n->key.sz ||
2756 strncmp(n->key.p, name, namesz)))
2757 n = n->next;
2758
2759 if (NULL == n) {
2760 /* Create a new string table entry. */
2761 n = mandoc_malloc(sizeof(struct roffkv));
2762 n->key.p = mandoc_strndup(name, namesz);
2763 n->key.sz = namesz;
2764 n->val.p = NULL;
2765 n->val.sz = 0;
2766 n->next = *r;
2767 *r = n;
2768 } else if (0 == append) {
2769 free(n->val.p);
2770 n->val.p = NULL;
2771 n->val.sz = 0;
2772 }
2773
2774 if (NULL == string)
2775 return;
2776
2777 /*
2778 * One additional byte for the '\n' in multiline mode,
2779 * and one for the terminating '\0'.
2780 */
2781 newch = stringsz + (1 < append ? 2u : 1u);
2782
2783 if (NULL == n->val.p) {
2784 n->val.p = mandoc_malloc(newch);
2785 *n->val.p = '\0';
2786 oldch = 0;
2787 } else {
2788 oldch = n->val.sz;
2789 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2790 }
2791
2792 /* Skip existing content in the destination buffer. */
2793 c = n->val.p + (int)oldch;
2794
2795 /* Append new content to the destination buffer. */
2796 i = 0;
2797 while (i < (int)stringsz) {
2798 /*
2799 * Rudimentary roff copy mode:
2800 * Handle escaped backslashes.
2801 */
2802 if ('\\' == string[i] && '\\' == string[i + 1])
2803 i++;
2804 *c++ = string[i++];
2805 }
2806
2807 /* Append terminating bytes. */
2808 if (1 < append)
2809 *c++ = '\n';
2810
2811 *c = '\0';
2812 n->val.sz = (int)(c - n->val.p);
2813 }
2814
2815 static const char *
2816 roff_getstrn(const struct roff *r, const char *name, size_t len)
2817 {
2818 const struct roffkv *n;
2819 int i;
2820
2821 for (n = r->strtab; n; n = n->next)
2822 if (0 == strncmp(name, n->key.p, len) &&
2823 '\0' == n->key.p[(int)len])
2824 return(n->val.p);
2825
2826 for (i = 0; i < PREDEFS_MAX; i++)
2827 if (0 == strncmp(name, predefs[i].name, len) &&
2828 '\0' == predefs[i].name[(int)len])
2829 return(predefs[i].str);
2830
2831 return(NULL);
2832 }
2833
2834 static void
2835 roff_freestr(struct roffkv *r)
2836 {
2837 struct roffkv *n, *nn;
2838
2839 for (n = r; n; n = nn) {
2840 free(n->key.p);
2841 free(n->val.p);
2842 nn = n->next;
2843 free(n);
2844 }
2845 }
2846
2847 const struct tbl_span *
2848 roff_span(const struct roff *r)
2849 {
2850
2851 return(r->tbl ? tbl_span(r->tbl) : NULL);
2852 }
2853
2854 const struct eqn *
2855 roff_eqn(const struct roff *r)
2856 {
2857
2858 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2859 }
2860
2861 /*
2862 * Duplicate an input string, making the appropriate character
2863 * conversations (as stipulated by `tr') along the way.
2864 * Returns a heap-allocated string with all the replacements made.
2865 */
2866 char *
2867 roff_strdup(const struct roff *r, const char *p)
2868 {
2869 const struct roffkv *cp;
2870 char *res;
2871 const char *pp;
2872 size_t ssz, sz;
2873 enum mandoc_esc esc;
2874
2875 if (NULL == r->xmbtab && NULL == r->xtab)
2876 return(mandoc_strdup(p));
2877 else if ('\0' == *p)
2878 return(mandoc_strdup(""));
2879
2880 /*
2881 * Step through each character looking for term matches
2882 * (remember that a `tr' can be invoked with an escape, which is
2883 * a glyph but the escape is multi-character).
2884 * We only do this if the character hash has been initialised
2885 * and the string is >0 length.
2886 */
2887
2888 res = NULL;
2889 ssz = 0;
2890
2891 while ('\0' != *p) {
2892 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2893 sz = r->xtab[(int)*p].sz;
2894 res = mandoc_realloc(res, ssz + sz + 1);
2895 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2896 ssz += sz;
2897 p++;
2898 continue;
2899 } else if ('\\' != *p) {
2900 res = mandoc_realloc(res, ssz + 2);
2901 res[ssz++] = *p++;
2902 continue;
2903 }
2904
2905 /* Search for term matches. */
2906 for (cp = r->xmbtab; cp; cp = cp->next)
2907 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2908 break;
2909
2910 if (NULL != cp) {
2911 /*
2912 * A match has been found.
2913 * Append the match to the array and move
2914 * forward by its keysize.
2915 */
2916 res = mandoc_realloc(res,
2917 ssz + cp->val.sz + 1);
2918 memcpy(res + ssz, cp->val.p, cp->val.sz);
2919 ssz += cp->val.sz;
2920 p += (int)cp->key.sz;
2921 continue;
2922 }
2923
2924 /*
2925 * Handle escapes carefully: we need to copy
2926 * over just the escape itself, or else we might
2927 * do replacements within the escape itself.
2928 * Make sure to pass along the bogus string.
2929 */
2930 pp = p++;
2931 esc = mandoc_escape(&p, NULL, NULL);
2932 if (ESCAPE_ERROR == esc) {
2933 sz = strlen(pp);
2934 res = mandoc_realloc(res, ssz + sz + 1);
2935 memcpy(res + ssz, pp, sz);
2936 break;
2937 }
2938 /*
2939 * We bail out on bad escapes.
2940 * No need to warn: we already did so when
2941 * roff_res() was called.
2942 */
2943 sz = (int)(p - pp);
2944 res = mandoc_realloc(res, ssz + sz + 1);
2945 memcpy(res + ssz, pp, sz);
2946 ssz += sz;
2947 }
2948
2949 res[(int)ssz] = '\0';
2950 return(res);
2951 }
2952
2953 int
2954 roff_getformat(const struct roff *r)
2955 {
2956
2957 return(r->format);
2958 }
2959
2960 /*
2961 * Find out whether a line is a macro line or not.
2962 * If it is, adjust the current position and return one; if it isn't,
2963 * return zero and don't change the current position.
2964 * If the control character has been set with `.cc', then let that grain
2965 * precedence.
2966 * This is slighly contrary to groff, where using the non-breaking
2967 * control character when `cc' has been invoked will cause the
2968 * non-breaking macro contents to be printed verbatim.
2969 */
2970 int
2971 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2972 {
2973 int pos;
2974
2975 pos = *ppos;
2976
2977 if (0 != r->control && cp[pos] == r->control)
2978 pos++;
2979 else if (0 != r->control)
2980 return(0);
2981 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2982 pos += 2;
2983 else if ('.' == cp[pos] || '\'' == cp[pos])
2984 pos++;
2985 else
2986 return(0);
2987
2988 while (' ' == cp[pos] || '\t' == cp[pos])
2989 pos++;
2990
2991 *ppos = pos;
2992 return(1);
2993 }