]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
More than one data field may follow T} on the same input line.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.268 2015/04/19 14:57:38 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "roff.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libroff.h"
35
36 /* Maximum number of nested if-else conditionals. */
37 #define RSTACK_MAX 128
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 enum rofft {
45 ROFF_ab,
46 ROFF_ad,
47 ROFF_af,
48 ROFF_aln,
49 ROFF_als,
50 ROFF_am,
51 ROFF_am1,
52 ROFF_ami,
53 ROFF_ami1,
54 ROFF_as,
55 ROFF_as1,
56 ROFF_asciify,
57 ROFF_backtrace,
58 ROFF_bd,
59 ROFF_bleedat,
60 ROFF_blm,
61 ROFF_box,
62 ROFF_boxa,
63 ROFF_bp,
64 ROFF_BP,
65 /* MAN_br, MDOC_br */
66 ROFF_break,
67 ROFF_breakchar,
68 ROFF_brnl,
69 ROFF_brp,
70 ROFF_brpnl,
71 ROFF_c2,
72 ROFF_cc,
73 ROFF_ce,
74 ROFF_cf,
75 ROFF_cflags,
76 ROFF_ch,
77 ROFF_char,
78 ROFF_chop,
79 ROFF_class,
80 ROFF_close,
81 ROFF_CL,
82 ROFF_color,
83 ROFF_composite,
84 ROFF_continue,
85 ROFF_cp,
86 ROFF_cropat,
87 ROFF_cs,
88 ROFF_cu,
89 ROFF_da,
90 ROFF_dch,
91 ROFF_Dd,
92 ROFF_de,
93 ROFF_de1,
94 ROFF_defcolor,
95 ROFF_dei,
96 ROFF_dei1,
97 ROFF_device,
98 ROFF_devicem,
99 ROFF_di,
100 ROFF_do,
101 ROFF_ds,
102 ROFF_ds1,
103 ROFF_dwh,
104 ROFF_dt,
105 ROFF_ec,
106 ROFF_ecr,
107 ROFF_ecs,
108 ROFF_el,
109 ROFF_em,
110 ROFF_EN,
111 ROFF_eo,
112 ROFF_EP,
113 ROFF_EQ,
114 ROFF_errprint,
115 ROFF_ev,
116 ROFF_evc,
117 ROFF_ex,
118 ROFF_fallback,
119 ROFF_fam,
120 ROFF_fc,
121 ROFF_fchar,
122 ROFF_fcolor,
123 ROFF_fdeferlig,
124 ROFF_feature,
125 /* MAN_fi; ignored in mdoc(7) */
126 ROFF_fkern,
127 ROFF_fl,
128 ROFF_flig,
129 ROFF_fp,
130 ROFF_fps,
131 ROFF_fschar,
132 ROFF_fspacewidth,
133 ROFF_fspecial,
134 /* MAN_ft; ignored in mdoc(7) */
135 ROFF_ftr,
136 ROFF_fzoom,
137 ROFF_gcolor,
138 ROFF_hc,
139 ROFF_hcode,
140 ROFF_hidechar,
141 ROFF_hla,
142 ROFF_hlm,
143 ROFF_hpf,
144 ROFF_hpfa,
145 ROFF_hpfcode,
146 ROFF_hw,
147 ROFF_hy,
148 ROFF_hylang,
149 ROFF_hylen,
150 ROFF_hym,
151 ROFF_hypp,
152 ROFF_hys,
153 ROFF_ie,
154 ROFF_if,
155 ROFF_ig,
156 /* MAN_in; ignored in mdoc(7) */
157 ROFF_index,
158 ROFF_it,
159 ROFF_itc,
160 ROFF_IX,
161 ROFF_kern,
162 ROFF_kernafter,
163 ROFF_kernbefore,
164 ROFF_kernpair,
165 ROFF_lc,
166 ROFF_lc_ctype,
167 ROFF_lds,
168 ROFF_length,
169 ROFF_letadj,
170 ROFF_lf,
171 ROFF_lg,
172 ROFF_lhang,
173 ROFF_linetabs,
174 /* MAN_ll, MDOC_ll */
175 ROFF_lnr,
176 ROFF_lnrf,
177 ROFF_lpfx,
178 ROFF_ls,
179 ROFF_lsm,
180 ROFF_lt,
181 ROFF_mc,
182 ROFF_mediasize,
183 ROFF_minss,
184 ROFF_mk,
185 ROFF_mso,
186 ROFF_na,
187 ROFF_ne,
188 /* MAN_nf; ignored in mdoc(7) */
189 ROFF_nh,
190 ROFF_nhychar,
191 ROFF_nm,
192 ROFF_nn,
193 ROFF_nop,
194 ROFF_nr,
195 ROFF_nrf,
196 ROFF_nroff,
197 ROFF_ns,
198 ROFF_nx,
199 ROFF_open,
200 ROFF_opena,
201 ROFF_os,
202 ROFF_output,
203 ROFF_padj,
204 ROFF_papersize,
205 ROFF_pc,
206 ROFF_pev,
207 ROFF_pi,
208 ROFF_PI,
209 ROFF_pl,
210 ROFF_pm,
211 ROFF_pn,
212 ROFF_pnr,
213 ROFF_po,
214 ROFF_ps,
215 ROFF_psbb,
216 ROFF_pshape,
217 ROFF_pso,
218 ROFF_ptr,
219 ROFF_pvs,
220 ROFF_rchar,
221 ROFF_rd,
222 ROFF_recursionlimit,
223 ROFF_return,
224 ROFF_rfschar,
225 ROFF_rhang,
226 ROFF_rj,
227 ROFF_rm,
228 ROFF_rn,
229 ROFF_rnn,
230 ROFF_rr,
231 ROFF_rs,
232 ROFF_rt,
233 ROFF_schar,
234 ROFF_sentchar,
235 ROFF_shc,
236 ROFF_shift,
237 ROFF_sizes,
238 ROFF_so,
239 /* MAN_sp, MDOC_sp */
240 ROFF_spacewidth,
241 ROFF_special,
242 ROFF_spreadwarn,
243 ROFF_ss,
244 ROFF_sty,
245 ROFF_substring,
246 ROFF_sv,
247 ROFF_sy,
248 ROFF_T_,
249 ROFF_ta,
250 ROFF_tc,
251 ROFF_TE,
252 ROFF_TH,
253 ROFF_ti,
254 ROFF_tkf,
255 ROFF_tl,
256 ROFF_tm,
257 ROFF_tm1,
258 ROFF_tmc,
259 ROFF_tr,
260 ROFF_track,
261 ROFF_transchar,
262 ROFF_trf,
263 ROFF_trimat,
264 ROFF_trin,
265 ROFF_trnt,
266 ROFF_troff,
267 ROFF_TS,
268 ROFF_uf,
269 ROFF_ul,
270 ROFF_unformat,
271 ROFF_unwatch,
272 ROFF_unwatchn,
273 ROFF_vpt,
274 ROFF_vs,
275 ROFF_warn,
276 ROFF_warnscale,
277 ROFF_watch,
278 ROFF_watchlength,
279 ROFF_watchn,
280 ROFF_wh,
281 ROFF_while,
282 ROFF_write,
283 ROFF_writec,
284 ROFF_writem,
285 ROFF_xflag,
286 ROFF_cblock,
287 ROFF_USERDEF,
288 ROFF_MAX
289 };
290
291 /*
292 * An incredibly-simple string buffer.
293 */
294 struct roffstr {
295 char *p; /* nil-terminated buffer */
296 size_t sz; /* saved strlen(p) */
297 };
298
299 /*
300 * A key-value roffstr pair as part of a singly-linked list.
301 */
302 struct roffkv {
303 struct roffstr key;
304 struct roffstr val;
305 struct roffkv *next; /* next in list */
306 };
307
308 /*
309 * A single number register as part of a singly-linked list.
310 */
311 struct roffreg {
312 struct roffstr key;
313 int val;
314 struct roffreg *next;
315 };
316
317 struct roff {
318 struct mparse *parse; /* parse point */
319 const struct mchars *mchars; /* character table */
320 struct roffnode *last; /* leaf of stack */
321 int *rstack; /* stack of inverted `ie' values */
322 struct roffreg *regtab; /* number registers */
323 struct roffkv *strtab; /* user-defined strings & macros */
324 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
325 struct roffstr *xtab; /* single-byte trans table (`tr') */
326 const char *current_string; /* value of last called user macro */
327 struct tbl_node *first_tbl; /* first table parsed */
328 struct tbl_node *last_tbl; /* last table parsed */
329 struct tbl_node *tbl; /* current table being parsed */
330 struct eqn_node *last_eqn; /* last equation parsed */
331 struct eqn_node *first_eqn; /* first equation parsed */
332 struct eqn_node *eqn; /* current equation being parsed */
333 int eqn_inline; /* current equation is inline */
334 int options; /* parse options */
335 int rstacksz; /* current size limit of rstack */
336 int rstackpos; /* position in rstack */
337 int format; /* current file in mdoc or man format */
338 char control; /* control character */
339 };
340
341 struct roffnode {
342 enum rofft tok; /* type of node */
343 struct roffnode *parent; /* up one in stack */
344 int line; /* parse line */
345 int col; /* parse col */
346 char *name; /* node name, e.g. macro name */
347 char *end; /* end-rules: custom token */
348 int endspan; /* end-rules: next-line or infty */
349 int rule; /* current evaluation rule */
350 };
351
352 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
353 enum rofft tok, /* tok of macro */ \
354 struct buf *buf, /* input buffer */ \
355 int ln, /* parse line */ \
356 int ppos, /* original pos in buffer */ \
357 int pos, /* current pos in buffer */ \
358 int *offs /* reset offset of buffer data */
359
360 typedef enum rofferr (*roffproc)(ROFF_ARGS);
361
362 struct roffmac {
363 const char *name; /* macro name */
364 roffproc proc; /* process new macro */
365 roffproc text; /* process as child text of macro */
366 roffproc sub; /* process as child of macro */
367 int flags;
368 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
369 struct roffmac *next;
370 };
371
372 struct predef {
373 const char *name; /* predefined input name */
374 const char *str; /* replacement symbol */
375 };
376
377 #define PREDEF(__name, __str) \
378 { (__name), (__str) },
379
380 /* --- function prototypes ------------------------------------------------ */
381
382 static enum rofft roffhash_find(const char *, size_t);
383 static void roffhash_init(void);
384 static void roffnode_cleanscope(struct roff *);
385 static void roffnode_pop(struct roff *);
386 static void roffnode_push(struct roff *, enum rofft,
387 const char *, int, int);
388 static enum rofferr roff_block(ROFF_ARGS);
389 static enum rofferr roff_block_text(ROFF_ARGS);
390 static enum rofferr roff_block_sub(ROFF_ARGS);
391 static enum rofferr roff_brp(ROFF_ARGS);
392 static enum rofferr roff_cblock(ROFF_ARGS);
393 static enum rofferr roff_cc(ROFF_ARGS);
394 static void roff_ccond(struct roff *, int, int);
395 static enum rofferr roff_cond(ROFF_ARGS);
396 static enum rofferr roff_cond_text(ROFF_ARGS);
397 static enum rofferr roff_cond_sub(ROFF_ARGS);
398 static enum rofferr roff_ds(ROFF_ARGS);
399 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
400 static int roff_evalcond(struct roff *r, int,
401 const char *, int *);
402 static int roff_evalnum(struct roff *, int,
403 const char *, int *, int *, int);
404 static int roff_evalpar(struct roff *, int,
405 const char *, int *, int *, int);
406 static int roff_evalstrcond(const char *, int *);
407 static void roff_free1(struct roff *);
408 static void roff_freereg(struct roffreg *);
409 static void roff_freestr(struct roffkv *);
410 static size_t roff_getname(struct roff *, char **, int, int);
411 static int roff_getnum(const char *, int *, int *, int);
412 static int roff_getop(const char *, int *, char *);
413 static int roff_getregn(const struct roff *,
414 const char *, size_t);
415 static int roff_getregro(const char *name);
416 static const char *roff_getstrn(const struct roff *,
417 const char *, size_t);
418 static enum rofferr roff_insec(ROFF_ARGS);
419 static enum rofferr roff_it(ROFF_ARGS);
420 static enum rofferr roff_line_ignore(ROFF_ARGS);
421 static void roff_man_alloc1(struct roff_man *);
422 static void roff_man_free1(struct roff_man *);
423 static enum rofferr roff_nr(ROFF_ARGS);
424 static enum rofft roff_parse(struct roff *, char *, int *,
425 int, int);
426 static enum rofferr roff_parsetext(struct buf *, int, int *);
427 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
428 static enum rofferr roff_rm(ROFF_ARGS);
429 static enum rofferr roff_rr(ROFF_ARGS);
430 static void roff_setstr(struct roff *,
431 const char *, const char *, int);
432 static void roff_setstrn(struct roffkv **, const char *,
433 size_t, const char *, size_t, int);
434 static enum rofferr roff_so(ROFF_ARGS);
435 static enum rofferr roff_tr(ROFF_ARGS);
436 static enum rofferr roff_Dd(ROFF_ARGS);
437 static enum rofferr roff_TH(ROFF_ARGS);
438 static enum rofferr roff_TE(ROFF_ARGS);
439 static enum rofferr roff_TS(ROFF_ARGS);
440 static enum rofferr roff_EQ(ROFF_ARGS);
441 static enum rofferr roff_EN(ROFF_ARGS);
442 static enum rofferr roff_T_(ROFF_ARGS);
443 static enum rofferr roff_unsupp(ROFF_ARGS);
444 static enum rofferr roff_userdef(ROFF_ARGS);
445
446 /* --- constant data ------------------------------------------------------ */
447
448 /* See roffhash_find() */
449
450 #define ASCII_HI 126
451 #define ASCII_LO 33
452 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
453
454 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
455 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
456
457 static struct roffmac *hash[HASHWIDTH];
458
459 static struct roffmac roffs[ROFF_MAX] = {
460 { "ab", roff_unsupp, NULL, NULL, 0, NULL },
461 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
462 { "af", roff_line_ignore, NULL, NULL, 0, NULL },
463 { "aln", roff_unsupp, NULL, NULL, 0, NULL },
464 { "als", roff_unsupp, NULL, NULL, 0, NULL },
465 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
466 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
467 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
468 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
469 { "as", roff_ds, NULL, NULL, 0, NULL },
470 { "as1", roff_ds, NULL, NULL, 0, NULL },
471 { "asciify", roff_unsupp, NULL, NULL, 0, NULL },
472 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
473 { "bd", roff_line_ignore, NULL, NULL, 0, NULL },
474 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
475 { "blm", roff_unsupp, NULL, NULL, 0, NULL },
476 { "box", roff_unsupp, NULL, NULL, 0, NULL },
477 { "boxa", roff_unsupp, NULL, NULL, 0, NULL },
478 { "bp", roff_line_ignore, NULL, NULL, 0, NULL },
479 { "BP", roff_unsupp, NULL, NULL, 0, NULL },
480 { "break", roff_unsupp, NULL, NULL, 0, NULL },
481 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
482 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
483 { "brp", roff_brp, NULL, NULL, 0, NULL },
484 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
485 { "c2", roff_unsupp, NULL, NULL, 0, NULL },
486 { "cc", roff_cc, NULL, NULL, 0, NULL },
487 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
488 { "cf", roff_insec, NULL, NULL, 0, NULL },
489 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
490 { "ch", roff_line_ignore, NULL, NULL, 0, NULL },
491 { "char", roff_unsupp, NULL, NULL, 0, NULL },
492 { "chop", roff_unsupp, NULL, NULL, 0, NULL },
493 { "class", roff_line_ignore, NULL, NULL, 0, NULL },
494 { "close", roff_insec, NULL, NULL, 0, NULL },
495 { "CL", roff_unsupp, NULL, NULL, 0, NULL },
496 { "color", roff_line_ignore, NULL, NULL, 0, NULL },
497 { "composite", roff_unsupp, NULL, NULL, 0, NULL },
498 { "continue", roff_unsupp, NULL, NULL, 0, NULL },
499 { "cp", roff_line_ignore, NULL, NULL, 0, NULL },
500 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
501 { "cs", roff_line_ignore, NULL, NULL, 0, NULL },
502 { "cu", roff_line_ignore, NULL, NULL, 0, NULL },
503 { "da", roff_unsupp, NULL, NULL, 0, NULL },
504 { "dch", roff_unsupp, NULL, NULL, 0, NULL },
505 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
506 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
507 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
508 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
509 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
510 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
511 { "device", roff_unsupp, NULL, NULL, 0, NULL },
512 { "devicem", roff_unsupp, NULL, NULL, 0, NULL },
513 { "di", roff_unsupp, NULL, NULL, 0, NULL },
514 { "do", roff_unsupp, NULL, NULL, 0, NULL },
515 { "ds", roff_ds, NULL, NULL, 0, NULL },
516 { "ds1", roff_ds, NULL, NULL, 0, NULL },
517 { "dwh", roff_unsupp, NULL, NULL, 0, NULL },
518 { "dt", roff_unsupp, NULL, NULL, 0, NULL },
519 { "ec", roff_unsupp, NULL, NULL, 0, NULL },
520 { "ecr", roff_unsupp, NULL, NULL, 0, NULL },
521 { "ecs", roff_unsupp, NULL, NULL, 0, NULL },
522 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
523 { "em", roff_unsupp, NULL, NULL, 0, NULL },
524 { "EN", roff_EN, NULL, NULL, 0, NULL },
525 { "eo", roff_unsupp, NULL, NULL, 0, NULL },
526 { "EP", roff_unsupp, NULL, NULL, 0, NULL },
527 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
528 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
529 { "ev", roff_unsupp, NULL, NULL, 0, NULL },
530 { "evc", roff_unsupp, NULL, NULL, 0, NULL },
531 { "ex", roff_unsupp, NULL, NULL, 0, NULL },
532 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
533 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
534 { "fc", roff_unsupp, NULL, NULL, 0, NULL },
535 { "fchar", roff_unsupp, NULL, NULL, 0, NULL },
536 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
537 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
538 { "feature", roff_line_ignore, NULL, NULL, 0, NULL },
539 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
540 { "fl", roff_line_ignore, NULL, NULL, 0, NULL },
541 { "flig", roff_line_ignore, NULL, NULL, 0, NULL },
542 { "fp", roff_line_ignore, NULL, NULL, 0, NULL },
543 { "fps", roff_line_ignore, NULL, NULL, 0, NULL },
544 { "fschar", roff_unsupp, NULL, NULL, 0, NULL },
545 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
546 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
547 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
548 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
549 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
550 { "hc", roff_line_ignore, NULL, NULL, 0, NULL },
551 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
552 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
553 { "hla", roff_line_ignore, NULL, NULL, 0, NULL },
554 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
555 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
556 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
557 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
558 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
559 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
560 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
561 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
562 { "hym", roff_line_ignore, NULL, NULL, 0, NULL },
563 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
564 { "hys", roff_line_ignore, NULL, NULL, 0, NULL },
565 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
566 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
567 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
568 { "index", roff_unsupp, NULL, NULL, 0, NULL },
569 { "it", roff_it, NULL, NULL, 0, NULL },
570 { "itc", roff_unsupp, NULL, NULL, 0, NULL },
571 { "IX", roff_line_ignore, NULL, NULL, 0, NULL },
572 { "kern", roff_line_ignore, NULL, NULL, 0, NULL },
573 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
574 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
575 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
576 { "lc", roff_unsupp, NULL, NULL, 0, NULL },
577 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
578 { "lds", roff_unsupp, NULL, NULL, 0, NULL },
579 { "length", roff_unsupp, NULL, NULL, 0, NULL },
580 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
581 { "lf", roff_insec, NULL, NULL, 0, NULL },
582 { "lg", roff_line_ignore, NULL, NULL, 0, NULL },
583 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
584 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
585 { "lnr", roff_unsupp, NULL, NULL, 0, NULL },
586 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
587 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
588 { "ls", roff_line_ignore, NULL, NULL, 0, NULL },
589 { "lsm", roff_unsupp, NULL, NULL, 0, NULL },
590 { "lt", roff_line_ignore, NULL, NULL, 0, NULL },
591 { "mc", roff_line_ignore, NULL, NULL, 0, NULL },
592 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
593 { "minss", roff_line_ignore, NULL, NULL, 0, NULL },
594 { "mk", roff_line_ignore, NULL, NULL, 0, NULL },
595 { "mso", roff_insec, NULL, NULL, 0, NULL },
596 { "na", roff_line_ignore, NULL, NULL, 0, NULL },
597 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
598 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
599 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
600 { "nm", roff_unsupp, NULL, NULL, 0, NULL },
601 { "nn", roff_unsupp, NULL, NULL, 0, NULL },
602 { "nop", roff_unsupp, NULL, NULL, 0, NULL },
603 { "nr", roff_nr, NULL, NULL, 0, NULL },
604 { "nrf", roff_unsupp, NULL, NULL, 0, NULL },
605 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
606 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
607 { "nx", roff_insec, NULL, NULL, 0, NULL },
608 { "open", roff_insec, NULL, NULL, 0, NULL },
609 { "opena", roff_insec, NULL, NULL, 0, NULL },
610 { "os", roff_line_ignore, NULL, NULL, 0, NULL },
611 { "output", roff_unsupp, NULL, NULL, 0, NULL },
612 { "padj", roff_line_ignore, NULL, NULL, 0, NULL },
613 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
614 { "pc", roff_line_ignore, NULL, NULL, 0, NULL },
615 { "pev", roff_line_ignore, NULL, NULL, 0, NULL },
616 { "pi", roff_insec, NULL, NULL, 0, NULL },
617 { "PI", roff_unsupp, NULL, NULL, 0, NULL },
618 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
619 { "pm", roff_line_ignore, NULL, NULL, 0, NULL },
620 { "pn", roff_line_ignore, NULL, NULL, 0, NULL },
621 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
622 { "po", roff_line_ignore, NULL, NULL, 0, NULL },
623 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
624 { "psbb", roff_unsupp, NULL, NULL, 0, NULL },
625 { "pshape", roff_unsupp, NULL, NULL, 0, NULL },
626 { "pso", roff_insec, NULL, NULL, 0, NULL },
627 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
628 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
629 { "rchar", roff_unsupp, NULL, NULL, 0, NULL },
630 { "rd", roff_line_ignore, NULL, NULL, 0, NULL },
631 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
632 { "return", roff_unsupp, NULL, NULL, 0, NULL },
633 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
634 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
635 { "rj", roff_line_ignore, NULL, NULL, 0, NULL },
636 { "rm", roff_rm, NULL, NULL, 0, NULL },
637 { "rn", roff_unsupp, NULL, NULL, 0, NULL },
638 { "rnn", roff_unsupp, NULL, NULL, 0, NULL },
639 { "rr", roff_rr, NULL, NULL, 0, NULL },
640 { "rs", roff_line_ignore, NULL, NULL, 0, NULL },
641 { "rt", roff_line_ignore, NULL, NULL, 0, NULL },
642 { "schar", roff_unsupp, NULL, NULL, 0, NULL },
643 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
644 { "shc", roff_line_ignore, NULL, NULL, 0, NULL },
645 { "shift", roff_unsupp, NULL, NULL, 0, NULL },
646 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
647 { "so", roff_so, NULL, NULL, 0, NULL },
648 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
649 { "special", roff_line_ignore, NULL, NULL, 0, NULL },
650 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
651 { "ss", roff_line_ignore, NULL, NULL, 0, NULL },
652 { "sty", roff_line_ignore, NULL, NULL, 0, NULL },
653 { "substring", roff_unsupp, NULL, NULL, 0, NULL },
654 { "sv", roff_line_ignore, NULL, NULL, 0, NULL },
655 { "sy", roff_insec, NULL, NULL, 0, NULL },
656 { "T&", roff_T_, NULL, NULL, 0, NULL },
657 { "ta", roff_unsupp, NULL, NULL, 0, NULL },
658 { "tc", roff_unsupp, NULL, NULL, 0, NULL },
659 { "TE", roff_TE, NULL, NULL, 0, NULL },
660 { "TH", roff_TH, NULL, NULL, 0, NULL },
661 { "ti", roff_unsupp, NULL, NULL, 0, NULL },
662 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
663 { "tl", roff_unsupp, NULL, NULL, 0, NULL },
664 { "tm", roff_line_ignore, NULL, NULL, 0, NULL },
665 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
666 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
667 { "tr", roff_tr, NULL, NULL, 0, NULL },
668 { "track", roff_line_ignore, NULL, NULL, 0, NULL },
669 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
670 { "trf", roff_insec, NULL, NULL, 0, NULL },
671 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
672 { "trin", roff_unsupp, NULL, NULL, 0, NULL },
673 { "trnt", roff_unsupp, NULL, NULL, 0, NULL },
674 { "troff", roff_line_ignore, NULL, NULL, 0, NULL },
675 { "TS", roff_TS, NULL, NULL, 0, NULL },
676 { "uf", roff_line_ignore, NULL, NULL, 0, NULL },
677 { "ul", roff_line_ignore, NULL, NULL, 0, NULL },
678 { "unformat", roff_unsupp, NULL, NULL, 0, NULL },
679 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
680 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
681 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
682 { "vs", roff_line_ignore, NULL, NULL, 0, NULL },
683 { "warn", roff_line_ignore, NULL, NULL, 0, NULL },
684 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
685 { "watch", roff_line_ignore, NULL, NULL, 0, NULL },
686 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
687 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
688 { "wh", roff_unsupp, NULL, NULL, 0, NULL },
689 { "while", roff_unsupp, NULL, NULL, 0, NULL },
690 { "write", roff_insec, NULL, NULL, 0, NULL },
691 { "writec", roff_insec, NULL, NULL, 0, NULL },
692 { "writem", roff_insec, NULL, NULL, 0, NULL },
693 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
694 { ".", roff_cblock, NULL, NULL, 0, NULL },
695 { NULL, roff_userdef, NULL, NULL, 0, NULL },
696 };
697
698 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
699 const char *const __mdoc_reserved[] = {
700 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
701 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
702 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
703 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
704 "Dt", "Dv", "Dx", "D1",
705 "Ec", "Ed", "Ef", "Ek", "El", "Em",
706 "En", "Eo", "Er", "Es", "Ev", "Ex",
707 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
708 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
709 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
710 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
711 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
712 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
713 "Sc", "Sh", "Sm", "So", "Sq",
714 "Ss", "St", "Sx", "Sy",
715 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
716 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
717 "%P", "%Q", "%R", "%T", "%U", "%V",
718 NULL
719 };
720
721 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
722 const char *const __man_reserved[] = {
723 "AT", "B", "BI", "BR", "DT",
724 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
725 "LP", "OP", "P", "PD", "PP",
726 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
727 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
728 NULL
729 };
730
731 /* Array of injected predefined strings. */
732 #define PREDEFS_MAX 38
733 static const struct predef predefs[PREDEFS_MAX] = {
734 #include "predefs.in"
735 };
736
737 /* See roffhash_find() */
738 #define ROFF_HASH(p) (p[0] - ASCII_LO)
739
740 static int roffit_lines; /* number of lines to delay */
741 static char *roffit_macro; /* nil-terminated macro line */
742
743
744 /* --- request table ------------------------------------------------------ */
745
746 static void
747 roffhash_init(void)
748 {
749 struct roffmac *n;
750 int buc, i;
751
752 for (i = 0; i < (int)ROFF_USERDEF; i++) {
753 assert(roffs[i].name[0] >= ASCII_LO);
754 assert(roffs[i].name[0] <= ASCII_HI);
755
756 buc = ROFF_HASH(roffs[i].name);
757
758 if (NULL != (n = hash[buc])) {
759 for ( ; n->next; n = n->next)
760 /* Do nothing. */ ;
761 n->next = &roffs[i];
762 } else
763 hash[buc] = &roffs[i];
764 }
765 }
766
767 /*
768 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
769 * the nil-terminated string name could be found.
770 */
771 static enum rofft
772 roffhash_find(const char *p, size_t s)
773 {
774 int buc;
775 struct roffmac *n;
776
777 /*
778 * libroff has an extremely simple hashtable, for the time
779 * being, which simply keys on the first character, which must
780 * be printable, then walks a chain. It works well enough until
781 * optimised.
782 */
783
784 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
785 return(ROFF_MAX);
786
787 buc = ROFF_HASH(p);
788
789 if (NULL == (n = hash[buc]))
790 return(ROFF_MAX);
791 for ( ; n; n = n->next)
792 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
793 return((enum rofft)(n - roffs));
794
795 return(ROFF_MAX);
796 }
797
798 /* --- stack of request blocks -------------------------------------------- */
799
800 /*
801 * Pop the current node off of the stack of roff instructions currently
802 * pending.
803 */
804 static void
805 roffnode_pop(struct roff *r)
806 {
807 struct roffnode *p;
808
809 assert(r->last);
810 p = r->last;
811
812 r->last = r->last->parent;
813 free(p->name);
814 free(p->end);
815 free(p);
816 }
817
818 /*
819 * Push a roff node onto the instruction stack. This must later be
820 * removed with roffnode_pop().
821 */
822 static void
823 roffnode_push(struct roff *r, enum rofft tok, const char *name,
824 int line, int col)
825 {
826 struct roffnode *p;
827
828 p = mandoc_calloc(1, sizeof(struct roffnode));
829 p->tok = tok;
830 if (name)
831 p->name = mandoc_strdup(name);
832 p->parent = r->last;
833 p->line = line;
834 p->col = col;
835 p->rule = p->parent ? p->parent->rule : 0;
836
837 r->last = p;
838 }
839
840 /* --- roff parser state data management ---------------------------------- */
841
842 static void
843 roff_free1(struct roff *r)
844 {
845 struct tbl_node *tbl;
846 struct eqn_node *e;
847 int i;
848
849 while (NULL != (tbl = r->first_tbl)) {
850 r->first_tbl = tbl->next;
851 tbl_free(tbl);
852 }
853 r->first_tbl = r->last_tbl = r->tbl = NULL;
854
855 while (NULL != (e = r->first_eqn)) {
856 r->first_eqn = e->next;
857 eqn_free(e);
858 }
859 r->first_eqn = r->last_eqn = r->eqn = NULL;
860
861 while (r->last)
862 roffnode_pop(r);
863
864 free (r->rstack);
865 r->rstack = NULL;
866 r->rstacksz = 0;
867 r->rstackpos = -1;
868
869 roff_freereg(r->regtab);
870 r->regtab = NULL;
871
872 roff_freestr(r->strtab);
873 roff_freestr(r->xmbtab);
874 r->strtab = r->xmbtab = NULL;
875
876 if (r->xtab)
877 for (i = 0; i < 128; i++)
878 free(r->xtab[i].p);
879 free(r->xtab);
880 r->xtab = NULL;
881 }
882
883 void
884 roff_reset(struct roff *r)
885 {
886
887 roff_free1(r);
888 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
889 r->control = 0;
890 }
891
892 void
893 roff_free(struct roff *r)
894 {
895
896 roff_free1(r);
897 free(r);
898 }
899
900 struct roff *
901 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
902 {
903 struct roff *r;
904
905 r = mandoc_calloc(1, sizeof(struct roff));
906 r->parse = parse;
907 r->mchars = mchars;
908 r->options = options;
909 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
910 r->rstackpos = -1;
911
912 roffhash_init();
913
914 return(r);
915 }
916
917 /* --- syntax tree state data management ---------------------------------- */
918
919 static void
920 roff_man_free1(struct roff_man *man)
921 {
922
923 if (man->first != NULL)
924 roff_node_delete(man, man->first);
925 free(man->meta.msec);
926 free(man->meta.vol);
927 free(man->meta.os);
928 free(man->meta.arch);
929 free(man->meta.title);
930 free(man->meta.name);
931 free(man->meta.date);
932 }
933
934 static void
935 roff_man_alloc1(struct roff_man *man)
936 {
937
938 memset(&man->meta, 0, sizeof(man->meta));
939 man->first = mandoc_calloc(1, sizeof(*man->first));
940 man->first->type = ROFFT_ROOT;
941 man->last = man->first;
942 man->last_es = NULL;
943 man->flags = 0;
944 man->macroset = MACROSET_NONE;
945 man->lastsec = man->lastnamed = SEC_NONE;
946 man->next = ROFF_NEXT_CHILD;
947 }
948
949 void
950 roff_man_reset(struct roff_man *man)
951 {
952
953 roff_man_free1(man);
954 roff_man_alloc1(man);
955 }
956
957 void
958 roff_man_free(struct roff_man *man)
959 {
960
961 roff_man_free1(man);
962 free(man);
963 }
964
965 struct roff_man *
966 roff_man_alloc(struct roff *roff, struct mparse *parse,
967 const char *defos, int quick)
968 {
969 struct roff_man *man;
970
971 man = mandoc_calloc(1, sizeof(*man));
972 man->parse = parse;
973 man->roff = roff;
974 man->defos = defos;
975 man->quick = quick;
976 roff_man_alloc1(man);
977 return(man);
978 }
979
980 /* --- syntax tree handling ----------------------------------------------- */
981
982 struct roff_node *
983 roff_node_alloc(struct roff_man *man, int line, int pos,
984 enum roff_type type, int tok)
985 {
986 struct roff_node *n;
987
988 n = mandoc_calloc(1, sizeof(*n));
989 n->line = line;
990 n->pos = pos;
991 n->tok = tok;
992 n->type = type;
993 n->sec = man->lastsec;
994
995 if (man->flags & MDOC_SYNOPSIS)
996 n->flags |= MDOC_SYNPRETTY;
997 else
998 n->flags &= ~MDOC_SYNPRETTY;
999 if (man->flags & MDOC_NEWLINE)
1000 n->flags |= MDOC_LINE;
1001 man->flags &= ~MDOC_NEWLINE;
1002
1003 return(n);
1004 }
1005
1006 void
1007 roff_node_append(struct roff_man *man, struct roff_node *n)
1008 {
1009
1010 switch (man->next) {
1011 case ROFF_NEXT_SIBLING:
1012 man->last->next = n;
1013 n->prev = man->last;
1014 n->parent = man->last->parent;
1015 break;
1016 case ROFF_NEXT_CHILD:
1017 man->last->child = n;
1018 n->parent = man->last;
1019 break;
1020 default:
1021 abort();
1022 /* NOTREACHED */
1023 }
1024 n->parent->nchild++;
1025
1026 /*
1027 * Copy over the normalised-data pointer of our parent. Not
1028 * everybody has one, but copying a null pointer is fine.
1029 */
1030
1031 switch (n->type) {
1032 case ROFFT_BODY:
1033 if (n->end != ENDBODY_NOT)
1034 break;
1035 /* FALLTHROUGH */
1036 case ROFFT_TAIL:
1037 /* FALLTHROUGH */
1038 case ROFFT_HEAD:
1039 n->norm = n->parent->norm;
1040 break;
1041 default:
1042 break;
1043 }
1044
1045 if (man->macroset == MACROSET_MDOC)
1046 mdoc_valid_pre(man, n);
1047
1048 switch (n->type) {
1049 case ROFFT_HEAD:
1050 assert(n->parent->type == ROFFT_BLOCK);
1051 n->parent->head = n;
1052 break;
1053 case ROFFT_BODY:
1054 if (n->end)
1055 break;
1056 assert(n->parent->type == ROFFT_BLOCK);
1057 n->parent->body = n;
1058 break;
1059 case ROFFT_TAIL:
1060 assert(n->parent->type == ROFFT_BLOCK);
1061 n->parent->tail = n;
1062 break;
1063 default:
1064 break;
1065 }
1066 man->last = n;
1067 }
1068
1069 void
1070 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1071 {
1072 struct roff_node *n;
1073
1074 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1075 n->string = roff_strdup(man->roff, word);
1076 roff_node_append(man, n);
1077 if (man->macroset == MACROSET_MDOC)
1078 mdoc_valid_post(man);
1079 else
1080 man_valid_post(man);
1081 man->next = ROFF_NEXT_SIBLING;
1082 }
1083
1084 void
1085 roff_word_append(struct roff_man *man, const char *word)
1086 {
1087 struct roff_node *n;
1088 char *addstr, *newstr;
1089
1090 n = man->last;
1091 addstr = roff_strdup(man->roff, word);
1092 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1093 free(addstr);
1094 free(n->string);
1095 n->string = newstr;
1096 man->next = ROFF_NEXT_SIBLING;
1097 }
1098
1099 void
1100 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1101 {
1102 struct roff_node *n;
1103
1104 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1105 roff_node_append(man, n);
1106 man->next = ROFF_NEXT_CHILD;
1107 }
1108
1109 struct roff_node *
1110 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1111 {
1112 struct roff_node *n;
1113
1114 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1115 roff_node_append(man, n);
1116 man->next = ROFF_NEXT_CHILD;
1117 return(n);
1118 }
1119
1120 struct roff_node *
1121 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1122 {
1123 struct roff_node *n;
1124
1125 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1126 roff_node_append(man, n);
1127 man->next = ROFF_NEXT_CHILD;
1128 return(n);
1129 }
1130
1131 struct roff_node *
1132 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1133 {
1134 struct roff_node *n;
1135
1136 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1137 roff_node_append(man, n);
1138 man->next = ROFF_NEXT_CHILD;
1139 return(n);
1140 }
1141
1142 void
1143 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1144 {
1145 struct roff_node *n;
1146
1147 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1148 n->eqn = eqn;
1149 if (eqn->ln > man->last->line)
1150 n->flags |= MDOC_LINE;
1151 roff_node_append(man, n);
1152 man->next = ROFF_NEXT_SIBLING;
1153 }
1154
1155 void
1156 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1157 {
1158 struct roff_node *n;
1159
1160 if (man->macroset == MACROSET_MAN)
1161 man_breakscope(man, TOKEN_NONE);
1162 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1163 n->span = tbl;
1164 roff_node_append(man, n);
1165 if (man->macroset == MACROSET_MDOC)
1166 mdoc_valid_post(man);
1167 else
1168 man_valid_post(man);
1169 man->next = ROFF_NEXT_SIBLING;
1170 }
1171
1172 void
1173 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1174 {
1175
1176 /* Adjust siblings. */
1177
1178 if (n->prev)
1179 n->prev->next = n->next;
1180 if (n->next)
1181 n->next->prev = n->prev;
1182
1183 /* Adjust parent. */
1184
1185 if (n->parent != NULL) {
1186 n->parent->nchild--;
1187 if (n->parent->child == n)
1188 n->parent->child = n->next;
1189 if (n->parent->last == n)
1190 n->parent->last = n->prev;
1191 }
1192
1193 /* Adjust parse point. */
1194
1195 if (man == NULL)
1196 return;
1197 if (man->last == n) {
1198 if (n->prev == NULL) {
1199 man->last = n->parent;
1200 man->next = ROFF_NEXT_CHILD;
1201 } else {
1202 man->last = n->prev;
1203 man->next = ROFF_NEXT_SIBLING;
1204 }
1205 }
1206 if (man->first == n)
1207 man->first = NULL;
1208 }
1209
1210 void
1211 roff_node_free(struct roff_node *n)
1212 {
1213
1214 if (n->args != NULL)
1215 mdoc_argv_free(n->args);
1216 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1217 free(n->norm);
1218 free(n->string);
1219 free(n);
1220 }
1221
1222 void
1223 roff_node_delete(struct roff_man *man, struct roff_node *n)
1224 {
1225
1226 while (n->child != NULL)
1227 roff_node_delete(man, n->child);
1228 assert(n->nchild == 0);
1229 roff_node_unlink(man, n);
1230 roff_node_free(n);
1231 }
1232
1233 /* --- main functions of the roff parser ---------------------------------- */
1234
1235 /*
1236 * In the current line, expand escape sequences that tend to get
1237 * used in numerical expressions and conditional requests.
1238 * Also check the syntax of the remaining escape sequences.
1239 */
1240 static enum rofferr
1241 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1242 {
1243 char ubuf[24]; /* buffer to print the number */
1244 const char *start; /* start of the string to process */
1245 char *stesc; /* start of an escape sequence ('\\') */
1246 const char *stnam; /* start of the name, after "[(*" */
1247 const char *cp; /* end of the name, e.g. before ']' */
1248 const char *res; /* the string to be substituted */
1249 char *nbuf; /* new buffer to copy buf->buf to */
1250 size_t maxl; /* expected length of the escape name */
1251 size_t naml; /* actual length of the escape name */
1252 enum mandoc_esc esc; /* type of the escape sequence */
1253 int inaml; /* length returned from mandoc_escape() */
1254 int expand_count; /* to avoid infinite loops */
1255 int npos; /* position in numeric expression */
1256 int arg_complete; /* argument not interrupted by eol */
1257 char term; /* character terminating the escape */
1258
1259 expand_count = 0;
1260 start = buf->buf + pos;
1261 stesc = strchr(start, '\0') - 1;
1262 while (stesc-- > start) {
1263
1264 /* Search backwards for the next backslash. */
1265
1266 if (*stesc != '\\')
1267 continue;
1268
1269 /* If it is escaped, skip it. */
1270
1271 for (cp = stesc - 1; cp >= start; cp--)
1272 if (*cp != '\\')
1273 break;
1274
1275 if ((stesc - cp) % 2 == 0) {
1276 stesc = (char *)cp;
1277 continue;
1278 }
1279
1280 /* Decide whether to expand or to check only. */
1281
1282 term = '\0';
1283 cp = stesc + 1;
1284 switch (*cp) {
1285 case '*':
1286 res = NULL;
1287 break;
1288 case 'B':
1289 /* FALLTHROUGH */
1290 case 'w':
1291 term = cp[1];
1292 /* FALLTHROUGH */
1293 case 'n':
1294 res = ubuf;
1295 break;
1296 default:
1297 esc = mandoc_escape(&cp, &stnam, &inaml);
1298 if (esc == ESCAPE_ERROR ||
1299 (esc == ESCAPE_SPECIAL &&
1300 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
1301 mandoc_vmsg(MANDOCERR_ESC_BAD,
1302 r->parse, ln, (int)(stesc - buf->buf),
1303 "%.*s", (int)(cp - stesc), stesc);
1304 continue;
1305 }
1306
1307 if (EXPAND_LIMIT < ++expand_count) {
1308 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1309 ln, (int)(stesc - buf->buf), NULL);
1310 return(ROFF_IGN);
1311 }
1312
1313 /*
1314 * The third character decides the length
1315 * of the name of the string or register.
1316 * Save a pointer to the name.
1317 */
1318
1319 if (term == '\0') {
1320 switch (*++cp) {
1321 case '\0':
1322 maxl = 0;
1323 break;
1324 case '(':
1325 cp++;
1326 maxl = 2;
1327 break;
1328 case '[':
1329 cp++;
1330 term = ']';
1331 maxl = 0;
1332 break;
1333 default:
1334 maxl = 1;
1335 break;
1336 }
1337 } else {
1338 cp += 2;
1339 maxl = 0;
1340 }
1341 stnam = cp;
1342
1343 /* Advance to the end of the name. */
1344
1345 naml = 0;
1346 arg_complete = 1;
1347 while (maxl == 0 || naml < maxl) {
1348 if (*cp == '\0') {
1349 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1350 ln, (int)(stesc - buf->buf), stesc);
1351 arg_complete = 0;
1352 break;
1353 }
1354 if (maxl == 0 && *cp == term) {
1355 cp++;
1356 break;
1357 }
1358 if (*cp++ != '\\' || stesc[1] != 'w') {
1359 naml++;
1360 continue;
1361 }
1362 switch (mandoc_escape(&cp, NULL, NULL)) {
1363 case ESCAPE_SPECIAL:
1364 /* FALLTHROUGH */
1365 case ESCAPE_UNICODE:
1366 /* FALLTHROUGH */
1367 case ESCAPE_NUMBERED:
1368 /* FALLTHROUGH */
1369 case ESCAPE_OVERSTRIKE:
1370 naml++;
1371 break;
1372 default:
1373 break;
1374 }
1375 }
1376
1377 /*
1378 * Retrieve the replacement string; if it is
1379 * undefined, resume searching for escapes.
1380 */
1381
1382 switch (stesc[1]) {
1383 case '*':
1384 if (arg_complete)
1385 res = roff_getstrn(r, stnam, naml);
1386 break;
1387 case 'B':
1388 npos = 0;
1389 ubuf[0] = arg_complete &&
1390 roff_evalnum(r, ln, stnam, &npos,
1391 NULL, ROFFNUM_SCALE) &&
1392 stnam + npos + 1 == cp ? '1' : '0';
1393 ubuf[1] = '\0';
1394 break;
1395 case 'n':
1396 if (arg_complete)
1397 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1398 roff_getregn(r, stnam, naml));
1399 else
1400 ubuf[0] = '\0';
1401 break;
1402 case 'w':
1403 /* use even incomplete args */
1404 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1405 24 * (int)naml);
1406 break;
1407 }
1408
1409 if (res == NULL) {
1410 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1411 r->parse, ln, (int)(stesc - buf->buf),
1412 "%.*s", (int)naml, stnam);
1413 res = "";
1414 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1415 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1416 ln, (int)(stesc - buf->buf), NULL);
1417 return(ROFF_IGN);
1418 }
1419
1420 /* Replace the escape sequence by the string. */
1421
1422 *stesc = '\0';
1423 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1424 buf->buf, res, cp) + 1;
1425
1426 /* Prepare for the next replacement. */
1427
1428 start = nbuf + pos;
1429 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1430 free(buf->buf);
1431 buf->buf = nbuf;
1432 }
1433 return(ROFF_CONT);
1434 }
1435
1436 /*
1437 * Process text streams:
1438 * Convert all breakable hyphens into ASCII_HYPH.
1439 * Decrement and spring input line trap.
1440 */
1441 static enum rofferr
1442 roff_parsetext(struct buf *buf, int pos, int *offs)
1443 {
1444 size_t sz;
1445 const char *start;
1446 char *p;
1447 int isz;
1448 enum mandoc_esc esc;
1449
1450 start = p = buf->buf + pos;
1451
1452 while (*p != '\0') {
1453 sz = strcspn(p, "-\\");
1454 p += sz;
1455
1456 if (*p == '\0')
1457 break;
1458
1459 if (*p == '\\') {
1460 /* Skip over escapes. */
1461 p++;
1462 esc = mandoc_escape((const char **)&p, NULL, NULL);
1463 if (esc == ESCAPE_ERROR)
1464 break;
1465 while (*p == '-')
1466 p++;
1467 continue;
1468 } else if (p == start) {
1469 p++;
1470 continue;
1471 }
1472
1473 if (isalpha((unsigned char)p[-1]) &&
1474 isalpha((unsigned char)p[1]))
1475 *p = ASCII_HYPH;
1476 p++;
1477 }
1478
1479 /* Spring the input line trap. */
1480 if (roffit_lines == 1) {
1481 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1482 free(buf->buf);
1483 buf->buf = p;
1484 buf->sz = isz + 1;
1485 *offs = 0;
1486 free(roffit_macro);
1487 roffit_lines = 0;
1488 return(ROFF_REPARSE);
1489 } else if (roffit_lines > 1)
1490 --roffit_lines;
1491 return(ROFF_CONT);
1492 }
1493
1494 enum rofferr
1495 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1496 {
1497 enum rofft t;
1498 enum rofferr e;
1499 int pos; /* parse point */
1500 int spos; /* saved parse point for messages */
1501 int ppos; /* original offset in buf->buf */
1502 int ctl; /* macro line (boolean) */
1503
1504 ppos = pos = *offs;
1505
1506 /* Handle in-line equation delimiters. */
1507
1508 if (r->tbl == NULL &&
1509 r->last_eqn != NULL && r->last_eqn->delim &&
1510 (r->eqn == NULL || r->eqn_inline)) {
1511 e = roff_eqndelim(r, buf, pos);
1512 if (e == ROFF_REPARSE)
1513 return(e);
1514 assert(e == ROFF_CONT);
1515 }
1516
1517 /* Expand some escape sequences. */
1518
1519 e = roff_res(r, buf, ln, pos);
1520 if (e == ROFF_IGN)
1521 return(e);
1522 assert(e == ROFF_CONT);
1523
1524 ctl = roff_getcontrol(r, buf->buf, &pos);
1525
1526 /*
1527 * First, if a scope is open and we're not a macro, pass the
1528 * text through the macro's filter.
1529 * Equations process all content themselves.
1530 * Tables process almost all content themselves, but we want
1531 * to warn about macros before passing it there.
1532 */
1533
1534 if (r->last != NULL && ! ctl) {
1535 t = r->last->tok;
1536 assert(roffs[t].text);
1537 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1538 assert(e == ROFF_IGN || e == ROFF_CONT);
1539 if (e != ROFF_CONT)
1540 return(e);
1541 }
1542 if (r->eqn != NULL)
1543 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
1544 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1545 return(tbl_read(r->tbl, ln, buf->buf, ppos));
1546 if ( ! ctl)
1547 return(roff_parsetext(buf, pos, offs));
1548
1549 /* Skip empty request lines. */
1550
1551 if (buf->buf[pos] == '"') {
1552 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1553 ln, pos, NULL);
1554 return(ROFF_IGN);
1555 } else if (buf->buf[pos] == '\0')
1556 return(ROFF_IGN);
1557
1558 /*
1559 * If a scope is open, go to the child handler for that macro,
1560 * as it may want to preprocess before doing anything with it.
1561 * Don't do so if an equation is open.
1562 */
1563
1564 if (r->last) {
1565 t = r->last->tok;
1566 assert(roffs[t].sub);
1567 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
1568 }
1569
1570 /* No scope is open. This is a new request or macro. */
1571
1572 spos = pos;
1573 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1574
1575 /* Tables ignore most macros. */
1576
1577 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1578 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1579 ln, pos, buf->buf + spos);
1580 if (t == ROFF_TS)
1581 return(ROFF_IGN);
1582 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1583 pos++;
1584 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1585 pos++;
1586 return(tbl_read(r->tbl, ln, buf->buf, pos));
1587 }
1588
1589 /*
1590 * This is neither a roff request nor a user-defined macro.
1591 * Let the standard macro set parsers handle it.
1592 */
1593
1594 if (t == ROFF_MAX)
1595 return(ROFF_CONT);
1596
1597 /* Execute a roff request or a user defined macro. */
1598
1599 assert(roffs[t].proc);
1600 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1601 }
1602
1603 void
1604 roff_endparse(struct roff *r)
1605 {
1606
1607 if (r->last)
1608 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1609 r->last->line, r->last->col,
1610 roffs[r->last->tok].name);
1611
1612 if (r->eqn) {
1613 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1614 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1615 eqn_end(&r->eqn);
1616 }
1617
1618 if (r->tbl) {
1619 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1620 r->tbl->line, r->tbl->pos, "TS");
1621 tbl_end(&r->tbl);
1622 }
1623 }
1624
1625 /*
1626 * Parse a roff node's type from the input buffer. This must be in the
1627 * form of ".foo xxx" in the usual way.
1628 */
1629 static enum rofft
1630 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1631 {
1632 char *cp;
1633 const char *mac;
1634 size_t maclen;
1635 enum rofft t;
1636
1637 cp = buf + *pos;
1638
1639 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1640 return(ROFF_MAX);
1641
1642 mac = cp;
1643 maclen = roff_getname(r, &cp, ln, ppos);
1644
1645 t = (r->current_string = roff_getstrn(r, mac, maclen))
1646 ? ROFF_USERDEF : roffhash_find(mac, maclen);
1647
1648 if (ROFF_MAX != t)
1649 *pos = cp - buf;
1650
1651 return(t);
1652 }
1653
1654 /* --- handling of request blocks ----------------------------------------- */
1655
1656 static enum rofferr
1657 roff_cblock(ROFF_ARGS)
1658 {
1659
1660 /*
1661 * A block-close `..' should only be invoked as a child of an
1662 * ignore macro, otherwise raise a warning and just ignore it.
1663 */
1664
1665 if (r->last == NULL) {
1666 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1667 ln, ppos, "..");
1668 return(ROFF_IGN);
1669 }
1670
1671 switch (r->last->tok) {
1672 case ROFF_am:
1673 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1674 /* FALLTHROUGH */
1675 case ROFF_ami:
1676 /* FALLTHROUGH */
1677 case ROFF_de:
1678 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1679 /* FALLTHROUGH */
1680 case ROFF_dei:
1681 /* FALLTHROUGH */
1682 case ROFF_ig:
1683 break;
1684 default:
1685 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1686 ln, ppos, "..");
1687 return(ROFF_IGN);
1688 }
1689
1690 if (buf->buf[pos] != '\0')
1691 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1692 ".. %s", buf->buf + pos);
1693
1694 roffnode_pop(r);
1695 roffnode_cleanscope(r);
1696 return(ROFF_IGN);
1697
1698 }
1699
1700 static void
1701 roffnode_cleanscope(struct roff *r)
1702 {
1703
1704 while (r->last) {
1705 if (--r->last->endspan != 0)
1706 break;
1707 roffnode_pop(r);
1708 }
1709 }
1710
1711 static void
1712 roff_ccond(struct roff *r, int ln, int ppos)
1713 {
1714
1715 if (NULL == r->last) {
1716 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1717 ln, ppos, "\\}");
1718 return;
1719 }
1720
1721 switch (r->last->tok) {
1722 case ROFF_el:
1723 /* FALLTHROUGH */
1724 case ROFF_ie:
1725 /* FALLTHROUGH */
1726 case ROFF_if:
1727 break;
1728 default:
1729 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1730 ln, ppos, "\\}");
1731 return;
1732 }
1733
1734 if (r->last->endspan > -1) {
1735 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1736 ln, ppos, "\\}");
1737 return;
1738 }
1739
1740 roffnode_pop(r);
1741 roffnode_cleanscope(r);
1742 return;
1743 }
1744
1745 static enum rofferr
1746 roff_block(ROFF_ARGS)
1747 {
1748 const char *name;
1749 char *iname, *cp;
1750 size_t namesz;
1751
1752 /* Ignore groff compatibility mode for now. */
1753
1754 if (tok == ROFF_de1)
1755 tok = ROFF_de;
1756 else if (tok == ROFF_dei1)
1757 tok = ROFF_dei;
1758 else if (tok == ROFF_am1)
1759 tok = ROFF_am;
1760 else if (tok == ROFF_ami1)
1761 tok = ROFF_ami;
1762
1763 /* Parse the macro name argument. */
1764
1765 cp = buf->buf + pos;
1766 if (tok == ROFF_ig) {
1767 iname = NULL;
1768 namesz = 0;
1769 } else {
1770 iname = cp;
1771 namesz = roff_getname(r, &cp, ln, ppos);
1772 iname[namesz] = '\0';
1773 }
1774
1775 /* Resolve the macro name argument if it is indirect. */
1776
1777 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1778 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1779 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1780 r->parse, ln, (int)(iname - buf->buf),
1781 "%.*s", (int)namesz, iname);
1782 namesz = 0;
1783 } else
1784 namesz = strlen(name);
1785 } else
1786 name = iname;
1787
1788 if (namesz == 0 && tok != ROFF_ig) {
1789 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1790 ln, ppos, roffs[tok].name);
1791 return(ROFF_IGN);
1792 }
1793
1794 roffnode_push(r, tok, name, ln, ppos);
1795
1796 /*
1797 * At the beginning of a `de' macro, clear the existing string
1798 * with the same name, if there is one. New content will be
1799 * appended from roff_block_text() in multiline mode.
1800 */
1801
1802 if (tok == ROFF_de || tok == ROFF_dei)
1803 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1804
1805 if (*cp == '\0')
1806 return(ROFF_IGN);
1807
1808 /* Get the custom end marker. */
1809
1810 iname = cp;
1811 namesz = roff_getname(r, &cp, ln, ppos);
1812
1813 /* Resolve the end marker if it is indirect. */
1814
1815 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1816 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1817 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1818 r->parse, ln, (int)(iname - buf->buf),
1819 "%.*s", (int)namesz, iname);
1820 namesz = 0;
1821 } else
1822 namesz = strlen(name);
1823 } else
1824 name = iname;
1825
1826 if (namesz)
1827 r->last->end = mandoc_strndup(name, namesz);
1828
1829 if (*cp != '\0')
1830 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1831 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1832
1833 return(ROFF_IGN);
1834 }
1835
1836 static enum rofferr
1837 roff_block_sub(ROFF_ARGS)
1838 {
1839 enum rofft t;
1840 int i, j;
1841
1842 /*
1843 * First check whether a custom macro exists at this level. If
1844 * it does, then check against it. This is some of groff's
1845 * stranger behaviours. If we encountered a custom end-scope
1846 * tag and that tag also happens to be a "real" macro, then we
1847 * need to try interpreting it again as a real macro. If it's
1848 * not, then return ignore. Else continue.
1849 */
1850
1851 if (r->last->end) {
1852 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1853 if (buf->buf[i] != r->last->end[j])
1854 break;
1855
1856 if (r->last->end[j] == '\0' &&
1857 (buf->buf[i] == '\0' ||
1858 buf->buf[i] == ' ' ||
1859 buf->buf[i] == '\t')) {
1860 roffnode_pop(r);
1861 roffnode_cleanscope(r);
1862
1863 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1864 i++;
1865
1866 pos = i;
1867 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1868 ROFF_MAX)
1869 return(ROFF_RERUN);
1870 return(ROFF_IGN);
1871 }
1872 }
1873
1874 /*
1875 * If we have no custom end-query or lookup failed, then try
1876 * pulling it out of the hashtable.
1877 */
1878
1879 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1880
1881 if (t != ROFF_cblock) {
1882 if (tok != ROFF_ig)
1883 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1884 return(ROFF_IGN);
1885 }
1886
1887 assert(roffs[t].proc);
1888 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1889 }
1890
1891 static enum rofferr
1892 roff_block_text(ROFF_ARGS)
1893 {
1894
1895 if (tok != ROFF_ig)
1896 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1897
1898 return(ROFF_IGN);
1899 }
1900
1901 static enum rofferr
1902 roff_cond_sub(ROFF_ARGS)
1903 {
1904 enum rofft t;
1905 char *ep;
1906 int rr;
1907
1908 rr = r->last->rule;
1909 roffnode_cleanscope(r);
1910 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1911
1912 /*
1913 * Fully handle known macros when they are structurally
1914 * required or when the conditional evaluated to true.
1915 */
1916
1917 if ((t != ROFF_MAX) &&
1918 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1919 assert(roffs[t].proc);
1920 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1921 }
1922
1923 /*
1924 * If `\}' occurs on a macro line without a preceding macro,
1925 * drop the line completely.
1926 */
1927
1928 ep = buf->buf + pos;
1929 if (ep[0] == '\\' && ep[1] == '}')
1930 rr = 0;
1931
1932 /* Always check for the closing delimiter `\}'. */
1933
1934 while ((ep = strchr(ep, '\\')) != NULL) {
1935 if (*(++ep) == '}') {
1936 *ep = '&';
1937 roff_ccond(r, ln, ep - buf->buf - 1);
1938 }
1939 if (*ep != '\0')
1940 ++ep;
1941 }
1942 return(rr ? ROFF_CONT : ROFF_IGN);
1943 }
1944
1945 static enum rofferr
1946 roff_cond_text(ROFF_ARGS)
1947 {
1948 char *ep;
1949 int rr;
1950
1951 rr = r->last->rule;
1952 roffnode_cleanscope(r);
1953
1954 ep = buf->buf + pos;
1955 while ((ep = strchr(ep, '\\')) != NULL) {
1956 if (*(++ep) == '}') {
1957 *ep = '&';
1958 roff_ccond(r, ln, ep - buf->buf - 1);
1959 }
1960 if (*ep != '\0')
1961 ++ep;
1962 }
1963 return(rr ? ROFF_CONT : ROFF_IGN);
1964 }
1965
1966 /* --- handling of numeric and conditional expressions -------------------- */
1967
1968 /*
1969 * Parse a single signed integer number. Stop at the first non-digit.
1970 * If there is at least one digit, return success and advance the
1971 * parse point, else return failure and let the parse point unchanged.
1972 * Ignore overflows, treat them just like the C language.
1973 */
1974 static int
1975 roff_getnum(const char *v, int *pos, int *res, int flags)
1976 {
1977 int myres, scaled, n, p;
1978
1979 if (NULL == res)
1980 res = &myres;
1981
1982 p = *pos;
1983 n = v[p] == '-';
1984 if (n || v[p] == '+')
1985 p++;
1986
1987 if (flags & ROFFNUM_WHITE)
1988 while (isspace((unsigned char)v[p]))
1989 p++;
1990
1991 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1992 *res = 10 * *res + v[p] - '0';
1993 if (p == *pos + n)
1994 return 0;
1995
1996 if (n)
1997 *res = -*res;
1998
1999 /* Each number may be followed by one optional scaling unit. */
2000
2001 switch (v[p]) {
2002 case 'f':
2003 scaled = *res * 65536;
2004 break;
2005 case 'i':
2006 scaled = *res * 240;
2007 break;
2008 case 'c':
2009 scaled = *res * 240 / 2.54;
2010 break;
2011 case 'v':
2012 /* FALLTROUGH */
2013 case 'P':
2014 scaled = *res * 40;
2015 break;
2016 case 'm':
2017 /* FALLTROUGH */
2018 case 'n':
2019 scaled = *res * 24;
2020 break;
2021 case 'p':
2022 scaled = *res * 10 / 3;
2023 break;
2024 case 'u':
2025 scaled = *res;
2026 break;
2027 case 'M':
2028 scaled = *res * 6 / 25;
2029 break;
2030 default:
2031 scaled = *res;
2032 p--;
2033 break;
2034 }
2035 if (flags & ROFFNUM_SCALE)
2036 *res = scaled;
2037
2038 *pos = p + 1;
2039 return(1);
2040 }
2041
2042 /*
2043 * Evaluate a string comparison condition.
2044 * The first character is the delimiter.
2045 * Succeed if the string up to its second occurrence
2046 * matches the string up to its third occurence.
2047 * Advance the cursor after the third occurrence
2048 * or lacking that, to the end of the line.
2049 */
2050 static int
2051 roff_evalstrcond(const char *v, int *pos)
2052 {
2053 const char *s1, *s2, *s3;
2054 int match;
2055
2056 match = 0;
2057 s1 = v + *pos; /* initial delimiter */
2058 s2 = s1 + 1; /* for scanning the first string */
2059 s3 = strchr(s2, *s1); /* for scanning the second string */
2060
2061 if (NULL == s3) /* found no middle delimiter */
2062 goto out;
2063
2064 while ('\0' != *++s3) {
2065 if (*s2 != *s3) { /* mismatch */
2066 s3 = strchr(s3, *s1);
2067 break;
2068 }
2069 if (*s3 == *s1) { /* found the final delimiter */
2070 match = 1;
2071 break;
2072 }
2073 s2++;
2074 }
2075
2076 out:
2077 if (NULL == s3)
2078 s3 = strchr(s2, '\0');
2079 else if (*s3 != '\0')
2080 s3++;
2081 *pos = s3 - v;
2082 return(match);
2083 }
2084
2085 /*
2086 * Evaluate an optionally negated single character, numerical,
2087 * or string condition.
2088 */
2089 static int
2090 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
2091 {
2092 int number, savepos, wanttrue;
2093
2094 if ('!' == v[*pos]) {
2095 wanttrue = 0;
2096 (*pos)++;
2097 } else
2098 wanttrue = 1;
2099
2100 switch (v[*pos]) {
2101 case '\0':
2102 return(0);
2103 case 'n':
2104 /* FALLTHROUGH */
2105 case 'o':
2106 (*pos)++;
2107 return(wanttrue);
2108 case 'c':
2109 /* FALLTHROUGH */
2110 case 'd':
2111 /* FALLTHROUGH */
2112 case 'e':
2113 /* FALLTHROUGH */
2114 case 'r':
2115 /* FALLTHROUGH */
2116 case 't':
2117 /* FALLTHROUGH */
2118 case 'v':
2119 (*pos)++;
2120 return(!wanttrue);
2121 default:
2122 break;
2123 }
2124
2125 savepos = *pos;
2126 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2127 return((number > 0) == wanttrue);
2128 else if (*pos == savepos)
2129 return(roff_evalstrcond(v, pos) == wanttrue);
2130 else
2131 return (0);
2132 }
2133
2134 static enum rofferr
2135 roff_line_ignore(ROFF_ARGS)
2136 {
2137
2138 return(ROFF_IGN);
2139 }
2140
2141 static enum rofferr
2142 roff_insec(ROFF_ARGS)
2143 {
2144
2145 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2146 ln, ppos, roffs[tok].name);
2147 return(ROFF_IGN);
2148 }
2149
2150 static enum rofferr
2151 roff_unsupp(ROFF_ARGS)
2152 {
2153
2154 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2155 ln, ppos, roffs[tok].name);
2156 return(ROFF_IGN);
2157 }
2158
2159 static enum rofferr
2160 roff_cond(ROFF_ARGS)
2161 {
2162
2163 roffnode_push(r, tok, NULL, ln, ppos);
2164
2165 /*
2166 * An `.el' has no conditional body: it will consume the value
2167 * of the current rstack entry set in prior `ie' calls or
2168 * defaults to DENY.
2169 *
2170 * If we're not an `el', however, then evaluate the conditional.
2171 */
2172
2173 r->last->rule = tok == ROFF_el ?
2174 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2175 roff_evalcond(r, ln, buf->buf, &pos);
2176
2177 /*
2178 * An if-else will put the NEGATION of the current evaluated
2179 * conditional into the stack of rules.
2180 */
2181
2182 if (tok == ROFF_ie) {
2183 if (r->rstackpos + 1 == r->rstacksz) {
2184 r->rstacksz += 16;
2185 r->rstack = mandoc_reallocarray(r->rstack,
2186 r->rstacksz, sizeof(int));
2187 }
2188 r->rstack[++r->rstackpos] = !r->last->rule;
2189 }
2190
2191 /* If the parent has false as its rule, then so do we. */
2192
2193 if (r->last->parent && !r->last->parent->rule)
2194 r->last->rule = 0;
2195
2196 /*
2197 * Determine scope.
2198 * If there is nothing on the line after the conditional,
2199 * not even whitespace, use next-line scope.
2200 */
2201
2202 if (buf->buf[pos] == '\0') {
2203 r->last->endspan = 2;
2204 goto out;
2205 }
2206
2207 while (buf->buf[pos] == ' ')
2208 pos++;
2209
2210 /* An opening brace requests multiline scope. */
2211
2212 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2213 r->last->endspan = -1;
2214 pos += 2;
2215 goto out;
2216 }
2217
2218 /*
2219 * Anything else following the conditional causes
2220 * single-line scope. Warn if the scope contains
2221 * nothing but trailing whitespace.
2222 */
2223
2224 if (buf->buf[pos] == '\0')
2225 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2226 ln, ppos, roffs[tok].name);
2227
2228 r->last->endspan = 1;
2229
2230 out:
2231 *offs = pos;
2232 return(ROFF_RERUN);
2233 }
2234
2235 static enum rofferr
2236 roff_ds(ROFF_ARGS)
2237 {
2238 char *string;
2239 const char *name;
2240 size_t namesz;
2241
2242 /* Ignore groff compatibility mode for now. */
2243
2244 if (tok == ROFF_ds1)
2245 tok = ROFF_ds;
2246 else if (tok == ROFF_as1)
2247 tok = ROFF_as;
2248
2249 /*
2250 * The first word is the name of the string.
2251 * If it is empty or terminated by an escape sequence,
2252 * abort the `ds' request without defining anything.
2253 */
2254
2255 name = string = buf->buf + pos;
2256 if (*name == '\0')
2257 return(ROFF_IGN);
2258
2259 namesz = roff_getname(r, &string, ln, pos);
2260 if (name[namesz] == '\\')
2261 return(ROFF_IGN);
2262
2263 /* Read past the initial double-quote, if any. */
2264 if (*string == '"')
2265 string++;
2266
2267 /* The rest is the value. */
2268 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2269 ROFF_as == tok);
2270 return(ROFF_IGN);
2271 }
2272
2273 /*
2274 * Parse a single operator, one or two characters long.
2275 * If the operator is recognized, return success and advance the
2276 * parse point, else return failure and let the parse point unchanged.
2277 */
2278 static int
2279 roff_getop(const char *v, int *pos, char *res)
2280 {
2281
2282 *res = v[*pos];
2283
2284 switch (*res) {
2285 case '+':
2286 /* FALLTHROUGH */
2287 case '-':
2288 /* FALLTHROUGH */
2289 case '*':
2290 /* FALLTHROUGH */
2291 case '/':
2292 /* FALLTHROUGH */
2293 case '%':
2294 /* FALLTHROUGH */
2295 case '&':
2296 /* FALLTHROUGH */
2297 case ':':
2298 break;
2299 case '<':
2300 switch (v[*pos + 1]) {
2301 case '=':
2302 *res = 'l';
2303 (*pos)++;
2304 break;
2305 case '>':
2306 *res = '!';
2307 (*pos)++;
2308 break;
2309 case '?':
2310 *res = 'i';
2311 (*pos)++;
2312 break;
2313 default:
2314 break;
2315 }
2316 break;
2317 case '>':
2318 switch (v[*pos + 1]) {
2319 case '=':
2320 *res = 'g';
2321 (*pos)++;
2322 break;
2323 case '?':
2324 *res = 'a';
2325 (*pos)++;
2326 break;
2327 default:
2328 break;
2329 }
2330 break;
2331 case '=':
2332 if ('=' == v[*pos + 1])
2333 (*pos)++;
2334 break;
2335 default:
2336 return(0);
2337 }
2338 (*pos)++;
2339
2340 return(*res);
2341 }
2342
2343 /*
2344 * Evaluate either a parenthesized numeric expression
2345 * or a single signed integer number.
2346 */
2347 static int
2348 roff_evalpar(struct roff *r, int ln,
2349 const char *v, int *pos, int *res, int flags)
2350 {
2351
2352 if ('(' != v[*pos])
2353 return(roff_getnum(v, pos, res, flags));
2354
2355 (*pos)++;
2356 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2357 return(0);
2358
2359 /*
2360 * Omission of the closing parenthesis
2361 * is an error in validation mode,
2362 * but ignored in evaluation mode.
2363 */
2364
2365 if (')' == v[*pos])
2366 (*pos)++;
2367 else if (NULL == res)
2368 return(0);
2369
2370 return(1);
2371 }
2372
2373 /*
2374 * Evaluate a complete numeric expression.
2375 * Proceed left to right, there is no concept of precedence.
2376 */
2377 static int
2378 roff_evalnum(struct roff *r, int ln, const char *v,
2379 int *pos, int *res, int flags)
2380 {
2381 int mypos, operand2;
2382 char operator;
2383
2384 if (NULL == pos) {
2385 mypos = 0;
2386 pos = &mypos;
2387 }
2388
2389 if (flags & ROFFNUM_WHITE)
2390 while (isspace((unsigned char)v[*pos]))
2391 (*pos)++;
2392
2393 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2394 return(0);
2395
2396 while (1) {
2397 if (flags & ROFFNUM_WHITE)
2398 while (isspace((unsigned char)v[*pos]))
2399 (*pos)++;
2400
2401 if ( ! roff_getop(v, pos, &operator))
2402 break;
2403
2404 if (flags & ROFFNUM_WHITE)
2405 while (isspace((unsigned char)v[*pos]))
2406 (*pos)++;
2407
2408 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2409 return(0);
2410
2411 if (flags & ROFFNUM_WHITE)
2412 while (isspace((unsigned char)v[*pos]))
2413 (*pos)++;
2414
2415 if (NULL == res)
2416 continue;
2417
2418 switch (operator) {
2419 case '+':
2420 *res += operand2;
2421 break;
2422 case '-':
2423 *res -= operand2;
2424 break;
2425 case '*':
2426 *res *= operand2;
2427 break;
2428 case '/':
2429 if (operand2 == 0) {
2430 mandoc_msg(MANDOCERR_DIVZERO,
2431 r->parse, ln, *pos, v);
2432 *res = 0;
2433 break;
2434 }
2435 *res /= operand2;
2436 break;
2437 case '%':
2438 if (operand2 == 0) {
2439 mandoc_msg(MANDOCERR_DIVZERO,
2440 r->parse, ln, *pos, v);
2441 *res = 0;
2442 break;
2443 }
2444 *res %= operand2;
2445 break;
2446 case '<':
2447 *res = *res < operand2;
2448 break;
2449 case '>':
2450 *res = *res > operand2;
2451 break;
2452 case 'l':
2453 *res = *res <= operand2;
2454 break;
2455 case 'g':
2456 *res = *res >= operand2;
2457 break;
2458 case '=':
2459 *res = *res == operand2;
2460 break;
2461 case '!':
2462 *res = *res != operand2;
2463 break;
2464 case '&':
2465 *res = *res && operand2;
2466 break;
2467 case ':':
2468 *res = *res || operand2;
2469 break;
2470 case 'i':
2471 if (operand2 < *res)
2472 *res = operand2;
2473 break;
2474 case 'a':
2475 if (operand2 > *res)
2476 *res = operand2;
2477 break;
2478 default:
2479 abort();
2480 }
2481 }
2482 return(1);
2483 }
2484
2485 /* --- register management ------------------------------------------------ */
2486
2487 void
2488 roff_setreg(struct roff *r, const char *name, int val, char sign)
2489 {
2490 struct roffreg *reg;
2491
2492 /* Search for an existing register with the same name. */
2493 reg = r->regtab;
2494
2495 while (reg && strcmp(name, reg->key.p))
2496 reg = reg->next;
2497
2498 if (NULL == reg) {
2499 /* Create a new register. */
2500 reg = mandoc_malloc(sizeof(struct roffreg));
2501 reg->key.p = mandoc_strdup(name);
2502 reg->key.sz = strlen(name);
2503 reg->val = 0;
2504 reg->next = r->regtab;
2505 r->regtab = reg;
2506 }
2507
2508 if ('+' == sign)
2509 reg->val += val;
2510 else if ('-' == sign)
2511 reg->val -= val;
2512 else
2513 reg->val = val;
2514 }
2515
2516 /*
2517 * Handle some predefined read-only number registers.
2518 * For now, return -1 if the requested register is not predefined;
2519 * in case a predefined read-only register having the value -1
2520 * were to turn up, another special value would have to be chosen.
2521 */
2522 static int
2523 roff_getregro(const char *name)
2524 {
2525
2526 switch (*name) {
2527 case 'A': /* ASCII approximation mode is always off. */
2528 return(0);
2529 case 'g': /* Groff compatibility mode is always on. */
2530 return(1);
2531 case 'H': /* Fixed horizontal resolution. */
2532 return (24);
2533 case 'j': /* Always adjust left margin only. */
2534 return(0);
2535 case 'T': /* Some output device is always defined. */
2536 return(1);
2537 case 'V': /* Fixed vertical resolution. */
2538 return (40);
2539 default:
2540 return (-1);
2541 }
2542 }
2543
2544 int
2545 roff_getreg(const struct roff *r, const char *name)
2546 {
2547 struct roffreg *reg;
2548 int val;
2549
2550 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2551 val = roff_getregro(name + 1);
2552 if (-1 != val)
2553 return (val);
2554 }
2555
2556 for (reg = r->regtab; reg; reg = reg->next)
2557 if (0 == strcmp(name, reg->key.p))
2558 return(reg->val);
2559
2560 return(0);
2561 }
2562
2563 static int
2564 roff_getregn(const struct roff *r, const char *name, size_t len)
2565 {
2566 struct roffreg *reg;
2567 int val;
2568
2569 if ('.' == name[0] && 2 == len) {
2570 val = roff_getregro(name + 1);
2571 if (-1 != val)
2572 return (val);
2573 }
2574
2575 for (reg = r->regtab; reg; reg = reg->next)
2576 if (len == reg->key.sz &&
2577 0 == strncmp(name, reg->key.p, len))
2578 return(reg->val);
2579
2580 return(0);
2581 }
2582
2583 static void
2584 roff_freereg(struct roffreg *reg)
2585 {
2586 struct roffreg *old_reg;
2587
2588 while (NULL != reg) {
2589 free(reg->key.p);
2590 old_reg = reg;
2591 reg = reg->next;
2592 free(old_reg);
2593 }
2594 }
2595
2596 static enum rofferr
2597 roff_nr(ROFF_ARGS)
2598 {
2599 char *key, *val;
2600 size_t keysz;
2601 int iv;
2602 char sign;
2603
2604 key = val = buf->buf + pos;
2605 if (*key == '\0')
2606 return(ROFF_IGN);
2607
2608 keysz = roff_getname(r, &val, ln, pos);
2609 if (key[keysz] == '\\')
2610 return(ROFF_IGN);
2611 key[keysz] = '\0';
2612
2613 sign = *val;
2614 if (sign == '+' || sign == '-')
2615 val++;
2616
2617 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2618 roff_setreg(r, key, iv, sign);
2619
2620 return(ROFF_IGN);
2621 }
2622
2623 static enum rofferr
2624 roff_rr(ROFF_ARGS)
2625 {
2626 struct roffreg *reg, **prev;
2627 char *name, *cp;
2628 size_t namesz;
2629
2630 name = cp = buf->buf + pos;
2631 if (*name == '\0')
2632 return(ROFF_IGN);
2633 namesz = roff_getname(r, &cp, ln, pos);
2634 name[namesz] = '\0';
2635
2636 prev = &r->regtab;
2637 while (1) {
2638 reg = *prev;
2639 if (reg == NULL || !strcmp(name, reg->key.p))
2640 break;
2641 prev = &reg->next;
2642 }
2643 if (reg != NULL) {
2644 *prev = reg->next;
2645 free(reg->key.p);
2646 free(reg);
2647 }
2648 return(ROFF_IGN);
2649 }
2650
2651 /* --- handler functions for roff requests -------------------------------- */
2652
2653 static enum rofferr
2654 roff_rm(ROFF_ARGS)
2655 {
2656 const char *name;
2657 char *cp;
2658 size_t namesz;
2659
2660 cp = buf->buf + pos;
2661 while (*cp != '\0') {
2662 name = cp;
2663 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2664 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2665 if (name[namesz] == '\\')
2666 break;
2667 }
2668 return(ROFF_IGN);
2669 }
2670
2671 static enum rofferr
2672 roff_it(ROFF_ARGS)
2673 {
2674 int iv;
2675
2676 /* Parse the number of lines. */
2677
2678 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2679 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2680 ln, ppos, buf->buf + 1);
2681 return(ROFF_IGN);
2682 }
2683
2684 while (isspace((unsigned char)buf->buf[pos]))
2685 pos++;
2686
2687 /*
2688 * Arm the input line trap.
2689 * Special-casing "an-trap" is an ugly workaround to cope
2690 * with DocBook stupidly fiddling with man(7) internals.
2691 */
2692
2693 roffit_lines = iv;
2694 roffit_macro = mandoc_strdup(iv != 1 ||
2695 strcmp(buf->buf + pos, "an-trap") ?
2696 buf->buf + pos : "br");
2697 return(ROFF_IGN);
2698 }
2699
2700 static enum rofferr
2701 roff_Dd(ROFF_ARGS)
2702 {
2703 const char *const *cp;
2704
2705 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2706 for (cp = __mdoc_reserved; *cp; cp++)
2707 roff_setstr(r, *cp, NULL, 0);
2708
2709 if (r->format == 0)
2710 r->format = MPARSE_MDOC;
2711
2712 return(ROFF_CONT);
2713 }
2714
2715 static enum rofferr
2716 roff_TH(ROFF_ARGS)
2717 {
2718 const char *const *cp;
2719
2720 if ((r->options & MPARSE_QUICK) == 0)
2721 for (cp = __man_reserved; *cp; cp++)
2722 roff_setstr(r, *cp, NULL, 0);
2723
2724 if (r->format == 0)
2725 r->format = MPARSE_MAN;
2726
2727 return(ROFF_CONT);
2728 }
2729
2730 static enum rofferr
2731 roff_TE(ROFF_ARGS)
2732 {
2733
2734 if (NULL == r->tbl)
2735 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2736 ln, ppos, "TE");
2737 else if ( ! tbl_end(&r->tbl)) {
2738 free(buf->buf);
2739 buf->buf = mandoc_strdup(".sp");
2740 buf->sz = 4;
2741 return(ROFF_REPARSE);
2742 }
2743 return(ROFF_IGN);
2744 }
2745
2746 static enum rofferr
2747 roff_T_(ROFF_ARGS)
2748 {
2749
2750 if (NULL == r->tbl)
2751 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2752 ln, ppos, "T&");
2753 else
2754 tbl_restart(ppos, ln, r->tbl);
2755
2756 return(ROFF_IGN);
2757 }
2758
2759 /*
2760 * Handle in-line equation delimiters.
2761 */
2762 static enum rofferr
2763 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2764 {
2765 char *cp1, *cp2;
2766 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2767
2768 /*
2769 * Outside equations, look for an opening delimiter.
2770 * If we are inside an equation, we already know it is
2771 * in-line, or this function wouldn't have been called;
2772 * so look for a closing delimiter.
2773 */
2774
2775 cp1 = buf->buf + pos;
2776 cp2 = strchr(cp1, r->eqn == NULL ?
2777 r->last_eqn->odelim : r->last_eqn->cdelim);
2778 if (cp2 == NULL)
2779 return(ROFF_CONT);
2780
2781 *cp2++ = '\0';
2782 bef_pr = bef_nl = aft_nl = aft_pr = "";
2783
2784 /* Handle preceding text, protecting whitespace. */
2785
2786 if (*buf->buf != '\0') {
2787 if (r->eqn == NULL)
2788 bef_pr = "\\&";
2789 bef_nl = "\n";
2790 }
2791
2792 /*
2793 * Prepare replacing the delimiter with an equation macro
2794 * and drop leading white space from the equation.
2795 */
2796
2797 if (r->eqn == NULL) {
2798 while (*cp2 == ' ')
2799 cp2++;
2800 mac = ".EQ";
2801 } else
2802 mac = ".EN";
2803
2804 /* Handle following text, protecting whitespace. */
2805
2806 if (*cp2 != '\0') {
2807 aft_nl = "\n";
2808 if (r->eqn != NULL)
2809 aft_pr = "\\&";
2810 }
2811
2812 /* Do the actual replacement. */
2813
2814 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2815 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2816 free(buf->buf);
2817 buf->buf = cp1;
2818
2819 /* Toggle the in-line state of the eqn subsystem. */
2820
2821 r->eqn_inline = r->eqn == NULL;
2822 return(ROFF_REPARSE);
2823 }
2824
2825 static enum rofferr
2826 roff_EQ(ROFF_ARGS)
2827 {
2828 struct eqn_node *e;
2829
2830 assert(r->eqn == NULL);
2831 e = eqn_alloc(ppos, ln, r->parse);
2832
2833 if (r->last_eqn) {
2834 r->last_eqn->next = e;
2835 e->delim = r->last_eqn->delim;
2836 e->odelim = r->last_eqn->odelim;
2837 e->cdelim = r->last_eqn->cdelim;
2838 } else
2839 r->first_eqn = r->last_eqn = e;
2840
2841 r->eqn = r->last_eqn = e;
2842
2843 if (buf->buf[pos] != '\0')
2844 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2845 ".EQ %s", buf->buf + pos);
2846
2847 return(ROFF_IGN);
2848 }
2849
2850 static enum rofferr
2851 roff_EN(ROFF_ARGS)
2852 {
2853
2854 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2855 return(ROFF_IGN);
2856 }
2857
2858 static enum rofferr
2859 roff_TS(ROFF_ARGS)
2860 {
2861 struct tbl_node *tbl;
2862
2863 if (r->tbl) {
2864 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2865 ln, ppos, "TS breaks TS");
2866 tbl_end(&r->tbl);
2867 }
2868
2869 tbl = tbl_alloc(ppos, ln, r->parse);
2870
2871 if (r->last_tbl)
2872 r->last_tbl->next = tbl;
2873 else
2874 r->first_tbl = r->last_tbl = tbl;
2875
2876 r->tbl = r->last_tbl = tbl;
2877 return(ROFF_IGN);
2878 }
2879
2880 static enum rofferr
2881 roff_brp(ROFF_ARGS)
2882 {
2883
2884 buf->buf[pos - 1] = '\0';
2885 return(ROFF_CONT);
2886 }
2887
2888 static enum rofferr
2889 roff_cc(ROFF_ARGS)
2890 {
2891 const char *p;
2892
2893 p = buf->buf + pos;
2894
2895 if (*p == '\0' || (r->control = *p++) == '.')
2896 r->control = 0;
2897
2898 if (*p != '\0')
2899 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2900 ln, p - buf->buf, "cc ... %s", p);
2901
2902 return(ROFF_IGN);
2903 }
2904
2905 static enum rofferr
2906 roff_tr(ROFF_ARGS)
2907 {
2908 const char *p, *first, *second;
2909 size_t fsz, ssz;
2910 enum mandoc_esc esc;
2911
2912 p = buf->buf + pos;
2913
2914 if (*p == '\0') {
2915 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2916 return(ROFF_IGN);
2917 }
2918
2919 while (*p != '\0') {
2920 fsz = ssz = 1;
2921
2922 first = p++;
2923 if (*first == '\\') {
2924 esc = mandoc_escape(&p, NULL, NULL);
2925 if (esc == ESCAPE_ERROR) {
2926 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2927 ln, (int)(p - buf->buf), first);
2928 return(ROFF_IGN);
2929 }
2930 fsz = (size_t)(p - first);
2931 }
2932
2933 second = p++;
2934 if (*second == '\\') {
2935 esc = mandoc_escape(&p, NULL, NULL);
2936 if (esc == ESCAPE_ERROR) {
2937 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2938 ln, (int)(p - buf->buf), second);
2939 return(ROFF_IGN);
2940 }
2941 ssz = (size_t)(p - second);
2942 } else if (*second == '\0') {
2943 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2944 ln, first - buf->buf, "tr %s", first);
2945 second = " ";
2946 p--;
2947 }
2948
2949 if (fsz > 1) {
2950 roff_setstrn(&r->xmbtab, first, fsz,
2951 second, ssz, 0);
2952 continue;
2953 }
2954
2955 if (r->xtab == NULL)
2956 r->xtab = mandoc_calloc(128,
2957 sizeof(struct roffstr));
2958
2959 free(r->xtab[(int)*first].p);
2960 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2961 r->xtab[(int)*first].sz = ssz;
2962 }
2963
2964 return(ROFF_IGN);
2965 }
2966
2967 static enum rofferr
2968 roff_so(ROFF_ARGS)
2969 {
2970 char *name, *cp;
2971
2972 name = buf->buf + pos;
2973 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2974
2975 /*
2976 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2977 * opening anything that's not in our cwd or anything beneath
2978 * it. Thus, explicitly disallow traversing up the file-system
2979 * or using absolute paths.
2980 */
2981
2982 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2983 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2984 ".so %s", name);
2985 buf->sz = mandoc_asprintf(&cp,
2986 ".sp\nSee the file %s.\n.sp", name) + 1;
2987 free(buf->buf);
2988 buf->buf = cp;
2989 *offs = 0;
2990 return(ROFF_REPARSE);
2991 }
2992
2993 *offs = pos;
2994 return(ROFF_SO);
2995 }
2996
2997 /* --- user defined strings and macros ------------------------------------ */
2998
2999 static enum rofferr
3000 roff_userdef(ROFF_ARGS)
3001 {
3002 const char *arg[9], *ap;
3003 char *cp, *n1, *n2;
3004 int i;
3005 size_t asz, rsz;
3006
3007 /*
3008 * Collect pointers to macro argument strings
3009 * and NUL-terminate them.
3010 */
3011
3012 cp = buf->buf + pos;
3013 for (i = 0; i < 9; i++)
3014 arg[i] = *cp == '\0' ? "" :
3015 mandoc_getarg(r->parse, &cp, ln, &pos);
3016
3017 /*
3018 * Expand macro arguments.
3019 */
3020
3021 buf->sz = strlen(r->current_string) + 1;
3022 n1 = cp = mandoc_malloc(buf->sz);
3023 memcpy(n1, r->current_string, buf->sz);
3024 while (*cp != '\0') {
3025
3026 /* Scan ahead for the next argument invocation. */
3027
3028 if (*cp++ != '\\')
3029 continue;
3030 if (*cp++ != '$')
3031 continue;
3032 i = *cp - '1';
3033 if (0 > i || 8 < i)
3034 continue;
3035 cp -= 2;
3036
3037 /*
3038 * Determine the size of the expanded argument,
3039 * taking escaping of quotes into account.
3040 */
3041
3042 asz = 0;
3043 for (ap = arg[i]; *ap != '\0'; ap++) {
3044 asz++;
3045 if (*ap == '"')
3046 asz += 3;
3047 }
3048 if (asz != 3) {
3049
3050 /*
3051 * Determine the size of the rest of the
3052 * unexpanded macro, including the NUL.
3053 */
3054
3055 rsz = buf->sz - (cp - n1) - 3;
3056
3057 /*
3058 * When shrinking, move before
3059 * releasing the storage.
3060 */
3061
3062 if (asz < 3)
3063 memmove(cp + asz, cp + 3, rsz);
3064
3065 /*
3066 * Resize the storage for the macro
3067 * and readjust the parse pointer.
3068 */
3069
3070 buf->sz += asz - 3;
3071 n2 = mandoc_realloc(n1, buf->sz);
3072 cp = n2 + (cp - n1);
3073 n1 = n2;
3074
3075 /*
3076 * When growing, make room
3077 * for the expanded argument.
3078 */
3079
3080 if (asz > 3)
3081 memmove(cp + asz, cp + 3, rsz);
3082 }
3083
3084 /* Copy the expanded argument, escaping quotes. */
3085
3086 n2 = cp;
3087 for (ap = arg[i]; *ap != '\0'; ap++) {
3088 if (*ap == '"') {
3089 memcpy(n2, "\\(dq", 4);
3090 n2 += 4;
3091 } else
3092 *n2++ = *ap;
3093 }
3094 }
3095
3096 /*
3097 * Replace the macro invocation
3098 * by the expanded macro.
3099 */
3100
3101 free(buf->buf);
3102 buf->buf = n1;
3103 *offs = 0;
3104
3105 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3106 ROFF_REPARSE : ROFF_APPEND);
3107 }
3108
3109 static size_t
3110 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3111 {
3112 char *name, *cp;
3113 size_t namesz;
3114
3115 name = *cpp;
3116 if ('\0' == *name)
3117 return(0);
3118
3119 /* Read until end of name and terminate it with NUL. */
3120 for (cp = name; 1; cp++) {
3121 if ('\0' == *cp || ' ' == *cp) {
3122 namesz = cp - name;
3123 break;
3124 }
3125 if ('\\' != *cp)
3126 continue;
3127 namesz = cp - name;
3128 if ('{' == cp[1] || '}' == cp[1])
3129 break;
3130 cp++;
3131 if ('\\' == *cp)
3132 continue;
3133 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3134 "%.*s", (int)(cp - name + 1), name);
3135 mandoc_escape((const char **)&cp, NULL, NULL);
3136 break;
3137 }
3138
3139 /* Read past spaces. */
3140 while (' ' == *cp)
3141 cp++;
3142
3143 *cpp = cp;
3144 return(namesz);
3145 }
3146
3147 /*
3148 * Store *string into the user-defined string called *name.
3149 * To clear an existing entry, call with (*r, *name, NULL, 0).
3150 * append == 0: replace mode
3151 * append == 1: single-line append mode
3152 * append == 2: multiline append mode, append '\n' after each call
3153 */
3154 static void
3155 roff_setstr(struct roff *r, const char *name, const char *string,
3156 int append)
3157 {
3158
3159 roff_setstrn(&r->strtab, name, strlen(name), string,
3160 string ? strlen(string) : 0, append);
3161 }
3162
3163 static void
3164 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3165 const char *string, size_t stringsz, int append)
3166 {
3167 struct roffkv *n;
3168 char *c;
3169 int i;
3170 size_t oldch, newch;
3171
3172 /* Search for an existing string with the same name. */
3173 n = *r;
3174
3175 while (n && (namesz != n->key.sz ||
3176 strncmp(n->key.p, name, namesz)))
3177 n = n->next;
3178
3179 if (NULL == n) {
3180 /* Create a new string table entry. */
3181 n = mandoc_malloc(sizeof(struct roffkv));
3182 n->key.p = mandoc_strndup(name, namesz);
3183 n->key.sz = namesz;
3184 n->val.p = NULL;
3185 n->val.sz = 0;
3186 n->next = *r;
3187 *r = n;
3188 } else if (0 == append) {
3189 free(n->val.p);
3190 n->val.p = NULL;
3191 n->val.sz = 0;
3192 }
3193
3194 if (NULL == string)
3195 return;
3196
3197 /*
3198 * One additional byte for the '\n' in multiline mode,
3199 * and one for the terminating '\0'.
3200 */
3201 newch = stringsz + (1 < append ? 2u : 1u);
3202
3203 if (NULL == n->val.p) {
3204 n->val.p = mandoc_malloc(newch);
3205 *n->val.p = '\0';
3206 oldch = 0;
3207 } else {
3208 oldch = n->val.sz;
3209 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3210 }
3211
3212 /* Skip existing content in the destination buffer. */
3213 c = n->val.p + (int)oldch;
3214
3215 /* Append new content to the destination buffer. */
3216 i = 0;
3217 while (i < (int)stringsz) {
3218 /*
3219 * Rudimentary roff copy mode:
3220 * Handle escaped backslashes.
3221 */
3222 if ('\\' == string[i] && '\\' == string[i + 1])
3223 i++;
3224 *c++ = string[i++];
3225 }
3226
3227 /* Append terminating bytes. */
3228 if (1 < append)
3229 *c++ = '\n';
3230
3231 *c = '\0';
3232 n->val.sz = (int)(c - n->val.p);
3233 }
3234
3235 static const char *
3236 roff_getstrn(const struct roff *r, const char *name, size_t len)
3237 {
3238 const struct roffkv *n;
3239 int i;
3240
3241 for (n = r->strtab; n; n = n->next)
3242 if (0 == strncmp(name, n->key.p, len) &&
3243 '\0' == n->key.p[(int)len])
3244 return(n->val.p);
3245
3246 for (i = 0; i < PREDEFS_MAX; i++)
3247 if (0 == strncmp(name, predefs[i].name, len) &&
3248 '\0' == predefs[i].name[(int)len])
3249 return(predefs[i].str);
3250
3251 return(NULL);
3252 }
3253
3254 static void
3255 roff_freestr(struct roffkv *r)
3256 {
3257 struct roffkv *n, *nn;
3258
3259 for (n = r; n; n = nn) {
3260 free(n->key.p);
3261 free(n->val.p);
3262 nn = n->next;
3263 free(n);
3264 }
3265 }
3266
3267 /* --- accessors and utility functions ------------------------------------ */
3268
3269 const struct tbl_span *
3270 roff_span(const struct roff *r)
3271 {
3272
3273 return(r->tbl ? tbl_span(r->tbl) : NULL);
3274 }
3275
3276 const struct eqn *
3277 roff_eqn(const struct roff *r)
3278 {
3279
3280 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
3281 }
3282
3283 /*
3284 * Duplicate an input string, making the appropriate character
3285 * conversations (as stipulated by `tr') along the way.
3286 * Returns a heap-allocated string with all the replacements made.
3287 */
3288 char *
3289 roff_strdup(const struct roff *r, const char *p)
3290 {
3291 const struct roffkv *cp;
3292 char *res;
3293 const char *pp;
3294 size_t ssz, sz;
3295 enum mandoc_esc esc;
3296
3297 if (NULL == r->xmbtab && NULL == r->xtab)
3298 return(mandoc_strdup(p));
3299 else if ('\0' == *p)
3300 return(mandoc_strdup(""));
3301
3302 /*
3303 * Step through each character looking for term matches
3304 * (remember that a `tr' can be invoked with an escape, which is
3305 * a glyph but the escape is multi-character).
3306 * We only do this if the character hash has been initialised
3307 * and the string is >0 length.
3308 */
3309
3310 res = NULL;
3311 ssz = 0;
3312
3313 while ('\0' != *p) {
3314 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3315 sz = r->xtab[(int)*p].sz;
3316 res = mandoc_realloc(res, ssz + sz + 1);
3317 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3318 ssz += sz;
3319 p++;
3320 continue;
3321 } else if ('\\' != *p) {
3322 res = mandoc_realloc(res, ssz + 2);
3323 res[ssz++] = *p++;
3324 continue;
3325 }
3326
3327 /* Search for term matches. */
3328 for (cp = r->xmbtab; cp; cp = cp->next)
3329 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3330 break;
3331
3332 if (NULL != cp) {
3333 /*
3334 * A match has been found.
3335 * Append the match to the array and move
3336 * forward by its keysize.
3337 */
3338 res = mandoc_realloc(res,
3339 ssz + cp->val.sz + 1);
3340 memcpy(res + ssz, cp->val.p, cp->val.sz);
3341 ssz += cp->val.sz;
3342 p += (int)cp->key.sz;
3343 continue;
3344 }
3345
3346 /*
3347 * Handle escapes carefully: we need to copy
3348 * over just the escape itself, or else we might
3349 * do replacements within the escape itself.
3350 * Make sure to pass along the bogus string.
3351 */
3352 pp = p++;
3353 esc = mandoc_escape(&p, NULL, NULL);
3354 if (ESCAPE_ERROR == esc) {
3355 sz = strlen(pp);
3356 res = mandoc_realloc(res, ssz + sz + 1);
3357 memcpy(res + ssz, pp, sz);
3358 break;
3359 }
3360 /*
3361 * We bail out on bad escapes.
3362 * No need to warn: we already did so when
3363 * roff_res() was called.
3364 */
3365 sz = (int)(p - pp);
3366 res = mandoc_realloc(res, ssz + sz + 1);
3367 memcpy(res + ssz, pp, sz);
3368 ssz += sz;
3369 }
3370
3371 res[(int)ssz] = '\0';
3372 return(res);
3373 }
3374
3375 int
3376 roff_getformat(const struct roff *r)
3377 {
3378
3379 return(r->format);
3380 }
3381
3382 /*
3383 * Find out whether a line is a macro line or not.
3384 * If it is, adjust the current position and return one; if it isn't,
3385 * return zero and don't change the current position.
3386 * If the control character has been set with `.cc', then let that grain
3387 * precedence.
3388 * This is slighly contrary to groff, where using the non-breaking
3389 * control character when `cc' has been invoked will cause the
3390 * non-breaking macro contents to be printed verbatim.
3391 */
3392 int
3393 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3394 {
3395 int pos;
3396
3397 pos = *ppos;
3398
3399 if (0 != r->control && cp[pos] == r->control)
3400 pos++;
3401 else if (0 != r->control)
3402 return(0);
3403 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3404 pos += 2;
3405 else if ('.' == cp[pos] || '\'' == cp[pos])
3406 pos++;
3407 else
3408 return(0);
3409
3410 while (' ' == cp[pos] || '\t' == cp[pos])
3411 pos++;
3412
3413 *ppos = pos;
3414 return(1);
3415 }