]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Ignore blank characters at the beginning of a conditional block,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.272 2015/06/27 13:29:14 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "roff.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libroff.h"
35
36 /* Maximum number of nested if-else conditionals. */
37 #define RSTACK_MAX 128
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 enum rofft {
45 ROFF_ab,
46 ROFF_ad,
47 ROFF_af,
48 ROFF_aln,
49 ROFF_als,
50 ROFF_am,
51 ROFF_am1,
52 ROFF_ami,
53 ROFF_ami1,
54 ROFF_as,
55 ROFF_as1,
56 ROFF_asciify,
57 ROFF_backtrace,
58 ROFF_bd,
59 ROFF_bleedat,
60 ROFF_blm,
61 ROFF_box,
62 ROFF_boxa,
63 ROFF_bp,
64 ROFF_BP,
65 /* MAN_br, MDOC_br */
66 ROFF_break,
67 ROFF_breakchar,
68 ROFF_brnl,
69 ROFF_brp,
70 ROFF_brpnl,
71 ROFF_c2,
72 ROFF_cc,
73 ROFF_ce,
74 ROFF_cf,
75 ROFF_cflags,
76 ROFF_ch,
77 ROFF_char,
78 ROFF_chop,
79 ROFF_class,
80 ROFF_close,
81 ROFF_CL,
82 ROFF_color,
83 ROFF_composite,
84 ROFF_continue,
85 ROFF_cp,
86 ROFF_cropat,
87 ROFF_cs,
88 ROFF_cu,
89 ROFF_da,
90 ROFF_dch,
91 ROFF_Dd,
92 ROFF_de,
93 ROFF_de1,
94 ROFF_defcolor,
95 ROFF_dei,
96 ROFF_dei1,
97 ROFF_device,
98 ROFF_devicem,
99 ROFF_di,
100 ROFF_do,
101 ROFF_ds,
102 ROFF_ds1,
103 ROFF_dwh,
104 ROFF_dt,
105 ROFF_ec,
106 ROFF_ecr,
107 ROFF_ecs,
108 ROFF_el,
109 ROFF_em,
110 ROFF_EN,
111 ROFF_eo,
112 ROFF_EP,
113 ROFF_EQ,
114 ROFF_errprint,
115 ROFF_ev,
116 ROFF_evc,
117 ROFF_ex,
118 ROFF_fallback,
119 ROFF_fam,
120 ROFF_fc,
121 ROFF_fchar,
122 ROFF_fcolor,
123 ROFF_fdeferlig,
124 ROFF_feature,
125 /* MAN_fi; ignored in mdoc(7) */
126 ROFF_fkern,
127 ROFF_fl,
128 ROFF_flig,
129 ROFF_fp,
130 ROFF_fps,
131 ROFF_fschar,
132 ROFF_fspacewidth,
133 ROFF_fspecial,
134 /* MAN_ft; ignored in mdoc(7) */
135 ROFF_ftr,
136 ROFF_fzoom,
137 ROFF_gcolor,
138 ROFF_hc,
139 ROFF_hcode,
140 ROFF_hidechar,
141 ROFF_hla,
142 ROFF_hlm,
143 ROFF_hpf,
144 ROFF_hpfa,
145 ROFF_hpfcode,
146 ROFF_hw,
147 ROFF_hy,
148 ROFF_hylang,
149 ROFF_hylen,
150 ROFF_hym,
151 ROFF_hypp,
152 ROFF_hys,
153 ROFF_ie,
154 ROFF_if,
155 ROFF_ig,
156 /* MAN_in; ignored in mdoc(7) */
157 ROFF_index,
158 ROFF_it,
159 ROFF_itc,
160 ROFF_IX,
161 ROFF_kern,
162 ROFF_kernafter,
163 ROFF_kernbefore,
164 ROFF_kernpair,
165 ROFF_lc,
166 ROFF_lc_ctype,
167 ROFF_lds,
168 ROFF_length,
169 ROFF_letadj,
170 ROFF_lf,
171 ROFF_lg,
172 ROFF_lhang,
173 ROFF_linetabs,
174 /* MAN_ll, MDOC_ll */
175 ROFF_lnr,
176 ROFF_lnrf,
177 ROFF_lpfx,
178 ROFF_ls,
179 ROFF_lsm,
180 ROFF_lt,
181 ROFF_mc,
182 ROFF_mediasize,
183 ROFF_minss,
184 ROFF_mk,
185 ROFF_mso,
186 ROFF_na,
187 ROFF_ne,
188 /* MAN_nf; ignored in mdoc(7) */
189 ROFF_nh,
190 ROFF_nhychar,
191 ROFF_nm,
192 ROFF_nn,
193 ROFF_nop,
194 ROFF_nr,
195 ROFF_nrf,
196 ROFF_nroff,
197 ROFF_ns,
198 ROFF_nx,
199 ROFF_open,
200 ROFF_opena,
201 ROFF_os,
202 ROFF_output,
203 ROFF_padj,
204 ROFF_papersize,
205 ROFF_pc,
206 ROFF_pev,
207 ROFF_pi,
208 ROFF_PI,
209 ROFF_pl,
210 ROFF_pm,
211 ROFF_pn,
212 ROFF_pnr,
213 ROFF_po,
214 ROFF_ps,
215 ROFF_psbb,
216 ROFF_pshape,
217 ROFF_pso,
218 ROFF_ptr,
219 ROFF_pvs,
220 ROFF_rchar,
221 ROFF_rd,
222 ROFF_recursionlimit,
223 ROFF_return,
224 ROFF_rfschar,
225 ROFF_rhang,
226 ROFF_rj,
227 ROFF_rm,
228 ROFF_rn,
229 ROFF_rnn,
230 ROFF_rr,
231 ROFF_rs,
232 ROFF_rt,
233 ROFF_schar,
234 ROFF_sentchar,
235 ROFF_shc,
236 ROFF_shift,
237 ROFF_sizes,
238 ROFF_so,
239 /* MAN_sp, MDOC_sp */
240 ROFF_spacewidth,
241 ROFF_special,
242 ROFF_spreadwarn,
243 ROFF_ss,
244 ROFF_sty,
245 ROFF_substring,
246 ROFF_sv,
247 ROFF_sy,
248 ROFF_T_,
249 ROFF_ta,
250 ROFF_tc,
251 ROFF_TE,
252 ROFF_TH,
253 ROFF_ti,
254 ROFF_tkf,
255 ROFF_tl,
256 ROFF_tm,
257 ROFF_tm1,
258 ROFF_tmc,
259 ROFF_tr,
260 ROFF_track,
261 ROFF_transchar,
262 ROFF_trf,
263 ROFF_trimat,
264 ROFF_trin,
265 ROFF_trnt,
266 ROFF_troff,
267 ROFF_TS,
268 ROFF_uf,
269 ROFF_ul,
270 ROFF_unformat,
271 ROFF_unwatch,
272 ROFF_unwatchn,
273 ROFF_vpt,
274 ROFF_vs,
275 ROFF_warn,
276 ROFF_warnscale,
277 ROFF_watch,
278 ROFF_watchlength,
279 ROFF_watchn,
280 ROFF_wh,
281 ROFF_while,
282 ROFF_write,
283 ROFF_writec,
284 ROFF_writem,
285 ROFF_xflag,
286 ROFF_cblock,
287 ROFF_USERDEF,
288 ROFF_MAX
289 };
290
291 /*
292 * An incredibly-simple string buffer.
293 */
294 struct roffstr {
295 char *p; /* nil-terminated buffer */
296 size_t sz; /* saved strlen(p) */
297 };
298
299 /*
300 * A key-value roffstr pair as part of a singly-linked list.
301 */
302 struct roffkv {
303 struct roffstr key;
304 struct roffstr val;
305 struct roffkv *next; /* next in list */
306 };
307
308 /*
309 * A single number register as part of a singly-linked list.
310 */
311 struct roffreg {
312 struct roffstr key;
313 int val;
314 struct roffreg *next;
315 };
316
317 struct roff {
318 struct mparse *parse; /* parse point */
319 const struct mchars *mchars; /* character table */
320 struct roffnode *last; /* leaf of stack */
321 int *rstack; /* stack of inverted `ie' values */
322 struct roffreg *regtab; /* number registers */
323 struct roffkv *strtab; /* user-defined strings & macros */
324 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
325 struct roffstr *xtab; /* single-byte trans table (`tr') */
326 const char *current_string; /* value of last called user macro */
327 struct tbl_node *first_tbl; /* first table parsed */
328 struct tbl_node *last_tbl; /* last table parsed */
329 struct tbl_node *tbl; /* current table being parsed */
330 struct eqn_node *last_eqn; /* last equation parsed */
331 struct eqn_node *first_eqn; /* first equation parsed */
332 struct eqn_node *eqn; /* current equation being parsed */
333 int eqn_inline; /* current equation is inline */
334 int options; /* parse options */
335 int rstacksz; /* current size limit of rstack */
336 int rstackpos; /* position in rstack */
337 int format; /* current file in mdoc or man format */
338 char control; /* control character */
339 };
340
341 struct roffnode {
342 enum rofft tok; /* type of node */
343 struct roffnode *parent; /* up one in stack */
344 int line; /* parse line */
345 int col; /* parse col */
346 char *name; /* node name, e.g. macro name */
347 char *end; /* end-rules: custom token */
348 int endspan; /* end-rules: next-line or infty */
349 int rule; /* current evaluation rule */
350 };
351
352 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
353 enum rofft tok, /* tok of macro */ \
354 struct buf *buf, /* input buffer */ \
355 int ln, /* parse line */ \
356 int ppos, /* original pos in buffer */ \
357 int pos, /* current pos in buffer */ \
358 int *offs /* reset offset of buffer data */
359
360 typedef enum rofferr (*roffproc)(ROFF_ARGS);
361
362 struct roffmac {
363 const char *name; /* macro name */
364 roffproc proc; /* process new macro */
365 roffproc text; /* process as child text of macro */
366 roffproc sub; /* process as child of macro */
367 int flags;
368 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
369 struct roffmac *next;
370 };
371
372 struct predef {
373 const char *name; /* predefined input name */
374 const char *str; /* replacement symbol */
375 };
376
377 #define PREDEF(__name, __str) \
378 { (__name), (__str) },
379
380 /* --- function prototypes ------------------------------------------------ */
381
382 static enum rofft roffhash_find(const char *, size_t);
383 static void roffhash_init(void);
384 static void roffnode_cleanscope(struct roff *);
385 static void roffnode_pop(struct roff *);
386 static void roffnode_push(struct roff *, enum rofft,
387 const char *, int, int);
388 static enum rofferr roff_block(ROFF_ARGS);
389 static enum rofferr roff_block_text(ROFF_ARGS);
390 static enum rofferr roff_block_sub(ROFF_ARGS);
391 static enum rofferr roff_brp(ROFF_ARGS);
392 static enum rofferr roff_cblock(ROFF_ARGS);
393 static enum rofferr roff_cc(ROFF_ARGS);
394 static void roff_ccond(struct roff *, int, int);
395 static enum rofferr roff_cond(ROFF_ARGS);
396 static enum rofferr roff_cond_text(ROFF_ARGS);
397 static enum rofferr roff_cond_sub(ROFF_ARGS);
398 static enum rofferr roff_ds(ROFF_ARGS);
399 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
400 static int roff_evalcond(struct roff *r, int, char *, int *);
401 static int roff_evalnum(struct roff *, int,
402 const char *, int *, int *, int);
403 static int roff_evalpar(struct roff *, int,
404 const char *, int *, int *, int);
405 static int roff_evalstrcond(const char *, int *);
406 static void roff_free1(struct roff *);
407 static void roff_freereg(struct roffreg *);
408 static void roff_freestr(struct roffkv *);
409 static size_t roff_getname(struct roff *, char **, int, int);
410 static int roff_getnum(const char *, int *, int *, int);
411 static int roff_getop(const char *, int *, char *);
412 static int roff_getregn(const struct roff *,
413 const char *, size_t);
414 static int roff_getregro(const char *name);
415 static const char *roff_getstrn(const struct roff *,
416 const char *, size_t);
417 static int roff_hasregn(const struct roff *,
418 const char *, size_t);
419 static enum rofferr roff_insec(ROFF_ARGS);
420 static enum rofferr roff_it(ROFF_ARGS);
421 static enum rofferr roff_line_ignore(ROFF_ARGS);
422 static void roff_man_alloc1(struct roff_man *);
423 static void roff_man_free1(struct roff_man *);
424 static enum rofferr roff_nr(ROFF_ARGS);
425 static enum rofft roff_parse(struct roff *, char *, int *,
426 int, int);
427 static enum rofferr roff_parsetext(struct buf *, int, int *);
428 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
429 static enum rofferr roff_rm(ROFF_ARGS);
430 static enum rofferr roff_rr(ROFF_ARGS);
431 static void roff_setstr(struct roff *,
432 const char *, const char *, int);
433 static void roff_setstrn(struct roffkv **, const char *,
434 size_t, const char *, size_t, int);
435 static enum rofferr roff_so(ROFF_ARGS);
436 static enum rofferr roff_tr(ROFF_ARGS);
437 static enum rofferr roff_Dd(ROFF_ARGS);
438 static enum rofferr roff_TH(ROFF_ARGS);
439 static enum rofferr roff_TE(ROFF_ARGS);
440 static enum rofferr roff_TS(ROFF_ARGS);
441 static enum rofferr roff_EQ(ROFF_ARGS);
442 static enum rofferr roff_EN(ROFF_ARGS);
443 static enum rofferr roff_T_(ROFF_ARGS);
444 static enum rofferr roff_unsupp(ROFF_ARGS);
445 static enum rofferr roff_userdef(ROFF_ARGS);
446
447 /* --- constant data ------------------------------------------------------ */
448
449 /* See roffhash_find() */
450
451 #define ASCII_HI 126
452 #define ASCII_LO 33
453 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
454
455 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
456 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
457
458 static struct roffmac *hash[HASHWIDTH];
459
460 static struct roffmac roffs[ROFF_MAX] = {
461 { "ab", roff_unsupp, NULL, NULL, 0, NULL },
462 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
463 { "af", roff_line_ignore, NULL, NULL, 0, NULL },
464 { "aln", roff_unsupp, NULL, NULL, 0, NULL },
465 { "als", roff_unsupp, NULL, NULL, 0, NULL },
466 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
467 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
468 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
469 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
470 { "as", roff_ds, NULL, NULL, 0, NULL },
471 { "as1", roff_ds, NULL, NULL, 0, NULL },
472 { "asciify", roff_unsupp, NULL, NULL, 0, NULL },
473 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
474 { "bd", roff_line_ignore, NULL, NULL, 0, NULL },
475 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
476 { "blm", roff_unsupp, NULL, NULL, 0, NULL },
477 { "box", roff_unsupp, NULL, NULL, 0, NULL },
478 { "boxa", roff_unsupp, NULL, NULL, 0, NULL },
479 { "bp", roff_line_ignore, NULL, NULL, 0, NULL },
480 { "BP", roff_unsupp, NULL, NULL, 0, NULL },
481 { "break", roff_unsupp, NULL, NULL, 0, NULL },
482 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
483 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
484 { "brp", roff_brp, NULL, NULL, 0, NULL },
485 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
486 { "c2", roff_unsupp, NULL, NULL, 0, NULL },
487 { "cc", roff_cc, NULL, NULL, 0, NULL },
488 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
489 { "cf", roff_insec, NULL, NULL, 0, NULL },
490 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
491 { "ch", roff_line_ignore, NULL, NULL, 0, NULL },
492 { "char", roff_unsupp, NULL, NULL, 0, NULL },
493 { "chop", roff_unsupp, NULL, NULL, 0, NULL },
494 { "class", roff_line_ignore, NULL, NULL, 0, NULL },
495 { "close", roff_insec, NULL, NULL, 0, NULL },
496 { "CL", roff_unsupp, NULL, NULL, 0, NULL },
497 { "color", roff_line_ignore, NULL, NULL, 0, NULL },
498 { "composite", roff_unsupp, NULL, NULL, 0, NULL },
499 { "continue", roff_unsupp, NULL, NULL, 0, NULL },
500 { "cp", roff_line_ignore, NULL, NULL, 0, NULL },
501 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
502 { "cs", roff_line_ignore, NULL, NULL, 0, NULL },
503 { "cu", roff_line_ignore, NULL, NULL, 0, NULL },
504 { "da", roff_unsupp, NULL, NULL, 0, NULL },
505 { "dch", roff_unsupp, NULL, NULL, 0, NULL },
506 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
507 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
508 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
509 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
510 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
511 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
512 { "device", roff_unsupp, NULL, NULL, 0, NULL },
513 { "devicem", roff_unsupp, NULL, NULL, 0, NULL },
514 { "di", roff_unsupp, NULL, NULL, 0, NULL },
515 { "do", roff_unsupp, NULL, NULL, 0, NULL },
516 { "ds", roff_ds, NULL, NULL, 0, NULL },
517 { "ds1", roff_ds, NULL, NULL, 0, NULL },
518 { "dwh", roff_unsupp, NULL, NULL, 0, NULL },
519 { "dt", roff_unsupp, NULL, NULL, 0, NULL },
520 { "ec", roff_unsupp, NULL, NULL, 0, NULL },
521 { "ecr", roff_unsupp, NULL, NULL, 0, NULL },
522 { "ecs", roff_unsupp, NULL, NULL, 0, NULL },
523 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
524 { "em", roff_unsupp, NULL, NULL, 0, NULL },
525 { "EN", roff_EN, NULL, NULL, 0, NULL },
526 { "eo", roff_unsupp, NULL, NULL, 0, NULL },
527 { "EP", roff_unsupp, NULL, NULL, 0, NULL },
528 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
529 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
530 { "ev", roff_unsupp, NULL, NULL, 0, NULL },
531 { "evc", roff_unsupp, NULL, NULL, 0, NULL },
532 { "ex", roff_unsupp, NULL, NULL, 0, NULL },
533 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
534 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
535 { "fc", roff_unsupp, NULL, NULL, 0, NULL },
536 { "fchar", roff_unsupp, NULL, NULL, 0, NULL },
537 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
538 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
539 { "feature", roff_line_ignore, NULL, NULL, 0, NULL },
540 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
541 { "fl", roff_line_ignore, NULL, NULL, 0, NULL },
542 { "flig", roff_line_ignore, NULL, NULL, 0, NULL },
543 { "fp", roff_line_ignore, NULL, NULL, 0, NULL },
544 { "fps", roff_line_ignore, NULL, NULL, 0, NULL },
545 { "fschar", roff_unsupp, NULL, NULL, 0, NULL },
546 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
547 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
548 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
549 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
550 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
551 { "hc", roff_line_ignore, NULL, NULL, 0, NULL },
552 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
553 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
554 { "hla", roff_line_ignore, NULL, NULL, 0, NULL },
555 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
556 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
557 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
558 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
559 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
560 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
561 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
562 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
563 { "hym", roff_line_ignore, NULL, NULL, 0, NULL },
564 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
565 { "hys", roff_line_ignore, NULL, NULL, 0, NULL },
566 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
567 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
568 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
569 { "index", roff_unsupp, NULL, NULL, 0, NULL },
570 { "it", roff_it, NULL, NULL, 0, NULL },
571 { "itc", roff_unsupp, NULL, NULL, 0, NULL },
572 { "IX", roff_line_ignore, NULL, NULL, 0, NULL },
573 { "kern", roff_line_ignore, NULL, NULL, 0, NULL },
574 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
575 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
576 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
577 { "lc", roff_unsupp, NULL, NULL, 0, NULL },
578 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
579 { "lds", roff_unsupp, NULL, NULL, 0, NULL },
580 { "length", roff_unsupp, NULL, NULL, 0, NULL },
581 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
582 { "lf", roff_insec, NULL, NULL, 0, NULL },
583 { "lg", roff_line_ignore, NULL, NULL, 0, NULL },
584 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
585 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
586 { "lnr", roff_unsupp, NULL, NULL, 0, NULL },
587 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
588 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
589 { "ls", roff_line_ignore, NULL, NULL, 0, NULL },
590 { "lsm", roff_unsupp, NULL, NULL, 0, NULL },
591 { "lt", roff_line_ignore, NULL, NULL, 0, NULL },
592 { "mc", roff_line_ignore, NULL, NULL, 0, NULL },
593 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
594 { "minss", roff_line_ignore, NULL, NULL, 0, NULL },
595 { "mk", roff_line_ignore, NULL, NULL, 0, NULL },
596 { "mso", roff_insec, NULL, NULL, 0, NULL },
597 { "na", roff_line_ignore, NULL, NULL, 0, NULL },
598 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
599 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
600 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
601 { "nm", roff_unsupp, NULL, NULL, 0, NULL },
602 { "nn", roff_unsupp, NULL, NULL, 0, NULL },
603 { "nop", roff_unsupp, NULL, NULL, 0, NULL },
604 { "nr", roff_nr, NULL, NULL, 0, NULL },
605 { "nrf", roff_unsupp, NULL, NULL, 0, NULL },
606 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
607 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
608 { "nx", roff_insec, NULL, NULL, 0, NULL },
609 { "open", roff_insec, NULL, NULL, 0, NULL },
610 { "opena", roff_insec, NULL, NULL, 0, NULL },
611 { "os", roff_line_ignore, NULL, NULL, 0, NULL },
612 { "output", roff_unsupp, NULL, NULL, 0, NULL },
613 { "padj", roff_line_ignore, NULL, NULL, 0, NULL },
614 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
615 { "pc", roff_line_ignore, NULL, NULL, 0, NULL },
616 { "pev", roff_line_ignore, NULL, NULL, 0, NULL },
617 { "pi", roff_insec, NULL, NULL, 0, NULL },
618 { "PI", roff_unsupp, NULL, NULL, 0, NULL },
619 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
620 { "pm", roff_line_ignore, NULL, NULL, 0, NULL },
621 { "pn", roff_line_ignore, NULL, NULL, 0, NULL },
622 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
623 { "po", roff_line_ignore, NULL, NULL, 0, NULL },
624 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
625 { "psbb", roff_unsupp, NULL, NULL, 0, NULL },
626 { "pshape", roff_unsupp, NULL, NULL, 0, NULL },
627 { "pso", roff_insec, NULL, NULL, 0, NULL },
628 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
629 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
630 { "rchar", roff_unsupp, NULL, NULL, 0, NULL },
631 { "rd", roff_line_ignore, NULL, NULL, 0, NULL },
632 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
633 { "return", roff_unsupp, NULL, NULL, 0, NULL },
634 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
635 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
636 { "rj", roff_line_ignore, NULL, NULL, 0, NULL },
637 { "rm", roff_rm, NULL, NULL, 0, NULL },
638 { "rn", roff_unsupp, NULL, NULL, 0, NULL },
639 { "rnn", roff_unsupp, NULL, NULL, 0, NULL },
640 { "rr", roff_rr, NULL, NULL, 0, NULL },
641 { "rs", roff_line_ignore, NULL, NULL, 0, NULL },
642 { "rt", roff_line_ignore, NULL, NULL, 0, NULL },
643 { "schar", roff_unsupp, NULL, NULL, 0, NULL },
644 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
645 { "shc", roff_line_ignore, NULL, NULL, 0, NULL },
646 { "shift", roff_unsupp, NULL, NULL, 0, NULL },
647 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
648 { "so", roff_so, NULL, NULL, 0, NULL },
649 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
650 { "special", roff_line_ignore, NULL, NULL, 0, NULL },
651 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
652 { "ss", roff_line_ignore, NULL, NULL, 0, NULL },
653 { "sty", roff_line_ignore, NULL, NULL, 0, NULL },
654 { "substring", roff_unsupp, NULL, NULL, 0, NULL },
655 { "sv", roff_line_ignore, NULL, NULL, 0, NULL },
656 { "sy", roff_insec, NULL, NULL, 0, NULL },
657 { "T&", roff_T_, NULL, NULL, 0, NULL },
658 { "ta", roff_unsupp, NULL, NULL, 0, NULL },
659 { "tc", roff_unsupp, NULL, NULL, 0, NULL },
660 { "TE", roff_TE, NULL, NULL, 0, NULL },
661 { "TH", roff_TH, NULL, NULL, 0, NULL },
662 { "ti", roff_unsupp, NULL, NULL, 0, NULL },
663 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
664 { "tl", roff_unsupp, NULL, NULL, 0, NULL },
665 { "tm", roff_line_ignore, NULL, NULL, 0, NULL },
666 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
667 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
668 { "tr", roff_tr, NULL, NULL, 0, NULL },
669 { "track", roff_line_ignore, NULL, NULL, 0, NULL },
670 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
671 { "trf", roff_insec, NULL, NULL, 0, NULL },
672 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
673 { "trin", roff_unsupp, NULL, NULL, 0, NULL },
674 { "trnt", roff_unsupp, NULL, NULL, 0, NULL },
675 { "troff", roff_line_ignore, NULL, NULL, 0, NULL },
676 { "TS", roff_TS, NULL, NULL, 0, NULL },
677 { "uf", roff_line_ignore, NULL, NULL, 0, NULL },
678 { "ul", roff_line_ignore, NULL, NULL, 0, NULL },
679 { "unformat", roff_unsupp, NULL, NULL, 0, NULL },
680 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
681 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
682 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
683 { "vs", roff_line_ignore, NULL, NULL, 0, NULL },
684 { "warn", roff_line_ignore, NULL, NULL, 0, NULL },
685 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
686 { "watch", roff_line_ignore, NULL, NULL, 0, NULL },
687 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
688 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
689 { "wh", roff_unsupp, NULL, NULL, 0, NULL },
690 { "while", roff_unsupp, NULL, NULL, 0, NULL },
691 { "write", roff_insec, NULL, NULL, 0, NULL },
692 { "writec", roff_insec, NULL, NULL, 0, NULL },
693 { "writem", roff_insec, NULL, NULL, 0, NULL },
694 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
695 { ".", roff_cblock, NULL, NULL, 0, NULL },
696 { NULL, roff_userdef, NULL, NULL, 0, NULL },
697 };
698
699 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
700 const char *const __mdoc_reserved[] = {
701 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
702 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
703 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
704 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
705 "Dt", "Dv", "Dx", "D1",
706 "Ec", "Ed", "Ef", "Ek", "El", "Em",
707 "En", "Eo", "Er", "Es", "Ev", "Ex",
708 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
709 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
710 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
711 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
712 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
713 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
714 "Sc", "Sh", "Sm", "So", "Sq",
715 "Ss", "St", "Sx", "Sy",
716 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
717 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
718 "%P", "%Q", "%R", "%T", "%U", "%V",
719 NULL
720 };
721
722 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
723 const char *const __man_reserved[] = {
724 "AT", "B", "BI", "BR", "DT",
725 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
726 "LP", "OP", "P", "PD", "PP",
727 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
728 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
729 NULL
730 };
731
732 /* Array of injected predefined strings. */
733 #define PREDEFS_MAX 38
734 static const struct predef predefs[PREDEFS_MAX] = {
735 #include "predefs.in"
736 };
737
738 /* See roffhash_find() */
739 #define ROFF_HASH(p) (p[0] - ASCII_LO)
740
741 static int roffit_lines; /* number of lines to delay */
742 static char *roffit_macro; /* nil-terminated macro line */
743
744
745 /* --- request table ------------------------------------------------------ */
746
747 static void
748 roffhash_init(void)
749 {
750 struct roffmac *n;
751 int buc, i;
752
753 for (i = 0; i < (int)ROFF_USERDEF; i++) {
754 assert(roffs[i].name[0] >= ASCII_LO);
755 assert(roffs[i].name[0] <= ASCII_HI);
756
757 buc = ROFF_HASH(roffs[i].name);
758
759 if (NULL != (n = hash[buc])) {
760 for ( ; n->next; n = n->next)
761 /* Do nothing. */ ;
762 n->next = &roffs[i];
763 } else
764 hash[buc] = &roffs[i];
765 }
766 }
767
768 /*
769 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
770 * the nil-terminated string name could be found.
771 */
772 static enum rofft
773 roffhash_find(const char *p, size_t s)
774 {
775 int buc;
776 struct roffmac *n;
777
778 /*
779 * libroff has an extremely simple hashtable, for the time
780 * being, which simply keys on the first character, which must
781 * be printable, then walks a chain. It works well enough until
782 * optimised.
783 */
784
785 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
786 return(ROFF_MAX);
787
788 buc = ROFF_HASH(p);
789
790 if (NULL == (n = hash[buc]))
791 return(ROFF_MAX);
792 for ( ; n; n = n->next)
793 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
794 return((enum rofft)(n - roffs));
795
796 return(ROFF_MAX);
797 }
798
799 /* --- stack of request blocks -------------------------------------------- */
800
801 /*
802 * Pop the current node off of the stack of roff instructions currently
803 * pending.
804 */
805 static void
806 roffnode_pop(struct roff *r)
807 {
808 struct roffnode *p;
809
810 assert(r->last);
811 p = r->last;
812
813 r->last = r->last->parent;
814 free(p->name);
815 free(p->end);
816 free(p);
817 }
818
819 /*
820 * Push a roff node onto the instruction stack. This must later be
821 * removed with roffnode_pop().
822 */
823 static void
824 roffnode_push(struct roff *r, enum rofft tok, const char *name,
825 int line, int col)
826 {
827 struct roffnode *p;
828
829 p = mandoc_calloc(1, sizeof(struct roffnode));
830 p->tok = tok;
831 if (name)
832 p->name = mandoc_strdup(name);
833 p->parent = r->last;
834 p->line = line;
835 p->col = col;
836 p->rule = p->parent ? p->parent->rule : 0;
837
838 r->last = p;
839 }
840
841 /* --- roff parser state data management ---------------------------------- */
842
843 static void
844 roff_free1(struct roff *r)
845 {
846 struct tbl_node *tbl;
847 struct eqn_node *e;
848 int i;
849
850 while (NULL != (tbl = r->first_tbl)) {
851 r->first_tbl = tbl->next;
852 tbl_free(tbl);
853 }
854 r->first_tbl = r->last_tbl = r->tbl = NULL;
855
856 while (NULL != (e = r->first_eqn)) {
857 r->first_eqn = e->next;
858 eqn_free(e);
859 }
860 r->first_eqn = r->last_eqn = r->eqn = NULL;
861
862 while (r->last)
863 roffnode_pop(r);
864
865 free (r->rstack);
866 r->rstack = NULL;
867 r->rstacksz = 0;
868 r->rstackpos = -1;
869
870 roff_freereg(r->regtab);
871 r->regtab = NULL;
872
873 roff_freestr(r->strtab);
874 roff_freestr(r->xmbtab);
875 r->strtab = r->xmbtab = NULL;
876
877 if (r->xtab)
878 for (i = 0; i < 128; i++)
879 free(r->xtab[i].p);
880 free(r->xtab);
881 r->xtab = NULL;
882 }
883
884 void
885 roff_reset(struct roff *r)
886 {
887
888 roff_free1(r);
889 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
890 r->control = 0;
891 }
892
893 void
894 roff_free(struct roff *r)
895 {
896
897 roff_free1(r);
898 free(r);
899 }
900
901 struct roff *
902 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
903 {
904 struct roff *r;
905
906 r = mandoc_calloc(1, sizeof(struct roff));
907 r->parse = parse;
908 r->mchars = mchars;
909 r->options = options;
910 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
911 r->rstackpos = -1;
912
913 roffhash_init();
914
915 return(r);
916 }
917
918 /* --- syntax tree state data management ---------------------------------- */
919
920 static void
921 roff_man_free1(struct roff_man *man)
922 {
923
924 if (man->first != NULL)
925 roff_node_delete(man, man->first);
926 free(man->meta.msec);
927 free(man->meta.vol);
928 free(man->meta.os);
929 free(man->meta.arch);
930 free(man->meta.title);
931 free(man->meta.name);
932 free(man->meta.date);
933 }
934
935 static void
936 roff_man_alloc1(struct roff_man *man)
937 {
938
939 memset(&man->meta, 0, sizeof(man->meta));
940 man->first = mandoc_calloc(1, sizeof(*man->first));
941 man->first->type = ROFFT_ROOT;
942 man->last = man->first;
943 man->last_es = NULL;
944 man->flags = 0;
945 man->macroset = MACROSET_NONE;
946 man->lastsec = man->lastnamed = SEC_NONE;
947 man->next = ROFF_NEXT_CHILD;
948 }
949
950 void
951 roff_man_reset(struct roff_man *man)
952 {
953
954 roff_man_free1(man);
955 roff_man_alloc1(man);
956 }
957
958 void
959 roff_man_free(struct roff_man *man)
960 {
961
962 roff_man_free1(man);
963 free(man);
964 }
965
966 struct roff_man *
967 roff_man_alloc(struct roff *roff, struct mparse *parse,
968 const char *defos, int quick)
969 {
970 struct roff_man *man;
971
972 man = mandoc_calloc(1, sizeof(*man));
973 man->parse = parse;
974 man->roff = roff;
975 man->defos = defos;
976 man->quick = quick;
977 roff_man_alloc1(man);
978 return(man);
979 }
980
981 /* --- syntax tree handling ----------------------------------------------- */
982
983 struct roff_node *
984 roff_node_alloc(struct roff_man *man, int line, int pos,
985 enum roff_type type, int tok)
986 {
987 struct roff_node *n;
988
989 n = mandoc_calloc(1, sizeof(*n));
990 n->line = line;
991 n->pos = pos;
992 n->tok = tok;
993 n->type = type;
994 n->sec = man->lastsec;
995
996 if (man->flags & MDOC_SYNOPSIS)
997 n->flags |= MDOC_SYNPRETTY;
998 else
999 n->flags &= ~MDOC_SYNPRETTY;
1000 if (man->flags & MDOC_NEWLINE)
1001 n->flags |= MDOC_LINE;
1002 man->flags &= ~MDOC_NEWLINE;
1003
1004 return(n);
1005 }
1006
1007 void
1008 roff_node_append(struct roff_man *man, struct roff_node *n)
1009 {
1010
1011 switch (man->next) {
1012 case ROFF_NEXT_SIBLING:
1013 man->last->next = n;
1014 n->prev = man->last;
1015 n->parent = man->last->parent;
1016 break;
1017 case ROFF_NEXT_CHILD:
1018 man->last->child = n;
1019 n->parent = man->last;
1020 break;
1021 default:
1022 abort();
1023 /* NOTREACHED */
1024 }
1025 n->parent->nchild++;
1026 n->parent->last = n;
1027
1028 /*
1029 * Copy over the normalised-data pointer of our parent. Not
1030 * everybody has one, but copying a null pointer is fine.
1031 */
1032
1033 switch (n->type) {
1034 case ROFFT_BODY:
1035 if (n->end != ENDBODY_NOT)
1036 break;
1037 /* FALLTHROUGH */
1038 case ROFFT_TAIL:
1039 /* FALLTHROUGH */
1040 case ROFFT_HEAD:
1041 n->norm = n->parent->norm;
1042 break;
1043 default:
1044 break;
1045 }
1046
1047 if (man->macroset == MACROSET_MDOC)
1048 mdoc_valid_pre(man, n);
1049
1050 switch (n->type) {
1051 case ROFFT_HEAD:
1052 assert(n->parent->type == ROFFT_BLOCK);
1053 n->parent->head = n;
1054 break;
1055 case ROFFT_BODY:
1056 if (n->end)
1057 break;
1058 assert(n->parent->type == ROFFT_BLOCK);
1059 n->parent->body = n;
1060 break;
1061 case ROFFT_TAIL:
1062 assert(n->parent->type == ROFFT_BLOCK);
1063 n->parent->tail = n;
1064 break;
1065 default:
1066 break;
1067 }
1068 man->last = n;
1069 }
1070
1071 void
1072 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1073 {
1074 struct roff_node *n;
1075
1076 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1077 n->string = roff_strdup(man->roff, word);
1078 roff_node_append(man, n);
1079 if (man->macroset == MACROSET_MDOC)
1080 mdoc_valid_post(man);
1081 else
1082 man_valid_post(man);
1083 man->next = ROFF_NEXT_SIBLING;
1084 }
1085
1086 void
1087 roff_word_append(struct roff_man *man, const char *word)
1088 {
1089 struct roff_node *n;
1090 char *addstr, *newstr;
1091
1092 n = man->last;
1093 addstr = roff_strdup(man->roff, word);
1094 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1095 free(addstr);
1096 free(n->string);
1097 n->string = newstr;
1098 man->next = ROFF_NEXT_SIBLING;
1099 }
1100
1101 void
1102 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1103 {
1104 struct roff_node *n;
1105
1106 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1107 roff_node_append(man, n);
1108 man->next = ROFF_NEXT_CHILD;
1109 }
1110
1111 struct roff_node *
1112 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1113 {
1114 struct roff_node *n;
1115
1116 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1117 roff_node_append(man, n);
1118 man->next = ROFF_NEXT_CHILD;
1119 return(n);
1120 }
1121
1122 struct roff_node *
1123 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1124 {
1125 struct roff_node *n;
1126
1127 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1128 roff_node_append(man, n);
1129 man->next = ROFF_NEXT_CHILD;
1130 return(n);
1131 }
1132
1133 struct roff_node *
1134 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1135 {
1136 struct roff_node *n;
1137
1138 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1139 roff_node_append(man, n);
1140 man->next = ROFF_NEXT_CHILD;
1141 return(n);
1142 }
1143
1144 void
1145 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1146 {
1147 struct roff_node *n;
1148
1149 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1150 n->eqn = eqn;
1151 if (eqn->ln > man->last->line)
1152 n->flags |= MDOC_LINE;
1153 roff_node_append(man, n);
1154 man->next = ROFF_NEXT_SIBLING;
1155 }
1156
1157 void
1158 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1159 {
1160 struct roff_node *n;
1161
1162 if (man->macroset == MACROSET_MAN)
1163 man_breakscope(man, TOKEN_NONE);
1164 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1165 n->span = tbl;
1166 roff_node_append(man, n);
1167 if (man->macroset == MACROSET_MDOC)
1168 mdoc_valid_post(man);
1169 else
1170 man_valid_post(man);
1171 man->next = ROFF_NEXT_SIBLING;
1172 }
1173
1174 void
1175 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1176 {
1177
1178 /* Adjust siblings. */
1179
1180 if (n->prev)
1181 n->prev->next = n->next;
1182 if (n->next)
1183 n->next->prev = n->prev;
1184
1185 /* Adjust parent. */
1186
1187 if (n->parent != NULL) {
1188 n->parent->nchild--;
1189 if (n->parent->child == n)
1190 n->parent->child = n->next;
1191 if (n->parent->last == n)
1192 n->parent->last = n->prev;
1193 }
1194
1195 /* Adjust parse point. */
1196
1197 if (man == NULL)
1198 return;
1199 if (man->last == n) {
1200 if (n->prev == NULL) {
1201 man->last = n->parent;
1202 man->next = ROFF_NEXT_CHILD;
1203 } else {
1204 man->last = n->prev;
1205 man->next = ROFF_NEXT_SIBLING;
1206 }
1207 }
1208 if (man->first == n)
1209 man->first = NULL;
1210 }
1211
1212 void
1213 roff_node_free(struct roff_node *n)
1214 {
1215
1216 if (n->args != NULL)
1217 mdoc_argv_free(n->args);
1218 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1219 free(n->norm);
1220 free(n->string);
1221 free(n);
1222 }
1223
1224 void
1225 roff_node_delete(struct roff_man *man, struct roff_node *n)
1226 {
1227
1228 while (n->child != NULL)
1229 roff_node_delete(man, n->child);
1230 assert(n->nchild == 0);
1231 roff_node_unlink(man, n);
1232 roff_node_free(n);
1233 }
1234
1235 void
1236 deroff(char **dest, const struct roff_node *n)
1237 {
1238 char *cp;
1239 size_t sz;
1240
1241 if (n->type != ROFFT_TEXT) {
1242 for (n = n->child; n != NULL; n = n->next)
1243 deroff(dest, n);
1244 return;
1245 }
1246
1247 /* Skip leading whitespace and escape sequences. */
1248
1249 cp = n->string;
1250 while (*cp != '\0') {
1251 if ('\\' == *cp) {
1252 cp++;
1253 mandoc_escape((const char **)&cp, NULL, NULL);
1254 } else if (isspace((unsigned char)*cp))
1255 cp++;
1256 else
1257 break;
1258 }
1259
1260 /* Skip trailing whitespace. */
1261
1262 for (sz = strlen(cp); sz; sz--)
1263 if ( ! isspace((unsigned char)cp[sz-1]))
1264 break;
1265
1266 /* Skip empty strings. */
1267
1268 if (sz == 0)
1269 return;
1270
1271 if (*dest == NULL) {
1272 *dest = mandoc_strndup(cp, sz);
1273 return;
1274 }
1275
1276 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1277 free(*dest);
1278 *dest = cp;
1279 }
1280
1281 /* --- main functions of the roff parser ---------------------------------- */
1282
1283 /*
1284 * In the current line, expand escape sequences that tend to get
1285 * used in numerical expressions and conditional requests.
1286 * Also check the syntax of the remaining escape sequences.
1287 */
1288 static enum rofferr
1289 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1290 {
1291 char ubuf[24]; /* buffer to print the number */
1292 const char *start; /* start of the string to process */
1293 char *stesc; /* start of an escape sequence ('\\') */
1294 const char *stnam; /* start of the name, after "[(*" */
1295 const char *cp; /* end of the name, e.g. before ']' */
1296 const char *res; /* the string to be substituted */
1297 char *nbuf; /* new buffer to copy buf->buf to */
1298 size_t maxl; /* expected length of the escape name */
1299 size_t naml; /* actual length of the escape name */
1300 enum mandoc_esc esc; /* type of the escape sequence */
1301 int inaml; /* length returned from mandoc_escape() */
1302 int expand_count; /* to avoid infinite loops */
1303 int npos; /* position in numeric expression */
1304 int arg_complete; /* argument not interrupted by eol */
1305 char term; /* character terminating the escape */
1306
1307 expand_count = 0;
1308 start = buf->buf + pos;
1309 stesc = strchr(start, '\0') - 1;
1310 while (stesc-- > start) {
1311
1312 /* Search backwards for the next backslash. */
1313
1314 if (*stesc != '\\')
1315 continue;
1316
1317 /* If it is escaped, skip it. */
1318
1319 for (cp = stesc - 1; cp >= start; cp--)
1320 if (*cp != '\\')
1321 break;
1322
1323 if ((stesc - cp) % 2 == 0) {
1324 stesc = (char *)cp;
1325 continue;
1326 }
1327
1328 /* Decide whether to expand or to check only. */
1329
1330 term = '\0';
1331 cp = stesc + 1;
1332 switch (*cp) {
1333 case '*':
1334 res = NULL;
1335 break;
1336 case 'B':
1337 /* FALLTHROUGH */
1338 case 'w':
1339 term = cp[1];
1340 /* FALLTHROUGH */
1341 case 'n':
1342 res = ubuf;
1343 break;
1344 default:
1345 esc = mandoc_escape(&cp, &stnam, &inaml);
1346 if (esc == ESCAPE_ERROR ||
1347 (esc == ESCAPE_SPECIAL &&
1348 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
1349 mandoc_vmsg(MANDOCERR_ESC_BAD,
1350 r->parse, ln, (int)(stesc - buf->buf),
1351 "%.*s", (int)(cp - stesc), stesc);
1352 continue;
1353 }
1354
1355 if (EXPAND_LIMIT < ++expand_count) {
1356 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1357 ln, (int)(stesc - buf->buf), NULL);
1358 return(ROFF_IGN);
1359 }
1360
1361 /*
1362 * The third character decides the length
1363 * of the name of the string or register.
1364 * Save a pointer to the name.
1365 */
1366
1367 if (term == '\0') {
1368 switch (*++cp) {
1369 case '\0':
1370 maxl = 0;
1371 break;
1372 case '(':
1373 cp++;
1374 maxl = 2;
1375 break;
1376 case '[':
1377 cp++;
1378 term = ']';
1379 maxl = 0;
1380 break;
1381 default:
1382 maxl = 1;
1383 break;
1384 }
1385 } else {
1386 cp += 2;
1387 maxl = 0;
1388 }
1389 stnam = cp;
1390
1391 /* Advance to the end of the name. */
1392
1393 naml = 0;
1394 arg_complete = 1;
1395 while (maxl == 0 || naml < maxl) {
1396 if (*cp == '\0') {
1397 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1398 ln, (int)(stesc - buf->buf), stesc);
1399 arg_complete = 0;
1400 break;
1401 }
1402 if (maxl == 0 && *cp == term) {
1403 cp++;
1404 break;
1405 }
1406 if (*cp++ != '\\' || stesc[1] != 'w') {
1407 naml++;
1408 continue;
1409 }
1410 switch (mandoc_escape(&cp, NULL, NULL)) {
1411 case ESCAPE_SPECIAL:
1412 /* FALLTHROUGH */
1413 case ESCAPE_UNICODE:
1414 /* FALLTHROUGH */
1415 case ESCAPE_NUMBERED:
1416 /* FALLTHROUGH */
1417 case ESCAPE_OVERSTRIKE:
1418 naml++;
1419 break;
1420 default:
1421 break;
1422 }
1423 }
1424
1425 /*
1426 * Retrieve the replacement string; if it is
1427 * undefined, resume searching for escapes.
1428 */
1429
1430 switch (stesc[1]) {
1431 case '*':
1432 if (arg_complete)
1433 res = roff_getstrn(r, stnam, naml);
1434 break;
1435 case 'B':
1436 npos = 0;
1437 ubuf[0] = arg_complete &&
1438 roff_evalnum(r, ln, stnam, &npos,
1439 NULL, ROFFNUM_SCALE) &&
1440 stnam + npos + 1 == cp ? '1' : '0';
1441 ubuf[1] = '\0';
1442 break;
1443 case 'n':
1444 if (arg_complete)
1445 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1446 roff_getregn(r, stnam, naml));
1447 else
1448 ubuf[0] = '\0';
1449 break;
1450 case 'w':
1451 /* use even incomplete args */
1452 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1453 24 * (int)naml);
1454 break;
1455 }
1456
1457 if (res == NULL) {
1458 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1459 r->parse, ln, (int)(stesc - buf->buf),
1460 "%.*s", (int)naml, stnam);
1461 res = "";
1462 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1463 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1464 ln, (int)(stesc - buf->buf), NULL);
1465 return(ROFF_IGN);
1466 }
1467
1468 /* Replace the escape sequence by the string. */
1469
1470 *stesc = '\0';
1471 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1472 buf->buf, res, cp) + 1;
1473
1474 /* Prepare for the next replacement. */
1475
1476 start = nbuf + pos;
1477 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1478 free(buf->buf);
1479 buf->buf = nbuf;
1480 }
1481 return(ROFF_CONT);
1482 }
1483
1484 /*
1485 * Process text streams:
1486 * Convert all breakable hyphens into ASCII_HYPH.
1487 * Decrement and spring input line trap.
1488 */
1489 static enum rofferr
1490 roff_parsetext(struct buf *buf, int pos, int *offs)
1491 {
1492 size_t sz;
1493 const char *start;
1494 char *p;
1495 int isz;
1496 enum mandoc_esc esc;
1497
1498 start = p = buf->buf + pos;
1499
1500 while (*p != '\0') {
1501 sz = strcspn(p, "-\\");
1502 p += sz;
1503
1504 if (*p == '\0')
1505 break;
1506
1507 if (*p == '\\') {
1508 /* Skip over escapes. */
1509 p++;
1510 esc = mandoc_escape((const char **)&p, NULL, NULL);
1511 if (esc == ESCAPE_ERROR)
1512 break;
1513 while (*p == '-')
1514 p++;
1515 continue;
1516 } else if (p == start) {
1517 p++;
1518 continue;
1519 }
1520
1521 if (isalpha((unsigned char)p[-1]) &&
1522 isalpha((unsigned char)p[1]))
1523 *p = ASCII_HYPH;
1524 p++;
1525 }
1526
1527 /* Spring the input line trap. */
1528 if (roffit_lines == 1) {
1529 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1530 free(buf->buf);
1531 buf->buf = p;
1532 buf->sz = isz + 1;
1533 *offs = 0;
1534 free(roffit_macro);
1535 roffit_lines = 0;
1536 return(ROFF_REPARSE);
1537 } else if (roffit_lines > 1)
1538 --roffit_lines;
1539 return(ROFF_CONT);
1540 }
1541
1542 enum rofferr
1543 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1544 {
1545 enum rofft t;
1546 enum rofferr e;
1547 int pos; /* parse point */
1548 int spos; /* saved parse point for messages */
1549 int ppos; /* original offset in buf->buf */
1550 int ctl; /* macro line (boolean) */
1551
1552 ppos = pos = *offs;
1553
1554 /* Handle in-line equation delimiters. */
1555
1556 if (r->tbl == NULL &&
1557 r->last_eqn != NULL && r->last_eqn->delim &&
1558 (r->eqn == NULL || r->eqn_inline)) {
1559 e = roff_eqndelim(r, buf, pos);
1560 if (e == ROFF_REPARSE)
1561 return(e);
1562 assert(e == ROFF_CONT);
1563 }
1564
1565 /* Expand some escape sequences. */
1566
1567 e = roff_res(r, buf, ln, pos);
1568 if (e == ROFF_IGN)
1569 return(e);
1570 assert(e == ROFF_CONT);
1571
1572 ctl = roff_getcontrol(r, buf->buf, &pos);
1573
1574 /*
1575 * First, if a scope is open and we're not a macro, pass the
1576 * text through the macro's filter.
1577 * Equations process all content themselves.
1578 * Tables process almost all content themselves, but we want
1579 * to warn about macros before passing it there.
1580 */
1581
1582 if (r->last != NULL && ! ctl) {
1583 t = r->last->tok;
1584 assert(roffs[t].text);
1585 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1586 assert(e == ROFF_IGN || e == ROFF_CONT);
1587 if (e != ROFF_CONT)
1588 return(e);
1589 }
1590 if (r->eqn != NULL)
1591 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
1592 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1593 return(tbl_read(r->tbl, ln, buf->buf, ppos));
1594 if ( ! ctl)
1595 return(roff_parsetext(buf, pos, offs));
1596
1597 /* Skip empty request lines. */
1598
1599 if (buf->buf[pos] == '"') {
1600 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1601 ln, pos, NULL);
1602 return(ROFF_IGN);
1603 } else if (buf->buf[pos] == '\0')
1604 return(ROFF_IGN);
1605
1606 /*
1607 * If a scope is open, go to the child handler for that macro,
1608 * as it may want to preprocess before doing anything with it.
1609 * Don't do so if an equation is open.
1610 */
1611
1612 if (r->last) {
1613 t = r->last->tok;
1614 assert(roffs[t].sub);
1615 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
1616 }
1617
1618 /* No scope is open. This is a new request or macro. */
1619
1620 spos = pos;
1621 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1622
1623 /* Tables ignore most macros. */
1624
1625 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1626 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1627 ln, pos, buf->buf + spos);
1628 if (t == ROFF_TS)
1629 return(ROFF_IGN);
1630 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1631 pos++;
1632 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1633 pos++;
1634 return(tbl_read(r->tbl, ln, buf->buf, pos));
1635 }
1636
1637 /*
1638 * This is neither a roff request nor a user-defined macro.
1639 * Let the standard macro set parsers handle it.
1640 */
1641
1642 if (t == ROFF_MAX)
1643 return(ROFF_CONT);
1644
1645 /* Execute a roff request or a user defined macro. */
1646
1647 assert(roffs[t].proc);
1648 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1649 }
1650
1651 void
1652 roff_endparse(struct roff *r)
1653 {
1654
1655 if (r->last)
1656 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1657 r->last->line, r->last->col,
1658 roffs[r->last->tok].name);
1659
1660 if (r->eqn) {
1661 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1662 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1663 eqn_end(&r->eqn);
1664 }
1665
1666 if (r->tbl) {
1667 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1668 r->tbl->line, r->tbl->pos, "TS");
1669 tbl_end(&r->tbl);
1670 }
1671 }
1672
1673 /*
1674 * Parse a roff node's type from the input buffer. This must be in the
1675 * form of ".foo xxx" in the usual way.
1676 */
1677 static enum rofft
1678 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1679 {
1680 char *cp;
1681 const char *mac;
1682 size_t maclen;
1683 enum rofft t;
1684
1685 cp = buf + *pos;
1686
1687 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1688 return(ROFF_MAX);
1689
1690 mac = cp;
1691 maclen = roff_getname(r, &cp, ln, ppos);
1692
1693 t = (r->current_string = roff_getstrn(r, mac, maclen))
1694 ? ROFF_USERDEF : roffhash_find(mac, maclen);
1695
1696 if (ROFF_MAX != t)
1697 *pos = cp - buf;
1698
1699 return(t);
1700 }
1701
1702 /* --- handling of request blocks ----------------------------------------- */
1703
1704 static enum rofferr
1705 roff_cblock(ROFF_ARGS)
1706 {
1707
1708 /*
1709 * A block-close `..' should only be invoked as a child of an
1710 * ignore macro, otherwise raise a warning and just ignore it.
1711 */
1712
1713 if (r->last == NULL) {
1714 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1715 ln, ppos, "..");
1716 return(ROFF_IGN);
1717 }
1718
1719 switch (r->last->tok) {
1720 case ROFF_am:
1721 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1722 /* FALLTHROUGH */
1723 case ROFF_ami:
1724 /* FALLTHROUGH */
1725 case ROFF_de:
1726 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1727 /* FALLTHROUGH */
1728 case ROFF_dei:
1729 /* FALLTHROUGH */
1730 case ROFF_ig:
1731 break;
1732 default:
1733 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1734 ln, ppos, "..");
1735 return(ROFF_IGN);
1736 }
1737
1738 if (buf->buf[pos] != '\0')
1739 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1740 ".. %s", buf->buf + pos);
1741
1742 roffnode_pop(r);
1743 roffnode_cleanscope(r);
1744 return(ROFF_IGN);
1745
1746 }
1747
1748 static void
1749 roffnode_cleanscope(struct roff *r)
1750 {
1751
1752 while (r->last) {
1753 if (--r->last->endspan != 0)
1754 break;
1755 roffnode_pop(r);
1756 }
1757 }
1758
1759 static void
1760 roff_ccond(struct roff *r, int ln, int ppos)
1761 {
1762
1763 if (NULL == r->last) {
1764 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1765 ln, ppos, "\\}");
1766 return;
1767 }
1768
1769 switch (r->last->tok) {
1770 case ROFF_el:
1771 /* FALLTHROUGH */
1772 case ROFF_ie:
1773 /* FALLTHROUGH */
1774 case ROFF_if:
1775 break;
1776 default:
1777 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1778 ln, ppos, "\\}");
1779 return;
1780 }
1781
1782 if (r->last->endspan > -1) {
1783 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1784 ln, ppos, "\\}");
1785 return;
1786 }
1787
1788 roffnode_pop(r);
1789 roffnode_cleanscope(r);
1790 return;
1791 }
1792
1793 static enum rofferr
1794 roff_block(ROFF_ARGS)
1795 {
1796 const char *name;
1797 char *iname, *cp;
1798 size_t namesz;
1799
1800 /* Ignore groff compatibility mode for now. */
1801
1802 if (tok == ROFF_de1)
1803 tok = ROFF_de;
1804 else if (tok == ROFF_dei1)
1805 tok = ROFF_dei;
1806 else if (tok == ROFF_am1)
1807 tok = ROFF_am;
1808 else if (tok == ROFF_ami1)
1809 tok = ROFF_ami;
1810
1811 /* Parse the macro name argument. */
1812
1813 cp = buf->buf + pos;
1814 if (tok == ROFF_ig) {
1815 iname = NULL;
1816 namesz = 0;
1817 } else {
1818 iname = cp;
1819 namesz = roff_getname(r, &cp, ln, ppos);
1820 iname[namesz] = '\0';
1821 }
1822
1823 /* Resolve the macro name argument if it is indirect. */
1824
1825 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1826 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1827 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1828 r->parse, ln, (int)(iname - buf->buf),
1829 "%.*s", (int)namesz, iname);
1830 namesz = 0;
1831 } else
1832 namesz = strlen(name);
1833 } else
1834 name = iname;
1835
1836 if (namesz == 0 && tok != ROFF_ig) {
1837 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1838 ln, ppos, roffs[tok].name);
1839 return(ROFF_IGN);
1840 }
1841
1842 roffnode_push(r, tok, name, ln, ppos);
1843
1844 /*
1845 * At the beginning of a `de' macro, clear the existing string
1846 * with the same name, if there is one. New content will be
1847 * appended from roff_block_text() in multiline mode.
1848 */
1849
1850 if (tok == ROFF_de || tok == ROFF_dei)
1851 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1852
1853 if (*cp == '\0')
1854 return(ROFF_IGN);
1855
1856 /* Get the custom end marker. */
1857
1858 iname = cp;
1859 namesz = roff_getname(r, &cp, ln, ppos);
1860
1861 /* Resolve the end marker if it is indirect. */
1862
1863 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1864 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1865 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1866 r->parse, ln, (int)(iname - buf->buf),
1867 "%.*s", (int)namesz, iname);
1868 namesz = 0;
1869 } else
1870 namesz = strlen(name);
1871 } else
1872 name = iname;
1873
1874 if (namesz)
1875 r->last->end = mandoc_strndup(name, namesz);
1876
1877 if (*cp != '\0')
1878 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1879 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1880
1881 return(ROFF_IGN);
1882 }
1883
1884 static enum rofferr
1885 roff_block_sub(ROFF_ARGS)
1886 {
1887 enum rofft t;
1888 int i, j;
1889
1890 /*
1891 * First check whether a custom macro exists at this level. If
1892 * it does, then check against it. This is some of groff's
1893 * stranger behaviours. If we encountered a custom end-scope
1894 * tag and that tag also happens to be a "real" macro, then we
1895 * need to try interpreting it again as a real macro. If it's
1896 * not, then return ignore. Else continue.
1897 */
1898
1899 if (r->last->end) {
1900 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1901 if (buf->buf[i] != r->last->end[j])
1902 break;
1903
1904 if (r->last->end[j] == '\0' &&
1905 (buf->buf[i] == '\0' ||
1906 buf->buf[i] == ' ' ||
1907 buf->buf[i] == '\t')) {
1908 roffnode_pop(r);
1909 roffnode_cleanscope(r);
1910
1911 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1912 i++;
1913
1914 pos = i;
1915 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1916 ROFF_MAX)
1917 return(ROFF_RERUN);
1918 return(ROFF_IGN);
1919 }
1920 }
1921
1922 /*
1923 * If we have no custom end-query or lookup failed, then try
1924 * pulling it out of the hashtable.
1925 */
1926
1927 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1928
1929 if (t != ROFF_cblock) {
1930 if (tok != ROFF_ig)
1931 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1932 return(ROFF_IGN);
1933 }
1934
1935 assert(roffs[t].proc);
1936 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1937 }
1938
1939 static enum rofferr
1940 roff_block_text(ROFF_ARGS)
1941 {
1942
1943 if (tok != ROFF_ig)
1944 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1945
1946 return(ROFF_IGN);
1947 }
1948
1949 static enum rofferr
1950 roff_cond_sub(ROFF_ARGS)
1951 {
1952 enum rofft t;
1953 char *ep;
1954 int rr;
1955
1956 rr = r->last->rule;
1957 roffnode_cleanscope(r);
1958 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1959
1960 /*
1961 * Fully handle known macros when they are structurally
1962 * required or when the conditional evaluated to true.
1963 */
1964
1965 if ((t != ROFF_MAX) &&
1966 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1967 assert(roffs[t].proc);
1968 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1969 }
1970
1971 /*
1972 * If `\}' occurs on a macro line without a preceding macro,
1973 * drop the line completely.
1974 */
1975
1976 ep = buf->buf + pos;
1977 if (ep[0] == '\\' && ep[1] == '}')
1978 rr = 0;
1979
1980 /* Always check for the closing delimiter `\}'. */
1981
1982 while ((ep = strchr(ep, '\\')) != NULL) {
1983 if (*(++ep) == '}') {
1984 *ep = '&';
1985 roff_ccond(r, ln, ep - buf->buf - 1);
1986 }
1987 if (*ep != '\0')
1988 ++ep;
1989 }
1990 return(rr ? ROFF_CONT : ROFF_IGN);
1991 }
1992
1993 static enum rofferr
1994 roff_cond_text(ROFF_ARGS)
1995 {
1996 char *ep;
1997 int rr;
1998
1999 rr = r->last->rule;
2000 roffnode_cleanscope(r);
2001
2002 ep = buf->buf + pos;
2003 while ((ep = strchr(ep, '\\')) != NULL) {
2004 if (*(++ep) == '}') {
2005 *ep = '&';
2006 roff_ccond(r, ln, ep - buf->buf - 1);
2007 }
2008 if (*ep != '\0')
2009 ++ep;
2010 }
2011 return(rr ? ROFF_CONT : ROFF_IGN);
2012 }
2013
2014 /* --- handling of numeric and conditional expressions -------------------- */
2015
2016 /*
2017 * Parse a single signed integer number. Stop at the first non-digit.
2018 * If there is at least one digit, return success and advance the
2019 * parse point, else return failure and let the parse point unchanged.
2020 * Ignore overflows, treat them just like the C language.
2021 */
2022 static int
2023 roff_getnum(const char *v, int *pos, int *res, int flags)
2024 {
2025 int myres, scaled, n, p;
2026
2027 if (NULL == res)
2028 res = &myres;
2029
2030 p = *pos;
2031 n = v[p] == '-';
2032 if (n || v[p] == '+')
2033 p++;
2034
2035 if (flags & ROFFNUM_WHITE)
2036 while (isspace((unsigned char)v[p]))
2037 p++;
2038
2039 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2040 *res = 10 * *res + v[p] - '0';
2041 if (p == *pos + n)
2042 return 0;
2043
2044 if (n)
2045 *res = -*res;
2046
2047 /* Each number may be followed by one optional scaling unit. */
2048
2049 switch (v[p]) {
2050 case 'f':
2051 scaled = *res * 65536;
2052 break;
2053 case 'i':
2054 scaled = *res * 240;
2055 break;
2056 case 'c':
2057 scaled = *res * 240 / 2.54;
2058 break;
2059 case 'v':
2060 /* FALLTROUGH */
2061 case 'P':
2062 scaled = *res * 40;
2063 break;
2064 case 'm':
2065 /* FALLTROUGH */
2066 case 'n':
2067 scaled = *res * 24;
2068 break;
2069 case 'p':
2070 scaled = *res * 10 / 3;
2071 break;
2072 case 'u':
2073 scaled = *res;
2074 break;
2075 case 'M':
2076 scaled = *res * 6 / 25;
2077 break;
2078 default:
2079 scaled = *res;
2080 p--;
2081 break;
2082 }
2083 if (flags & ROFFNUM_SCALE)
2084 *res = scaled;
2085
2086 *pos = p + 1;
2087 return(1);
2088 }
2089
2090 /*
2091 * Evaluate a string comparison condition.
2092 * The first character is the delimiter.
2093 * Succeed if the string up to its second occurrence
2094 * matches the string up to its third occurence.
2095 * Advance the cursor after the third occurrence
2096 * or lacking that, to the end of the line.
2097 */
2098 static int
2099 roff_evalstrcond(const char *v, int *pos)
2100 {
2101 const char *s1, *s2, *s3;
2102 int match;
2103
2104 match = 0;
2105 s1 = v + *pos; /* initial delimiter */
2106 s2 = s1 + 1; /* for scanning the first string */
2107 s3 = strchr(s2, *s1); /* for scanning the second string */
2108
2109 if (NULL == s3) /* found no middle delimiter */
2110 goto out;
2111
2112 while ('\0' != *++s3) {
2113 if (*s2 != *s3) { /* mismatch */
2114 s3 = strchr(s3, *s1);
2115 break;
2116 }
2117 if (*s3 == *s1) { /* found the final delimiter */
2118 match = 1;
2119 break;
2120 }
2121 s2++;
2122 }
2123
2124 out:
2125 if (NULL == s3)
2126 s3 = strchr(s2, '\0');
2127 else if (*s3 != '\0')
2128 s3++;
2129 *pos = s3 - v;
2130 return(match);
2131 }
2132
2133 /*
2134 * Evaluate an optionally negated single character, numerical,
2135 * or string condition.
2136 */
2137 static int
2138 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2139 {
2140 char *cp, *name;
2141 size_t sz;
2142 int number, savepos, wanttrue;
2143
2144 if ('!' == v[*pos]) {
2145 wanttrue = 0;
2146 (*pos)++;
2147 } else
2148 wanttrue = 1;
2149
2150 switch (v[*pos]) {
2151 case '\0':
2152 return(0);
2153 case 'n':
2154 /* FALLTHROUGH */
2155 case 'o':
2156 (*pos)++;
2157 return(wanttrue);
2158 case 'c':
2159 /* FALLTHROUGH */
2160 case 'd':
2161 /* FALLTHROUGH */
2162 case 'e':
2163 /* FALLTHROUGH */
2164 case 't':
2165 /* FALLTHROUGH */
2166 case 'v':
2167 (*pos)++;
2168 return(!wanttrue);
2169 case 'r':
2170 cp = name = v + ++*pos;
2171 sz = roff_getname(r, &cp, ln, *pos);
2172 *pos = cp - v;
2173 return((sz && roff_hasregn(r, name, sz)) == wanttrue);
2174 default:
2175 break;
2176 }
2177
2178 savepos = *pos;
2179 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2180 return((number > 0) == wanttrue);
2181 else if (*pos == savepos)
2182 return(roff_evalstrcond(v, pos) == wanttrue);
2183 else
2184 return (0);
2185 }
2186
2187 static enum rofferr
2188 roff_line_ignore(ROFF_ARGS)
2189 {
2190
2191 return(ROFF_IGN);
2192 }
2193
2194 static enum rofferr
2195 roff_insec(ROFF_ARGS)
2196 {
2197
2198 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2199 ln, ppos, roffs[tok].name);
2200 return(ROFF_IGN);
2201 }
2202
2203 static enum rofferr
2204 roff_unsupp(ROFF_ARGS)
2205 {
2206
2207 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2208 ln, ppos, roffs[tok].name);
2209 return(ROFF_IGN);
2210 }
2211
2212 static enum rofferr
2213 roff_cond(ROFF_ARGS)
2214 {
2215
2216 roffnode_push(r, tok, NULL, ln, ppos);
2217
2218 /*
2219 * An `.el' has no conditional body: it will consume the value
2220 * of the current rstack entry set in prior `ie' calls or
2221 * defaults to DENY.
2222 *
2223 * If we're not an `el', however, then evaluate the conditional.
2224 */
2225
2226 r->last->rule = tok == ROFF_el ?
2227 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2228 roff_evalcond(r, ln, buf->buf, &pos);
2229
2230 /*
2231 * An if-else will put the NEGATION of the current evaluated
2232 * conditional into the stack of rules.
2233 */
2234
2235 if (tok == ROFF_ie) {
2236 if (r->rstackpos + 1 == r->rstacksz) {
2237 r->rstacksz += 16;
2238 r->rstack = mandoc_reallocarray(r->rstack,
2239 r->rstacksz, sizeof(int));
2240 }
2241 r->rstack[++r->rstackpos] = !r->last->rule;
2242 }
2243
2244 /* If the parent has false as its rule, then so do we. */
2245
2246 if (r->last->parent && !r->last->parent->rule)
2247 r->last->rule = 0;
2248
2249 /*
2250 * Determine scope.
2251 * If there is nothing on the line after the conditional,
2252 * not even whitespace, use next-line scope.
2253 */
2254
2255 if (buf->buf[pos] == '\0') {
2256 r->last->endspan = 2;
2257 goto out;
2258 }
2259
2260 while (buf->buf[pos] == ' ')
2261 pos++;
2262
2263 /* An opening brace requests multiline scope. */
2264
2265 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2266 r->last->endspan = -1;
2267 pos += 2;
2268 while (buf->buf[pos] == ' ')
2269 pos++;
2270 goto out;
2271 }
2272
2273 /*
2274 * Anything else following the conditional causes
2275 * single-line scope. Warn if the scope contains
2276 * nothing but trailing whitespace.
2277 */
2278
2279 if (buf->buf[pos] == '\0')
2280 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2281 ln, ppos, roffs[tok].name);
2282
2283 r->last->endspan = 1;
2284
2285 out:
2286 *offs = pos;
2287 return(ROFF_RERUN);
2288 }
2289
2290 static enum rofferr
2291 roff_ds(ROFF_ARGS)
2292 {
2293 char *string;
2294 const char *name;
2295 size_t namesz;
2296
2297 /* Ignore groff compatibility mode for now. */
2298
2299 if (tok == ROFF_ds1)
2300 tok = ROFF_ds;
2301 else if (tok == ROFF_as1)
2302 tok = ROFF_as;
2303
2304 /*
2305 * The first word is the name of the string.
2306 * If it is empty or terminated by an escape sequence,
2307 * abort the `ds' request without defining anything.
2308 */
2309
2310 name = string = buf->buf + pos;
2311 if (*name == '\0')
2312 return(ROFF_IGN);
2313
2314 namesz = roff_getname(r, &string, ln, pos);
2315 if (name[namesz] == '\\')
2316 return(ROFF_IGN);
2317
2318 /* Read past the initial double-quote, if any. */
2319 if (*string == '"')
2320 string++;
2321
2322 /* The rest is the value. */
2323 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2324 ROFF_as == tok);
2325 return(ROFF_IGN);
2326 }
2327
2328 /*
2329 * Parse a single operator, one or two characters long.
2330 * If the operator is recognized, return success and advance the
2331 * parse point, else return failure and let the parse point unchanged.
2332 */
2333 static int
2334 roff_getop(const char *v, int *pos, char *res)
2335 {
2336
2337 *res = v[*pos];
2338
2339 switch (*res) {
2340 case '+':
2341 /* FALLTHROUGH */
2342 case '-':
2343 /* FALLTHROUGH */
2344 case '*':
2345 /* FALLTHROUGH */
2346 case '/':
2347 /* FALLTHROUGH */
2348 case '%':
2349 /* FALLTHROUGH */
2350 case '&':
2351 /* FALLTHROUGH */
2352 case ':':
2353 break;
2354 case '<':
2355 switch (v[*pos + 1]) {
2356 case '=':
2357 *res = 'l';
2358 (*pos)++;
2359 break;
2360 case '>':
2361 *res = '!';
2362 (*pos)++;
2363 break;
2364 case '?':
2365 *res = 'i';
2366 (*pos)++;
2367 break;
2368 default:
2369 break;
2370 }
2371 break;
2372 case '>':
2373 switch (v[*pos + 1]) {
2374 case '=':
2375 *res = 'g';
2376 (*pos)++;
2377 break;
2378 case '?':
2379 *res = 'a';
2380 (*pos)++;
2381 break;
2382 default:
2383 break;
2384 }
2385 break;
2386 case '=':
2387 if ('=' == v[*pos + 1])
2388 (*pos)++;
2389 break;
2390 default:
2391 return(0);
2392 }
2393 (*pos)++;
2394
2395 return(*res);
2396 }
2397
2398 /*
2399 * Evaluate either a parenthesized numeric expression
2400 * or a single signed integer number.
2401 */
2402 static int
2403 roff_evalpar(struct roff *r, int ln,
2404 const char *v, int *pos, int *res, int flags)
2405 {
2406
2407 if ('(' != v[*pos])
2408 return(roff_getnum(v, pos, res, flags));
2409
2410 (*pos)++;
2411 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2412 return(0);
2413
2414 /*
2415 * Omission of the closing parenthesis
2416 * is an error in validation mode,
2417 * but ignored in evaluation mode.
2418 */
2419
2420 if (')' == v[*pos])
2421 (*pos)++;
2422 else if (NULL == res)
2423 return(0);
2424
2425 return(1);
2426 }
2427
2428 /*
2429 * Evaluate a complete numeric expression.
2430 * Proceed left to right, there is no concept of precedence.
2431 */
2432 static int
2433 roff_evalnum(struct roff *r, int ln, const char *v,
2434 int *pos, int *res, int flags)
2435 {
2436 int mypos, operand2;
2437 char operator;
2438
2439 if (NULL == pos) {
2440 mypos = 0;
2441 pos = &mypos;
2442 }
2443
2444 if (flags & ROFFNUM_WHITE)
2445 while (isspace((unsigned char)v[*pos]))
2446 (*pos)++;
2447
2448 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2449 return(0);
2450
2451 while (1) {
2452 if (flags & ROFFNUM_WHITE)
2453 while (isspace((unsigned char)v[*pos]))
2454 (*pos)++;
2455
2456 if ( ! roff_getop(v, pos, &operator))
2457 break;
2458
2459 if (flags & ROFFNUM_WHITE)
2460 while (isspace((unsigned char)v[*pos]))
2461 (*pos)++;
2462
2463 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2464 return(0);
2465
2466 if (flags & ROFFNUM_WHITE)
2467 while (isspace((unsigned char)v[*pos]))
2468 (*pos)++;
2469
2470 if (NULL == res)
2471 continue;
2472
2473 switch (operator) {
2474 case '+':
2475 *res += operand2;
2476 break;
2477 case '-':
2478 *res -= operand2;
2479 break;
2480 case '*':
2481 *res *= operand2;
2482 break;
2483 case '/':
2484 if (operand2 == 0) {
2485 mandoc_msg(MANDOCERR_DIVZERO,
2486 r->parse, ln, *pos, v);
2487 *res = 0;
2488 break;
2489 }
2490 *res /= operand2;
2491 break;
2492 case '%':
2493 if (operand2 == 0) {
2494 mandoc_msg(MANDOCERR_DIVZERO,
2495 r->parse, ln, *pos, v);
2496 *res = 0;
2497 break;
2498 }
2499 *res %= operand2;
2500 break;
2501 case '<':
2502 *res = *res < operand2;
2503 break;
2504 case '>':
2505 *res = *res > operand2;
2506 break;
2507 case 'l':
2508 *res = *res <= operand2;
2509 break;
2510 case 'g':
2511 *res = *res >= operand2;
2512 break;
2513 case '=':
2514 *res = *res == operand2;
2515 break;
2516 case '!':
2517 *res = *res != operand2;
2518 break;
2519 case '&':
2520 *res = *res && operand2;
2521 break;
2522 case ':':
2523 *res = *res || operand2;
2524 break;
2525 case 'i':
2526 if (operand2 < *res)
2527 *res = operand2;
2528 break;
2529 case 'a':
2530 if (operand2 > *res)
2531 *res = operand2;
2532 break;
2533 default:
2534 abort();
2535 }
2536 }
2537 return(1);
2538 }
2539
2540 /* --- register management ------------------------------------------------ */
2541
2542 void
2543 roff_setreg(struct roff *r, const char *name, int val, char sign)
2544 {
2545 struct roffreg *reg;
2546
2547 /* Search for an existing register with the same name. */
2548 reg = r->regtab;
2549
2550 while (reg && strcmp(name, reg->key.p))
2551 reg = reg->next;
2552
2553 if (NULL == reg) {
2554 /* Create a new register. */
2555 reg = mandoc_malloc(sizeof(struct roffreg));
2556 reg->key.p = mandoc_strdup(name);
2557 reg->key.sz = strlen(name);
2558 reg->val = 0;
2559 reg->next = r->regtab;
2560 r->regtab = reg;
2561 }
2562
2563 if ('+' == sign)
2564 reg->val += val;
2565 else if ('-' == sign)
2566 reg->val -= val;
2567 else
2568 reg->val = val;
2569 }
2570
2571 /*
2572 * Handle some predefined read-only number registers.
2573 * For now, return -1 if the requested register is not predefined;
2574 * in case a predefined read-only register having the value -1
2575 * were to turn up, another special value would have to be chosen.
2576 */
2577 static int
2578 roff_getregro(const char *name)
2579 {
2580
2581 switch (*name) {
2582 case 'A': /* ASCII approximation mode is always off. */
2583 return(0);
2584 case 'g': /* Groff compatibility mode is always on. */
2585 return(1);
2586 case 'H': /* Fixed horizontal resolution. */
2587 return (24);
2588 case 'j': /* Always adjust left margin only. */
2589 return(0);
2590 case 'T': /* Some output device is always defined. */
2591 return(1);
2592 case 'V': /* Fixed vertical resolution. */
2593 return (40);
2594 default:
2595 return (-1);
2596 }
2597 }
2598
2599 int
2600 roff_getreg(const struct roff *r, const char *name)
2601 {
2602 struct roffreg *reg;
2603 int val;
2604
2605 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2606 val = roff_getregro(name + 1);
2607 if (-1 != val)
2608 return (val);
2609 }
2610
2611 for (reg = r->regtab; reg; reg = reg->next)
2612 if (0 == strcmp(name, reg->key.p))
2613 return(reg->val);
2614
2615 return(0);
2616 }
2617
2618 static int
2619 roff_getregn(const struct roff *r, const char *name, size_t len)
2620 {
2621 struct roffreg *reg;
2622 int val;
2623
2624 if ('.' == name[0] && 2 == len) {
2625 val = roff_getregro(name + 1);
2626 if (-1 != val)
2627 return (val);
2628 }
2629
2630 for (reg = r->regtab; reg; reg = reg->next)
2631 if (len == reg->key.sz &&
2632 0 == strncmp(name, reg->key.p, len))
2633 return(reg->val);
2634
2635 return(0);
2636 }
2637
2638 static int
2639 roff_hasregn(const struct roff *r, const char *name, size_t len)
2640 {
2641 struct roffreg *reg;
2642 int val;
2643
2644 if ('.' == name[0] && 2 == len) {
2645 val = roff_getregro(name + 1);
2646 if (-1 != val)
2647 return(1);
2648 }
2649
2650 for (reg = r->regtab; reg; reg = reg->next)
2651 if (len == reg->key.sz &&
2652 0 == strncmp(name, reg->key.p, len))
2653 return(1);
2654
2655 return(0);
2656 }
2657
2658 static void
2659 roff_freereg(struct roffreg *reg)
2660 {
2661 struct roffreg *old_reg;
2662
2663 while (NULL != reg) {
2664 free(reg->key.p);
2665 old_reg = reg;
2666 reg = reg->next;
2667 free(old_reg);
2668 }
2669 }
2670
2671 static enum rofferr
2672 roff_nr(ROFF_ARGS)
2673 {
2674 char *key, *val;
2675 size_t keysz;
2676 int iv;
2677 char sign;
2678
2679 key = val = buf->buf + pos;
2680 if (*key == '\0')
2681 return(ROFF_IGN);
2682
2683 keysz = roff_getname(r, &val, ln, pos);
2684 if (key[keysz] == '\\')
2685 return(ROFF_IGN);
2686 key[keysz] = '\0';
2687
2688 sign = *val;
2689 if (sign == '+' || sign == '-')
2690 val++;
2691
2692 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2693 roff_setreg(r, key, iv, sign);
2694
2695 return(ROFF_IGN);
2696 }
2697
2698 static enum rofferr
2699 roff_rr(ROFF_ARGS)
2700 {
2701 struct roffreg *reg, **prev;
2702 char *name, *cp;
2703 size_t namesz;
2704
2705 name = cp = buf->buf + pos;
2706 if (*name == '\0')
2707 return(ROFF_IGN);
2708 namesz = roff_getname(r, &cp, ln, pos);
2709 name[namesz] = '\0';
2710
2711 prev = &r->regtab;
2712 while (1) {
2713 reg = *prev;
2714 if (reg == NULL || !strcmp(name, reg->key.p))
2715 break;
2716 prev = &reg->next;
2717 }
2718 if (reg != NULL) {
2719 *prev = reg->next;
2720 free(reg->key.p);
2721 free(reg);
2722 }
2723 return(ROFF_IGN);
2724 }
2725
2726 /* --- handler functions for roff requests -------------------------------- */
2727
2728 static enum rofferr
2729 roff_rm(ROFF_ARGS)
2730 {
2731 const char *name;
2732 char *cp;
2733 size_t namesz;
2734
2735 cp = buf->buf + pos;
2736 while (*cp != '\0') {
2737 name = cp;
2738 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2739 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2740 if (name[namesz] == '\\')
2741 break;
2742 }
2743 return(ROFF_IGN);
2744 }
2745
2746 static enum rofferr
2747 roff_it(ROFF_ARGS)
2748 {
2749 int iv;
2750
2751 /* Parse the number of lines. */
2752
2753 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2754 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2755 ln, ppos, buf->buf + 1);
2756 return(ROFF_IGN);
2757 }
2758
2759 while (isspace((unsigned char)buf->buf[pos]))
2760 pos++;
2761
2762 /*
2763 * Arm the input line trap.
2764 * Special-casing "an-trap" is an ugly workaround to cope
2765 * with DocBook stupidly fiddling with man(7) internals.
2766 */
2767
2768 roffit_lines = iv;
2769 roffit_macro = mandoc_strdup(iv != 1 ||
2770 strcmp(buf->buf + pos, "an-trap") ?
2771 buf->buf + pos : "br");
2772 return(ROFF_IGN);
2773 }
2774
2775 static enum rofferr
2776 roff_Dd(ROFF_ARGS)
2777 {
2778 const char *const *cp;
2779
2780 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2781 for (cp = __mdoc_reserved; *cp; cp++)
2782 roff_setstr(r, *cp, NULL, 0);
2783
2784 if (r->format == 0)
2785 r->format = MPARSE_MDOC;
2786
2787 return(ROFF_CONT);
2788 }
2789
2790 static enum rofferr
2791 roff_TH(ROFF_ARGS)
2792 {
2793 const char *const *cp;
2794
2795 if ((r->options & MPARSE_QUICK) == 0)
2796 for (cp = __man_reserved; *cp; cp++)
2797 roff_setstr(r, *cp, NULL, 0);
2798
2799 if (r->format == 0)
2800 r->format = MPARSE_MAN;
2801
2802 return(ROFF_CONT);
2803 }
2804
2805 static enum rofferr
2806 roff_TE(ROFF_ARGS)
2807 {
2808
2809 if (NULL == r->tbl)
2810 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2811 ln, ppos, "TE");
2812 else if ( ! tbl_end(&r->tbl)) {
2813 free(buf->buf);
2814 buf->buf = mandoc_strdup(".sp");
2815 buf->sz = 4;
2816 return(ROFF_REPARSE);
2817 }
2818 return(ROFF_IGN);
2819 }
2820
2821 static enum rofferr
2822 roff_T_(ROFF_ARGS)
2823 {
2824
2825 if (NULL == r->tbl)
2826 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2827 ln, ppos, "T&");
2828 else
2829 tbl_restart(ppos, ln, r->tbl);
2830
2831 return(ROFF_IGN);
2832 }
2833
2834 /*
2835 * Handle in-line equation delimiters.
2836 */
2837 static enum rofferr
2838 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2839 {
2840 char *cp1, *cp2;
2841 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2842
2843 /*
2844 * Outside equations, look for an opening delimiter.
2845 * If we are inside an equation, we already know it is
2846 * in-line, or this function wouldn't have been called;
2847 * so look for a closing delimiter.
2848 */
2849
2850 cp1 = buf->buf + pos;
2851 cp2 = strchr(cp1, r->eqn == NULL ?
2852 r->last_eqn->odelim : r->last_eqn->cdelim);
2853 if (cp2 == NULL)
2854 return(ROFF_CONT);
2855
2856 *cp2++ = '\0';
2857 bef_pr = bef_nl = aft_nl = aft_pr = "";
2858
2859 /* Handle preceding text, protecting whitespace. */
2860
2861 if (*buf->buf != '\0') {
2862 if (r->eqn == NULL)
2863 bef_pr = "\\&";
2864 bef_nl = "\n";
2865 }
2866
2867 /*
2868 * Prepare replacing the delimiter with an equation macro
2869 * and drop leading white space from the equation.
2870 */
2871
2872 if (r->eqn == NULL) {
2873 while (*cp2 == ' ')
2874 cp2++;
2875 mac = ".EQ";
2876 } else
2877 mac = ".EN";
2878
2879 /* Handle following text, protecting whitespace. */
2880
2881 if (*cp2 != '\0') {
2882 aft_nl = "\n";
2883 if (r->eqn != NULL)
2884 aft_pr = "\\&";
2885 }
2886
2887 /* Do the actual replacement. */
2888
2889 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2890 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2891 free(buf->buf);
2892 buf->buf = cp1;
2893
2894 /* Toggle the in-line state of the eqn subsystem. */
2895
2896 r->eqn_inline = r->eqn == NULL;
2897 return(ROFF_REPARSE);
2898 }
2899
2900 static enum rofferr
2901 roff_EQ(ROFF_ARGS)
2902 {
2903 struct eqn_node *e;
2904
2905 assert(r->eqn == NULL);
2906 e = eqn_alloc(ppos, ln, r->parse);
2907
2908 if (r->last_eqn) {
2909 r->last_eqn->next = e;
2910 e->delim = r->last_eqn->delim;
2911 e->odelim = r->last_eqn->odelim;
2912 e->cdelim = r->last_eqn->cdelim;
2913 } else
2914 r->first_eqn = r->last_eqn = e;
2915
2916 r->eqn = r->last_eqn = e;
2917
2918 if (buf->buf[pos] != '\0')
2919 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2920 ".EQ %s", buf->buf + pos);
2921
2922 return(ROFF_IGN);
2923 }
2924
2925 static enum rofferr
2926 roff_EN(ROFF_ARGS)
2927 {
2928
2929 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2930 return(ROFF_IGN);
2931 }
2932
2933 static enum rofferr
2934 roff_TS(ROFF_ARGS)
2935 {
2936 struct tbl_node *tbl;
2937
2938 if (r->tbl) {
2939 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2940 ln, ppos, "TS breaks TS");
2941 tbl_end(&r->tbl);
2942 }
2943
2944 tbl = tbl_alloc(ppos, ln, r->parse);
2945
2946 if (r->last_tbl)
2947 r->last_tbl->next = tbl;
2948 else
2949 r->first_tbl = r->last_tbl = tbl;
2950
2951 r->tbl = r->last_tbl = tbl;
2952 return(ROFF_IGN);
2953 }
2954
2955 static enum rofferr
2956 roff_brp(ROFF_ARGS)
2957 {
2958
2959 buf->buf[pos - 1] = '\0';
2960 return(ROFF_CONT);
2961 }
2962
2963 static enum rofferr
2964 roff_cc(ROFF_ARGS)
2965 {
2966 const char *p;
2967
2968 p = buf->buf + pos;
2969
2970 if (*p == '\0' || (r->control = *p++) == '.')
2971 r->control = 0;
2972
2973 if (*p != '\0')
2974 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2975 ln, p - buf->buf, "cc ... %s", p);
2976
2977 return(ROFF_IGN);
2978 }
2979
2980 static enum rofferr
2981 roff_tr(ROFF_ARGS)
2982 {
2983 const char *p, *first, *second;
2984 size_t fsz, ssz;
2985 enum mandoc_esc esc;
2986
2987 p = buf->buf + pos;
2988
2989 if (*p == '\0') {
2990 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2991 return(ROFF_IGN);
2992 }
2993
2994 while (*p != '\0') {
2995 fsz = ssz = 1;
2996
2997 first = p++;
2998 if (*first == '\\') {
2999 esc = mandoc_escape(&p, NULL, NULL);
3000 if (esc == ESCAPE_ERROR) {
3001 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3002 ln, (int)(p - buf->buf), first);
3003 return(ROFF_IGN);
3004 }
3005 fsz = (size_t)(p - first);
3006 }
3007
3008 second = p++;
3009 if (*second == '\\') {
3010 esc = mandoc_escape(&p, NULL, NULL);
3011 if (esc == ESCAPE_ERROR) {
3012 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3013 ln, (int)(p - buf->buf), second);
3014 return(ROFF_IGN);
3015 }
3016 ssz = (size_t)(p - second);
3017 } else if (*second == '\0') {
3018 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3019 ln, first - buf->buf, "tr %s", first);
3020 second = " ";
3021 p--;
3022 }
3023
3024 if (fsz > 1) {
3025 roff_setstrn(&r->xmbtab, first, fsz,
3026 second, ssz, 0);
3027 continue;
3028 }
3029
3030 if (r->xtab == NULL)
3031 r->xtab = mandoc_calloc(128,
3032 sizeof(struct roffstr));
3033
3034 free(r->xtab[(int)*first].p);
3035 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3036 r->xtab[(int)*first].sz = ssz;
3037 }
3038
3039 return(ROFF_IGN);
3040 }
3041
3042 static enum rofferr
3043 roff_so(ROFF_ARGS)
3044 {
3045 char *name, *cp;
3046
3047 name = buf->buf + pos;
3048 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3049
3050 /*
3051 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3052 * opening anything that's not in our cwd or anything beneath
3053 * it. Thus, explicitly disallow traversing up the file-system
3054 * or using absolute paths.
3055 */
3056
3057 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3058 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3059 ".so %s", name);
3060 buf->sz = mandoc_asprintf(&cp,
3061 ".sp\nSee the file %s.\n.sp", name) + 1;
3062 free(buf->buf);
3063 buf->buf = cp;
3064 *offs = 0;
3065 return(ROFF_REPARSE);
3066 }
3067
3068 *offs = pos;
3069 return(ROFF_SO);
3070 }
3071
3072 /* --- user defined strings and macros ------------------------------------ */
3073
3074 static enum rofferr
3075 roff_userdef(ROFF_ARGS)
3076 {
3077 const char *arg[9], *ap;
3078 char *cp, *n1, *n2;
3079 int i;
3080 size_t asz, rsz;
3081
3082 /*
3083 * Collect pointers to macro argument strings
3084 * and NUL-terminate them.
3085 */
3086
3087 cp = buf->buf + pos;
3088 for (i = 0; i < 9; i++)
3089 arg[i] = *cp == '\0' ? "" :
3090 mandoc_getarg(r->parse, &cp, ln, &pos);
3091
3092 /*
3093 * Expand macro arguments.
3094 */
3095
3096 buf->sz = strlen(r->current_string) + 1;
3097 n1 = cp = mandoc_malloc(buf->sz);
3098 memcpy(n1, r->current_string, buf->sz);
3099 while (*cp != '\0') {
3100
3101 /* Scan ahead for the next argument invocation. */
3102
3103 if (*cp++ != '\\')
3104 continue;
3105 if (*cp++ != '$')
3106 continue;
3107 i = *cp - '1';
3108 if (0 > i || 8 < i)
3109 continue;
3110 cp -= 2;
3111
3112 /*
3113 * Determine the size of the expanded argument,
3114 * taking escaping of quotes into account.
3115 */
3116
3117 asz = 0;
3118 for (ap = arg[i]; *ap != '\0'; ap++) {
3119 asz++;
3120 if (*ap == '"')
3121 asz += 3;
3122 }
3123 if (asz != 3) {
3124
3125 /*
3126 * Determine the size of the rest of the
3127 * unexpanded macro, including the NUL.
3128 */
3129
3130 rsz = buf->sz - (cp - n1) - 3;
3131
3132 /*
3133 * When shrinking, move before
3134 * releasing the storage.
3135 */
3136
3137 if (asz < 3)
3138 memmove(cp + asz, cp + 3, rsz);
3139
3140 /*
3141 * Resize the storage for the macro
3142 * and readjust the parse pointer.
3143 */
3144
3145 buf->sz += asz - 3;
3146 n2 = mandoc_realloc(n1, buf->sz);
3147 cp = n2 + (cp - n1);
3148 n1 = n2;
3149
3150 /*
3151 * When growing, make room
3152 * for the expanded argument.
3153 */
3154
3155 if (asz > 3)
3156 memmove(cp + asz, cp + 3, rsz);
3157 }
3158
3159 /* Copy the expanded argument, escaping quotes. */
3160
3161 n2 = cp;
3162 for (ap = arg[i]; *ap != '\0'; ap++) {
3163 if (*ap == '"') {
3164 memcpy(n2, "\\(dq", 4);
3165 n2 += 4;
3166 } else
3167 *n2++ = *ap;
3168 }
3169 }
3170
3171 /*
3172 * Replace the macro invocation
3173 * by the expanded macro.
3174 */
3175
3176 free(buf->buf);
3177 buf->buf = n1;
3178 *offs = 0;
3179
3180 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3181 ROFF_REPARSE : ROFF_APPEND);
3182 }
3183
3184 static size_t
3185 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3186 {
3187 char *name, *cp;
3188 size_t namesz;
3189
3190 name = *cpp;
3191 if ('\0' == *name)
3192 return(0);
3193
3194 /* Read until end of name and terminate it with NUL. */
3195 for (cp = name; 1; cp++) {
3196 if ('\0' == *cp || ' ' == *cp) {
3197 namesz = cp - name;
3198 break;
3199 }
3200 if ('\\' != *cp)
3201 continue;
3202 namesz = cp - name;
3203 if ('{' == cp[1] || '}' == cp[1])
3204 break;
3205 cp++;
3206 if ('\\' == *cp)
3207 continue;
3208 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3209 "%.*s", (int)(cp - name + 1), name);
3210 mandoc_escape((const char **)&cp, NULL, NULL);
3211 break;
3212 }
3213
3214 /* Read past spaces. */
3215 while (' ' == *cp)
3216 cp++;
3217
3218 *cpp = cp;
3219 return(namesz);
3220 }
3221
3222 /*
3223 * Store *string into the user-defined string called *name.
3224 * To clear an existing entry, call with (*r, *name, NULL, 0).
3225 * append == 0: replace mode
3226 * append == 1: single-line append mode
3227 * append == 2: multiline append mode, append '\n' after each call
3228 */
3229 static void
3230 roff_setstr(struct roff *r, const char *name, const char *string,
3231 int append)
3232 {
3233
3234 roff_setstrn(&r->strtab, name, strlen(name), string,
3235 string ? strlen(string) : 0, append);
3236 }
3237
3238 static void
3239 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3240 const char *string, size_t stringsz, int append)
3241 {
3242 struct roffkv *n;
3243 char *c;
3244 int i;
3245 size_t oldch, newch;
3246
3247 /* Search for an existing string with the same name. */
3248 n = *r;
3249
3250 while (n && (namesz != n->key.sz ||
3251 strncmp(n->key.p, name, namesz)))
3252 n = n->next;
3253
3254 if (NULL == n) {
3255 /* Create a new string table entry. */
3256 n = mandoc_malloc(sizeof(struct roffkv));
3257 n->key.p = mandoc_strndup(name, namesz);
3258 n->key.sz = namesz;
3259 n->val.p = NULL;
3260 n->val.sz = 0;
3261 n->next = *r;
3262 *r = n;
3263 } else if (0 == append) {
3264 free(n->val.p);
3265 n->val.p = NULL;
3266 n->val.sz = 0;
3267 }
3268
3269 if (NULL == string)
3270 return;
3271
3272 /*
3273 * One additional byte for the '\n' in multiline mode,
3274 * and one for the terminating '\0'.
3275 */
3276 newch = stringsz + (1 < append ? 2u : 1u);
3277
3278 if (NULL == n->val.p) {
3279 n->val.p = mandoc_malloc(newch);
3280 *n->val.p = '\0';
3281 oldch = 0;
3282 } else {
3283 oldch = n->val.sz;
3284 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3285 }
3286
3287 /* Skip existing content in the destination buffer. */
3288 c = n->val.p + (int)oldch;
3289
3290 /* Append new content to the destination buffer. */
3291 i = 0;
3292 while (i < (int)stringsz) {
3293 /*
3294 * Rudimentary roff copy mode:
3295 * Handle escaped backslashes.
3296 */
3297 if ('\\' == string[i] && '\\' == string[i + 1])
3298 i++;
3299 *c++ = string[i++];
3300 }
3301
3302 /* Append terminating bytes. */
3303 if (1 < append)
3304 *c++ = '\n';
3305
3306 *c = '\0';
3307 n->val.sz = (int)(c - n->val.p);
3308 }
3309
3310 static const char *
3311 roff_getstrn(const struct roff *r, const char *name, size_t len)
3312 {
3313 const struct roffkv *n;
3314 int i;
3315
3316 for (n = r->strtab; n; n = n->next)
3317 if (0 == strncmp(name, n->key.p, len) &&
3318 '\0' == n->key.p[(int)len])
3319 return(n->val.p);
3320
3321 for (i = 0; i < PREDEFS_MAX; i++)
3322 if (0 == strncmp(name, predefs[i].name, len) &&
3323 '\0' == predefs[i].name[(int)len])
3324 return(predefs[i].str);
3325
3326 return(NULL);
3327 }
3328
3329 static void
3330 roff_freestr(struct roffkv *r)
3331 {
3332 struct roffkv *n, *nn;
3333
3334 for (n = r; n; n = nn) {
3335 free(n->key.p);
3336 free(n->val.p);
3337 nn = n->next;
3338 free(n);
3339 }
3340 }
3341
3342 /* --- accessors and utility functions ------------------------------------ */
3343
3344 const struct tbl_span *
3345 roff_span(const struct roff *r)
3346 {
3347
3348 return(r->tbl ? tbl_span(r->tbl) : NULL);
3349 }
3350
3351 const struct eqn *
3352 roff_eqn(const struct roff *r)
3353 {
3354
3355 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
3356 }
3357
3358 /*
3359 * Duplicate an input string, making the appropriate character
3360 * conversations (as stipulated by `tr') along the way.
3361 * Returns a heap-allocated string with all the replacements made.
3362 */
3363 char *
3364 roff_strdup(const struct roff *r, const char *p)
3365 {
3366 const struct roffkv *cp;
3367 char *res;
3368 const char *pp;
3369 size_t ssz, sz;
3370 enum mandoc_esc esc;
3371
3372 if (NULL == r->xmbtab && NULL == r->xtab)
3373 return(mandoc_strdup(p));
3374 else if ('\0' == *p)
3375 return(mandoc_strdup(""));
3376
3377 /*
3378 * Step through each character looking for term matches
3379 * (remember that a `tr' can be invoked with an escape, which is
3380 * a glyph but the escape is multi-character).
3381 * We only do this if the character hash has been initialised
3382 * and the string is >0 length.
3383 */
3384
3385 res = NULL;
3386 ssz = 0;
3387
3388 while ('\0' != *p) {
3389 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3390 sz = r->xtab[(int)*p].sz;
3391 res = mandoc_realloc(res, ssz + sz + 1);
3392 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3393 ssz += sz;
3394 p++;
3395 continue;
3396 } else if ('\\' != *p) {
3397 res = mandoc_realloc(res, ssz + 2);
3398 res[ssz++] = *p++;
3399 continue;
3400 }
3401
3402 /* Search for term matches. */
3403 for (cp = r->xmbtab; cp; cp = cp->next)
3404 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3405 break;
3406
3407 if (NULL != cp) {
3408 /*
3409 * A match has been found.
3410 * Append the match to the array and move
3411 * forward by its keysize.
3412 */
3413 res = mandoc_realloc(res,
3414 ssz + cp->val.sz + 1);
3415 memcpy(res + ssz, cp->val.p, cp->val.sz);
3416 ssz += cp->val.sz;
3417 p += (int)cp->key.sz;
3418 continue;
3419 }
3420
3421 /*
3422 * Handle escapes carefully: we need to copy
3423 * over just the escape itself, or else we might
3424 * do replacements within the escape itself.
3425 * Make sure to pass along the bogus string.
3426 */
3427 pp = p++;
3428 esc = mandoc_escape(&p, NULL, NULL);
3429 if (ESCAPE_ERROR == esc) {
3430 sz = strlen(pp);
3431 res = mandoc_realloc(res, ssz + sz + 1);
3432 memcpy(res + ssz, pp, sz);
3433 break;
3434 }
3435 /*
3436 * We bail out on bad escapes.
3437 * No need to warn: we already did so when
3438 * roff_res() was called.
3439 */
3440 sz = (int)(p - pp);
3441 res = mandoc_realloc(res, ssz + sz + 1);
3442 memcpy(res + ssz, pp, sz);
3443 ssz += sz;
3444 }
3445
3446 res[(int)ssz] = '\0';
3447 return(res);
3448 }
3449
3450 int
3451 roff_getformat(const struct roff *r)
3452 {
3453
3454 return(r->format);
3455 }
3456
3457 /*
3458 * Find out whether a line is a macro line or not.
3459 * If it is, adjust the current position and return one; if it isn't,
3460 * return zero and don't change the current position.
3461 * If the control character has been set with `.cc', then let that grain
3462 * precedence.
3463 * This is slighly contrary to groff, where using the non-breaking
3464 * control character when `cc' has been invoked will cause the
3465 * non-breaking macro contents to be printed verbatim.
3466 */
3467 int
3468 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3469 {
3470 int pos;
3471
3472 pos = *ppos;
3473
3474 if (0 != r->control && cp[pos] == r->control)
3475 pos++;
3476 else if (0 != r->control)
3477 return(0);
3478 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3479 pos += 2;
3480 else if ('.' == cp[pos] || '\'' == cp[pos])
3481 pos++;
3482 else
3483 return(0);
3484
3485 while (' ' == cp[pos] || '\t' == cp[pos])
3486 pos++;
3487
3488 *ppos = pos;
3489 return(1);
3490 }