]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
If we have to reparse the text line because we spring an input line trap,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.275 2015/08/29 23:56:01 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "roff.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libroff.h"
35
36 /* Maximum number of nested if-else conditionals. */
37 #define RSTACK_MAX 128
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 enum rofft {
45 ROFF_ab,
46 ROFF_ad,
47 ROFF_af,
48 ROFF_aln,
49 ROFF_als,
50 ROFF_am,
51 ROFF_am1,
52 ROFF_ami,
53 ROFF_ami1,
54 ROFF_as,
55 ROFF_as1,
56 ROFF_asciify,
57 ROFF_backtrace,
58 ROFF_bd,
59 ROFF_bleedat,
60 ROFF_blm,
61 ROFF_box,
62 ROFF_boxa,
63 ROFF_bp,
64 ROFF_BP,
65 /* MAN_br, MDOC_br */
66 ROFF_break,
67 ROFF_breakchar,
68 ROFF_brnl,
69 ROFF_brp,
70 ROFF_brpnl,
71 ROFF_c2,
72 ROFF_cc,
73 ROFF_ce,
74 ROFF_cf,
75 ROFF_cflags,
76 ROFF_ch,
77 ROFF_char,
78 ROFF_chop,
79 ROFF_class,
80 ROFF_close,
81 ROFF_CL,
82 ROFF_color,
83 ROFF_composite,
84 ROFF_continue,
85 ROFF_cp,
86 ROFF_cropat,
87 ROFF_cs,
88 ROFF_cu,
89 ROFF_da,
90 ROFF_dch,
91 ROFF_Dd,
92 ROFF_de,
93 ROFF_de1,
94 ROFF_defcolor,
95 ROFF_dei,
96 ROFF_dei1,
97 ROFF_device,
98 ROFF_devicem,
99 ROFF_di,
100 ROFF_do,
101 ROFF_ds,
102 ROFF_ds1,
103 ROFF_dwh,
104 ROFF_dt,
105 ROFF_ec,
106 ROFF_ecr,
107 ROFF_ecs,
108 ROFF_el,
109 ROFF_em,
110 ROFF_EN,
111 ROFF_eo,
112 ROFF_EP,
113 ROFF_EQ,
114 ROFF_errprint,
115 ROFF_ev,
116 ROFF_evc,
117 ROFF_ex,
118 ROFF_fallback,
119 ROFF_fam,
120 ROFF_fc,
121 ROFF_fchar,
122 ROFF_fcolor,
123 ROFF_fdeferlig,
124 ROFF_feature,
125 /* MAN_fi; ignored in mdoc(7) */
126 ROFF_fkern,
127 ROFF_fl,
128 ROFF_flig,
129 ROFF_fp,
130 ROFF_fps,
131 ROFF_fschar,
132 ROFF_fspacewidth,
133 ROFF_fspecial,
134 /* MAN_ft; ignored in mdoc(7) */
135 ROFF_ftr,
136 ROFF_fzoom,
137 ROFF_gcolor,
138 ROFF_hc,
139 ROFF_hcode,
140 ROFF_hidechar,
141 ROFF_hla,
142 ROFF_hlm,
143 ROFF_hpf,
144 ROFF_hpfa,
145 ROFF_hpfcode,
146 ROFF_hw,
147 ROFF_hy,
148 ROFF_hylang,
149 ROFF_hylen,
150 ROFF_hym,
151 ROFF_hypp,
152 ROFF_hys,
153 ROFF_ie,
154 ROFF_if,
155 ROFF_ig,
156 /* MAN_in; ignored in mdoc(7) */
157 ROFF_index,
158 ROFF_it,
159 ROFF_itc,
160 ROFF_IX,
161 ROFF_kern,
162 ROFF_kernafter,
163 ROFF_kernbefore,
164 ROFF_kernpair,
165 ROFF_lc,
166 ROFF_lc_ctype,
167 ROFF_lds,
168 ROFF_length,
169 ROFF_letadj,
170 ROFF_lf,
171 ROFF_lg,
172 ROFF_lhang,
173 ROFF_linetabs,
174 /* MAN_ll, MDOC_ll */
175 ROFF_lnr,
176 ROFF_lnrf,
177 ROFF_lpfx,
178 ROFF_ls,
179 ROFF_lsm,
180 ROFF_lt,
181 ROFF_mc,
182 ROFF_mediasize,
183 ROFF_minss,
184 ROFF_mk,
185 ROFF_mso,
186 ROFF_na,
187 ROFF_ne,
188 /* MAN_nf; ignored in mdoc(7) */
189 ROFF_nh,
190 ROFF_nhychar,
191 ROFF_nm,
192 ROFF_nn,
193 ROFF_nop,
194 ROFF_nr,
195 ROFF_nrf,
196 ROFF_nroff,
197 ROFF_ns,
198 ROFF_nx,
199 ROFF_open,
200 ROFF_opena,
201 ROFF_os,
202 ROFF_output,
203 ROFF_padj,
204 ROFF_papersize,
205 ROFF_pc,
206 ROFF_pev,
207 ROFF_pi,
208 ROFF_PI,
209 ROFF_pl,
210 ROFF_pm,
211 ROFF_pn,
212 ROFF_pnr,
213 ROFF_po,
214 ROFF_ps,
215 ROFF_psbb,
216 ROFF_pshape,
217 ROFF_pso,
218 ROFF_ptr,
219 ROFF_pvs,
220 ROFF_rchar,
221 ROFF_rd,
222 ROFF_recursionlimit,
223 ROFF_return,
224 ROFF_rfschar,
225 ROFF_rhang,
226 ROFF_rj,
227 ROFF_rm,
228 ROFF_rn,
229 ROFF_rnn,
230 ROFF_rr,
231 ROFF_rs,
232 ROFF_rt,
233 ROFF_schar,
234 ROFF_sentchar,
235 ROFF_shc,
236 ROFF_shift,
237 ROFF_sizes,
238 ROFF_so,
239 /* MAN_sp, MDOC_sp */
240 ROFF_spacewidth,
241 ROFF_special,
242 ROFF_spreadwarn,
243 ROFF_ss,
244 ROFF_sty,
245 ROFF_substring,
246 ROFF_sv,
247 ROFF_sy,
248 ROFF_T_,
249 ROFF_ta,
250 ROFF_tc,
251 ROFF_TE,
252 ROFF_TH,
253 ROFF_ti,
254 ROFF_tkf,
255 ROFF_tl,
256 ROFF_tm,
257 ROFF_tm1,
258 ROFF_tmc,
259 ROFF_tr,
260 ROFF_track,
261 ROFF_transchar,
262 ROFF_trf,
263 ROFF_trimat,
264 ROFF_trin,
265 ROFF_trnt,
266 ROFF_troff,
267 ROFF_TS,
268 ROFF_uf,
269 ROFF_ul,
270 ROFF_unformat,
271 ROFF_unwatch,
272 ROFF_unwatchn,
273 ROFF_vpt,
274 ROFF_vs,
275 ROFF_warn,
276 ROFF_warnscale,
277 ROFF_watch,
278 ROFF_watchlength,
279 ROFF_watchn,
280 ROFF_wh,
281 ROFF_while,
282 ROFF_write,
283 ROFF_writec,
284 ROFF_writem,
285 ROFF_xflag,
286 ROFF_cblock,
287 ROFF_USERDEF,
288 ROFF_MAX
289 };
290
291 /*
292 * An incredibly-simple string buffer.
293 */
294 struct roffstr {
295 char *p; /* nil-terminated buffer */
296 size_t sz; /* saved strlen(p) */
297 };
298
299 /*
300 * A key-value roffstr pair as part of a singly-linked list.
301 */
302 struct roffkv {
303 struct roffstr key;
304 struct roffstr val;
305 struct roffkv *next; /* next in list */
306 };
307
308 /*
309 * A single number register as part of a singly-linked list.
310 */
311 struct roffreg {
312 struct roffstr key;
313 int val;
314 struct roffreg *next;
315 };
316
317 struct roff {
318 struct mparse *parse; /* parse point */
319 const struct mchars *mchars; /* character table */
320 struct roffnode *last; /* leaf of stack */
321 int *rstack; /* stack of inverted `ie' values */
322 struct roffreg *regtab; /* number registers */
323 struct roffkv *strtab; /* user-defined strings & macros */
324 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
325 struct roffstr *xtab; /* single-byte trans table (`tr') */
326 const char *current_string; /* value of last called user macro */
327 struct tbl_node *first_tbl; /* first table parsed */
328 struct tbl_node *last_tbl; /* last table parsed */
329 struct tbl_node *tbl; /* current table being parsed */
330 struct eqn_node *last_eqn; /* last equation parsed */
331 struct eqn_node *first_eqn; /* first equation parsed */
332 struct eqn_node *eqn; /* current equation being parsed */
333 int eqn_inline; /* current equation is inline */
334 int options; /* parse options */
335 int rstacksz; /* current size limit of rstack */
336 int rstackpos; /* position in rstack */
337 int format; /* current file in mdoc or man format */
338 int argc; /* number of args of the last macro */
339 char control; /* control character */
340 };
341
342 struct roffnode {
343 enum rofft tok; /* type of node */
344 struct roffnode *parent; /* up one in stack */
345 int line; /* parse line */
346 int col; /* parse col */
347 char *name; /* node name, e.g. macro name */
348 char *end; /* end-rules: custom token */
349 int endspan; /* end-rules: next-line or infty */
350 int rule; /* current evaluation rule */
351 };
352
353 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
354 enum rofft tok, /* tok of macro */ \
355 struct buf *buf, /* input buffer */ \
356 int ln, /* parse line */ \
357 int ppos, /* original pos in buffer */ \
358 int pos, /* current pos in buffer */ \
359 int *offs /* reset offset of buffer data */
360
361 typedef enum rofferr (*roffproc)(ROFF_ARGS);
362
363 struct roffmac {
364 const char *name; /* macro name */
365 roffproc proc; /* process new macro */
366 roffproc text; /* process as child text of macro */
367 roffproc sub; /* process as child of macro */
368 int flags;
369 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
370 struct roffmac *next;
371 };
372
373 struct predef {
374 const char *name; /* predefined input name */
375 const char *str; /* replacement symbol */
376 };
377
378 #define PREDEF(__name, __str) \
379 { (__name), (__str) },
380
381 /* --- function prototypes ------------------------------------------------ */
382
383 static enum rofft roffhash_find(const char *, size_t);
384 static void roffhash_init(void);
385 static void roffnode_cleanscope(struct roff *);
386 static void roffnode_pop(struct roff *);
387 static void roffnode_push(struct roff *, enum rofft,
388 const char *, int, int);
389 static enum rofferr roff_block(ROFF_ARGS);
390 static enum rofferr roff_block_text(ROFF_ARGS);
391 static enum rofferr roff_block_sub(ROFF_ARGS);
392 static enum rofferr roff_brp(ROFF_ARGS);
393 static enum rofferr roff_cblock(ROFF_ARGS);
394 static enum rofferr roff_cc(ROFF_ARGS);
395 static void roff_ccond(struct roff *, int, int);
396 static enum rofferr roff_cond(ROFF_ARGS);
397 static enum rofferr roff_cond_text(ROFF_ARGS);
398 static enum rofferr roff_cond_sub(ROFF_ARGS);
399 static enum rofferr roff_ds(ROFF_ARGS);
400 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
401 static int roff_evalcond(struct roff *r, int, char *, int *);
402 static int roff_evalnum(struct roff *, int,
403 const char *, int *, int *, int);
404 static int roff_evalpar(struct roff *, int,
405 const char *, int *, int *, int);
406 static int roff_evalstrcond(const char *, int *);
407 static void roff_free1(struct roff *);
408 static void roff_freereg(struct roffreg *);
409 static void roff_freestr(struct roffkv *);
410 static size_t roff_getname(struct roff *, char **, int, int);
411 static int roff_getnum(const char *, int *, int *, int);
412 static int roff_getop(const char *, int *, char *);
413 static int roff_getregn(const struct roff *,
414 const char *, size_t);
415 static int roff_getregro(const struct roff *,
416 const char *name);
417 static const char *roff_getstrn(const struct roff *,
418 const char *, size_t);
419 static int roff_hasregn(const struct roff *,
420 const char *, size_t);
421 static enum rofferr roff_insec(ROFF_ARGS);
422 static enum rofferr roff_it(ROFF_ARGS);
423 static enum rofferr roff_line_ignore(ROFF_ARGS);
424 static void roff_man_alloc1(struct roff_man *);
425 static void roff_man_free1(struct roff_man *);
426 static enum rofferr roff_nr(ROFF_ARGS);
427 static enum rofft roff_parse(struct roff *, char *, int *,
428 int, int);
429 static enum rofferr roff_parsetext(struct buf *, int, int *);
430 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
431 static enum rofferr roff_rm(ROFF_ARGS);
432 static enum rofferr roff_rr(ROFF_ARGS);
433 static void roff_setstr(struct roff *,
434 const char *, const char *, int);
435 static void roff_setstrn(struct roffkv **, const char *,
436 size_t, const char *, size_t, int);
437 static enum rofferr roff_so(ROFF_ARGS);
438 static enum rofferr roff_tr(ROFF_ARGS);
439 static enum rofferr roff_Dd(ROFF_ARGS);
440 static enum rofferr roff_TH(ROFF_ARGS);
441 static enum rofferr roff_TE(ROFF_ARGS);
442 static enum rofferr roff_TS(ROFF_ARGS);
443 static enum rofferr roff_EQ(ROFF_ARGS);
444 static enum rofferr roff_EN(ROFF_ARGS);
445 static enum rofferr roff_T_(ROFF_ARGS);
446 static enum rofferr roff_unsupp(ROFF_ARGS);
447 static enum rofferr roff_userdef(ROFF_ARGS);
448
449 /* --- constant data ------------------------------------------------------ */
450
451 /* See roffhash_find() */
452
453 #define ASCII_HI 126
454 #define ASCII_LO 33
455 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
456
457 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
458 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
459
460 static struct roffmac *hash[HASHWIDTH];
461
462 static struct roffmac roffs[ROFF_MAX] = {
463 { "ab", roff_unsupp, NULL, NULL, 0, NULL },
464 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
465 { "af", roff_line_ignore, NULL, NULL, 0, NULL },
466 { "aln", roff_unsupp, NULL, NULL, 0, NULL },
467 { "als", roff_unsupp, NULL, NULL, 0, NULL },
468 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
469 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
470 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
471 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
472 { "as", roff_ds, NULL, NULL, 0, NULL },
473 { "as1", roff_ds, NULL, NULL, 0, NULL },
474 { "asciify", roff_unsupp, NULL, NULL, 0, NULL },
475 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
476 { "bd", roff_line_ignore, NULL, NULL, 0, NULL },
477 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
478 { "blm", roff_unsupp, NULL, NULL, 0, NULL },
479 { "box", roff_unsupp, NULL, NULL, 0, NULL },
480 { "boxa", roff_unsupp, NULL, NULL, 0, NULL },
481 { "bp", roff_line_ignore, NULL, NULL, 0, NULL },
482 { "BP", roff_unsupp, NULL, NULL, 0, NULL },
483 { "break", roff_unsupp, NULL, NULL, 0, NULL },
484 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
485 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
486 { "brp", roff_brp, NULL, NULL, 0, NULL },
487 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
488 { "c2", roff_unsupp, NULL, NULL, 0, NULL },
489 { "cc", roff_cc, NULL, NULL, 0, NULL },
490 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
491 { "cf", roff_insec, NULL, NULL, 0, NULL },
492 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
493 { "ch", roff_line_ignore, NULL, NULL, 0, NULL },
494 { "char", roff_unsupp, NULL, NULL, 0, NULL },
495 { "chop", roff_unsupp, NULL, NULL, 0, NULL },
496 { "class", roff_line_ignore, NULL, NULL, 0, NULL },
497 { "close", roff_insec, NULL, NULL, 0, NULL },
498 { "CL", roff_unsupp, NULL, NULL, 0, NULL },
499 { "color", roff_line_ignore, NULL, NULL, 0, NULL },
500 { "composite", roff_unsupp, NULL, NULL, 0, NULL },
501 { "continue", roff_unsupp, NULL, NULL, 0, NULL },
502 { "cp", roff_line_ignore, NULL, NULL, 0, NULL },
503 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
504 { "cs", roff_line_ignore, NULL, NULL, 0, NULL },
505 { "cu", roff_line_ignore, NULL, NULL, 0, NULL },
506 { "da", roff_unsupp, NULL, NULL, 0, NULL },
507 { "dch", roff_unsupp, NULL, NULL, 0, NULL },
508 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
509 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
510 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
511 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
512 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
513 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
514 { "device", roff_unsupp, NULL, NULL, 0, NULL },
515 { "devicem", roff_unsupp, NULL, NULL, 0, NULL },
516 { "di", roff_unsupp, NULL, NULL, 0, NULL },
517 { "do", roff_unsupp, NULL, NULL, 0, NULL },
518 { "ds", roff_ds, NULL, NULL, 0, NULL },
519 { "ds1", roff_ds, NULL, NULL, 0, NULL },
520 { "dwh", roff_unsupp, NULL, NULL, 0, NULL },
521 { "dt", roff_unsupp, NULL, NULL, 0, NULL },
522 { "ec", roff_unsupp, NULL, NULL, 0, NULL },
523 { "ecr", roff_unsupp, NULL, NULL, 0, NULL },
524 { "ecs", roff_unsupp, NULL, NULL, 0, NULL },
525 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
526 { "em", roff_unsupp, NULL, NULL, 0, NULL },
527 { "EN", roff_EN, NULL, NULL, 0, NULL },
528 { "eo", roff_unsupp, NULL, NULL, 0, NULL },
529 { "EP", roff_unsupp, NULL, NULL, 0, NULL },
530 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
531 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
532 { "ev", roff_unsupp, NULL, NULL, 0, NULL },
533 { "evc", roff_unsupp, NULL, NULL, 0, NULL },
534 { "ex", roff_unsupp, NULL, NULL, 0, NULL },
535 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
536 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
537 { "fc", roff_unsupp, NULL, NULL, 0, NULL },
538 { "fchar", roff_unsupp, NULL, NULL, 0, NULL },
539 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
540 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
541 { "feature", roff_line_ignore, NULL, NULL, 0, NULL },
542 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
543 { "fl", roff_line_ignore, NULL, NULL, 0, NULL },
544 { "flig", roff_line_ignore, NULL, NULL, 0, NULL },
545 { "fp", roff_line_ignore, NULL, NULL, 0, NULL },
546 { "fps", roff_line_ignore, NULL, NULL, 0, NULL },
547 { "fschar", roff_unsupp, NULL, NULL, 0, NULL },
548 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
549 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
550 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
551 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
552 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
553 { "hc", roff_line_ignore, NULL, NULL, 0, NULL },
554 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
555 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
556 { "hla", roff_line_ignore, NULL, NULL, 0, NULL },
557 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
558 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
559 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
560 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
561 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
562 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
563 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
564 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
565 { "hym", roff_line_ignore, NULL, NULL, 0, NULL },
566 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
567 { "hys", roff_line_ignore, NULL, NULL, 0, NULL },
568 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
569 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
570 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
571 { "index", roff_unsupp, NULL, NULL, 0, NULL },
572 { "it", roff_it, NULL, NULL, 0, NULL },
573 { "itc", roff_unsupp, NULL, NULL, 0, NULL },
574 { "IX", roff_line_ignore, NULL, NULL, 0, NULL },
575 { "kern", roff_line_ignore, NULL, NULL, 0, NULL },
576 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
577 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
578 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
579 { "lc", roff_unsupp, NULL, NULL, 0, NULL },
580 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
581 { "lds", roff_unsupp, NULL, NULL, 0, NULL },
582 { "length", roff_unsupp, NULL, NULL, 0, NULL },
583 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
584 { "lf", roff_insec, NULL, NULL, 0, NULL },
585 { "lg", roff_line_ignore, NULL, NULL, 0, NULL },
586 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
587 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
588 { "lnr", roff_unsupp, NULL, NULL, 0, NULL },
589 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
590 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
591 { "ls", roff_line_ignore, NULL, NULL, 0, NULL },
592 { "lsm", roff_unsupp, NULL, NULL, 0, NULL },
593 { "lt", roff_line_ignore, NULL, NULL, 0, NULL },
594 { "mc", roff_line_ignore, NULL, NULL, 0, NULL },
595 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
596 { "minss", roff_line_ignore, NULL, NULL, 0, NULL },
597 { "mk", roff_line_ignore, NULL, NULL, 0, NULL },
598 { "mso", roff_insec, NULL, NULL, 0, NULL },
599 { "na", roff_line_ignore, NULL, NULL, 0, NULL },
600 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
601 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
602 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
603 { "nm", roff_unsupp, NULL, NULL, 0, NULL },
604 { "nn", roff_unsupp, NULL, NULL, 0, NULL },
605 { "nop", roff_unsupp, NULL, NULL, 0, NULL },
606 { "nr", roff_nr, NULL, NULL, 0, NULL },
607 { "nrf", roff_unsupp, NULL, NULL, 0, NULL },
608 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
609 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
610 { "nx", roff_insec, NULL, NULL, 0, NULL },
611 { "open", roff_insec, NULL, NULL, 0, NULL },
612 { "opena", roff_insec, NULL, NULL, 0, NULL },
613 { "os", roff_line_ignore, NULL, NULL, 0, NULL },
614 { "output", roff_unsupp, NULL, NULL, 0, NULL },
615 { "padj", roff_line_ignore, NULL, NULL, 0, NULL },
616 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
617 { "pc", roff_line_ignore, NULL, NULL, 0, NULL },
618 { "pev", roff_line_ignore, NULL, NULL, 0, NULL },
619 { "pi", roff_insec, NULL, NULL, 0, NULL },
620 { "PI", roff_unsupp, NULL, NULL, 0, NULL },
621 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
622 { "pm", roff_line_ignore, NULL, NULL, 0, NULL },
623 { "pn", roff_line_ignore, NULL, NULL, 0, NULL },
624 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
625 { "po", roff_line_ignore, NULL, NULL, 0, NULL },
626 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
627 { "psbb", roff_unsupp, NULL, NULL, 0, NULL },
628 { "pshape", roff_unsupp, NULL, NULL, 0, NULL },
629 { "pso", roff_insec, NULL, NULL, 0, NULL },
630 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
631 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
632 { "rchar", roff_unsupp, NULL, NULL, 0, NULL },
633 { "rd", roff_line_ignore, NULL, NULL, 0, NULL },
634 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
635 { "return", roff_unsupp, NULL, NULL, 0, NULL },
636 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
637 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
638 { "rj", roff_line_ignore, NULL, NULL, 0, NULL },
639 { "rm", roff_rm, NULL, NULL, 0, NULL },
640 { "rn", roff_unsupp, NULL, NULL, 0, NULL },
641 { "rnn", roff_unsupp, NULL, NULL, 0, NULL },
642 { "rr", roff_rr, NULL, NULL, 0, NULL },
643 { "rs", roff_line_ignore, NULL, NULL, 0, NULL },
644 { "rt", roff_line_ignore, NULL, NULL, 0, NULL },
645 { "schar", roff_unsupp, NULL, NULL, 0, NULL },
646 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
647 { "shc", roff_line_ignore, NULL, NULL, 0, NULL },
648 { "shift", roff_unsupp, NULL, NULL, 0, NULL },
649 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
650 { "so", roff_so, NULL, NULL, 0, NULL },
651 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
652 { "special", roff_line_ignore, NULL, NULL, 0, NULL },
653 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
654 { "ss", roff_line_ignore, NULL, NULL, 0, NULL },
655 { "sty", roff_line_ignore, NULL, NULL, 0, NULL },
656 { "substring", roff_unsupp, NULL, NULL, 0, NULL },
657 { "sv", roff_line_ignore, NULL, NULL, 0, NULL },
658 { "sy", roff_insec, NULL, NULL, 0, NULL },
659 { "T&", roff_T_, NULL, NULL, 0, NULL },
660 { "ta", roff_unsupp, NULL, NULL, 0, NULL },
661 { "tc", roff_unsupp, NULL, NULL, 0, NULL },
662 { "TE", roff_TE, NULL, NULL, 0, NULL },
663 { "TH", roff_TH, NULL, NULL, 0, NULL },
664 { "ti", roff_unsupp, NULL, NULL, 0, NULL },
665 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
666 { "tl", roff_unsupp, NULL, NULL, 0, NULL },
667 { "tm", roff_line_ignore, NULL, NULL, 0, NULL },
668 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
669 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
670 { "tr", roff_tr, NULL, NULL, 0, NULL },
671 { "track", roff_line_ignore, NULL, NULL, 0, NULL },
672 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
673 { "trf", roff_insec, NULL, NULL, 0, NULL },
674 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
675 { "trin", roff_unsupp, NULL, NULL, 0, NULL },
676 { "trnt", roff_unsupp, NULL, NULL, 0, NULL },
677 { "troff", roff_line_ignore, NULL, NULL, 0, NULL },
678 { "TS", roff_TS, NULL, NULL, 0, NULL },
679 { "uf", roff_line_ignore, NULL, NULL, 0, NULL },
680 { "ul", roff_line_ignore, NULL, NULL, 0, NULL },
681 { "unformat", roff_unsupp, NULL, NULL, 0, NULL },
682 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
683 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
684 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
685 { "vs", roff_line_ignore, NULL, NULL, 0, NULL },
686 { "warn", roff_line_ignore, NULL, NULL, 0, NULL },
687 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
688 { "watch", roff_line_ignore, NULL, NULL, 0, NULL },
689 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
690 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
691 { "wh", roff_unsupp, NULL, NULL, 0, NULL },
692 { "while", roff_unsupp, NULL, NULL, 0, NULL },
693 { "write", roff_insec, NULL, NULL, 0, NULL },
694 { "writec", roff_insec, NULL, NULL, 0, NULL },
695 { "writem", roff_insec, NULL, NULL, 0, NULL },
696 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
697 { ".", roff_cblock, NULL, NULL, 0, NULL },
698 { NULL, roff_userdef, NULL, NULL, 0, NULL },
699 };
700
701 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
702 const char *const __mdoc_reserved[] = {
703 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
704 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
705 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
706 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
707 "Dt", "Dv", "Dx", "D1",
708 "Ec", "Ed", "Ef", "Ek", "El", "Em",
709 "En", "Eo", "Er", "Es", "Ev", "Ex",
710 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
711 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
712 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
713 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
714 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
715 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
716 "Sc", "Sh", "Sm", "So", "Sq",
717 "Ss", "St", "Sx", "Sy",
718 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
719 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
720 "%P", "%Q", "%R", "%T", "%U", "%V",
721 NULL
722 };
723
724 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
725 const char *const __man_reserved[] = {
726 "AT", "B", "BI", "BR", "DT",
727 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
728 "LP", "OP", "P", "PD", "PP",
729 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
730 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
731 NULL
732 };
733
734 /* Array of injected predefined strings. */
735 #define PREDEFS_MAX 38
736 static const struct predef predefs[PREDEFS_MAX] = {
737 #include "predefs.in"
738 };
739
740 /* See roffhash_find() */
741 #define ROFF_HASH(p) (p[0] - ASCII_LO)
742
743 static int roffit_lines; /* number of lines to delay */
744 static char *roffit_macro; /* nil-terminated macro line */
745
746
747 /* --- request table ------------------------------------------------------ */
748
749 static void
750 roffhash_init(void)
751 {
752 struct roffmac *n;
753 int buc, i;
754
755 for (i = 0; i < (int)ROFF_USERDEF; i++) {
756 assert(roffs[i].name[0] >= ASCII_LO);
757 assert(roffs[i].name[0] <= ASCII_HI);
758
759 buc = ROFF_HASH(roffs[i].name);
760
761 if (NULL != (n = hash[buc])) {
762 for ( ; n->next; n = n->next)
763 /* Do nothing. */ ;
764 n->next = &roffs[i];
765 } else
766 hash[buc] = &roffs[i];
767 }
768 }
769
770 /*
771 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
772 * the nil-terminated string name could be found.
773 */
774 static enum rofft
775 roffhash_find(const char *p, size_t s)
776 {
777 int buc;
778 struct roffmac *n;
779
780 /*
781 * libroff has an extremely simple hashtable, for the time
782 * being, which simply keys on the first character, which must
783 * be printable, then walks a chain. It works well enough until
784 * optimised.
785 */
786
787 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
788 return(ROFF_MAX);
789
790 buc = ROFF_HASH(p);
791
792 if (NULL == (n = hash[buc]))
793 return(ROFF_MAX);
794 for ( ; n; n = n->next)
795 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
796 return((enum rofft)(n - roffs));
797
798 return(ROFF_MAX);
799 }
800
801 /* --- stack of request blocks -------------------------------------------- */
802
803 /*
804 * Pop the current node off of the stack of roff instructions currently
805 * pending.
806 */
807 static void
808 roffnode_pop(struct roff *r)
809 {
810 struct roffnode *p;
811
812 assert(r->last);
813 p = r->last;
814
815 r->last = r->last->parent;
816 free(p->name);
817 free(p->end);
818 free(p);
819 }
820
821 /*
822 * Push a roff node onto the instruction stack. This must later be
823 * removed with roffnode_pop().
824 */
825 static void
826 roffnode_push(struct roff *r, enum rofft tok, const char *name,
827 int line, int col)
828 {
829 struct roffnode *p;
830
831 p = mandoc_calloc(1, sizeof(struct roffnode));
832 p->tok = tok;
833 if (name)
834 p->name = mandoc_strdup(name);
835 p->parent = r->last;
836 p->line = line;
837 p->col = col;
838 p->rule = p->parent ? p->parent->rule : 0;
839
840 r->last = p;
841 }
842
843 /* --- roff parser state data management ---------------------------------- */
844
845 static void
846 roff_free1(struct roff *r)
847 {
848 struct tbl_node *tbl;
849 struct eqn_node *e;
850 int i;
851
852 while (NULL != (tbl = r->first_tbl)) {
853 r->first_tbl = tbl->next;
854 tbl_free(tbl);
855 }
856 r->first_tbl = r->last_tbl = r->tbl = NULL;
857
858 while (NULL != (e = r->first_eqn)) {
859 r->first_eqn = e->next;
860 eqn_free(e);
861 }
862 r->first_eqn = r->last_eqn = r->eqn = NULL;
863
864 while (r->last)
865 roffnode_pop(r);
866
867 free (r->rstack);
868 r->rstack = NULL;
869 r->rstacksz = 0;
870 r->rstackpos = -1;
871
872 roff_freereg(r->regtab);
873 r->regtab = NULL;
874
875 roff_freestr(r->strtab);
876 roff_freestr(r->xmbtab);
877 r->strtab = r->xmbtab = NULL;
878
879 if (r->xtab)
880 for (i = 0; i < 128; i++)
881 free(r->xtab[i].p);
882 free(r->xtab);
883 r->xtab = NULL;
884 }
885
886 void
887 roff_reset(struct roff *r)
888 {
889
890 roff_free1(r);
891 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
892 r->control = 0;
893 }
894
895 void
896 roff_free(struct roff *r)
897 {
898
899 roff_free1(r);
900 free(r);
901 }
902
903 struct roff *
904 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
905 {
906 struct roff *r;
907
908 r = mandoc_calloc(1, sizeof(struct roff));
909 r->parse = parse;
910 r->mchars = mchars;
911 r->options = options;
912 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
913 r->rstackpos = -1;
914
915 roffhash_init();
916
917 return(r);
918 }
919
920 /* --- syntax tree state data management ---------------------------------- */
921
922 static void
923 roff_man_free1(struct roff_man *man)
924 {
925
926 if (man->first != NULL)
927 roff_node_delete(man, man->first);
928 free(man->meta.msec);
929 free(man->meta.vol);
930 free(man->meta.os);
931 free(man->meta.arch);
932 free(man->meta.title);
933 free(man->meta.name);
934 free(man->meta.date);
935 }
936
937 static void
938 roff_man_alloc1(struct roff_man *man)
939 {
940
941 memset(&man->meta, 0, sizeof(man->meta));
942 man->first = mandoc_calloc(1, sizeof(*man->first));
943 man->first->type = ROFFT_ROOT;
944 man->last = man->first;
945 man->last_es = NULL;
946 man->flags = 0;
947 man->macroset = MACROSET_NONE;
948 man->lastsec = man->lastnamed = SEC_NONE;
949 man->next = ROFF_NEXT_CHILD;
950 }
951
952 void
953 roff_man_reset(struct roff_man *man)
954 {
955
956 roff_man_free1(man);
957 roff_man_alloc1(man);
958 }
959
960 void
961 roff_man_free(struct roff_man *man)
962 {
963
964 roff_man_free1(man);
965 free(man);
966 }
967
968 struct roff_man *
969 roff_man_alloc(struct roff *roff, struct mparse *parse,
970 const char *defos, int quick)
971 {
972 struct roff_man *man;
973
974 man = mandoc_calloc(1, sizeof(*man));
975 man->parse = parse;
976 man->roff = roff;
977 man->defos = defos;
978 man->quick = quick;
979 roff_man_alloc1(man);
980 return(man);
981 }
982
983 /* --- syntax tree handling ----------------------------------------------- */
984
985 struct roff_node *
986 roff_node_alloc(struct roff_man *man, int line, int pos,
987 enum roff_type type, int tok)
988 {
989 struct roff_node *n;
990
991 n = mandoc_calloc(1, sizeof(*n));
992 n->line = line;
993 n->pos = pos;
994 n->tok = tok;
995 n->type = type;
996 n->sec = man->lastsec;
997
998 if (man->flags & MDOC_SYNOPSIS)
999 n->flags |= MDOC_SYNPRETTY;
1000 else
1001 n->flags &= ~MDOC_SYNPRETTY;
1002 if (man->flags & MDOC_NEWLINE)
1003 n->flags |= MDOC_LINE;
1004 man->flags &= ~MDOC_NEWLINE;
1005
1006 return(n);
1007 }
1008
1009 void
1010 roff_node_append(struct roff_man *man, struct roff_node *n)
1011 {
1012
1013 switch (man->next) {
1014 case ROFF_NEXT_SIBLING:
1015 man->last->next = n;
1016 n->prev = man->last;
1017 n->parent = man->last->parent;
1018 break;
1019 case ROFF_NEXT_CHILD:
1020 man->last->child = n;
1021 n->parent = man->last;
1022 break;
1023 default:
1024 abort();
1025 /* NOTREACHED */
1026 }
1027 n->parent->nchild++;
1028 n->parent->last = n;
1029
1030 /*
1031 * Copy over the normalised-data pointer of our parent. Not
1032 * everybody has one, but copying a null pointer is fine.
1033 */
1034
1035 switch (n->type) {
1036 case ROFFT_BODY:
1037 if (n->end != ENDBODY_NOT)
1038 break;
1039 /* FALLTHROUGH */
1040 case ROFFT_TAIL:
1041 /* FALLTHROUGH */
1042 case ROFFT_HEAD:
1043 n->norm = n->parent->norm;
1044 break;
1045 default:
1046 break;
1047 }
1048
1049 if (man->macroset == MACROSET_MDOC)
1050 mdoc_valid_pre(man, n);
1051
1052 switch (n->type) {
1053 case ROFFT_HEAD:
1054 assert(n->parent->type == ROFFT_BLOCK);
1055 n->parent->head = n;
1056 break;
1057 case ROFFT_BODY:
1058 if (n->end)
1059 break;
1060 assert(n->parent->type == ROFFT_BLOCK);
1061 n->parent->body = n;
1062 break;
1063 case ROFFT_TAIL:
1064 assert(n->parent->type == ROFFT_BLOCK);
1065 n->parent->tail = n;
1066 break;
1067 default:
1068 break;
1069 }
1070 man->last = n;
1071 }
1072
1073 void
1074 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1075 {
1076 struct roff_node *n;
1077
1078 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1079 n->string = roff_strdup(man->roff, word);
1080 roff_node_append(man, n);
1081 if (man->macroset == MACROSET_MDOC)
1082 mdoc_valid_post(man);
1083 else
1084 man_valid_post(man);
1085 man->next = ROFF_NEXT_SIBLING;
1086 }
1087
1088 void
1089 roff_word_append(struct roff_man *man, const char *word)
1090 {
1091 struct roff_node *n;
1092 char *addstr, *newstr;
1093
1094 n = man->last;
1095 addstr = roff_strdup(man->roff, word);
1096 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1097 free(addstr);
1098 free(n->string);
1099 n->string = newstr;
1100 man->next = ROFF_NEXT_SIBLING;
1101 }
1102
1103 void
1104 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1105 {
1106 struct roff_node *n;
1107
1108 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1109 roff_node_append(man, n);
1110 man->next = ROFF_NEXT_CHILD;
1111 }
1112
1113 struct roff_node *
1114 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1115 {
1116 struct roff_node *n;
1117
1118 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1119 roff_node_append(man, n);
1120 man->next = ROFF_NEXT_CHILD;
1121 return(n);
1122 }
1123
1124 struct roff_node *
1125 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1126 {
1127 struct roff_node *n;
1128
1129 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1130 roff_node_append(man, n);
1131 man->next = ROFF_NEXT_CHILD;
1132 return(n);
1133 }
1134
1135 struct roff_node *
1136 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1137 {
1138 struct roff_node *n;
1139
1140 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1141 roff_node_append(man, n);
1142 man->next = ROFF_NEXT_CHILD;
1143 return(n);
1144 }
1145
1146 void
1147 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1148 {
1149 struct roff_node *n;
1150
1151 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1152 n->eqn = eqn;
1153 if (eqn->ln > man->last->line)
1154 n->flags |= MDOC_LINE;
1155 roff_node_append(man, n);
1156 man->next = ROFF_NEXT_SIBLING;
1157 }
1158
1159 void
1160 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1161 {
1162 struct roff_node *n;
1163
1164 if (man->macroset == MACROSET_MAN)
1165 man_breakscope(man, TOKEN_NONE);
1166 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1167 n->span = tbl;
1168 roff_node_append(man, n);
1169 if (man->macroset == MACROSET_MDOC)
1170 mdoc_valid_post(man);
1171 else
1172 man_valid_post(man);
1173 man->next = ROFF_NEXT_SIBLING;
1174 }
1175
1176 void
1177 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1178 {
1179
1180 /* Adjust siblings. */
1181
1182 if (n->prev)
1183 n->prev->next = n->next;
1184 if (n->next)
1185 n->next->prev = n->prev;
1186
1187 /* Adjust parent. */
1188
1189 if (n->parent != NULL) {
1190 n->parent->nchild--;
1191 if (n->parent->child == n)
1192 n->parent->child = n->next;
1193 if (n->parent->last == n)
1194 n->parent->last = n->prev;
1195 }
1196
1197 /* Adjust parse point. */
1198
1199 if (man == NULL)
1200 return;
1201 if (man->last == n) {
1202 if (n->prev == NULL) {
1203 man->last = n->parent;
1204 man->next = ROFF_NEXT_CHILD;
1205 } else {
1206 man->last = n->prev;
1207 man->next = ROFF_NEXT_SIBLING;
1208 }
1209 }
1210 if (man->first == n)
1211 man->first = NULL;
1212 }
1213
1214 void
1215 roff_node_free(struct roff_node *n)
1216 {
1217
1218 if (n->args != NULL)
1219 mdoc_argv_free(n->args);
1220 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1221 free(n->norm);
1222 free(n->string);
1223 free(n);
1224 }
1225
1226 void
1227 roff_node_delete(struct roff_man *man, struct roff_node *n)
1228 {
1229
1230 while (n->child != NULL)
1231 roff_node_delete(man, n->child);
1232 assert(n->nchild == 0);
1233 roff_node_unlink(man, n);
1234 roff_node_free(n);
1235 }
1236
1237 void
1238 deroff(char **dest, const struct roff_node *n)
1239 {
1240 char *cp;
1241 size_t sz;
1242
1243 if (n->type != ROFFT_TEXT) {
1244 for (n = n->child; n != NULL; n = n->next)
1245 deroff(dest, n);
1246 return;
1247 }
1248
1249 /* Skip leading whitespace and escape sequences. */
1250
1251 cp = n->string;
1252 while (*cp != '\0') {
1253 if ('\\' == *cp) {
1254 cp++;
1255 mandoc_escape((const char **)&cp, NULL, NULL);
1256 } else if (isspace((unsigned char)*cp))
1257 cp++;
1258 else
1259 break;
1260 }
1261
1262 /* Skip trailing whitespace. */
1263
1264 for (sz = strlen(cp); sz; sz--)
1265 if ( ! isspace((unsigned char)cp[sz-1]))
1266 break;
1267
1268 /* Skip empty strings. */
1269
1270 if (sz == 0)
1271 return;
1272
1273 if (*dest == NULL) {
1274 *dest = mandoc_strndup(cp, sz);
1275 return;
1276 }
1277
1278 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1279 free(*dest);
1280 *dest = cp;
1281 }
1282
1283 /* --- main functions of the roff parser ---------------------------------- */
1284
1285 /*
1286 * In the current line, expand escape sequences that tend to get
1287 * used in numerical expressions and conditional requests.
1288 * Also check the syntax of the remaining escape sequences.
1289 */
1290 static enum rofferr
1291 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1292 {
1293 char ubuf[24]; /* buffer to print the number */
1294 const char *start; /* start of the string to process */
1295 char *stesc; /* start of an escape sequence ('\\') */
1296 const char *stnam; /* start of the name, after "[(*" */
1297 const char *cp; /* end of the name, e.g. before ']' */
1298 const char *res; /* the string to be substituted */
1299 char *nbuf; /* new buffer to copy buf->buf to */
1300 size_t maxl; /* expected length of the escape name */
1301 size_t naml; /* actual length of the escape name */
1302 enum mandoc_esc esc; /* type of the escape sequence */
1303 int inaml; /* length returned from mandoc_escape() */
1304 int expand_count; /* to avoid infinite loops */
1305 int npos; /* position in numeric expression */
1306 int arg_complete; /* argument not interrupted by eol */
1307 char term; /* character terminating the escape */
1308
1309 expand_count = 0;
1310 start = buf->buf + pos;
1311 stesc = strchr(start, '\0') - 1;
1312 while (stesc-- > start) {
1313
1314 /* Search backwards for the next backslash. */
1315
1316 if (*stesc != '\\')
1317 continue;
1318
1319 /* If it is escaped, skip it. */
1320
1321 for (cp = stesc - 1; cp >= start; cp--)
1322 if (*cp != '\\')
1323 break;
1324
1325 if ((stesc - cp) % 2 == 0) {
1326 stesc = (char *)cp;
1327 continue;
1328 }
1329
1330 /* Decide whether to expand or to check only. */
1331
1332 term = '\0';
1333 cp = stesc + 1;
1334 switch (*cp) {
1335 case '*':
1336 res = NULL;
1337 break;
1338 case 'B':
1339 /* FALLTHROUGH */
1340 case 'w':
1341 term = cp[1];
1342 /* FALLTHROUGH */
1343 case 'n':
1344 res = ubuf;
1345 break;
1346 default:
1347 esc = mandoc_escape(&cp, &stnam, &inaml);
1348 if (esc == ESCAPE_ERROR ||
1349 (esc == ESCAPE_SPECIAL &&
1350 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
1351 mandoc_vmsg(MANDOCERR_ESC_BAD,
1352 r->parse, ln, (int)(stesc - buf->buf),
1353 "%.*s", (int)(cp - stesc), stesc);
1354 continue;
1355 }
1356
1357 if (EXPAND_LIMIT < ++expand_count) {
1358 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1359 ln, (int)(stesc - buf->buf), NULL);
1360 return(ROFF_IGN);
1361 }
1362
1363 /*
1364 * The third character decides the length
1365 * of the name of the string or register.
1366 * Save a pointer to the name.
1367 */
1368
1369 if (term == '\0') {
1370 switch (*++cp) {
1371 case '\0':
1372 maxl = 0;
1373 break;
1374 case '(':
1375 cp++;
1376 maxl = 2;
1377 break;
1378 case '[':
1379 cp++;
1380 term = ']';
1381 maxl = 0;
1382 break;
1383 default:
1384 maxl = 1;
1385 break;
1386 }
1387 } else {
1388 cp += 2;
1389 maxl = 0;
1390 }
1391 stnam = cp;
1392
1393 /* Advance to the end of the name. */
1394
1395 naml = 0;
1396 arg_complete = 1;
1397 while (maxl == 0 || naml < maxl) {
1398 if (*cp == '\0') {
1399 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1400 ln, (int)(stesc - buf->buf), stesc);
1401 arg_complete = 0;
1402 break;
1403 }
1404 if (maxl == 0 && *cp == term) {
1405 cp++;
1406 break;
1407 }
1408 if (*cp++ != '\\' || stesc[1] != 'w') {
1409 naml++;
1410 continue;
1411 }
1412 switch (mandoc_escape(&cp, NULL, NULL)) {
1413 case ESCAPE_SPECIAL:
1414 /* FALLTHROUGH */
1415 case ESCAPE_UNICODE:
1416 /* FALLTHROUGH */
1417 case ESCAPE_NUMBERED:
1418 /* FALLTHROUGH */
1419 case ESCAPE_OVERSTRIKE:
1420 naml++;
1421 break;
1422 default:
1423 break;
1424 }
1425 }
1426
1427 /*
1428 * Retrieve the replacement string; if it is
1429 * undefined, resume searching for escapes.
1430 */
1431
1432 switch (stesc[1]) {
1433 case '*':
1434 if (arg_complete)
1435 res = roff_getstrn(r, stnam, naml);
1436 break;
1437 case 'B':
1438 npos = 0;
1439 ubuf[0] = arg_complete &&
1440 roff_evalnum(r, ln, stnam, &npos,
1441 NULL, ROFFNUM_SCALE) &&
1442 stnam + npos + 1 == cp ? '1' : '0';
1443 ubuf[1] = '\0';
1444 break;
1445 case 'n':
1446 if (arg_complete)
1447 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1448 roff_getregn(r, stnam, naml));
1449 else
1450 ubuf[0] = '\0';
1451 break;
1452 case 'w':
1453 /* use even incomplete args */
1454 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1455 24 * (int)naml);
1456 break;
1457 }
1458
1459 if (res == NULL) {
1460 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1461 r->parse, ln, (int)(stesc - buf->buf),
1462 "%.*s", (int)naml, stnam);
1463 res = "";
1464 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1465 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1466 ln, (int)(stesc - buf->buf), NULL);
1467 return(ROFF_IGN);
1468 }
1469
1470 /* Replace the escape sequence by the string. */
1471
1472 *stesc = '\0';
1473 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1474 buf->buf, res, cp) + 1;
1475
1476 /* Prepare for the next replacement. */
1477
1478 start = nbuf + pos;
1479 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1480 free(buf->buf);
1481 buf->buf = nbuf;
1482 }
1483 return(ROFF_CONT);
1484 }
1485
1486 /*
1487 * Process text streams.
1488 */
1489 static enum rofferr
1490 roff_parsetext(struct buf *buf, int pos, int *offs)
1491 {
1492 size_t sz;
1493 const char *start;
1494 char *p;
1495 int isz;
1496 enum mandoc_esc esc;
1497
1498 /* Spring the input line trap. */
1499
1500 if (roffit_lines == 1) {
1501 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1502 free(buf->buf);
1503 buf->buf = p;
1504 buf->sz = isz + 1;
1505 *offs = 0;
1506 free(roffit_macro);
1507 roffit_lines = 0;
1508 return(ROFF_REPARSE);
1509 } else if (roffit_lines > 1)
1510 --roffit_lines;
1511
1512 /* Convert all breakable hyphens into ASCII_HYPH. */
1513
1514 start = p = buf->buf + pos;
1515
1516 while (*p != '\0') {
1517 sz = strcspn(p, "-\\");
1518 p += sz;
1519
1520 if (*p == '\0')
1521 break;
1522
1523 if (*p == '\\') {
1524 /* Skip over escapes. */
1525 p++;
1526 esc = mandoc_escape((const char **)&p, NULL, NULL);
1527 if (esc == ESCAPE_ERROR)
1528 break;
1529 while (*p == '-')
1530 p++;
1531 continue;
1532 } else if (p == start) {
1533 p++;
1534 continue;
1535 }
1536
1537 if (isalpha((unsigned char)p[-1]) &&
1538 isalpha((unsigned char)p[1]))
1539 *p = ASCII_HYPH;
1540 p++;
1541 }
1542 return(ROFF_CONT);
1543 }
1544
1545 enum rofferr
1546 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1547 {
1548 enum rofft t;
1549 enum rofferr e;
1550 int pos; /* parse point */
1551 int spos; /* saved parse point for messages */
1552 int ppos; /* original offset in buf->buf */
1553 int ctl; /* macro line (boolean) */
1554
1555 ppos = pos = *offs;
1556
1557 /* Handle in-line equation delimiters. */
1558
1559 if (r->tbl == NULL &&
1560 r->last_eqn != NULL && r->last_eqn->delim &&
1561 (r->eqn == NULL || r->eqn_inline)) {
1562 e = roff_eqndelim(r, buf, pos);
1563 if (e == ROFF_REPARSE)
1564 return(e);
1565 assert(e == ROFF_CONT);
1566 }
1567
1568 /* Expand some escape sequences. */
1569
1570 e = roff_res(r, buf, ln, pos);
1571 if (e == ROFF_IGN)
1572 return(e);
1573 assert(e == ROFF_CONT);
1574
1575 ctl = roff_getcontrol(r, buf->buf, &pos);
1576
1577 /*
1578 * First, if a scope is open and we're not a macro, pass the
1579 * text through the macro's filter.
1580 * Equations process all content themselves.
1581 * Tables process almost all content themselves, but we want
1582 * to warn about macros before passing it there.
1583 */
1584
1585 if (r->last != NULL && ! ctl) {
1586 t = r->last->tok;
1587 assert(roffs[t].text);
1588 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1589 assert(e == ROFF_IGN || e == ROFF_CONT);
1590 if (e != ROFF_CONT)
1591 return(e);
1592 }
1593 if (r->eqn != NULL)
1594 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
1595 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1596 return(tbl_read(r->tbl, ln, buf->buf, ppos));
1597 if ( ! ctl)
1598 return(roff_parsetext(buf, pos, offs));
1599
1600 /* Skip empty request lines. */
1601
1602 if (buf->buf[pos] == '"') {
1603 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1604 ln, pos, NULL);
1605 return(ROFF_IGN);
1606 } else if (buf->buf[pos] == '\0')
1607 return(ROFF_IGN);
1608
1609 /*
1610 * If a scope is open, go to the child handler for that macro,
1611 * as it may want to preprocess before doing anything with it.
1612 * Don't do so if an equation is open.
1613 */
1614
1615 if (r->last) {
1616 t = r->last->tok;
1617 assert(roffs[t].sub);
1618 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
1619 }
1620
1621 /* No scope is open. This is a new request or macro. */
1622
1623 spos = pos;
1624 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1625
1626 /* Tables ignore most macros. */
1627
1628 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1629 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1630 ln, pos, buf->buf + spos);
1631 if (t == ROFF_TS)
1632 return(ROFF_IGN);
1633 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1634 pos++;
1635 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1636 pos++;
1637 return(tbl_read(r->tbl, ln, buf->buf, pos));
1638 }
1639
1640 /*
1641 * This is neither a roff request nor a user-defined macro.
1642 * Let the standard macro set parsers handle it.
1643 */
1644
1645 if (t == ROFF_MAX)
1646 return(ROFF_CONT);
1647
1648 /* Execute a roff request or a user defined macro. */
1649
1650 assert(roffs[t].proc);
1651 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1652 }
1653
1654 void
1655 roff_endparse(struct roff *r)
1656 {
1657
1658 if (r->last)
1659 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1660 r->last->line, r->last->col,
1661 roffs[r->last->tok].name);
1662
1663 if (r->eqn) {
1664 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1665 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1666 eqn_end(&r->eqn);
1667 }
1668
1669 if (r->tbl) {
1670 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1671 r->tbl->line, r->tbl->pos, "TS");
1672 tbl_end(&r->tbl);
1673 }
1674 }
1675
1676 /*
1677 * Parse a roff node's type from the input buffer. This must be in the
1678 * form of ".foo xxx" in the usual way.
1679 */
1680 static enum rofft
1681 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1682 {
1683 char *cp;
1684 const char *mac;
1685 size_t maclen;
1686 enum rofft t;
1687
1688 cp = buf + *pos;
1689
1690 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1691 return(ROFF_MAX);
1692
1693 mac = cp;
1694 maclen = roff_getname(r, &cp, ln, ppos);
1695
1696 t = (r->current_string = roff_getstrn(r, mac, maclen))
1697 ? ROFF_USERDEF : roffhash_find(mac, maclen);
1698
1699 if (ROFF_MAX != t)
1700 *pos = cp - buf;
1701
1702 return(t);
1703 }
1704
1705 /* --- handling of request blocks ----------------------------------------- */
1706
1707 static enum rofferr
1708 roff_cblock(ROFF_ARGS)
1709 {
1710
1711 /*
1712 * A block-close `..' should only be invoked as a child of an
1713 * ignore macro, otherwise raise a warning and just ignore it.
1714 */
1715
1716 if (r->last == NULL) {
1717 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1718 ln, ppos, "..");
1719 return(ROFF_IGN);
1720 }
1721
1722 switch (r->last->tok) {
1723 case ROFF_am:
1724 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1725 /* FALLTHROUGH */
1726 case ROFF_ami:
1727 /* FALLTHROUGH */
1728 case ROFF_de:
1729 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1730 /* FALLTHROUGH */
1731 case ROFF_dei:
1732 /* FALLTHROUGH */
1733 case ROFF_ig:
1734 break;
1735 default:
1736 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1737 ln, ppos, "..");
1738 return(ROFF_IGN);
1739 }
1740
1741 if (buf->buf[pos] != '\0')
1742 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1743 ".. %s", buf->buf + pos);
1744
1745 roffnode_pop(r);
1746 roffnode_cleanscope(r);
1747 return(ROFF_IGN);
1748
1749 }
1750
1751 static void
1752 roffnode_cleanscope(struct roff *r)
1753 {
1754
1755 while (r->last) {
1756 if (--r->last->endspan != 0)
1757 break;
1758 roffnode_pop(r);
1759 }
1760 }
1761
1762 static void
1763 roff_ccond(struct roff *r, int ln, int ppos)
1764 {
1765
1766 if (NULL == r->last) {
1767 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1768 ln, ppos, "\\}");
1769 return;
1770 }
1771
1772 switch (r->last->tok) {
1773 case ROFF_el:
1774 /* FALLTHROUGH */
1775 case ROFF_ie:
1776 /* FALLTHROUGH */
1777 case ROFF_if:
1778 break;
1779 default:
1780 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1781 ln, ppos, "\\}");
1782 return;
1783 }
1784
1785 if (r->last->endspan > -1) {
1786 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1787 ln, ppos, "\\}");
1788 return;
1789 }
1790
1791 roffnode_pop(r);
1792 roffnode_cleanscope(r);
1793 return;
1794 }
1795
1796 static enum rofferr
1797 roff_block(ROFF_ARGS)
1798 {
1799 const char *name;
1800 char *iname, *cp;
1801 size_t namesz;
1802
1803 /* Ignore groff compatibility mode for now. */
1804
1805 if (tok == ROFF_de1)
1806 tok = ROFF_de;
1807 else if (tok == ROFF_dei1)
1808 tok = ROFF_dei;
1809 else if (tok == ROFF_am1)
1810 tok = ROFF_am;
1811 else if (tok == ROFF_ami1)
1812 tok = ROFF_ami;
1813
1814 /* Parse the macro name argument. */
1815
1816 cp = buf->buf + pos;
1817 if (tok == ROFF_ig) {
1818 iname = NULL;
1819 namesz = 0;
1820 } else {
1821 iname = cp;
1822 namesz = roff_getname(r, &cp, ln, ppos);
1823 iname[namesz] = '\0';
1824 }
1825
1826 /* Resolve the macro name argument if it is indirect. */
1827
1828 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1829 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1830 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1831 r->parse, ln, (int)(iname - buf->buf),
1832 "%.*s", (int)namesz, iname);
1833 namesz = 0;
1834 } else
1835 namesz = strlen(name);
1836 } else
1837 name = iname;
1838
1839 if (namesz == 0 && tok != ROFF_ig) {
1840 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1841 ln, ppos, roffs[tok].name);
1842 return(ROFF_IGN);
1843 }
1844
1845 roffnode_push(r, tok, name, ln, ppos);
1846
1847 /*
1848 * At the beginning of a `de' macro, clear the existing string
1849 * with the same name, if there is one. New content will be
1850 * appended from roff_block_text() in multiline mode.
1851 */
1852
1853 if (tok == ROFF_de || tok == ROFF_dei)
1854 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1855
1856 if (*cp == '\0')
1857 return(ROFF_IGN);
1858
1859 /* Get the custom end marker. */
1860
1861 iname = cp;
1862 namesz = roff_getname(r, &cp, ln, ppos);
1863
1864 /* Resolve the end marker if it is indirect. */
1865
1866 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1867 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1868 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1869 r->parse, ln, (int)(iname - buf->buf),
1870 "%.*s", (int)namesz, iname);
1871 namesz = 0;
1872 } else
1873 namesz = strlen(name);
1874 } else
1875 name = iname;
1876
1877 if (namesz)
1878 r->last->end = mandoc_strndup(name, namesz);
1879
1880 if (*cp != '\0')
1881 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1882 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1883
1884 return(ROFF_IGN);
1885 }
1886
1887 static enum rofferr
1888 roff_block_sub(ROFF_ARGS)
1889 {
1890 enum rofft t;
1891 int i, j;
1892
1893 /*
1894 * First check whether a custom macro exists at this level. If
1895 * it does, then check against it. This is some of groff's
1896 * stranger behaviours. If we encountered a custom end-scope
1897 * tag and that tag also happens to be a "real" macro, then we
1898 * need to try interpreting it again as a real macro. If it's
1899 * not, then return ignore. Else continue.
1900 */
1901
1902 if (r->last->end) {
1903 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1904 if (buf->buf[i] != r->last->end[j])
1905 break;
1906
1907 if (r->last->end[j] == '\0' &&
1908 (buf->buf[i] == '\0' ||
1909 buf->buf[i] == ' ' ||
1910 buf->buf[i] == '\t')) {
1911 roffnode_pop(r);
1912 roffnode_cleanscope(r);
1913
1914 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1915 i++;
1916
1917 pos = i;
1918 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1919 ROFF_MAX)
1920 return(ROFF_RERUN);
1921 return(ROFF_IGN);
1922 }
1923 }
1924
1925 /*
1926 * If we have no custom end-query or lookup failed, then try
1927 * pulling it out of the hashtable.
1928 */
1929
1930 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1931
1932 if (t != ROFF_cblock) {
1933 if (tok != ROFF_ig)
1934 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1935 return(ROFF_IGN);
1936 }
1937
1938 assert(roffs[t].proc);
1939 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1940 }
1941
1942 static enum rofferr
1943 roff_block_text(ROFF_ARGS)
1944 {
1945
1946 if (tok != ROFF_ig)
1947 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1948
1949 return(ROFF_IGN);
1950 }
1951
1952 static enum rofferr
1953 roff_cond_sub(ROFF_ARGS)
1954 {
1955 enum rofft t;
1956 char *ep;
1957 int rr;
1958
1959 rr = r->last->rule;
1960 roffnode_cleanscope(r);
1961 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1962
1963 /*
1964 * Fully handle known macros when they are structurally
1965 * required or when the conditional evaluated to true.
1966 */
1967
1968 if ((t != ROFF_MAX) &&
1969 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1970 assert(roffs[t].proc);
1971 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1972 }
1973
1974 /*
1975 * If `\}' occurs on a macro line without a preceding macro,
1976 * drop the line completely.
1977 */
1978
1979 ep = buf->buf + pos;
1980 if (ep[0] == '\\' && ep[1] == '}')
1981 rr = 0;
1982
1983 /* Always check for the closing delimiter `\}'. */
1984
1985 while ((ep = strchr(ep, '\\')) != NULL) {
1986 if (*(++ep) == '}') {
1987 *ep = '&';
1988 roff_ccond(r, ln, ep - buf->buf - 1);
1989 }
1990 if (*ep != '\0')
1991 ++ep;
1992 }
1993 return(rr ? ROFF_CONT : ROFF_IGN);
1994 }
1995
1996 static enum rofferr
1997 roff_cond_text(ROFF_ARGS)
1998 {
1999 char *ep;
2000 int rr;
2001
2002 rr = r->last->rule;
2003 roffnode_cleanscope(r);
2004
2005 ep = buf->buf + pos;
2006 while ((ep = strchr(ep, '\\')) != NULL) {
2007 if (*(++ep) == '}') {
2008 *ep = '&';
2009 roff_ccond(r, ln, ep - buf->buf - 1);
2010 }
2011 if (*ep != '\0')
2012 ++ep;
2013 }
2014 return(rr ? ROFF_CONT : ROFF_IGN);
2015 }
2016
2017 /* --- handling of numeric and conditional expressions -------------------- */
2018
2019 /*
2020 * Parse a single signed integer number. Stop at the first non-digit.
2021 * If there is at least one digit, return success and advance the
2022 * parse point, else return failure and let the parse point unchanged.
2023 * Ignore overflows, treat them just like the C language.
2024 */
2025 static int
2026 roff_getnum(const char *v, int *pos, int *res, int flags)
2027 {
2028 int myres, scaled, n, p;
2029
2030 if (NULL == res)
2031 res = &myres;
2032
2033 p = *pos;
2034 n = v[p] == '-';
2035 if (n || v[p] == '+')
2036 p++;
2037
2038 if (flags & ROFFNUM_WHITE)
2039 while (isspace((unsigned char)v[p]))
2040 p++;
2041
2042 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2043 *res = 10 * *res + v[p] - '0';
2044 if (p == *pos + n)
2045 return 0;
2046
2047 if (n)
2048 *res = -*res;
2049
2050 /* Each number may be followed by one optional scaling unit. */
2051
2052 switch (v[p]) {
2053 case 'f':
2054 scaled = *res * 65536;
2055 break;
2056 case 'i':
2057 scaled = *res * 240;
2058 break;
2059 case 'c':
2060 scaled = *res * 240 / 2.54;
2061 break;
2062 case 'v':
2063 /* FALLTROUGH */
2064 case 'P':
2065 scaled = *res * 40;
2066 break;
2067 case 'm':
2068 /* FALLTROUGH */
2069 case 'n':
2070 scaled = *res * 24;
2071 break;
2072 case 'p':
2073 scaled = *res * 10 / 3;
2074 break;
2075 case 'u':
2076 scaled = *res;
2077 break;
2078 case 'M':
2079 scaled = *res * 6 / 25;
2080 break;
2081 default:
2082 scaled = *res;
2083 p--;
2084 break;
2085 }
2086 if (flags & ROFFNUM_SCALE)
2087 *res = scaled;
2088
2089 *pos = p + 1;
2090 return(1);
2091 }
2092
2093 /*
2094 * Evaluate a string comparison condition.
2095 * The first character is the delimiter.
2096 * Succeed if the string up to its second occurrence
2097 * matches the string up to its third occurence.
2098 * Advance the cursor after the third occurrence
2099 * or lacking that, to the end of the line.
2100 */
2101 static int
2102 roff_evalstrcond(const char *v, int *pos)
2103 {
2104 const char *s1, *s2, *s3;
2105 int match;
2106
2107 match = 0;
2108 s1 = v + *pos; /* initial delimiter */
2109 s2 = s1 + 1; /* for scanning the first string */
2110 s3 = strchr(s2, *s1); /* for scanning the second string */
2111
2112 if (NULL == s3) /* found no middle delimiter */
2113 goto out;
2114
2115 while ('\0' != *++s3) {
2116 if (*s2 != *s3) { /* mismatch */
2117 s3 = strchr(s3, *s1);
2118 break;
2119 }
2120 if (*s3 == *s1) { /* found the final delimiter */
2121 match = 1;
2122 break;
2123 }
2124 s2++;
2125 }
2126
2127 out:
2128 if (NULL == s3)
2129 s3 = strchr(s2, '\0');
2130 else if (*s3 != '\0')
2131 s3++;
2132 *pos = s3 - v;
2133 return(match);
2134 }
2135
2136 /*
2137 * Evaluate an optionally negated single character, numerical,
2138 * or string condition.
2139 */
2140 static int
2141 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2142 {
2143 char *cp, *name;
2144 size_t sz;
2145 int number, savepos, wanttrue;
2146
2147 if ('!' == v[*pos]) {
2148 wanttrue = 0;
2149 (*pos)++;
2150 } else
2151 wanttrue = 1;
2152
2153 switch (v[*pos]) {
2154 case '\0':
2155 return(0);
2156 case 'n':
2157 /* FALLTHROUGH */
2158 case 'o':
2159 (*pos)++;
2160 return(wanttrue);
2161 case 'c':
2162 /* FALLTHROUGH */
2163 case 'd':
2164 /* FALLTHROUGH */
2165 case 'e':
2166 /* FALLTHROUGH */
2167 case 't':
2168 /* FALLTHROUGH */
2169 case 'v':
2170 (*pos)++;
2171 return(!wanttrue);
2172 case 'r':
2173 cp = name = v + ++*pos;
2174 sz = roff_getname(r, &cp, ln, *pos);
2175 *pos = cp - v;
2176 return((sz && roff_hasregn(r, name, sz)) == wanttrue);
2177 default:
2178 break;
2179 }
2180
2181 savepos = *pos;
2182 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2183 return((number > 0) == wanttrue);
2184 else if (*pos == savepos)
2185 return(roff_evalstrcond(v, pos) == wanttrue);
2186 else
2187 return (0);
2188 }
2189
2190 static enum rofferr
2191 roff_line_ignore(ROFF_ARGS)
2192 {
2193
2194 return(ROFF_IGN);
2195 }
2196
2197 static enum rofferr
2198 roff_insec(ROFF_ARGS)
2199 {
2200
2201 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2202 ln, ppos, roffs[tok].name);
2203 return(ROFF_IGN);
2204 }
2205
2206 static enum rofferr
2207 roff_unsupp(ROFF_ARGS)
2208 {
2209
2210 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2211 ln, ppos, roffs[tok].name);
2212 return(ROFF_IGN);
2213 }
2214
2215 static enum rofferr
2216 roff_cond(ROFF_ARGS)
2217 {
2218
2219 roffnode_push(r, tok, NULL, ln, ppos);
2220
2221 /*
2222 * An `.el' has no conditional body: it will consume the value
2223 * of the current rstack entry set in prior `ie' calls or
2224 * defaults to DENY.
2225 *
2226 * If we're not an `el', however, then evaluate the conditional.
2227 */
2228
2229 r->last->rule = tok == ROFF_el ?
2230 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2231 roff_evalcond(r, ln, buf->buf, &pos);
2232
2233 /*
2234 * An if-else will put the NEGATION of the current evaluated
2235 * conditional into the stack of rules.
2236 */
2237
2238 if (tok == ROFF_ie) {
2239 if (r->rstackpos + 1 == r->rstacksz) {
2240 r->rstacksz += 16;
2241 r->rstack = mandoc_reallocarray(r->rstack,
2242 r->rstacksz, sizeof(int));
2243 }
2244 r->rstack[++r->rstackpos] = !r->last->rule;
2245 }
2246
2247 /* If the parent has false as its rule, then so do we. */
2248
2249 if (r->last->parent && !r->last->parent->rule)
2250 r->last->rule = 0;
2251
2252 /*
2253 * Determine scope.
2254 * If there is nothing on the line after the conditional,
2255 * not even whitespace, use next-line scope.
2256 */
2257
2258 if (buf->buf[pos] == '\0') {
2259 r->last->endspan = 2;
2260 goto out;
2261 }
2262
2263 while (buf->buf[pos] == ' ')
2264 pos++;
2265
2266 /* An opening brace requests multiline scope. */
2267
2268 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2269 r->last->endspan = -1;
2270 pos += 2;
2271 while (buf->buf[pos] == ' ')
2272 pos++;
2273 goto out;
2274 }
2275
2276 /*
2277 * Anything else following the conditional causes
2278 * single-line scope. Warn if the scope contains
2279 * nothing but trailing whitespace.
2280 */
2281
2282 if (buf->buf[pos] == '\0')
2283 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2284 ln, ppos, roffs[tok].name);
2285
2286 r->last->endspan = 1;
2287
2288 out:
2289 *offs = pos;
2290 return(ROFF_RERUN);
2291 }
2292
2293 static enum rofferr
2294 roff_ds(ROFF_ARGS)
2295 {
2296 char *string;
2297 const char *name;
2298 size_t namesz;
2299
2300 /* Ignore groff compatibility mode for now. */
2301
2302 if (tok == ROFF_ds1)
2303 tok = ROFF_ds;
2304 else if (tok == ROFF_as1)
2305 tok = ROFF_as;
2306
2307 /*
2308 * The first word is the name of the string.
2309 * If it is empty or terminated by an escape sequence,
2310 * abort the `ds' request without defining anything.
2311 */
2312
2313 name = string = buf->buf + pos;
2314 if (*name == '\0')
2315 return(ROFF_IGN);
2316
2317 namesz = roff_getname(r, &string, ln, pos);
2318 if (name[namesz] == '\\')
2319 return(ROFF_IGN);
2320
2321 /* Read past the initial double-quote, if any. */
2322 if (*string == '"')
2323 string++;
2324
2325 /* The rest is the value. */
2326 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2327 ROFF_as == tok);
2328 return(ROFF_IGN);
2329 }
2330
2331 /*
2332 * Parse a single operator, one or two characters long.
2333 * If the operator is recognized, return success and advance the
2334 * parse point, else return failure and let the parse point unchanged.
2335 */
2336 static int
2337 roff_getop(const char *v, int *pos, char *res)
2338 {
2339
2340 *res = v[*pos];
2341
2342 switch (*res) {
2343 case '+':
2344 /* FALLTHROUGH */
2345 case '-':
2346 /* FALLTHROUGH */
2347 case '*':
2348 /* FALLTHROUGH */
2349 case '/':
2350 /* FALLTHROUGH */
2351 case '%':
2352 /* FALLTHROUGH */
2353 case '&':
2354 /* FALLTHROUGH */
2355 case ':':
2356 break;
2357 case '<':
2358 switch (v[*pos + 1]) {
2359 case '=':
2360 *res = 'l';
2361 (*pos)++;
2362 break;
2363 case '>':
2364 *res = '!';
2365 (*pos)++;
2366 break;
2367 case '?':
2368 *res = 'i';
2369 (*pos)++;
2370 break;
2371 default:
2372 break;
2373 }
2374 break;
2375 case '>':
2376 switch (v[*pos + 1]) {
2377 case '=':
2378 *res = 'g';
2379 (*pos)++;
2380 break;
2381 case '?':
2382 *res = 'a';
2383 (*pos)++;
2384 break;
2385 default:
2386 break;
2387 }
2388 break;
2389 case '=':
2390 if ('=' == v[*pos + 1])
2391 (*pos)++;
2392 break;
2393 default:
2394 return(0);
2395 }
2396 (*pos)++;
2397
2398 return(*res);
2399 }
2400
2401 /*
2402 * Evaluate either a parenthesized numeric expression
2403 * or a single signed integer number.
2404 */
2405 static int
2406 roff_evalpar(struct roff *r, int ln,
2407 const char *v, int *pos, int *res, int flags)
2408 {
2409
2410 if ('(' != v[*pos])
2411 return(roff_getnum(v, pos, res, flags));
2412
2413 (*pos)++;
2414 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2415 return(0);
2416
2417 /*
2418 * Omission of the closing parenthesis
2419 * is an error in validation mode,
2420 * but ignored in evaluation mode.
2421 */
2422
2423 if (')' == v[*pos])
2424 (*pos)++;
2425 else if (NULL == res)
2426 return(0);
2427
2428 return(1);
2429 }
2430
2431 /*
2432 * Evaluate a complete numeric expression.
2433 * Proceed left to right, there is no concept of precedence.
2434 */
2435 static int
2436 roff_evalnum(struct roff *r, int ln, const char *v,
2437 int *pos, int *res, int flags)
2438 {
2439 int mypos, operand2;
2440 char operator;
2441
2442 if (NULL == pos) {
2443 mypos = 0;
2444 pos = &mypos;
2445 }
2446
2447 if (flags & ROFFNUM_WHITE)
2448 while (isspace((unsigned char)v[*pos]))
2449 (*pos)++;
2450
2451 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2452 return(0);
2453
2454 while (1) {
2455 if (flags & ROFFNUM_WHITE)
2456 while (isspace((unsigned char)v[*pos]))
2457 (*pos)++;
2458
2459 if ( ! roff_getop(v, pos, &operator))
2460 break;
2461
2462 if (flags & ROFFNUM_WHITE)
2463 while (isspace((unsigned char)v[*pos]))
2464 (*pos)++;
2465
2466 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2467 return(0);
2468
2469 if (flags & ROFFNUM_WHITE)
2470 while (isspace((unsigned char)v[*pos]))
2471 (*pos)++;
2472
2473 if (NULL == res)
2474 continue;
2475
2476 switch (operator) {
2477 case '+':
2478 *res += operand2;
2479 break;
2480 case '-':
2481 *res -= operand2;
2482 break;
2483 case '*':
2484 *res *= operand2;
2485 break;
2486 case '/':
2487 if (operand2 == 0) {
2488 mandoc_msg(MANDOCERR_DIVZERO,
2489 r->parse, ln, *pos, v);
2490 *res = 0;
2491 break;
2492 }
2493 *res /= operand2;
2494 break;
2495 case '%':
2496 if (operand2 == 0) {
2497 mandoc_msg(MANDOCERR_DIVZERO,
2498 r->parse, ln, *pos, v);
2499 *res = 0;
2500 break;
2501 }
2502 *res %= operand2;
2503 break;
2504 case '<':
2505 *res = *res < operand2;
2506 break;
2507 case '>':
2508 *res = *res > operand2;
2509 break;
2510 case 'l':
2511 *res = *res <= operand2;
2512 break;
2513 case 'g':
2514 *res = *res >= operand2;
2515 break;
2516 case '=':
2517 *res = *res == operand2;
2518 break;
2519 case '!':
2520 *res = *res != operand2;
2521 break;
2522 case '&':
2523 *res = *res && operand2;
2524 break;
2525 case ':':
2526 *res = *res || operand2;
2527 break;
2528 case 'i':
2529 if (operand2 < *res)
2530 *res = operand2;
2531 break;
2532 case 'a':
2533 if (operand2 > *res)
2534 *res = operand2;
2535 break;
2536 default:
2537 abort();
2538 }
2539 }
2540 return(1);
2541 }
2542
2543 /* --- register management ------------------------------------------------ */
2544
2545 void
2546 roff_setreg(struct roff *r, const char *name, int val, char sign)
2547 {
2548 struct roffreg *reg;
2549
2550 /* Search for an existing register with the same name. */
2551 reg = r->regtab;
2552
2553 while (reg && strcmp(name, reg->key.p))
2554 reg = reg->next;
2555
2556 if (NULL == reg) {
2557 /* Create a new register. */
2558 reg = mandoc_malloc(sizeof(struct roffreg));
2559 reg->key.p = mandoc_strdup(name);
2560 reg->key.sz = strlen(name);
2561 reg->val = 0;
2562 reg->next = r->regtab;
2563 r->regtab = reg;
2564 }
2565
2566 if ('+' == sign)
2567 reg->val += val;
2568 else if ('-' == sign)
2569 reg->val -= val;
2570 else
2571 reg->val = val;
2572 }
2573
2574 /*
2575 * Handle some predefined read-only number registers.
2576 * For now, return -1 if the requested register is not predefined;
2577 * in case a predefined read-only register having the value -1
2578 * were to turn up, another special value would have to be chosen.
2579 */
2580 static int
2581 roff_getregro(const struct roff *r, const char *name)
2582 {
2583
2584 switch (*name) {
2585 case '$': /* Number of arguments of the last macro evaluated. */
2586 return(r->argc);
2587 case 'A': /* ASCII approximation mode is always off. */
2588 return(0);
2589 case 'g': /* Groff compatibility mode is always on. */
2590 return(1);
2591 case 'H': /* Fixed horizontal resolution. */
2592 return (24);
2593 case 'j': /* Always adjust left margin only. */
2594 return(0);
2595 case 'T': /* Some output device is always defined. */
2596 return(1);
2597 case 'V': /* Fixed vertical resolution. */
2598 return (40);
2599 default:
2600 return (-1);
2601 }
2602 }
2603
2604 int
2605 roff_getreg(const struct roff *r, const char *name)
2606 {
2607 struct roffreg *reg;
2608 int val;
2609
2610 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2611 val = roff_getregro(r, name + 1);
2612 if (-1 != val)
2613 return (val);
2614 }
2615
2616 for (reg = r->regtab; reg; reg = reg->next)
2617 if (0 == strcmp(name, reg->key.p))
2618 return(reg->val);
2619
2620 return(0);
2621 }
2622
2623 static int
2624 roff_getregn(const struct roff *r, const char *name, size_t len)
2625 {
2626 struct roffreg *reg;
2627 int val;
2628
2629 if ('.' == name[0] && 2 == len) {
2630 val = roff_getregro(r, name + 1);
2631 if (-1 != val)
2632 return (val);
2633 }
2634
2635 for (reg = r->regtab; reg; reg = reg->next)
2636 if (len == reg->key.sz &&
2637 0 == strncmp(name, reg->key.p, len))
2638 return(reg->val);
2639
2640 return(0);
2641 }
2642
2643 static int
2644 roff_hasregn(const struct roff *r, const char *name, size_t len)
2645 {
2646 struct roffreg *reg;
2647 int val;
2648
2649 if ('.' == name[0] && 2 == len) {
2650 val = roff_getregro(r, name + 1);
2651 if (-1 != val)
2652 return(1);
2653 }
2654
2655 for (reg = r->regtab; reg; reg = reg->next)
2656 if (len == reg->key.sz &&
2657 0 == strncmp(name, reg->key.p, len))
2658 return(1);
2659
2660 return(0);
2661 }
2662
2663 static void
2664 roff_freereg(struct roffreg *reg)
2665 {
2666 struct roffreg *old_reg;
2667
2668 while (NULL != reg) {
2669 free(reg->key.p);
2670 old_reg = reg;
2671 reg = reg->next;
2672 free(old_reg);
2673 }
2674 }
2675
2676 static enum rofferr
2677 roff_nr(ROFF_ARGS)
2678 {
2679 char *key, *val;
2680 size_t keysz;
2681 int iv;
2682 char sign;
2683
2684 key = val = buf->buf + pos;
2685 if (*key == '\0')
2686 return(ROFF_IGN);
2687
2688 keysz = roff_getname(r, &val, ln, pos);
2689 if (key[keysz] == '\\')
2690 return(ROFF_IGN);
2691 key[keysz] = '\0';
2692
2693 sign = *val;
2694 if (sign == '+' || sign == '-')
2695 val++;
2696
2697 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2698 roff_setreg(r, key, iv, sign);
2699
2700 return(ROFF_IGN);
2701 }
2702
2703 static enum rofferr
2704 roff_rr(ROFF_ARGS)
2705 {
2706 struct roffreg *reg, **prev;
2707 char *name, *cp;
2708 size_t namesz;
2709
2710 name = cp = buf->buf + pos;
2711 if (*name == '\0')
2712 return(ROFF_IGN);
2713 namesz = roff_getname(r, &cp, ln, pos);
2714 name[namesz] = '\0';
2715
2716 prev = &r->regtab;
2717 while (1) {
2718 reg = *prev;
2719 if (reg == NULL || !strcmp(name, reg->key.p))
2720 break;
2721 prev = &reg->next;
2722 }
2723 if (reg != NULL) {
2724 *prev = reg->next;
2725 free(reg->key.p);
2726 free(reg);
2727 }
2728 return(ROFF_IGN);
2729 }
2730
2731 /* --- handler functions for roff requests -------------------------------- */
2732
2733 static enum rofferr
2734 roff_rm(ROFF_ARGS)
2735 {
2736 const char *name;
2737 char *cp;
2738 size_t namesz;
2739
2740 cp = buf->buf + pos;
2741 while (*cp != '\0') {
2742 name = cp;
2743 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2744 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2745 if (name[namesz] == '\\')
2746 break;
2747 }
2748 return(ROFF_IGN);
2749 }
2750
2751 static enum rofferr
2752 roff_it(ROFF_ARGS)
2753 {
2754 int iv;
2755
2756 /* Parse the number of lines. */
2757
2758 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2759 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2760 ln, ppos, buf->buf + 1);
2761 return(ROFF_IGN);
2762 }
2763
2764 while (isspace((unsigned char)buf->buf[pos]))
2765 pos++;
2766
2767 /*
2768 * Arm the input line trap.
2769 * Special-casing "an-trap" is an ugly workaround to cope
2770 * with DocBook stupidly fiddling with man(7) internals.
2771 */
2772
2773 roffit_lines = iv;
2774 roffit_macro = mandoc_strdup(iv != 1 ||
2775 strcmp(buf->buf + pos, "an-trap") ?
2776 buf->buf + pos : "br");
2777 return(ROFF_IGN);
2778 }
2779
2780 static enum rofferr
2781 roff_Dd(ROFF_ARGS)
2782 {
2783 const char *const *cp;
2784
2785 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2786 for (cp = __mdoc_reserved; *cp; cp++)
2787 roff_setstr(r, *cp, NULL, 0);
2788
2789 if (r->format == 0)
2790 r->format = MPARSE_MDOC;
2791
2792 return(ROFF_CONT);
2793 }
2794
2795 static enum rofferr
2796 roff_TH(ROFF_ARGS)
2797 {
2798 const char *const *cp;
2799
2800 if ((r->options & MPARSE_QUICK) == 0)
2801 for (cp = __man_reserved; *cp; cp++)
2802 roff_setstr(r, *cp, NULL, 0);
2803
2804 if (r->format == 0)
2805 r->format = MPARSE_MAN;
2806
2807 return(ROFF_CONT);
2808 }
2809
2810 static enum rofferr
2811 roff_TE(ROFF_ARGS)
2812 {
2813
2814 if (NULL == r->tbl)
2815 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2816 ln, ppos, "TE");
2817 else if ( ! tbl_end(&r->tbl)) {
2818 free(buf->buf);
2819 buf->buf = mandoc_strdup(".sp");
2820 buf->sz = 4;
2821 return(ROFF_REPARSE);
2822 }
2823 return(ROFF_IGN);
2824 }
2825
2826 static enum rofferr
2827 roff_T_(ROFF_ARGS)
2828 {
2829
2830 if (NULL == r->tbl)
2831 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2832 ln, ppos, "T&");
2833 else
2834 tbl_restart(ppos, ln, r->tbl);
2835
2836 return(ROFF_IGN);
2837 }
2838
2839 /*
2840 * Handle in-line equation delimiters.
2841 */
2842 static enum rofferr
2843 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2844 {
2845 char *cp1, *cp2;
2846 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2847
2848 /*
2849 * Outside equations, look for an opening delimiter.
2850 * If we are inside an equation, we already know it is
2851 * in-line, or this function wouldn't have been called;
2852 * so look for a closing delimiter.
2853 */
2854
2855 cp1 = buf->buf + pos;
2856 cp2 = strchr(cp1, r->eqn == NULL ?
2857 r->last_eqn->odelim : r->last_eqn->cdelim);
2858 if (cp2 == NULL)
2859 return(ROFF_CONT);
2860
2861 *cp2++ = '\0';
2862 bef_pr = bef_nl = aft_nl = aft_pr = "";
2863
2864 /* Handle preceding text, protecting whitespace. */
2865
2866 if (*buf->buf != '\0') {
2867 if (r->eqn == NULL)
2868 bef_pr = "\\&";
2869 bef_nl = "\n";
2870 }
2871
2872 /*
2873 * Prepare replacing the delimiter with an equation macro
2874 * and drop leading white space from the equation.
2875 */
2876
2877 if (r->eqn == NULL) {
2878 while (*cp2 == ' ')
2879 cp2++;
2880 mac = ".EQ";
2881 } else
2882 mac = ".EN";
2883
2884 /* Handle following text, protecting whitespace. */
2885
2886 if (*cp2 != '\0') {
2887 aft_nl = "\n";
2888 if (r->eqn != NULL)
2889 aft_pr = "\\&";
2890 }
2891
2892 /* Do the actual replacement. */
2893
2894 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2895 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2896 free(buf->buf);
2897 buf->buf = cp1;
2898
2899 /* Toggle the in-line state of the eqn subsystem. */
2900
2901 r->eqn_inline = r->eqn == NULL;
2902 return(ROFF_REPARSE);
2903 }
2904
2905 static enum rofferr
2906 roff_EQ(ROFF_ARGS)
2907 {
2908 struct eqn_node *e;
2909
2910 assert(r->eqn == NULL);
2911 e = eqn_alloc(ppos, ln, r->parse);
2912
2913 if (r->last_eqn) {
2914 r->last_eqn->next = e;
2915 e->delim = r->last_eqn->delim;
2916 e->odelim = r->last_eqn->odelim;
2917 e->cdelim = r->last_eqn->cdelim;
2918 } else
2919 r->first_eqn = r->last_eqn = e;
2920
2921 r->eqn = r->last_eqn = e;
2922
2923 if (buf->buf[pos] != '\0')
2924 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2925 ".EQ %s", buf->buf + pos);
2926
2927 return(ROFF_IGN);
2928 }
2929
2930 static enum rofferr
2931 roff_EN(ROFF_ARGS)
2932 {
2933
2934 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2935 return(ROFF_IGN);
2936 }
2937
2938 static enum rofferr
2939 roff_TS(ROFF_ARGS)
2940 {
2941 struct tbl_node *tbl;
2942
2943 if (r->tbl) {
2944 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2945 ln, ppos, "TS breaks TS");
2946 tbl_end(&r->tbl);
2947 }
2948
2949 tbl = tbl_alloc(ppos, ln, r->parse);
2950
2951 if (r->last_tbl)
2952 r->last_tbl->next = tbl;
2953 else
2954 r->first_tbl = r->last_tbl = tbl;
2955
2956 r->tbl = r->last_tbl = tbl;
2957 return(ROFF_IGN);
2958 }
2959
2960 static enum rofferr
2961 roff_brp(ROFF_ARGS)
2962 {
2963
2964 buf->buf[pos - 1] = '\0';
2965 return(ROFF_CONT);
2966 }
2967
2968 static enum rofferr
2969 roff_cc(ROFF_ARGS)
2970 {
2971 const char *p;
2972
2973 p = buf->buf + pos;
2974
2975 if (*p == '\0' || (r->control = *p++) == '.')
2976 r->control = 0;
2977
2978 if (*p != '\0')
2979 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2980 ln, p - buf->buf, "cc ... %s", p);
2981
2982 return(ROFF_IGN);
2983 }
2984
2985 static enum rofferr
2986 roff_tr(ROFF_ARGS)
2987 {
2988 const char *p, *first, *second;
2989 size_t fsz, ssz;
2990 enum mandoc_esc esc;
2991
2992 p = buf->buf + pos;
2993
2994 if (*p == '\0') {
2995 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2996 return(ROFF_IGN);
2997 }
2998
2999 while (*p != '\0') {
3000 fsz = ssz = 1;
3001
3002 first = p++;
3003 if (*first == '\\') {
3004 esc = mandoc_escape(&p, NULL, NULL);
3005 if (esc == ESCAPE_ERROR) {
3006 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3007 ln, (int)(p - buf->buf), first);
3008 return(ROFF_IGN);
3009 }
3010 fsz = (size_t)(p - first);
3011 }
3012
3013 second = p++;
3014 if (*second == '\\') {
3015 esc = mandoc_escape(&p, NULL, NULL);
3016 if (esc == ESCAPE_ERROR) {
3017 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3018 ln, (int)(p - buf->buf), second);
3019 return(ROFF_IGN);
3020 }
3021 ssz = (size_t)(p - second);
3022 } else if (*second == '\0') {
3023 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3024 ln, first - buf->buf, "tr %s", first);
3025 second = " ";
3026 p--;
3027 }
3028
3029 if (fsz > 1) {
3030 roff_setstrn(&r->xmbtab, first, fsz,
3031 second, ssz, 0);
3032 continue;
3033 }
3034
3035 if (r->xtab == NULL)
3036 r->xtab = mandoc_calloc(128,
3037 sizeof(struct roffstr));
3038
3039 free(r->xtab[(int)*first].p);
3040 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3041 r->xtab[(int)*first].sz = ssz;
3042 }
3043
3044 return(ROFF_IGN);
3045 }
3046
3047 static enum rofferr
3048 roff_so(ROFF_ARGS)
3049 {
3050 char *name, *cp;
3051
3052 name = buf->buf + pos;
3053 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3054
3055 /*
3056 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3057 * opening anything that's not in our cwd or anything beneath
3058 * it. Thus, explicitly disallow traversing up the file-system
3059 * or using absolute paths.
3060 */
3061
3062 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3063 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3064 ".so %s", name);
3065 buf->sz = mandoc_asprintf(&cp,
3066 ".sp\nSee the file %s.\n.sp", name) + 1;
3067 free(buf->buf);
3068 buf->buf = cp;
3069 *offs = 0;
3070 return(ROFF_REPARSE);
3071 }
3072
3073 *offs = pos;
3074 return(ROFF_SO);
3075 }
3076
3077 /* --- user defined strings and macros ------------------------------------ */
3078
3079 static enum rofferr
3080 roff_userdef(ROFF_ARGS)
3081 {
3082 const char *arg[9], *ap;
3083 char *cp, *n1, *n2;
3084 int i, ib, ie;
3085 size_t asz, rsz;
3086
3087 /*
3088 * Collect pointers to macro argument strings
3089 * and NUL-terminate them.
3090 */
3091
3092 r->argc = 0;
3093 cp = buf->buf + pos;
3094 for (i = 0; i < 9; i++) {
3095 if (*cp == '\0')
3096 arg[i] = "";
3097 else {
3098 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3099 r->argc = i + 1;
3100 }
3101 }
3102
3103 /*
3104 * Expand macro arguments.
3105 */
3106
3107 buf->sz = strlen(r->current_string) + 1;
3108 n1 = cp = mandoc_malloc(buf->sz);
3109 memcpy(n1, r->current_string, buf->sz);
3110 while (*cp != '\0') {
3111
3112 /* Scan ahead for the next argument invocation. */
3113
3114 if (*cp++ != '\\')
3115 continue;
3116 if (*cp++ != '$')
3117 continue;
3118 if (*cp == '*') { /* \\$* inserts all arguments */
3119 ib = 0;
3120 ie = r->argc - 1;
3121 } else { /* \\$1 .. \\$9 insert one argument */
3122 ib = ie = *cp - '1';
3123 if (ib < 0 || ib > 8)
3124 continue;
3125 }
3126 cp -= 2;
3127
3128 /*
3129 * Determine the size of the expanded argument,
3130 * taking escaping of quotes into account.
3131 */
3132
3133 asz = ie > ib ? ie - ib : 0; /* for blanks */
3134 for (i = ib; i <= ie; i++) {
3135 for (ap = arg[i]; *ap != '\0'; ap++) {
3136 asz++;
3137 if (*ap == '"')
3138 asz += 3;
3139 }
3140 }
3141 if (asz != 3) {
3142
3143 /*
3144 * Determine the size of the rest of the
3145 * unexpanded macro, including the NUL.
3146 */
3147
3148 rsz = buf->sz - (cp - n1) - 3;
3149
3150 /*
3151 * When shrinking, move before
3152 * releasing the storage.
3153 */
3154
3155 if (asz < 3)
3156 memmove(cp + asz, cp + 3, rsz);
3157
3158 /*
3159 * Resize the storage for the macro
3160 * and readjust the parse pointer.
3161 */
3162
3163 buf->sz += asz - 3;
3164 n2 = mandoc_realloc(n1, buf->sz);
3165 cp = n2 + (cp - n1);
3166 n1 = n2;
3167
3168 /*
3169 * When growing, make room
3170 * for the expanded argument.
3171 */
3172
3173 if (asz > 3)
3174 memmove(cp + asz, cp + 3, rsz);
3175 }
3176
3177 /* Copy the expanded argument, escaping quotes. */
3178
3179 n2 = cp;
3180 for (i = ib; i <= ie; i++) {
3181 for (ap = arg[i]; *ap != '\0'; ap++) {
3182 if (*ap == '"') {
3183 memcpy(n2, "\\(dq", 4);
3184 n2 += 4;
3185 } else
3186 *n2++ = *ap;
3187 }
3188 if (i < ie)
3189 *n2++ = ' ';
3190 }
3191 }
3192
3193 /*
3194 * Replace the macro invocation
3195 * by the expanded macro.
3196 */
3197
3198 free(buf->buf);
3199 buf->buf = n1;
3200 *offs = 0;
3201
3202 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3203 ROFF_REPARSE : ROFF_APPEND);
3204 }
3205
3206 static size_t
3207 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3208 {
3209 char *name, *cp;
3210 size_t namesz;
3211
3212 name = *cpp;
3213 if ('\0' == *name)
3214 return(0);
3215
3216 /* Read until end of name and terminate it with NUL. */
3217 for (cp = name; 1; cp++) {
3218 if ('\0' == *cp || ' ' == *cp) {
3219 namesz = cp - name;
3220 break;
3221 }
3222 if ('\\' != *cp)
3223 continue;
3224 namesz = cp - name;
3225 if ('{' == cp[1] || '}' == cp[1])
3226 break;
3227 cp++;
3228 if ('\\' == *cp)
3229 continue;
3230 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3231 "%.*s", (int)(cp - name + 1), name);
3232 mandoc_escape((const char **)&cp, NULL, NULL);
3233 break;
3234 }
3235
3236 /* Read past spaces. */
3237 while (' ' == *cp)
3238 cp++;
3239
3240 *cpp = cp;
3241 return(namesz);
3242 }
3243
3244 /*
3245 * Store *string into the user-defined string called *name.
3246 * To clear an existing entry, call with (*r, *name, NULL, 0).
3247 * append == 0: replace mode
3248 * append == 1: single-line append mode
3249 * append == 2: multiline append mode, append '\n' after each call
3250 */
3251 static void
3252 roff_setstr(struct roff *r, const char *name, const char *string,
3253 int append)
3254 {
3255
3256 roff_setstrn(&r->strtab, name, strlen(name), string,
3257 string ? strlen(string) : 0, append);
3258 }
3259
3260 static void
3261 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3262 const char *string, size_t stringsz, int append)
3263 {
3264 struct roffkv *n;
3265 char *c;
3266 int i;
3267 size_t oldch, newch;
3268
3269 /* Search for an existing string with the same name. */
3270 n = *r;
3271
3272 while (n && (namesz != n->key.sz ||
3273 strncmp(n->key.p, name, namesz)))
3274 n = n->next;
3275
3276 if (NULL == n) {
3277 /* Create a new string table entry. */
3278 n = mandoc_malloc(sizeof(struct roffkv));
3279 n->key.p = mandoc_strndup(name, namesz);
3280 n->key.sz = namesz;
3281 n->val.p = NULL;
3282 n->val.sz = 0;
3283 n->next = *r;
3284 *r = n;
3285 } else if (0 == append) {
3286 free(n->val.p);
3287 n->val.p = NULL;
3288 n->val.sz = 0;
3289 }
3290
3291 if (NULL == string)
3292 return;
3293
3294 /*
3295 * One additional byte for the '\n' in multiline mode,
3296 * and one for the terminating '\0'.
3297 */
3298 newch = stringsz + (1 < append ? 2u : 1u);
3299
3300 if (NULL == n->val.p) {
3301 n->val.p = mandoc_malloc(newch);
3302 *n->val.p = '\0';
3303 oldch = 0;
3304 } else {
3305 oldch = n->val.sz;
3306 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3307 }
3308
3309 /* Skip existing content in the destination buffer. */
3310 c = n->val.p + (int)oldch;
3311
3312 /* Append new content to the destination buffer. */
3313 i = 0;
3314 while (i < (int)stringsz) {
3315 /*
3316 * Rudimentary roff copy mode:
3317 * Handle escaped backslashes.
3318 */
3319 if ('\\' == string[i] && '\\' == string[i + 1])
3320 i++;
3321 *c++ = string[i++];
3322 }
3323
3324 /* Append terminating bytes. */
3325 if (1 < append)
3326 *c++ = '\n';
3327
3328 *c = '\0';
3329 n->val.sz = (int)(c - n->val.p);
3330 }
3331
3332 static const char *
3333 roff_getstrn(const struct roff *r, const char *name, size_t len)
3334 {
3335 const struct roffkv *n;
3336 int i;
3337
3338 for (n = r->strtab; n; n = n->next)
3339 if (0 == strncmp(name, n->key.p, len) &&
3340 '\0' == n->key.p[(int)len])
3341 return(n->val.p);
3342
3343 for (i = 0; i < PREDEFS_MAX; i++)
3344 if (0 == strncmp(name, predefs[i].name, len) &&
3345 '\0' == predefs[i].name[(int)len])
3346 return(predefs[i].str);
3347
3348 return(NULL);
3349 }
3350
3351 static void
3352 roff_freestr(struct roffkv *r)
3353 {
3354 struct roffkv *n, *nn;
3355
3356 for (n = r; n; n = nn) {
3357 free(n->key.p);
3358 free(n->val.p);
3359 nn = n->next;
3360 free(n);
3361 }
3362 }
3363
3364 /* --- accessors and utility functions ------------------------------------ */
3365
3366 const struct tbl_span *
3367 roff_span(const struct roff *r)
3368 {
3369
3370 return(r->tbl ? tbl_span(r->tbl) : NULL);
3371 }
3372
3373 const struct eqn *
3374 roff_eqn(const struct roff *r)
3375 {
3376
3377 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
3378 }
3379
3380 /*
3381 * Duplicate an input string, making the appropriate character
3382 * conversations (as stipulated by `tr') along the way.
3383 * Returns a heap-allocated string with all the replacements made.
3384 */
3385 char *
3386 roff_strdup(const struct roff *r, const char *p)
3387 {
3388 const struct roffkv *cp;
3389 char *res;
3390 const char *pp;
3391 size_t ssz, sz;
3392 enum mandoc_esc esc;
3393
3394 if (NULL == r->xmbtab && NULL == r->xtab)
3395 return(mandoc_strdup(p));
3396 else if ('\0' == *p)
3397 return(mandoc_strdup(""));
3398
3399 /*
3400 * Step through each character looking for term matches
3401 * (remember that a `tr' can be invoked with an escape, which is
3402 * a glyph but the escape is multi-character).
3403 * We only do this if the character hash has been initialised
3404 * and the string is >0 length.
3405 */
3406
3407 res = NULL;
3408 ssz = 0;
3409
3410 while ('\0' != *p) {
3411 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3412 sz = r->xtab[(int)*p].sz;
3413 res = mandoc_realloc(res, ssz + sz + 1);
3414 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3415 ssz += sz;
3416 p++;
3417 continue;
3418 } else if ('\\' != *p) {
3419 res = mandoc_realloc(res, ssz + 2);
3420 res[ssz++] = *p++;
3421 continue;
3422 }
3423
3424 /* Search for term matches. */
3425 for (cp = r->xmbtab; cp; cp = cp->next)
3426 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3427 break;
3428
3429 if (NULL != cp) {
3430 /*
3431 * A match has been found.
3432 * Append the match to the array and move
3433 * forward by its keysize.
3434 */
3435 res = mandoc_realloc(res,
3436 ssz + cp->val.sz + 1);
3437 memcpy(res + ssz, cp->val.p, cp->val.sz);
3438 ssz += cp->val.sz;
3439 p += (int)cp->key.sz;
3440 continue;
3441 }
3442
3443 /*
3444 * Handle escapes carefully: we need to copy
3445 * over just the escape itself, or else we might
3446 * do replacements within the escape itself.
3447 * Make sure to pass along the bogus string.
3448 */
3449 pp = p++;
3450 esc = mandoc_escape(&p, NULL, NULL);
3451 if (ESCAPE_ERROR == esc) {
3452 sz = strlen(pp);
3453 res = mandoc_realloc(res, ssz + sz + 1);
3454 memcpy(res + ssz, pp, sz);
3455 break;
3456 }
3457 /*
3458 * We bail out on bad escapes.
3459 * No need to warn: we already did so when
3460 * roff_res() was called.
3461 */
3462 sz = (int)(p - pp);
3463 res = mandoc_realloc(res, ssz + sz + 1);
3464 memcpy(res + ssz, pp, sz);
3465 ssz += sz;
3466 }
3467
3468 res[(int)ssz] = '\0';
3469 return(res);
3470 }
3471
3472 int
3473 roff_getformat(const struct roff *r)
3474 {
3475
3476 return(r->format);
3477 }
3478
3479 /*
3480 * Find out whether a line is a macro line or not.
3481 * If it is, adjust the current position and return one; if it isn't,
3482 * return zero and don't change the current position.
3483 * If the control character has been set with `.cc', then let that grain
3484 * precedence.
3485 * This is slighly contrary to groff, where using the non-breaking
3486 * control character when `cc' has been invoked will cause the
3487 * non-breaking macro contents to be printed verbatim.
3488 */
3489 int
3490 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3491 {
3492 int pos;
3493
3494 pos = *ppos;
3495
3496 if (0 != r->control && cp[pos] == r->control)
3497 pos++;
3498 else if (0 != r->control)
3499 return(0);
3500 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3501 pos += 2;
3502 else if ('.' == cp[pos] || '\'' == cp[pos])
3503 pos++;
3504 else
3505 return(0);
3506
3507 while (' ' == cp[pos] || '\t' == cp[pos])
3508 pos++;
3509
3510 *ppos = pos;
3511 return(1);
3512 }