]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Fix an obvious bug found during the /* FALLTHROUGH */ cleanup:
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.278 2015/10/12 00:08:16 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "roff.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libroff.h"
35
36 /* Maximum number of nested if-else conditionals. */
37 #define RSTACK_MAX 128
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 enum rofft {
45 ROFF_ab,
46 ROFF_ad,
47 ROFF_af,
48 ROFF_aln,
49 ROFF_als,
50 ROFF_am,
51 ROFF_am1,
52 ROFF_ami,
53 ROFF_ami1,
54 ROFF_as,
55 ROFF_as1,
56 ROFF_asciify,
57 ROFF_backtrace,
58 ROFF_bd,
59 ROFF_bleedat,
60 ROFF_blm,
61 ROFF_box,
62 ROFF_boxa,
63 ROFF_bp,
64 ROFF_BP,
65 /* MAN_br, MDOC_br */
66 ROFF_break,
67 ROFF_breakchar,
68 ROFF_brnl,
69 ROFF_brp,
70 ROFF_brpnl,
71 ROFF_c2,
72 ROFF_cc,
73 ROFF_ce,
74 ROFF_cf,
75 ROFF_cflags,
76 ROFF_ch,
77 ROFF_char,
78 ROFF_chop,
79 ROFF_class,
80 ROFF_close,
81 ROFF_CL,
82 ROFF_color,
83 ROFF_composite,
84 ROFF_continue,
85 ROFF_cp,
86 ROFF_cropat,
87 ROFF_cs,
88 ROFF_cu,
89 ROFF_da,
90 ROFF_dch,
91 ROFF_Dd,
92 ROFF_de,
93 ROFF_de1,
94 ROFF_defcolor,
95 ROFF_dei,
96 ROFF_dei1,
97 ROFF_device,
98 ROFF_devicem,
99 ROFF_di,
100 ROFF_do,
101 ROFF_ds,
102 ROFF_ds1,
103 ROFF_dwh,
104 ROFF_dt,
105 ROFF_ec,
106 ROFF_ecr,
107 ROFF_ecs,
108 ROFF_el,
109 ROFF_em,
110 ROFF_EN,
111 ROFF_eo,
112 ROFF_EP,
113 ROFF_EQ,
114 ROFF_errprint,
115 ROFF_ev,
116 ROFF_evc,
117 ROFF_ex,
118 ROFF_fallback,
119 ROFF_fam,
120 ROFF_fc,
121 ROFF_fchar,
122 ROFF_fcolor,
123 ROFF_fdeferlig,
124 ROFF_feature,
125 /* MAN_fi; ignored in mdoc(7) */
126 ROFF_fkern,
127 ROFF_fl,
128 ROFF_flig,
129 ROFF_fp,
130 ROFF_fps,
131 ROFF_fschar,
132 ROFF_fspacewidth,
133 ROFF_fspecial,
134 /* MAN_ft; ignored in mdoc(7) */
135 ROFF_ftr,
136 ROFF_fzoom,
137 ROFF_gcolor,
138 ROFF_hc,
139 ROFF_hcode,
140 ROFF_hidechar,
141 ROFF_hla,
142 ROFF_hlm,
143 ROFF_hpf,
144 ROFF_hpfa,
145 ROFF_hpfcode,
146 ROFF_hw,
147 ROFF_hy,
148 ROFF_hylang,
149 ROFF_hylen,
150 ROFF_hym,
151 ROFF_hypp,
152 ROFF_hys,
153 ROFF_ie,
154 ROFF_if,
155 ROFF_ig,
156 /* MAN_in; ignored in mdoc(7) */
157 ROFF_index,
158 ROFF_it,
159 ROFF_itc,
160 ROFF_IX,
161 ROFF_kern,
162 ROFF_kernafter,
163 ROFF_kernbefore,
164 ROFF_kernpair,
165 ROFF_lc,
166 ROFF_lc_ctype,
167 ROFF_lds,
168 ROFF_length,
169 ROFF_letadj,
170 ROFF_lf,
171 ROFF_lg,
172 ROFF_lhang,
173 ROFF_linetabs,
174 /* MAN_ll, MDOC_ll */
175 ROFF_lnr,
176 ROFF_lnrf,
177 ROFF_lpfx,
178 ROFF_ls,
179 ROFF_lsm,
180 ROFF_lt,
181 ROFF_mc,
182 ROFF_mediasize,
183 ROFF_minss,
184 ROFF_mk,
185 ROFF_mso,
186 ROFF_na,
187 ROFF_ne,
188 /* MAN_nf; ignored in mdoc(7) */
189 ROFF_nh,
190 ROFF_nhychar,
191 ROFF_nm,
192 ROFF_nn,
193 ROFF_nop,
194 ROFF_nr,
195 ROFF_nrf,
196 ROFF_nroff,
197 ROFF_ns,
198 ROFF_nx,
199 ROFF_open,
200 ROFF_opena,
201 ROFF_os,
202 ROFF_output,
203 ROFF_padj,
204 ROFF_papersize,
205 ROFF_pc,
206 ROFF_pev,
207 ROFF_pi,
208 ROFF_PI,
209 ROFF_pl,
210 ROFF_pm,
211 ROFF_pn,
212 ROFF_pnr,
213 ROFF_po,
214 ROFF_ps,
215 ROFF_psbb,
216 ROFF_pshape,
217 ROFF_pso,
218 ROFF_ptr,
219 ROFF_pvs,
220 ROFF_rchar,
221 ROFF_rd,
222 ROFF_recursionlimit,
223 ROFF_return,
224 ROFF_rfschar,
225 ROFF_rhang,
226 ROFF_rj,
227 ROFF_rm,
228 ROFF_rn,
229 ROFF_rnn,
230 ROFF_rr,
231 ROFF_rs,
232 ROFF_rt,
233 ROFF_schar,
234 ROFF_sentchar,
235 ROFF_shc,
236 ROFF_shift,
237 ROFF_sizes,
238 ROFF_so,
239 /* MAN_sp, MDOC_sp */
240 ROFF_spacewidth,
241 ROFF_special,
242 ROFF_spreadwarn,
243 ROFF_ss,
244 ROFF_sty,
245 ROFF_substring,
246 ROFF_sv,
247 ROFF_sy,
248 ROFF_T_,
249 ROFF_ta,
250 ROFF_tc,
251 ROFF_TE,
252 ROFF_TH,
253 ROFF_ti,
254 ROFF_tkf,
255 ROFF_tl,
256 ROFF_tm,
257 ROFF_tm1,
258 ROFF_tmc,
259 ROFF_tr,
260 ROFF_track,
261 ROFF_transchar,
262 ROFF_trf,
263 ROFF_trimat,
264 ROFF_trin,
265 ROFF_trnt,
266 ROFF_troff,
267 ROFF_TS,
268 ROFF_uf,
269 ROFF_ul,
270 ROFF_unformat,
271 ROFF_unwatch,
272 ROFF_unwatchn,
273 ROFF_vpt,
274 ROFF_vs,
275 ROFF_warn,
276 ROFF_warnscale,
277 ROFF_watch,
278 ROFF_watchlength,
279 ROFF_watchn,
280 ROFF_wh,
281 ROFF_while,
282 ROFF_write,
283 ROFF_writec,
284 ROFF_writem,
285 ROFF_xflag,
286 ROFF_cblock,
287 ROFF_USERDEF,
288 ROFF_MAX
289 };
290
291 /*
292 * An incredibly-simple string buffer.
293 */
294 struct roffstr {
295 char *p; /* nil-terminated buffer */
296 size_t sz; /* saved strlen(p) */
297 };
298
299 /*
300 * A key-value roffstr pair as part of a singly-linked list.
301 */
302 struct roffkv {
303 struct roffstr key;
304 struct roffstr val;
305 struct roffkv *next; /* next in list */
306 };
307
308 /*
309 * A single number register as part of a singly-linked list.
310 */
311 struct roffreg {
312 struct roffstr key;
313 int val;
314 struct roffreg *next;
315 };
316
317 struct roff {
318 struct mparse *parse; /* parse point */
319 const struct mchars *mchars; /* character table */
320 struct roffnode *last; /* leaf of stack */
321 int *rstack; /* stack of inverted `ie' values */
322 struct roffreg *regtab; /* number registers */
323 struct roffkv *strtab; /* user-defined strings & macros */
324 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
325 struct roffstr *xtab; /* single-byte trans table (`tr') */
326 const char *current_string; /* value of last called user macro */
327 struct tbl_node *first_tbl; /* first table parsed */
328 struct tbl_node *last_tbl; /* last table parsed */
329 struct tbl_node *tbl; /* current table being parsed */
330 struct eqn_node *last_eqn; /* last equation parsed */
331 struct eqn_node *first_eqn; /* first equation parsed */
332 struct eqn_node *eqn; /* current equation being parsed */
333 int eqn_inline; /* current equation is inline */
334 int options; /* parse options */
335 int rstacksz; /* current size limit of rstack */
336 int rstackpos; /* position in rstack */
337 int format; /* current file in mdoc or man format */
338 int argc; /* number of args of the last macro */
339 char control; /* control character */
340 };
341
342 struct roffnode {
343 enum rofft tok; /* type of node */
344 struct roffnode *parent; /* up one in stack */
345 int line; /* parse line */
346 int col; /* parse col */
347 char *name; /* node name, e.g. macro name */
348 char *end; /* end-rules: custom token */
349 int endspan; /* end-rules: next-line or infty */
350 int rule; /* current evaluation rule */
351 };
352
353 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
354 enum rofft tok, /* tok of macro */ \
355 struct buf *buf, /* input buffer */ \
356 int ln, /* parse line */ \
357 int ppos, /* original pos in buffer */ \
358 int pos, /* current pos in buffer */ \
359 int *offs /* reset offset of buffer data */
360
361 typedef enum rofferr (*roffproc)(ROFF_ARGS);
362
363 struct roffmac {
364 const char *name; /* macro name */
365 roffproc proc; /* process new macro */
366 roffproc text; /* process as child text of macro */
367 roffproc sub; /* process as child of macro */
368 int flags;
369 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
370 struct roffmac *next;
371 };
372
373 struct predef {
374 const char *name; /* predefined input name */
375 const char *str; /* replacement symbol */
376 };
377
378 #define PREDEF(__name, __str) \
379 { (__name), (__str) },
380
381 /* --- function prototypes ------------------------------------------------ */
382
383 static enum rofft roffhash_find(const char *, size_t);
384 static void roffhash_init(void);
385 static void roffnode_cleanscope(struct roff *);
386 static void roffnode_pop(struct roff *);
387 static void roffnode_push(struct roff *, enum rofft,
388 const char *, int, int);
389 static enum rofferr roff_block(ROFF_ARGS);
390 static enum rofferr roff_block_text(ROFF_ARGS);
391 static enum rofferr roff_block_sub(ROFF_ARGS);
392 static enum rofferr roff_brp(ROFF_ARGS);
393 static enum rofferr roff_cblock(ROFF_ARGS);
394 static enum rofferr roff_cc(ROFF_ARGS);
395 static void roff_ccond(struct roff *, int, int);
396 static enum rofferr roff_cond(ROFF_ARGS);
397 static enum rofferr roff_cond_text(ROFF_ARGS);
398 static enum rofferr roff_cond_sub(ROFF_ARGS);
399 static enum rofferr roff_ds(ROFF_ARGS);
400 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
401 static int roff_evalcond(struct roff *r, int, char *, int *);
402 static int roff_evalnum(struct roff *, int,
403 const char *, int *, int *, int);
404 static int roff_evalpar(struct roff *, int,
405 const char *, int *, int *, int);
406 static int roff_evalstrcond(const char *, int *);
407 static void roff_free1(struct roff *);
408 static void roff_freereg(struct roffreg *);
409 static void roff_freestr(struct roffkv *);
410 static size_t roff_getname(struct roff *, char **, int, int);
411 static int roff_getnum(const char *, int *, int *, int);
412 static int roff_getop(const char *, int *, char *);
413 static int roff_getregn(const struct roff *,
414 const char *, size_t);
415 static int roff_getregro(const struct roff *,
416 const char *name);
417 static const char *roff_getstrn(const struct roff *,
418 const char *, size_t);
419 static int roff_hasregn(const struct roff *,
420 const char *, size_t);
421 static enum rofferr roff_insec(ROFF_ARGS);
422 static enum rofferr roff_it(ROFF_ARGS);
423 static enum rofferr roff_line_ignore(ROFF_ARGS);
424 static void roff_man_alloc1(struct roff_man *);
425 static void roff_man_free1(struct roff_man *);
426 static enum rofferr roff_nr(ROFF_ARGS);
427 static enum rofft roff_parse(struct roff *, char *, int *,
428 int, int);
429 static enum rofferr roff_parsetext(struct buf *, int, int *);
430 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
431 static enum rofferr roff_rm(ROFF_ARGS);
432 static enum rofferr roff_rr(ROFF_ARGS);
433 static void roff_setstr(struct roff *,
434 const char *, const char *, int);
435 static void roff_setstrn(struct roffkv **, const char *,
436 size_t, const char *, size_t, int);
437 static enum rofferr roff_so(ROFF_ARGS);
438 static enum rofferr roff_tr(ROFF_ARGS);
439 static enum rofferr roff_Dd(ROFF_ARGS);
440 static enum rofferr roff_TH(ROFF_ARGS);
441 static enum rofferr roff_TE(ROFF_ARGS);
442 static enum rofferr roff_TS(ROFF_ARGS);
443 static enum rofferr roff_EQ(ROFF_ARGS);
444 static enum rofferr roff_EN(ROFF_ARGS);
445 static enum rofferr roff_T_(ROFF_ARGS);
446 static enum rofferr roff_unsupp(ROFF_ARGS);
447 static enum rofferr roff_userdef(ROFF_ARGS);
448
449 /* --- constant data ------------------------------------------------------ */
450
451 /* See roffhash_find() */
452
453 #define ASCII_HI 126
454 #define ASCII_LO 33
455 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
456
457 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
458 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
459
460 static struct roffmac *hash[HASHWIDTH];
461
462 static struct roffmac roffs[ROFF_MAX] = {
463 { "ab", roff_unsupp, NULL, NULL, 0, NULL },
464 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
465 { "af", roff_line_ignore, NULL, NULL, 0, NULL },
466 { "aln", roff_unsupp, NULL, NULL, 0, NULL },
467 { "als", roff_unsupp, NULL, NULL, 0, NULL },
468 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
469 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
470 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
471 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
472 { "as", roff_ds, NULL, NULL, 0, NULL },
473 { "as1", roff_ds, NULL, NULL, 0, NULL },
474 { "asciify", roff_unsupp, NULL, NULL, 0, NULL },
475 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
476 { "bd", roff_line_ignore, NULL, NULL, 0, NULL },
477 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
478 { "blm", roff_unsupp, NULL, NULL, 0, NULL },
479 { "box", roff_unsupp, NULL, NULL, 0, NULL },
480 { "boxa", roff_unsupp, NULL, NULL, 0, NULL },
481 { "bp", roff_line_ignore, NULL, NULL, 0, NULL },
482 { "BP", roff_unsupp, NULL, NULL, 0, NULL },
483 { "break", roff_unsupp, NULL, NULL, 0, NULL },
484 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
485 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
486 { "brp", roff_brp, NULL, NULL, 0, NULL },
487 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
488 { "c2", roff_unsupp, NULL, NULL, 0, NULL },
489 { "cc", roff_cc, NULL, NULL, 0, NULL },
490 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
491 { "cf", roff_insec, NULL, NULL, 0, NULL },
492 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
493 { "ch", roff_line_ignore, NULL, NULL, 0, NULL },
494 { "char", roff_unsupp, NULL, NULL, 0, NULL },
495 { "chop", roff_unsupp, NULL, NULL, 0, NULL },
496 { "class", roff_line_ignore, NULL, NULL, 0, NULL },
497 { "close", roff_insec, NULL, NULL, 0, NULL },
498 { "CL", roff_unsupp, NULL, NULL, 0, NULL },
499 { "color", roff_line_ignore, NULL, NULL, 0, NULL },
500 { "composite", roff_unsupp, NULL, NULL, 0, NULL },
501 { "continue", roff_unsupp, NULL, NULL, 0, NULL },
502 { "cp", roff_line_ignore, NULL, NULL, 0, NULL },
503 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
504 { "cs", roff_line_ignore, NULL, NULL, 0, NULL },
505 { "cu", roff_line_ignore, NULL, NULL, 0, NULL },
506 { "da", roff_unsupp, NULL, NULL, 0, NULL },
507 { "dch", roff_unsupp, NULL, NULL, 0, NULL },
508 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
509 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
510 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
511 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
512 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
513 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
514 { "device", roff_unsupp, NULL, NULL, 0, NULL },
515 { "devicem", roff_unsupp, NULL, NULL, 0, NULL },
516 { "di", roff_unsupp, NULL, NULL, 0, NULL },
517 { "do", roff_unsupp, NULL, NULL, 0, NULL },
518 { "ds", roff_ds, NULL, NULL, 0, NULL },
519 { "ds1", roff_ds, NULL, NULL, 0, NULL },
520 { "dwh", roff_unsupp, NULL, NULL, 0, NULL },
521 { "dt", roff_unsupp, NULL, NULL, 0, NULL },
522 { "ec", roff_unsupp, NULL, NULL, 0, NULL },
523 { "ecr", roff_unsupp, NULL, NULL, 0, NULL },
524 { "ecs", roff_unsupp, NULL, NULL, 0, NULL },
525 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
526 { "em", roff_unsupp, NULL, NULL, 0, NULL },
527 { "EN", roff_EN, NULL, NULL, 0, NULL },
528 { "eo", roff_unsupp, NULL, NULL, 0, NULL },
529 { "EP", roff_unsupp, NULL, NULL, 0, NULL },
530 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
531 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
532 { "ev", roff_unsupp, NULL, NULL, 0, NULL },
533 { "evc", roff_unsupp, NULL, NULL, 0, NULL },
534 { "ex", roff_unsupp, NULL, NULL, 0, NULL },
535 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
536 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
537 { "fc", roff_unsupp, NULL, NULL, 0, NULL },
538 { "fchar", roff_unsupp, NULL, NULL, 0, NULL },
539 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
540 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
541 { "feature", roff_line_ignore, NULL, NULL, 0, NULL },
542 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
543 { "fl", roff_line_ignore, NULL, NULL, 0, NULL },
544 { "flig", roff_line_ignore, NULL, NULL, 0, NULL },
545 { "fp", roff_line_ignore, NULL, NULL, 0, NULL },
546 { "fps", roff_line_ignore, NULL, NULL, 0, NULL },
547 { "fschar", roff_unsupp, NULL, NULL, 0, NULL },
548 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
549 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
550 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
551 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
552 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
553 { "hc", roff_line_ignore, NULL, NULL, 0, NULL },
554 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
555 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
556 { "hla", roff_line_ignore, NULL, NULL, 0, NULL },
557 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
558 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
559 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
560 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
561 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
562 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
563 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
564 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
565 { "hym", roff_line_ignore, NULL, NULL, 0, NULL },
566 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
567 { "hys", roff_line_ignore, NULL, NULL, 0, NULL },
568 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
569 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
570 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
571 { "index", roff_unsupp, NULL, NULL, 0, NULL },
572 { "it", roff_it, NULL, NULL, 0, NULL },
573 { "itc", roff_unsupp, NULL, NULL, 0, NULL },
574 { "IX", roff_line_ignore, NULL, NULL, 0, NULL },
575 { "kern", roff_line_ignore, NULL, NULL, 0, NULL },
576 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
577 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
578 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
579 { "lc", roff_unsupp, NULL, NULL, 0, NULL },
580 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
581 { "lds", roff_unsupp, NULL, NULL, 0, NULL },
582 { "length", roff_unsupp, NULL, NULL, 0, NULL },
583 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
584 { "lf", roff_insec, NULL, NULL, 0, NULL },
585 { "lg", roff_line_ignore, NULL, NULL, 0, NULL },
586 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
587 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
588 { "lnr", roff_unsupp, NULL, NULL, 0, NULL },
589 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
590 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
591 { "ls", roff_line_ignore, NULL, NULL, 0, NULL },
592 { "lsm", roff_unsupp, NULL, NULL, 0, NULL },
593 { "lt", roff_line_ignore, NULL, NULL, 0, NULL },
594 { "mc", roff_line_ignore, NULL, NULL, 0, NULL },
595 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
596 { "minss", roff_line_ignore, NULL, NULL, 0, NULL },
597 { "mk", roff_line_ignore, NULL, NULL, 0, NULL },
598 { "mso", roff_insec, NULL, NULL, 0, NULL },
599 { "na", roff_line_ignore, NULL, NULL, 0, NULL },
600 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
601 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
602 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
603 { "nm", roff_unsupp, NULL, NULL, 0, NULL },
604 { "nn", roff_unsupp, NULL, NULL, 0, NULL },
605 { "nop", roff_unsupp, NULL, NULL, 0, NULL },
606 { "nr", roff_nr, NULL, NULL, 0, NULL },
607 { "nrf", roff_unsupp, NULL, NULL, 0, NULL },
608 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
609 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
610 { "nx", roff_insec, NULL, NULL, 0, NULL },
611 { "open", roff_insec, NULL, NULL, 0, NULL },
612 { "opena", roff_insec, NULL, NULL, 0, NULL },
613 { "os", roff_line_ignore, NULL, NULL, 0, NULL },
614 { "output", roff_unsupp, NULL, NULL, 0, NULL },
615 { "padj", roff_line_ignore, NULL, NULL, 0, NULL },
616 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
617 { "pc", roff_line_ignore, NULL, NULL, 0, NULL },
618 { "pev", roff_line_ignore, NULL, NULL, 0, NULL },
619 { "pi", roff_insec, NULL, NULL, 0, NULL },
620 { "PI", roff_unsupp, NULL, NULL, 0, NULL },
621 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
622 { "pm", roff_line_ignore, NULL, NULL, 0, NULL },
623 { "pn", roff_line_ignore, NULL, NULL, 0, NULL },
624 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
625 { "po", roff_line_ignore, NULL, NULL, 0, NULL },
626 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
627 { "psbb", roff_unsupp, NULL, NULL, 0, NULL },
628 { "pshape", roff_unsupp, NULL, NULL, 0, NULL },
629 { "pso", roff_insec, NULL, NULL, 0, NULL },
630 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
631 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
632 { "rchar", roff_unsupp, NULL, NULL, 0, NULL },
633 { "rd", roff_line_ignore, NULL, NULL, 0, NULL },
634 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
635 { "return", roff_unsupp, NULL, NULL, 0, NULL },
636 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
637 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
638 { "rj", roff_line_ignore, NULL, NULL, 0, NULL },
639 { "rm", roff_rm, NULL, NULL, 0, NULL },
640 { "rn", roff_unsupp, NULL, NULL, 0, NULL },
641 { "rnn", roff_unsupp, NULL, NULL, 0, NULL },
642 { "rr", roff_rr, NULL, NULL, 0, NULL },
643 { "rs", roff_line_ignore, NULL, NULL, 0, NULL },
644 { "rt", roff_line_ignore, NULL, NULL, 0, NULL },
645 { "schar", roff_unsupp, NULL, NULL, 0, NULL },
646 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
647 { "shc", roff_line_ignore, NULL, NULL, 0, NULL },
648 { "shift", roff_unsupp, NULL, NULL, 0, NULL },
649 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
650 { "so", roff_so, NULL, NULL, 0, NULL },
651 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
652 { "special", roff_line_ignore, NULL, NULL, 0, NULL },
653 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
654 { "ss", roff_line_ignore, NULL, NULL, 0, NULL },
655 { "sty", roff_line_ignore, NULL, NULL, 0, NULL },
656 { "substring", roff_unsupp, NULL, NULL, 0, NULL },
657 { "sv", roff_line_ignore, NULL, NULL, 0, NULL },
658 { "sy", roff_insec, NULL, NULL, 0, NULL },
659 { "T&", roff_T_, NULL, NULL, 0, NULL },
660 { "ta", roff_unsupp, NULL, NULL, 0, NULL },
661 { "tc", roff_unsupp, NULL, NULL, 0, NULL },
662 { "TE", roff_TE, NULL, NULL, 0, NULL },
663 { "TH", roff_TH, NULL, NULL, 0, NULL },
664 { "ti", roff_unsupp, NULL, NULL, 0, NULL },
665 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
666 { "tl", roff_unsupp, NULL, NULL, 0, NULL },
667 { "tm", roff_line_ignore, NULL, NULL, 0, NULL },
668 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
669 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
670 { "tr", roff_tr, NULL, NULL, 0, NULL },
671 { "track", roff_line_ignore, NULL, NULL, 0, NULL },
672 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
673 { "trf", roff_insec, NULL, NULL, 0, NULL },
674 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
675 { "trin", roff_unsupp, NULL, NULL, 0, NULL },
676 { "trnt", roff_unsupp, NULL, NULL, 0, NULL },
677 { "troff", roff_line_ignore, NULL, NULL, 0, NULL },
678 { "TS", roff_TS, NULL, NULL, 0, NULL },
679 { "uf", roff_line_ignore, NULL, NULL, 0, NULL },
680 { "ul", roff_line_ignore, NULL, NULL, 0, NULL },
681 { "unformat", roff_unsupp, NULL, NULL, 0, NULL },
682 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
683 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
684 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
685 { "vs", roff_line_ignore, NULL, NULL, 0, NULL },
686 { "warn", roff_line_ignore, NULL, NULL, 0, NULL },
687 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
688 { "watch", roff_line_ignore, NULL, NULL, 0, NULL },
689 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
690 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
691 { "wh", roff_unsupp, NULL, NULL, 0, NULL },
692 { "while", roff_unsupp, NULL, NULL, 0, NULL },
693 { "write", roff_insec, NULL, NULL, 0, NULL },
694 { "writec", roff_insec, NULL, NULL, 0, NULL },
695 { "writem", roff_insec, NULL, NULL, 0, NULL },
696 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
697 { ".", roff_cblock, NULL, NULL, 0, NULL },
698 { NULL, roff_userdef, NULL, NULL, 0, NULL },
699 };
700
701 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
702 const char *const __mdoc_reserved[] = {
703 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
704 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
705 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
706 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
707 "Dt", "Dv", "Dx", "D1",
708 "Ec", "Ed", "Ef", "Ek", "El", "Em",
709 "En", "Eo", "Er", "Es", "Ev", "Ex",
710 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
711 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
712 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
713 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
714 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
715 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
716 "Sc", "Sh", "Sm", "So", "Sq",
717 "Ss", "St", "Sx", "Sy",
718 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
719 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
720 "%P", "%Q", "%R", "%T", "%U", "%V",
721 NULL
722 };
723
724 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
725 const char *const __man_reserved[] = {
726 "AT", "B", "BI", "BR", "DT",
727 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
728 "LP", "OP", "P", "PD", "PP",
729 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
730 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
731 NULL
732 };
733
734 /* Array of injected predefined strings. */
735 #define PREDEFS_MAX 38
736 static const struct predef predefs[PREDEFS_MAX] = {
737 #include "predefs.in"
738 };
739
740 /* See roffhash_find() */
741 #define ROFF_HASH(p) (p[0] - ASCII_LO)
742
743 static int roffit_lines; /* number of lines to delay */
744 static char *roffit_macro; /* nil-terminated macro line */
745
746
747 /* --- request table ------------------------------------------------------ */
748
749 static void
750 roffhash_init(void)
751 {
752 struct roffmac *n;
753 int buc, i;
754
755 for (i = 0; i < (int)ROFF_USERDEF; i++) {
756 assert(roffs[i].name[0] >= ASCII_LO);
757 assert(roffs[i].name[0] <= ASCII_HI);
758
759 buc = ROFF_HASH(roffs[i].name);
760
761 if (NULL != (n = hash[buc])) {
762 for ( ; n->next; n = n->next)
763 /* Do nothing. */ ;
764 n->next = &roffs[i];
765 } else
766 hash[buc] = &roffs[i];
767 }
768 }
769
770 /*
771 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
772 * the nil-terminated string name could be found.
773 */
774 static enum rofft
775 roffhash_find(const char *p, size_t s)
776 {
777 int buc;
778 struct roffmac *n;
779
780 /*
781 * libroff has an extremely simple hashtable, for the time
782 * being, which simply keys on the first character, which must
783 * be printable, then walks a chain. It works well enough until
784 * optimised.
785 */
786
787 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
788 return ROFF_MAX;
789
790 buc = ROFF_HASH(p);
791
792 if (NULL == (n = hash[buc]))
793 return ROFF_MAX;
794 for ( ; n; n = n->next)
795 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
796 return (enum rofft)(n - roffs);
797
798 return ROFF_MAX;
799 }
800
801 /* --- stack of request blocks -------------------------------------------- */
802
803 /*
804 * Pop the current node off of the stack of roff instructions currently
805 * pending.
806 */
807 static void
808 roffnode_pop(struct roff *r)
809 {
810 struct roffnode *p;
811
812 assert(r->last);
813 p = r->last;
814
815 r->last = r->last->parent;
816 free(p->name);
817 free(p->end);
818 free(p);
819 }
820
821 /*
822 * Push a roff node onto the instruction stack. This must later be
823 * removed with roffnode_pop().
824 */
825 static void
826 roffnode_push(struct roff *r, enum rofft tok, const char *name,
827 int line, int col)
828 {
829 struct roffnode *p;
830
831 p = mandoc_calloc(1, sizeof(struct roffnode));
832 p->tok = tok;
833 if (name)
834 p->name = mandoc_strdup(name);
835 p->parent = r->last;
836 p->line = line;
837 p->col = col;
838 p->rule = p->parent ? p->parent->rule : 0;
839
840 r->last = p;
841 }
842
843 /* --- roff parser state data management ---------------------------------- */
844
845 static void
846 roff_free1(struct roff *r)
847 {
848 struct tbl_node *tbl;
849 struct eqn_node *e;
850 int i;
851
852 while (NULL != (tbl = r->first_tbl)) {
853 r->first_tbl = tbl->next;
854 tbl_free(tbl);
855 }
856 r->first_tbl = r->last_tbl = r->tbl = NULL;
857
858 while (NULL != (e = r->first_eqn)) {
859 r->first_eqn = e->next;
860 eqn_free(e);
861 }
862 r->first_eqn = r->last_eqn = r->eqn = NULL;
863
864 while (r->last)
865 roffnode_pop(r);
866
867 free (r->rstack);
868 r->rstack = NULL;
869 r->rstacksz = 0;
870 r->rstackpos = -1;
871
872 roff_freereg(r->regtab);
873 r->regtab = NULL;
874
875 roff_freestr(r->strtab);
876 roff_freestr(r->xmbtab);
877 r->strtab = r->xmbtab = NULL;
878
879 if (r->xtab)
880 for (i = 0; i < 128; i++)
881 free(r->xtab[i].p);
882 free(r->xtab);
883 r->xtab = NULL;
884 }
885
886 void
887 roff_reset(struct roff *r)
888 {
889
890 roff_free1(r);
891 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
892 r->control = 0;
893 }
894
895 void
896 roff_free(struct roff *r)
897 {
898
899 roff_free1(r);
900 free(r);
901 }
902
903 struct roff *
904 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
905 {
906 struct roff *r;
907
908 r = mandoc_calloc(1, sizeof(struct roff));
909 r->parse = parse;
910 r->mchars = mchars;
911 r->options = options;
912 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
913 r->rstackpos = -1;
914
915 roffhash_init();
916
917 return r;
918 }
919
920 /* --- syntax tree state data management ---------------------------------- */
921
922 static void
923 roff_man_free1(struct roff_man *man)
924 {
925
926 if (man->first != NULL)
927 roff_node_delete(man, man->first);
928 free(man->meta.msec);
929 free(man->meta.vol);
930 free(man->meta.os);
931 free(man->meta.arch);
932 free(man->meta.title);
933 free(man->meta.name);
934 free(man->meta.date);
935 }
936
937 static void
938 roff_man_alloc1(struct roff_man *man)
939 {
940
941 memset(&man->meta, 0, sizeof(man->meta));
942 man->first = mandoc_calloc(1, sizeof(*man->first));
943 man->first->type = ROFFT_ROOT;
944 man->last = man->first;
945 man->last_es = NULL;
946 man->flags = 0;
947 man->macroset = MACROSET_NONE;
948 man->lastsec = man->lastnamed = SEC_NONE;
949 man->next = ROFF_NEXT_CHILD;
950 }
951
952 void
953 roff_man_reset(struct roff_man *man)
954 {
955
956 roff_man_free1(man);
957 roff_man_alloc1(man);
958 }
959
960 void
961 roff_man_free(struct roff_man *man)
962 {
963
964 roff_man_free1(man);
965 free(man);
966 }
967
968 struct roff_man *
969 roff_man_alloc(struct roff *roff, struct mparse *parse,
970 const char *defos, int quick)
971 {
972 struct roff_man *man;
973
974 man = mandoc_calloc(1, sizeof(*man));
975 man->parse = parse;
976 man->roff = roff;
977 man->defos = defos;
978 man->quick = quick;
979 roff_man_alloc1(man);
980 return man;
981 }
982
983 /* --- syntax tree handling ----------------------------------------------- */
984
985 struct roff_node *
986 roff_node_alloc(struct roff_man *man, int line, int pos,
987 enum roff_type type, int tok)
988 {
989 struct roff_node *n;
990
991 n = mandoc_calloc(1, sizeof(*n));
992 n->line = line;
993 n->pos = pos;
994 n->tok = tok;
995 n->type = type;
996 n->sec = man->lastsec;
997
998 if (man->flags & MDOC_SYNOPSIS)
999 n->flags |= MDOC_SYNPRETTY;
1000 else
1001 n->flags &= ~MDOC_SYNPRETTY;
1002 if (man->flags & MDOC_NEWLINE)
1003 n->flags |= MDOC_LINE;
1004 man->flags &= ~MDOC_NEWLINE;
1005
1006 return n;
1007 }
1008
1009 void
1010 roff_node_append(struct roff_man *man, struct roff_node *n)
1011 {
1012
1013 switch (man->next) {
1014 case ROFF_NEXT_SIBLING:
1015 man->last->next = n;
1016 n->prev = man->last;
1017 n->parent = man->last->parent;
1018 break;
1019 case ROFF_NEXT_CHILD:
1020 man->last->child = n;
1021 n->parent = man->last;
1022 break;
1023 default:
1024 abort();
1025 }
1026 n->parent->nchild++;
1027 n->parent->last = n;
1028
1029 /*
1030 * Copy over the normalised-data pointer of our parent. Not
1031 * everybody has one, but copying a null pointer is fine.
1032 */
1033
1034 switch (n->type) {
1035 case ROFFT_BODY:
1036 if (n->end != ENDBODY_NOT)
1037 break;
1038 /* FALLTHROUGH */
1039 case ROFFT_TAIL:
1040 case ROFFT_HEAD:
1041 n->norm = n->parent->norm;
1042 break;
1043 default:
1044 break;
1045 }
1046
1047 if (man->macroset == MACROSET_MDOC)
1048 mdoc_valid_pre(man, n);
1049
1050 switch (n->type) {
1051 case ROFFT_HEAD:
1052 assert(n->parent->type == ROFFT_BLOCK);
1053 n->parent->head = n;
1054 break;
1055 case ROFFT_BODY:
1056 if (n->end)
1057 break;
1058 assert(n->parent->type == ROFFT_BLOCK);
1059 n->parent->body = n;
1060 break;
1061 case ROFFT_TAIL:
1062 assert(n->parent->type == ROFFT_BLOCK);
1063 n->parent->tail = n;
1064 break;
1065 default:
1066 break;
1067 }
1068 man->last = n;
1069 }
1070
1071 void
1072 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1073 {
1074 struct roff_node *n;
1075
1076 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1077 n->string = roff_strdup(man->roff, word);
1078 roff_node_append(man, n);
1079 if (man->macroset == MACROSET_MDOC)
1080 mdoc_valid_post(man);
1081 else
1082 man_valid_post(man);
1083 man->next = ROFF_NEXT_SIBLING;
1084 }
1085
1086 void
1087 roff_word_append(struct roff_man *man, const char *word)
1088 {
1089 struct roff_node *n;
1090 char *addstr, *newstr;
1091
1092 n = man->last;
1093 addstr = roff_strdup(man->roff, word);
1094 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1095 free(addstr);
1096 free(n->string);
1097 n->string = newstr;
1098 man->next = ROFF_NEXT_SIBLING;
1099 }
1100
1101 void
1102 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1103 {
1104 struct roff_node *n;
1105
1106 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1107 roff_node_append(man, n);
1108 man->next = ROFF_NEXT_CHILD;
1109 }
1110
1111 struct roff_node *
1112 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1113 {
1114 struct roff_node *n;
1115
1116 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1117 roff_node_append(man, n);
1118 man->next = ROFF_NEXT_CHILD;
1119 return n;
1120 }
1121
1122 struct roff_node *
1123 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1124 {
1125 struct roff_node *n;
1126
1127 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1128 roff_node_append(man, n);
1129 man->next = ROFF_NEXT_CHILD;
1130 return n;
1131 }
1132
1133 struct roff_node *
1134 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1135 {
1136 struct roff_node *n;
1137
1138 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1139 roff_node_append(man, n);
1140 man->next = ROFF_NEXT_CHILD;
1141 return n;
1142 }
1143
1144 void
1145 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1146 {
1147 struct roff_node *n;
1148
1149 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1150 n->eqn = eqn;
1151 if (eqn->ln > man->last->line)
1152 n->flags |= MDOC_LINE;
1153 roff_node_append(man, n);
1154 man->next = ROFF_NEXT_SIBLING;
1155 }
1156
1157 void
1158 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1159 {
1160 struct roff_node *n;
1161
1162 if (man->macroset == MACROSET_MAN)
1163 man_breakscope(man, TOKEN_NONE);
1164 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1165 n->span = tbl;
1166 roff_node_append(man, n);
1167 if (man->macroset == MACROSET_MDOC)
1168 mdoc_valid_post(man);
1169 else
1170 man_valid_post(man);
1171 man->next = ROFF_NEXT_SIBLING;
1172 }
1173
1174 void
1175 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1176 {
1177
1178 /* Adjust siblings. */
1179
1180 if (n->prev)
1181 n->prev->next = n->next;
1182 if (n->next)
1183 n->next->prev = n->prev;
1184
1185 /* Adjust parent. */
1186
1187 if (n->parent != NULL) {
1188 n->parent->nchild--;
1189 if (n->parent->child == n)
1190 n->parent->child = n->next;
1191 if (n->parent->last == n)
1192 n->parent->last = n->prev;
1193 }
1194
1195 /* Adjust parse point. */
1196
1197 if (man == NULL)
1198 return;
1199 if (man->last == n) {
1200 if (n->prev == NULL) {
1201 man->last = n->parent;
1202 man->next = ROFF_NEXT_CHILD;
1203 } else {
1204 man->last = n->prev;
1205 man->next = ROFF_NEXT_SIBLING;
1206 }
1207 }
1208 if (man->first == n)
1209 man->first = NULL;
1210 }
1211
1212 void
1213 roff_node_free(struct roff_node *n)
1214 {
1215
1216 if (n->args != NULL)
1217 mdoc_argv_free(n->args);
1218 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1219 free(n->norm);
1220 free(n->string);
1221 free(n);
1222 }
1223
1224 void
1225 roff_node_delete(struct roff_man *man, struct roff_node *n)
1226 {
1227
1228 while (n->child != NULL)
1229 roff_node_delete(man, n->child);
1230 assert(n->nchild == 0);
1231 roff_node_unlink(man, n);
1232 roff_node_free(n);
1233 }
1234
1235 void
1236 deroff(char **dest, const struct roff_node *n)
1237 {
1238 char *cp;
1239 size_t sz;
1240
1241 if (n->type != ROFFT_TEXT) {
1242 for (n = n->child; n != NULL; n = n->next)
1243 deroff(dest, n);
1244 return;
1245 }
1246
1247 /* Skip leading whitespace and escape sequences. */
1248
1249 cp = n->string;
1250 while (*cp != '\0') {
1251 if ('\\' == *cp) {
1252 cp++;
1253 mandoc_escape((const char **)&cp, NULL, NULL);
1254 } else if (isspace((unsigned char)*cp))
1255 cp++;
1256 else
1257 break;
1258 }
1259
1260 /* Skip trailing whitespace. */
1261
1262 for (sz = strlen(cp); sz; sz--)
1263 if ( ! isspace((unsigned char)cp[sz-1]))
1264 break;
1265
1266 /* Skip empty strings. */
1267
1268 if (sz == 0)
1269 return;
1270
1271 if (*dest == NULL) {
1272 *dest = mandoc_strndup(cp, sz);
1273 return;
1274 }
1275
1276 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1277 free(*dest);
1278 *dest = cp;
1279 }
1280
1281 /* --- main functions of the roff parser ---------------------------------- */
1282
1283 /*
1284 * In the current line, expand escape sequences that tend to get
1285 * used in numerical expressions and conditional requests.
1286 * Also check the syntax of the remaining escape sequences.
1287 */
1288 static enum rofferr
1289 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1290 {
1291 char ubuf[24]; /* buffer to print the number */
1292 const char *start; /* start of the string to process */
1293 char *stesc; /* start of an escape sequence ('\\') */
1294 const char *stnam; /* start of the name, after "[(*" */
1295 const char *cp; /* end of the name, e.g. before ']' */
1296 const char *res; /* the string to be substituted */
1297 char *nbuf; /* new buffer to copy buf->buf to */
1298 size_t maxl; /* expected length of the escape name */
1299 size_t naml; /* actual length of the escape name */
1300 enum mandoc_esc esc; /* type of the escape sequence */
1301 int inaml; /* length returned from mandoc_escape() */
1302 int expand_count; /* to avoid infinite loops */
1303 int npos; /* position in numeric expression */
1304 int arg_complete; /* argument not interrupted by eol */
1305 char term; /* character terminating the escape */
1306
1307 expand_count = 0;
1308 start = buf->buf + pos;
1309 stesc = strchr(start, '\0') - 1;
1310 while (stesc-- > start) {
1311
1312 /* Search backwards for the next backslash. */
1313
1314 if (*stesc != '\\')
1315 continue;
1316
1317 /* If it is escaped, skip it. */
1318
1319 for (cp = stesc - 1; cp >= start; cp--)
1320 if (*cp != '\\')
1321 break;
1322
1323 if ((stesc - cp) % 2 == 0) {
1324 stesc = (char *)cp;
1325 continue;
1326 }
1327
1328 /* Decide whether to expand or to check only. */
1329
1330 term = '\0';
1331 cp = stesc + 1;
1332 switch (*cp) {
1333 case '*':
1334 res = NULL;
1335 break;
1336 case 'B':
1337 case 'w':
1338 term = cp[1];
1339 /* FALLTHROUGH */
1340 case 'n':
1341 res = ubuf;
1342 break;
1343 default:
1344 esc = mandoc_escape(&cp, &stnam, &inaml);
1345 if (esc == ESCAPE_ERROR ||
1346 (esc == ESCAPE_SPECIAL &&
1347 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
1348 mandoc_vmsg(MANDOCERR_ESC_BAD,
1349 r->parse, ln, (int)(stesc - buf->buf),
1350 "%.*s", (int)(cp - stesc), stesc);
1351 continue;
1352 }
1353
1354 if (EXPAND_LIMIT < ++expand_count) {
1355 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1356 ln, (int)(stesc - buf->buf), NULL);
1357 return ROFF_IGN;
1358 }
1359
1360 /*
1361 * The third character decides the length
1362 * of the name of the string or register.
1363 * Save a pointer to the name.
1364 */
1365
1366 if (term == '\0') {
1367 switch (*++cp) {
1368 case '\0':
1369 maxl = 0;
1370 break;
1371 case '(':
1372 cp++;
1373 maxl = 2;
1374 break;
1375 case '[':
1376 cp++;
1377 term = ']';
1378 maxl = 0;
1379 break;
1380 default:
1381 maxl = 1;
1382 break;
1383 }
1384 } else {
1385 cp += 2;
1386 maxl = 0;
1387 }
1388 stnam = cp;
1389
1390 /* Advance to the end of the name. */
1391
1392 naml = 0;
1393 arg_complete = 1;
1394 while (maxl == 0 || naml < maxl) {
1395 if (*cp == '\0') {
1396 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1397 ln, (int)(stesc - buf->buf), stesc);
1398 arg_complete = 0;
1399 break;
1400 }
1401 if (maxl == 0 && *cp == term) {
1402 cp++;
1403 break;
1404 }
1405 if (*cp++ != '\\' || stesc[1] != 'w') {
1406 naml++;
1407 continue;
1408 }
1409 switch (mandoc_escape(&cp, NULL, NULL)) {
1410 case ESCAPE_SPECIAL:
1411 case ESCAPE_UNICODE:
1412 case ESCAPE_NUMBERED:
1413 case ESCAPE_OVERSTRIKE:
1414 naml++;
1415 break;
1416 default:
1417 break;
1418 }
1419 }
1420
1421 /*
1422 * Retrieve the replacement string; if it is
1423 * undefined, resume searching for escapes.
1424 */
1425
1426 switch (stesc[1]) {
1427 case '*':
1428 if (arg_complete)
1429 res = roff_getstrn(r, stnam, naml);
1430 break;
1431 case 'B':
1432 npos = 0;
1433 ubuf[0] = arg_complete &&
1434 roff_evalnum(r, ln, stnam, &npos,
1435 NULL, ROFFNUM_SCALE) &&
1436 stnam + npos + 1 == cp ? '1' : '0';
1437 ubuf[1] = '\0';
1438 break;
1439 case 'n':
1440 if (arg_complete)
1441 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1442 roff_getregn(r, stnam, naml));
1443 else
1444 ubuf[0] = '\0';
1445 break;
1446 case 'w':
1447 /* use even incomplete args */
1448 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1449 24 * (int)naml);
1450 break;
1451 }
1452
1453 if (res == NULL) {
1454 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1455 r->parse, ln, (int)(stesc - buf->buf),
1456 "%.*s", (int)naml, stnam);
1457 res = "";
1458 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1459 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1460 ln, (int)(stesc - buf->buf), NULL);
1461 return ROFF_IGN;
1462 }
1463
1464 /* Replace the escape sequence by the string. */
1465
1466 *stesc = '\0';
1467 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1468 buf->buf, res, cp) + 1;
1469
1470 /* Prepare for the next replacement. */
1471
1472 start = nbuf + pos;
1473 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1474 free(buf->buf);
1475 buf->buf = nbuf;
1476 }
1477 return ROFF_CONT;
1478 }
1479
1480 /*
1481 * Process text streams.
1482 */
1483 static enum rofferr
1484 roff_parsetext(struct buf *buf, int pos, int *offs)
1485 {
1486 size_t sz;
1487 const char *start;
1488 char *p;
1489 int isz;
1490 enum mandoc_esc esc;
1491
1492 /* Spring the input line trap. */
1493
1494 if (roffit_lines == 1) {
1495 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1496 free(buf->buf);
1497 buf->buf = p;
1498 buf->sz = isz + 1;
1499 *offs = 0;
1500 free(roffit_macro);
1501 roffit_lines = 0;
1502 return ROFF_REPARSE;
1503 } else if (roffit_lines > 1)
1504 --roffit_lines;
1505
1506 /* Convert all breakable hyphens into ASCII_HYPH. */
1507
1508 start = p = buf->buf + pos;
1509
1510 while (*p != '\0') {
1511 sz = strcspn(p, "-\\");
1512 p += sz;
1513
1514 if (*p == '\0')
1515 break;
1516
1517 if (*p == '\\') {
1518 /* Skip over escapes. */
1519 p++;
1520 esc = mandoc_escape((const char **)&p, NULL, NULL);
1521 if (esc == ESCAPE_ERROR)
1522 break;
1523 while (*p == '-')
1524 p++;
1525 continue;
1526 } else if (p == start) {
1527 p++;
1528 continue;
1529 }
1530
1531 if (isalpha((unsigned char)p[-1]) &&
1532 isalpha((unsigned char)p[1]))
1533 *p = ASCII_HYPH;
1534 p++;
1535 }
1536 return ROFF_CONT;
1537 }
1538
1539 enum rofferr
1540 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1541 {
1542 enum rofft t;
1543 enum rofferr e;
1544 int pos; /* parse point */
1545 int spos; /* saved parse point for messages */
1546 int ppos; /* original offset in buf->buf */
1547 int ctl; /* macro line (boolean) */
1548
1549 ppos = pos = *offs;
1550
1551 /* Handle in-line equation delimiters. */
1552
1553 if (r->tbl == NULL &&
1554 r->last_eqn != NULL && r->last_eqn->delim &&
1555 (r->eqn == NULL || r->eqn_inline)) {
1556 e = roff_eqndelim(r, buf, pos);
1557 if (e == ROFF_REPARSE)
1558 return e;
1559 assert(e == ROFF_CONT);
1560 }
1561
1562 /* Expand some escape sequences. */
1563
1564 e = roff_res(r, buf, ln, pos);
1565 if (e == ROFF_IGN)
1566 return e;
1567 assert(e == ROFF_CONT);
1568
1569 ctl = roff_getcontrol(r, buf->buf, &pos);
1570
1571 /*
1572 * First, if a scope is open and we're not a macro, pass the
1573 * text through the macro's filter.
1574 * Equations process all content themselves.
1575 * Tables process almost all content themselves, but we want
1576 * to warn about macros before passing it there.
1577 */
1578
1579 if (r->last != NULL && ! ctl) {
1580 t = r->last->tok;
1581 assert(roffs[t].text);
1582 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1583 assert(e == ROFF_IGN || e == ROFF_CONT);
1584 if (e != ROFF_CONT)
1585 return e;
1586 }
1587 if (r->eqn != NULL)
1588 return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1589 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1590 return tbl_read(r->tbl, ln, buf->buf, ppos);
1591 if ( ! ctl)
1592 return roff_parsetext(buf, pos, offs);
1593
1594 /* Skip empty request lines. */
1595
1596 if (buf->buf[pos] == '"') {
1597 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1598 ln, pos, NULL);
1599 return ROFF_IGN;
1600 } else if (buf->buf[pos] == '\0')
1601 return ROFF_IGN;
1602
1603 /*
1604 * If a scope is open, go to the child handler for that macro,
1605 * as it may want to preprocess before doing anything with it.
1606 * Don't do so if an equation is open.
1607 */
1608
1609 if (r->last) {
1610 t = r->last->tok;
1611 assert(roffs[t].sub);
1612 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1613 }
1614
1615 /* No scope is open. This is a new request or macro. */
1616
1617 spos = pos;
1618 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1619
1620 /* Tables ignore most macros. */
1621
1622 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1623 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1624 ln, pos, buf->buf + spos);
1625 if (t == ROFF_TS)
1626 return ROFF_IGN;
1627 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1628 pos++;
1629 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1630 pos++;
1631 return tbl_read(r->tbl, ln, buf->buf, pos);
1632 }
1633
1634 /*
1635 * This is neither a roff request nor a user-defined macro.
1636 * Let the standard macro set parsers handle it.
1637 */
1638
1639 if (t == ROFF_MAX)
1640 return ROFF_CONT;
1641
1642 /* Execute a roff request or a user defined macro. */
1643
1644 assert(roffs[t].proc);
1645 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1646 }
1647
1648 void
1649 roff_endparse(struct roff *r)
1650 {
1651
1652 if (r->last)
1653 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1654 r->last->line, r->last->col,
1655 roffs[r->last->tok].name);
1656
1657 if (r->eqn) {
1658 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1659 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1660 eqn_end(&r->eqn);
1661 }
1662
1663 if (r->tbl) {
1664 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1665 r->tbl->line, r->tbl->pos, "TS");
1666 tbl_end(&r->tbl);
1667 }
1668 }
1669
1670 /*
1671 * Parse a roff node's type from the input buffer. This must be in the
1672 * form of ".foo xxx" in the usual way.
1673 */
1674 static enum rofft
1675 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1676 {
1677 char *cp;
1678 const char *mac;
1679 size_t maclen;
1680 enum rofft t;
1681
1682 cp = buf + *pos;
1683
1684 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1685 return ROFF_MAX;
1686
1687 mac = cp;
1688 maclen = roff_getname(r, &cp, ln, ppos);
1689
1690 t = (r->current_string = roff_getstrn(r, mac, maclen))
1691 ? ROFF_USERDEF : roffhash_find(mac, maclen);
1692
1693 if (ROFF_MAX != t)
1694 *pos = cp - buf;
1695
1696 return t;
1697 }
1698
1699 /* --- handling of request blocks ----------------------------------------- */
1700
1701 static enum rofferr
1702 roff_cblock(ROFF_ARGS)
1703 {
1704
1705 /*
1706 * A block-close `..' should only be invoked as a child of an
1707 * ignore macro, otherwise raise a warning and just ignore it.
1708 */
1709
1710 if (r->last == NULL) {
1711 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1712 ln, ppos, "..");
1713 return ROFF_IGN;
1714 }
1715
1716 switch (r->last->tok) {
1717 case ROFF_am:
1718 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1719 case ROFF_ami:
1720 case ROFF_de:
1721 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1722 case ROFF_dei:
1723 case ROFF_ig:
1724 break;
1725 default:
1726 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1727 ln, ppos, "..");
1728 return ROFF_IGN;
1729 }
1730
1731 if (buf->buf[pos] != '\0')
1732 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1733 ".. %s", buf->buf + pos);
1734
1735 roffnode_pop(r);
1736 roffnode_cleanscope(r);
1737 return ROFF_IGN;
1738
1739 }
1740
1741 static void
1742 roffnode_cleanscope(struct roff *r)
1743 {
1744
1745 while (r->last) {
1746 if (--r->last->endspan != 0)
1747 break;
1748 roffnode_pop(r);
1749 }
1750 }
1751
1752 static void
1753 roff_ccond(struct roff *r, int ln, int ppos)
1754 {
1755
1756 if (NULL == r->last) {
1757 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1758 ln, ppos, "\\}");
1759 return;
1760 }
1761
1762 switch (r->last->tok) {
1763 case ROFF_el:
1764 case ROFF_ie:
1765 case ROFF_if:
1766 break;
1767 default:
1768 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1769 ln, ppos, "\\}");
1770 return;
1771 }
1772
1773 if (r->last->endspan > -1) {
1774 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1775 ln, ppos, "\\}");
1776 return;
1777 }
1778
1779 roffnode_pop(r);
1780 roffnode_cleanscope(r);
1781 return;
1782 }
1783
1784 static enum rofferr
1785 roff_block(ROFF_ARGS)
1786 {
1787 const char *name;
1788 char *iname, *cp;
1789 size_t namesz;
1790
1791 /* Ignore groff compatibility mode for now. */
1792
1793 if (tok == ROFF_de1)
1794 tok = ROFF_de;
1795 else if (tok == ROFF_dei1)
1796 tok = ROFF_dei;
1797 else if (tok == ROFF_am1)
1798 tok = ROFF_am;
1799 else if (tok == ROFF_ami1)
1800 tok = ROFF_ami;
1801
1802 /* Parse the macro name argument. */
1803
1804 cp = buf->buf + pos;
1805 if (tok == ROFF_ig) {
1806 iname = NULL;
1807 namesz = 0;
1808 } else {
1809 iname = cp;
1810 namesz = roff_getname(r, &cp, ln, ppos);
1811 iname[namesz] = '\0';
1812 }
1813
1814 /* Resolve the macro name argument if it is indirect. */
1815
1816 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1817 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1818 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1819 r->parse, ln, (int)(iname - buf->buf),
1820 "%.*s", (int)namesz, iname);
1821 namesz = 0;
1822 } else
1823 namesz = strlen(name);
1824 } else
1825 name = iname;
1826
1827 if (namesz == 0 && tok != ROFF_ig) {
1828 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1829 ln, ppos, roffs[tok].name);
1830 return ROFF_IGN;
1831 }
1832
1833 roffnode_push(r, tok, name, ln, ppos);
1834
1835 /*
1836 * At the beginning of a `de' macro, clear the existing string
1837 * with the same name, if there is one. New content will be
1838 * appended from roff_block_text() in multiline mode.
1839 */
1840
1841 if (tok == ROFF_de || tok == ROFF_dei)
1842 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1843
1844 if (*cp == '\0')
1845 return ROFF_IGN;
1846
1847 /* Get the custom end marker. */
1848
1849 iname = cp;
1850 namesz = roff_getname(r, &cp, ln, ppos);
1851
1852 /* Resolve the end marker if it is indirect. */
1853
1854 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1855 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1856 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1857 r->parse, ln, (int)(iname - buf->buf),
1858 "%.*s", (int)namesz, iname);
1859 namesz = 0;
1860 } else
1861 namesz = strlen(name);
1862 } else
1863 name = iname;
1864
1865 if (namesz)
1866 r->last->end = mandoc_strndup(name, namesz);
1867
1868 if (*cp != '\0')
1869 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1870 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1871
1872 return ROFF_IGN;
1873 }
1874
1875 static enum rofferr
1876 roff_block_sub(ROFF_ARGS)
1877 {
1878 enum rofft t;
1879 int i, j;
1880
1881 /*
1882 * First check whether a custom macro exists at this level. If
1883 * it does, then check against it. This is some of groff's
1884 * stranger behaviours. If we encountered a custom end-scope
1885 * tag and that tag also happens to be a "real" macro, then we
1886 * need to try interpreting it again as a real macro. If it's
1887 * not, then return ignore. Else continue.
1888 */
1889
1890 if (r->last->end) {
1891 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1892 if (buf->buf[i] != r->last->end[j])
1893 break;
1894
1895 if (r->last->end[j] == '\0' &&
1896 (buf->buf[i] == '\0' ||
1897 buf->buf[i] == ' ' ||
1898 buf->buf[i] == '\t')) {
1899 roffnode_pop(r);
1900 roffnode_cleanscope(r);
1901
1902 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1903 i++;
1904
1905 pos = i;
1906 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1907 ROFF_MAX)
1908 return ROFF_RERUN;
1909 return ROFF_IGN;
1910 }
1911 }
1912
1913 /*
1914 * If we have no custom end-query or lookup failed, then try
1915 * pulling it out of the hashtable.
1916 */
1917
1918 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1919
1920 if (t != ROFF_cblock) {
1921 if (tok != ROFF_ig)
1922 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1923 return ROFF_IGN;
1924 }
1925
1926 assert(roffs[t].proc);
1927 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1928 }
1929
1930 static enum rofferr
1931 roff_block_text(ROFF_ARGS)
1932 {
1933
1934 if (tok != ROFF_ig)
1935 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1936
1937 return ROFF_IGN;
1938 }
1939
1940 static enum rofferr
1941 roff_cond_sub(ROFF_ARGS)
1942 {
1943 enum rofft t;
1944 char *ep;
1945 int rr;
1946
1947 rr = r->last->rule;
1948 roffnode_cleanscope(r);
1949 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1950
1951 /*
1952 * Fully handle known macros when they are structurally
1953 * required or when the conditional evaluated to true.
1954 */
1955
1956 if ((t != ROFF_MAX) &&
1957 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1958 assert(roffs[t].proc);
1959 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1960 }
1961
1962 /*
1963 * If `\}' occurs on a macro line without a preceding macro,
1964 * drop the line completely.
1965 */
1966
1967 ep = buf->buf + pos;
1968 if (ep[0] == '\\' && ep[1] == '}')
1969 rr = 0;
1970
1971 /* Always check for the closing delimiter `\}'. */
1972
1973 while ((ep = strchr(ep, '\\')) != NULL) {
1974 if (*(++ep) == '}') {
1975 *ep = '&';
1976 roff_ccond(r, ln, ep - buf->buf - 1);
1977 }
1978 if (*ep != '\0')
1979 ++ep;
1980 }
1981 return rr ? ROFF_CONT : ROFF_IGN;
1982 }
1983
1984 static enum rofferr
1985 roff_cond_text(ROFF_ARGS)
1986 {
1987 char *ep;
1988 int rr;
1989
1990 rr = r->last->rule;
1991 roffnode_cleanscope(r);
1992
1993 ep = buf->buf + pos;
1994 while ((ep = strchr(ep, '\\')) != NULL) {
1995 if (*(++ep) == '}') {
1996 *ep = '&';
1997 roff_ccond(r, ln, ep - buf->buf - 1);
1998 }
1999 if (*ep != '\0')
2000 ++ep;
2001 }
2002 return rr ? ROFF_CONT : ROFF_IGN;
2003 }
2004
2005 /* --- handling of numeric and conditional expressions -------------------- */
2006
2007 /*
2008 * Parse a single signed integer number. Stop at the first non-digit.
2009 * If there is at least one digit, return success and advance the
2010 * parse point, else return failure and let the parse point unchanged.
2011 * Ignore overflows, treat them just like the C language.
2012 */
2013 static int
2014 roff_getnum(const char *v, int *pos, int *res, int flags)
2015 {
2016 int myres, scaled, n, p;
2017
2018 if (NULL == res)
2019 res = &myres;
2020
2021 p = *pos;
2022 n = v[p] == '-';
2023 if (n || v[p] == '+')
2024 p++;
2025
2026 if (flags & ROFFNUM_WHITE)
2027 while (isspace((unsigned char)v[p]))
2028 p++;
2029
2030 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2031 *res = 10 * *res + v[p] - '0';
2032 if (p == *pos + n)
2033 return 0;
2034
2035 if (n)
2036 *res = -*res;
2037
2038 /* Each number may be followed by one optional scaling unit. */
2039
2040 switch (v[p]) {
2041 case 'f':
2042 scaled = *res * 65536;
2043 break;
2044 case 'i':
2045 scaled = *res * 240;
2046 break;
2047 case 'c':
2048 scaled = *res * 240 / 2.54;
2049 break;
2050 case 'v':
2051 case 'P':
2052 scaled = *res * 40;
2053 break;
2054 case 'm':
2055 case 'n':
2056 scaled = *res * 24;
2057 break;
2058 case 'p':
2059 scaled = *res * 10 / 3;
2060 break;
2061 case 'u':
2062 scaled = *res;
2063 break;
2064 case 'M':
2065 scaled = *res * 6 / 25;
2066 break;
2067 default:
2068 scaled = *res;
2069 p--;
2070 break;
2071 }
2072 if (flags & ROFFNUM_SCALE)
2073 *res = scaled;
2074
2075 *pos = p + 1;
2076 return 1;
2077 }
2078
2079 /*
2080 * Evaluate a string comparison condition.
2081 * The first character is the delimiter.
2082 * Succeed if the string up to its second occurrence
2083 * matches the string up to its third occurence.
2084 * Advance the cursor after the third occurrence
2085 * or lacking that, to the end of the line.
2086 */
2087 static int
2088 roff_evalstrcond(const char *v, int *pos)
2089 {
2090 const char *s1, *s2, *s3;
2091 int match;
2092
2093 match = 0;
2094 s1 = v + *pos; /* initial delimiter */
2095 s2 = s1 + 1; /* for scanning the first string */
2096 s3 = strchr(s2, *s1); /* for scanning the second string */
2097
2098 if (NULL == s3) /* found no middle delimiter */
2099 goto out;
2100
2101 while ('\0' != *++s3) {
2102 if (*s2 != *s3) { /* mismatch */
2103 s3 = strchr(s3, *s1);
2104 break;
2105 }
2106 if (*s3 == *s1) { /* found the final delimiter */
2107 match = 1;
2108 break;
2109 }
2110 s2++;
2111 }
2112
2113 out:
2114 if (NULL == s3)
2115 s3 = strchr(s2, '\0');
2116 else if (*s3 != '\0')
2117 s3++;
2118 *pos = s3 - v;
2119 return match;
2120 }
2121
2122 /*
2123 * Evaluate an optionally negated single character, numerical,
2124 * or string condition.
2125 */
2126 static int
2127 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2128 {
2129 char *cp, *name;
2130 size_t sz;
2131 int number, savepos, wanttrue;
2132
2133 if ('!' == v[*pos]) {
2134 wanttrue = 0;
2135 (*pos)++;
2136 } else
2137 wanttrue = 1;
2138
2139 switch (v[*pos]) {
2140 case '\0':
2141 return 0;
2142 case 'n':
2143 case 'o':
2144 (*pos)++;
2145 return wanttrue;
2146 case 'c':
2147 case 'd':
2148 case 'e':
2149 case 't':
2150 case 'v':
2151 (*pos)++;
2152 return !wanttrue;
2153 case 'r':
2154 cp = name = v + ++*pos;
2155 sz = roff_getname(r, &cp, ln, *pos);
2156 *pos = cp - v;
2157 return (sz && roff_hasregn(r, name, sz)) == wanttrue;
2158 default:
2159 break;
2160 }
2161
2162 savepos = *pos;
2163 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2164 return (number > 0) == wanttrue;
2165 else if (*pos == savepos)
2166 return roff_evalstrcond(v, pos) == wanttrue;
2167 else
2168 return 0;
2169 }
2170
2171 static enum rofferr
2172 roff_line_ignore(ROFF_ARGS)
2173 {
2174
2175 return ROFF_IGN;
2176 }
2177
2178 static enum rofferr
2179 roff_insec(ROFF_ARGS)
2180 {
2181
2182 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2183 ln, ppos, roffs[tok].name);
2184 return ROFF_IGN;
2185 }
2186
2187 static enum rofferr
2188 roff_unsupp(ROFF_ARGS)
2189 {
2190
2191 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2192 ln, ppos, roffs[tok].name);
2193 return ROFF_IGN;
2194 }
2195
2196 static enum rofferr
2197 roff_cond(ROFF_ARGS)
2198 {
2199
2200 roffnode_push(r, tok, NULL, ln, ppos);
2201
2202 /*
2203 * An `.el' has no conditional body: it will consume the value
2204 * of the current rstack entry set in prior `ie' calls or
2205 * defaults to DENY.
2206 *
2207 * If we're not an `el', however, then evaluate the conditional.
2208 */
2209
2210 r->last->rule = tok == ROFF_el ?
2211 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2212 roff_evalcond(r, ln, buf->buf, &pos);
2213
2214 /*
2215 * An if-else will put the NEGATION of the current evaluated
2216 * conditional into the stack of rules.
2217 */
2218
2219 if (tok == ROFF_ie) {
2220 if (r->rstackpos + 1 == r->rstacksz) {
2221 r->rstacksz += 16;
2222 r->rstack = mandoc_reallocarray(r->rstack,
2223 r->rstacksz, sizeof(int));
2224 }
2225 r->rstack[++r->rstackpos] = !r->last->rule;
2226 }
2227
2228 /* If the parent has false as its rule, then so do we. */
2229
2230 if (r->last->parent && !r->last->parent->rule)
2231 r->last->rule = 0;
2232
2233 /*
2234 * Determine scope.
2235 * If there is nothing on the line after the conditional,
2236 * not even whitespace, use next-line scope.
2237 */
2238
2239 if (buf->buf[pos] == '\0') {
2240 r->last->endspan = 2;
2241 goto out;
2242 }
2243
2244 while (buf->buf[pos] == ' ')
2245 pos++;
2246
2247 /* An opening brace requests multiline scope. */
2248
2249 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2250 r->last->endspan = -1;
2251 pos += 2;
2252 while (buf->buf[pos] == ' ')
2253 pos++;
2254 goto out;
2255 }
2256
2257 /*
2258 * Anything else following the conditional causes
2259 * single-line scope. Warn if the scope contains
2260 * nothing but trailing whitespace.
2261 */
2262
2263 if (buf->buf[pos] == '\0')
2264 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2265 ln, ppos, roffs[tok].name);
2266
2267 r->last->endspan = 1;
2268
2269 out:
2270 *offs = pos;
2271 return ROFF_RERUN;
2272 }
2273
2274 static enum rofferr
2275 roff_ds(ROFF_ARGS)
2276 {
2277 char *string;
2278 const char *name;
2279 size_t namesz;
2280
2281 /* Ignore groff compatibility mode for now. */
2282
2283 if (tok == ROFF_ds1)
2284 tok = ROFF_ds;
2285 else if (tok == ROFF_as1)
2286 tok = ROFF_as;
2287
2288 /*
2289 * The first word is the name of the string.
2290 * If it is empty or terminated by an escape sequence,
2291 * abort the `ds' request without defining anything.
2292 */
2293
2294 name = string = buf->buf + pos;
2295 if (*name == '\0')
2296 return ROFF_IGN;
2297
2298 namesz = roff_getname(r, &string, ln, pos);
2299 if (name[namesz] == '\\')
2300 return ROFF_IGN;
2301
2302 /* Read past the initial double-quote, if any. */
2303 if (*string == '"')
2304 string++;
2305
2306 /* The rest is the value. */
2307 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2308 ROFF_as == tok);
2309 return ROFF_IGN;
2310 }
2311
2312 /*
2313 * Parse a single operator, one or two characters long.
2314 * If the operator is recognized, return success and advance the
2315 * parse point, else return failure and let the parse point unchanged.
2316 */
2317 static int
2318 roff_getop(const char *v, int *pos, char *res)
2319 {
2320
2321 *res = v[*pos];
2322
2323 switch (*res) {
2324 case '+':
2325 case '-':
2326 case '*':
2327 case '/':
2328 case '%':
2329 case '&':
2330 case ':':
2331 break;
2332 case '<':
2333 switch (v[*pos + 1]) {
2334 case '=':
2335 *res = 'l';
2336 (*pos)++;
2337 break;
2338 case '>':
2339 *res = '!';
2340 (*pos)++;
2341 break;
2342 case '?':
2343 *res = 'i';
2344 (*pos)++;
2345 break;
2346 default:
2347 break;
2348 }
2349 break;
2350 case '>':
2351 switch (v[*pos + 1]) {
2352 case '=':
2353 *res = 'g';
2354 (*pos)++;
2355 break;
2356 case '?':
2357 *res = 'a';
2358 (*pos)++;
2359 break;
2360 default:
2361 break;
2362 }
2363 break;
2364 case '=':
2365 if ('=' == v[*pos + 1])
2366 (*pos)++;
2367 break;
2368 default:
2369 return 0;
2370 }
2371 (*pos)++;
2372
2373 return *res;
2374 }
2375
2376 /*
2377 * Evaluate either a parenthesized numeric expression
2378 * or a single signed integer number.
2379 */
2380 static int
2381 roff_evalpar(struct roff *r, int ln,
2382 const char *v, int *pos, int *res, int flags)
2383 {
2384
2385 if ('(' != v[*pos])
2386 return roff_getnum(v, pos, res, flags);
2387
2388 (*pos)++;
2389 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2390 return 0;
2391
2392 /*
2393 * Omission of the closing parenthesis
2394 * is an error in validation mode,
2395 * but ignored in evaluation mode.
2396 */
2397
2398 if (')' == v[*pos])
2399 (*pos)++;
2400 else if (NULL == res)
2401 return 0;
2402
2403 return 1;
2404 }
2405
2406 /*
2407 * Evaluate a complete numeric expression.
2408 * Proceed left to right, there is no concept of precedence.
2409 */
2410 static int
2411 roff_evalnum(struct roff *r, int ln, const char *v,
2412 int *pos, int *res, int flags)
2413 {
2414 int mypos, operand2;
2415 char operator;
2416
2417 if (NULL == pos) {
2418 mypos = 0;
2419 pos = &mypos;
2420 }
2421
2422 if (flags & ROFFNUM_WHITE)
2423 while (isspace((unsigned char)v[*pos]))
2424 (*pos)++;
2425
2426 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2427 return 0;
2428
2429 while (1) {
2430 if (flags & ROFFNUM_WHITE)
2431 while (isspace((unsigned char)v[*pos]))
2432 (*pos)++;
2433
2434 if ( ! roff_getop(v, pos, &operator))
2435 break;
2436
2437 if (flags & ROFFNUM_WHITE)
2438 while (isspace((unsigned char)v[*pos]))
2439 (*pos)++;
2440
2441 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2442 return 0;
2443
2444 if (flags & ROFFNUM_WHITE)
2445 while (isspace((unsigned char)v[*pos]))
2446 (*pos)++;
2447
2448 if (NULL == res)
2449 continue;
2450
2451 switch (operator) {
2452 case '+':
2453 *res += operand2;
2454 break;
2455 case '-':
2456 *res -= operand2;
2457 break;
2458 case '*':
2459 *res *= operand2;
2460 break;
2461 case '/':
2462 if (operand2 == 0) {
2463 mandoc_msg(MANDOCERR_DIVZERO,
2464 r->parse, ln, *pos, v);
2465 *res = 0;
2466 break;
2467 }
2468 *res /= operand2;
2469 break;
2470 case '%':
2471 if (operand2 == 0) {
2472 mandoc_msg(MANDOCERR_DIVZERO,
2473 r->parse, ln, *pos, v);
2474 *res = 0;
2475 break;
2476 }
2477 *res %= operand2;
2478 break;
2479 case '<':
2480 *res = *res < operand2;
2481 break;
2482 case '>':
2483 *res = *res > operand2;
2484 break;
2485 case 'l':
2486 *res = *res <= operand2;
2487 break;
2488 case 'g':
2489 *res = *res >= operand2;
2490 break;
2491 case '=':
2492 *res = *res == operand2;
2493 break;
2494 case '!':
2495 *res = *res != operand2;
2496 break;
2497 case '&':
2498 *res = *res && operand2;
2499 break;
2500 case ':':
2501 *res = *res || operand2;
2502 break;
2503 case 'i':
2504 if (operand2 < *res)
2505 *res = operand2;
2506 break;
2507 case 'a':
2508 if (operand2 > *res)
2509 *res = operand2;
2510 break;
2511 default:
2512 abort();
2513 }
2514 }
2515 return 1;
2516 }
2517
2518 /* --- register management ------------------------------------------------ */
2519
2520 void
2521 roff_setreg(struct roff *r, const char *name, int val, char sign)
2522 {
2523 struct roffreg *reg;
2524
2525 /* Search for an existing register with the same name. */
2526 reg = r->regtab;
2527
2528 while (reg && strcmp(name, reg->key.p))
2529 reg = reg->next;
2530
2531 if (NULL == reg) {
2532 /* Create a new register. */
2533 reg = mandoc_malloc(sizeof(struct roffreg));
2534 reg->key.p = mandoc_strdup(name);
2535 reg->key.sz = strlen(name);
2536 reg->val = 0;
2537 reg->next = r->regtab;
2538 r->regtab = reg;
2539 }
2540
2541 if ('+' == sign)
2542 reg->val += val;
2543 else if ('-' == sign)
2544 reg->val -= val;
2545 else
2546 reg->val = val;
2547 }
2548
2549 /*
2550 * Handle some predefined read-only number registers.
2551 * For now, return -1 if the requested register is not predefined;
2552 * in case a predefined read-only register having the value -1
2553 * were to turn up, another special value would have to be chosen.
2554 */
2555 static int
2556 roff_getregro(const struct roff *r, const char *name)
2557 {
2558
2559 switch (*name) {
2560 case '$': /* Number of arguments of the last macro evaluated. */
2561 return r->argc;
2562 case 'A': /* ASCII approximation mode is always off. */
2563 return 0;
2564 case 'g': /* Groff compatibility mode is always on. */
2565 return 1;
2566 case 'H': /* Fixed horizontal resolution. */
2567 return 24;
2568 case 'j': /* Always adjust left margin only. */
2569 return 0;
2570 case 'T': /* Some output device is always defined. */
2571 return 1;
2572 case 'V': /* Fixed vertical resolution. */
2573 return 40;
2574 default:
2575 return -1;
2576 }
2577 }
2578
2579 int
2580 roff_getreg(const struct roff *r, const char *name)
2581 {
2582 struct roffreg *reg;
2583 int val;
2584
2585 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2586 val = roff_getregro(r, name + 1);
2587 if (-1 != val)
2588 return val;
2589 }
2590
2591 for (reg = r->regtab; reg; reg = reg->next)
2592 if (0 == strcmp(name, reg->key.p))
2593 return reg->val;
2594
2595 return 0;
2596 }
2597
2598 static int
2599 roff_getregn(const struct roff *r, const char *name, size_t len)
2600 {
2601 struct roffreg *reg;
2602 int val;
2603
2604 if ('.' == name[0] && 2 == len) {
2605 val = roff_getregro(r, name + 1);
2606 if (-1 != val)
2607 return val;
2608 }
2609
2610 for (reg = r->regtab; reg; reg = reg->next)
2611 if (len == reg->key.sz &&
2612 0 == strncmp(name, reg->key.p, len))
2613 return reg->val;
2614
2615 return 0;
2616 }
2617
2618 static int
2619 roff_hasregn(const struct roff *r, const char *name, size_t len)
2620 {
2621 struct roffreg *reg;
2622 int val;
2623
2624 if ('.' == name[0] && 2 == len) {
2625 val = roff_getregro(r, name + 1);
2626 if (-1 != val)
2627 return 1;
2628 }
2629
2630 for (reg = r->regtab; reg; reg = reg->next)
2631 if (len == reg->key.sz &&
2632 0 == strncmp(name, reg->key.p, len))
2633 return 1;
2634
2635 return 0;
2636 }
2637
2638 static void
2639 roff_freereg(struct roffreg *reg)
2640 {
2641 struct roffreg *old_reg;
2642
2643 while (NULL != reg) {
2644 free(reg->key.p);
2645 old_reg = reg;
2646 reg = reg->next;
2647 free(old_reg);
2648 }
2649 }
2650
2651 static enum rofferr
2652 roff_nr(ROFF_ARGS)
2653 {
2654 char *key, *val;
2655 size_t keysz;
2656 int iv;
2657 char sign;
2658
2659 key = val = buf->buf + pos;
2660 if (*key == '\0')
2661 return ROFF_IGN;
2662
2663 keysz = roff_getname(r, &val, ln, pos);
2664 if (key[keysz] == '\\')
2665 return ROFF_IGN;
2666 key[keysz] = '\0';
2667
2668 sign = *val;
2669 if (sign == '+' || sign == '-')
2670 val++;
2671
2672 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2673 roff_setreg(r, key, iv, sign);
2674
2675 return ROFF_IGN;
2676 }
2677
2678 static enum rofferr
2679 roff_rr(ROFF_ARGS)
2680 {
2681 struct roffreg *reg, **prev;
2682 char *name, *cp;
2683 size_t namesz;
2684
2685 name = cp = buf->buf + pos;
2686 if (*name == '\0')
2687 return ROFF_IGN;
2688 namesz = roff_getname(r, &cp, ln, pos);
2689 name[namesz] = '\0';
2690
2691 prev = &r->regtab;
2692 while (1) {
2693 reg = *prev;
2694 if (reg == NULL || !strcmp(name, reg->key.p))
2695 break;
2696 prev = &reg->next;
2697 }
2698 if (reg != NULL) {
2699 *prev = reg->next;
2700 free(reg->key.p);
2701 free(reg);
2702 }
2703 return ROFF_IGN;
2704 }
2705
2706 /* --- handler functions for roff requests -------------------------------- */
2707
2708 static enum rofferr
2709 roff_rm(ROFF_ARGS)
2710 {
2711 const char *name;
2712 char *cp;
2713 size_t namesz;
2714
2715 cp = buf->buf + pos;
2716 while (*cp != '\0') {
2717 name = cp;
2718 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2719 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2720 if (name[namesz] == '\\')
2721 break;
2722 }
2723 return ROFF_IGN;
2724 }
2725
2726 static enum rofferr
2727 roff_it(ROFF_ARGS)
2728 {
2729 int iv;
2730
2731 /* Parse the number of lines. */
2732
2733 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2734 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2735 ln, ppos, buf->buf + 1);
2736 return ROFF_IGN;
2737 }
2738
2739 while (isspace((unsigned char)buf->buf[pos]))
2740 pos++;
2741
2742 /*
2743 * Arm the input line trap.
2744 * Special-casing "an-trap" is an ugly workaround to cope
2745 * with DocBook stupidly fiddling with man(7) internals.
2746 */
2747
2748 roffit_lines = iv;
2749 roffit_macro = mandoc_strdup(iv != 1 ||
2750 strcmp(buf->buf + pos, "an-trap") ?
2751 buf->buf + pos : "br");
2752 return ROFF_IGN;
2753 }
2754
2755 static enum rofferr
2756 roff_Dd(ROFF_ARGS)
2757 {
2758 const char *const *cp;
2759
2760 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2761 for (cp = __mdoc_reserved; *cp; cp++)
2762 roff_setstr(r, *cp, NULL, 0);
2763
2764 if (r->format == 0)
2765 r->format = MPARSE_MDOC;
2766
2767 return ROFF_CONT;
2768 }
2769
2770 static enum rofferr
2771 roff_TH(ROFF_ARGS)
2772 {
2773 const char *const *cp;
2774
2775 if ((r->options & MPARSE_QUICK) == 0)
2776 for (cp = __man_reserved; *cp; cp++)
2777 roff_setstr(r, *cp, NULL, 0);
2778
2779 if (r->format == 0)
2780 r->format = MPARSE_MAN;
2781
2782 return ROFF_CONT;
2783 }
2784
2785 static enum rofferr
2786 roff_TE(ROFF_ARGS)
2787 {
2788
2789 if (NULL == r->tbl)
2790 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2791 ln, ppos, "TE");
2792 else if ( ! tbl_end(&r->tbl)) {
2793 free(buf->buf);
2794 buf->buf = mandoc_strdup(".sp");
2795 buf->sz = 4;
2796 return ROFF_REPARSE;
2797 }
2798 return ROFF_IGN;
2799 }
2800
2801 static enum rofferr
2802 roff_T_(ROFF_ARGS)
2803 {
2804
2805 if (NULL == r->tbl)
2806 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2807 ln, ppos, "T&");
2808 else
2809 tbl_restart(ppos, ln, r->tbl);
2810
2811 return ROFF_IGN;
2812 }
2813
2814 /*
2815 * Handle in-line equation delimiters.
2816 */
2817 static enum rofferr
2818 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2819 {
2820 char *cp1, *cp2;
2821 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2822
2823 /*
2824 * Outside equations, look for an opening delimiter.
2825 * If we are inside an equation, we already know it is
2826 * in-line, or this function wouldn't have been called;
2827 * so look for a closing delimiter.
2828 */
2829
2830 cp1 = buf->buf + pos;
2831 cp2 = strchr(cp1, r->eqn == NULL ?
2832 r->last_eqn->odelim : r->last_eqn->cdelim);
2833 if (cp2 == NULL)
2834 return ROFF_CONT;
2835
2836 *cp2++ = '\0';
2837 bef_pr = bef_nl = aft_nl = aft_pr = "";
2838
2839 /* Handle preceding text, protecting whitespace. */
2840
2841 if (*buf->buf != '\0') {
2842 if (r->eqn == NULL)
2843 bef_pr = "\\&";
2844 bef_nl = "\n";
2845 }
2846
2847 /*
2848 * Prepare replacing the delimiter with an equation macro
2849 * and drop leading white space from the equation.
2850 */
2851
2852 if (r->eqn == NULL) {
2853 while (*cp2 == ' ')
2854 cp2++;
2855 mac = ".EQ";
2856 } else
2857 mac = ".EN";
2858
2859 /* Handle following text, protecting whitespace. */
2860
2861 if (*cp2 != '\0') {
2862 aft_nl = "\n";
2863 if (r->eqn != NULL)
2864 aft_pr = "\\&";
2865 }
2866
2867 /* Do the actual replacement. */
2868
2869 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2870 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2871 free(buf->buf);
2872 buf->buf = cp1;
2873
2874 /* Toggle the in-line state of the eqn subsystem. */
2875
2876 r->eqn_inline = r->eqn == NULL;
2877 return ROFF_REPARSE;
2878 }
2879
2880 static enum rofferr
2881 roff_EQ(ROFF_ARGS)
2882 {
2883 struct eqn_node *e;
2884
2885 assert(r->eqn == NULL);
2886 e = eqn_alloc(ppos, ln, r->parse);
2887
2888 if (r->last_eqn) {
2889 r->last_eqn->next = e;
2890 e->delim = r->last_eqn->delim;
2891 e->odelim = r->last_eqn->odelim;
2892 e->cdelim = r->last_eqn->cdelim;
2893 } else
2894 r->first_eqn = r->last_eqn = e;
2895
2896 r->eqn = r->last_eqn = e;
2897
2898 if (buf->buf[pos] != '\0')
2899 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2900 ".EQ %s", buf->buf + pos);
2901
2902 return ROFF_IGN;
2903 }
2904
2905 static enum rofferr
2906 roff_EN(ROFF_ARGS)
2907 {
2908
2909 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2910 return ROFF_IGN;
2911 }
2912
2913 static enum rofferr
2914 roff_TS(ROFF_ARGS)
2915 {
2916 struct tbl_node *tbl;
2917
2918 if (r->tbl) {
2919 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2920 ln, ppos, "TS breaks TS");
2921 tbl_end(&r->tbl);
2922 }
2923
2924 tbl = tbl_alloc(ppos, ln, r->parse);
2925
2926 if (r->last_tbl)
2927 r->last_tbl->next = tbl;
2928 else
2929 r->first_tbl = r->last_tbl = tbl;
2930
2931 r->tbl = r->last_tbl = tbl;
2932 return ROFF_IGN;
2933 }
2934
2935 static enum rofferr
2936 roff_brp(ROFF_ARGS)
2937 {
2938
2939 buf->buf[pos - 1] = '\0';
2940 return ROFF_CONT;
2941 }
2942
2943 static enum rofferr
2944 roff_cc(ROFF_ARGS)
2945 {
2946 const char *p;
2947
2948 p = buf->buf + pos;
2949
2950 if (*p == '\0' || (r->control = *p++) == '.')
2951 r->control = 0;
2952
2953 if (*p != '\0')
2954 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2955 ln, p - buf->buf, "cc ... %s", p);
2956
2957 return ROFF_IGN;
2958 }
2959
2960 static enum rofferr
2961 roff_tr(ROFF_ARGS)
2962 {
2963 const char *p, *first, *second;
2964 size_t fsz, ssz;
2965 enum mandoc_esc esc;
2966
2967 p = buf->buf + pos;
2968
2969 if (*p == '\0') {
2970 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2971 return ROFF_IGN;
2972 }
2973
2974 while (*p != '\0') {
2975 fsz = ssz = 1;
2976
2977 first = p++;
2978 if (*first == '\\') {
2979 esc = mandoc_escape(&p, NULL, NULL);
2980 if (esc == ESCAPE_ERROR) {
2981 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2982 ln, (int)(p - buf->buf), first);
2983 return ROFF_IGN;
2984 }
2985 fsz = (size_t)(p - first);
2986 }
2987
2988 second = p++;
2989 if (*second == '\\') {
2990 esc = mandoc_escape(&p, NULL, NULL);
2991 if (esc == ESCAPE_ERROR) {
2992 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2993 ln, (int)(p - buf->buf), second);
2994 return ROFF_IGN;
2995 }
2996 ssz = (size_t)(p - second);
2997 } else if (*second == '\0') {
2998 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2999 ln, first - buf->buf, "tr %s", first);
3000 second = " ";
3001 p--;
3002 }
3003
3004 if (fsz > 1) {
3005 roff_setstrn(&r->xmbtab, first, fsz,
3006 second, ssz, 0);
3007 continue;
3008 }
3009
3010 if (r->xtab == NULL)
3011 r->xtab = mandoc_calloc(128,
3012 sizeof(struct roffstr));
3013
3014 free(r->xtab[(int)*first].p);
3015 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3016 r->xtab[(int)*first].sz = ssz;
3017 }
3018
3019 return ROFF_IGN;
3020 }
3021
3022 static enum rofferr
3023 roff_so(ROFF_ARGS)
3024 {
3025 char *name, *cp;
3026
3027 name = buf->buf + pos;
3028 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3029
3030 /*
3031 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3032 * opening anything that's not in our cwd or anything beneath
3033 * it. Thus, explicitly disallow traversing up the file-system
3034 * or using absolute paths.
3035 */
3036
3037 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3038 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3039 ".so %s", name);
3040 buf->sz = mandoc_asprintf(&cp,
3041 ".sp\nSee the file %s.\n.sp", name) + 1;
3042 free(buf->buf);
3043 buf->buf = cp;
3044 *offs = 0;
3045 return ROFF_REPARSE;
3046 }
3047
3048 *offs = pos;
3049 return ROFF_SO;
3050 }
3051
3052 /* --- user defined strings and macros ------------------------------------ */
3053
3054 static enum rofferr
3055 roff_userdef(ROFF_ARGS)
3056 {
3057 const char *arg[9], *ap;
3058 char *cp, *n1, *n2;
3059 int i, ib, ie;
3060 size_t asz, rsz;
3061
3062 /*
3063 * Collect pointers to macro argument strings
3064 * and NUL-terminate them.
3065 */
3066
3067 r->argc = 0;
3068 cp = buf->buf + pos;
3069 for (i = 0; i < 9; i++) {
3070 if (*cp == '\0')
3071 arg[i] = "";
3072 else {
3073 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3074 r->argc = i + 1;
3075 }
3076 }
3077
3078 /*
3079 * Expand macro arguments.
3080 */
3081
3082 buf->sz = strlen(r->current_string) + 1;
3083 n1 = cp = mandoc_malloc(buf->sz);
3084 memcpy(n1, r->current_string, buf->sz);
3085 while (*cp != '\0') {
3086
3087 /* Scan ahead for the next argument invocation. */
3088
3089 if (*cp++ != '\\')
3090 continue;
3091 if (*cp++ != '$')
3092 continue;
3093 if (*cp == '*') { /* \\$* inserts all arguments */
3094 ib = 0;
3095 ie = r->argc - 1;
3096 } else { /* \\$1 .. \\$9 insert one argument */
3097 ib = ie = *cp - '1';
3098 if (ib < 0 || ib > 8)
3099 continue;
3100 }
3101 cp -= 2;
3102
3103 /*
3104 * Determine the size of the expanded argument,
3105 * taking escaping of quotes into account.
3106 */
3107
3108 asz = ie > ib ? ie - ib : 0; /* for blanks */
3109 for (i = ib; i <= ie; i++) {
3110 for (ap = arg[i]; *ap != '\0'; ap++) {
3111 asz++;
3112 if (*ap == '"')
3113 asz += 3;
3114 }
3115 }
3116 if (asz != 3) {
3117
3118 /*
3119 * Determine the size of the rest of the
3120 * unexpanded macro, including the NUL.
3121 */
3122
3123 rsz = buf->sz - (cp - n1) - 3;
3124
3125 /*
3126 * When shrinking, move before
3127 * releasing the storage.
3128 */
3129
3130 if (asz < 3)
3131 memmove(cp + asz, cp + 3, rsz);
3132
3133 /*
3134 * Resize the storage for the macro
3135 * and readjust the parse pointer.
3136 */
3137
3138 buf->sz += asz - 3;
3139 n2 = mandoc_realloc(n1, buf->sz);
3140 cp = n2 + (cp - n1);
3141 n1 = n2;
3142
3143 /*
3144 * When growing, make room
3145 * for the expanded argument.
3146 */
3147
3148 if (asz > 3)
3149 memmove(cp + asz, cp + 3, rsz);
3150 }
3151
3152 /* Copy the expanded argument, escaping quotes. */
3153
3154 n2 = cp;
3155 for (i = ib; i <= ie; i++) {
3156 for (ap = arg[i]; *ap != '\0'; ap++) {
3157 if (*ap == '"') {
3158 memcpy(n2, "\\(dq", 4);
3159 n2 += 4;
3160 } else
3161 *n2++ = *ap;
3162 }
3163 if (i < ie)
3164 *n2++ = ' ';
3165 }
3166 }
3167
3168 /*
3169 * Replace the macro invocation
3170 * by the expanded macro.
3171 */
3172
3173 free(buf->buf);
3174 buf->buf = n1;
3175 *offs = 0;
3176
3177 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3178 ROFF_REPARSE : ROFF_APPEND;
3179 }
3180
3181 static size_t
3182 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3183 {
3184 char *name, *cp;
3185 size_t namesz;
3186
3187 name = *cpp;
3188 if ('\0' == *name)
3189 return 0;
3190
3191 /* Read until end of name and terminate it with NUL. */
3192 for (cp = name; 1; cp++) {
3193 if ('\0' == *cp || ' ' == *cp) {
3194 namesz = cp - name;
3195 break;
3196 }
3197 if ('\\' != *cp)
3198 continue;
3199 namesz = cp - name;
3200 if ('{' == cp[1] || '}' == cp[1])
3201 break;
3202 cp++;
3203 if ('\\' == *cp)
3204 continue;
3205 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3206 "%.*s", (int)(cp - name + 1), name);
3207 mandoc_escape((const char **)&cp, NULL, NULL);
3208 break;
3209 }
3210
3211 /* Read past spaces. */
3212 while (' ' == *cp)
3213 cp++;
3214
3215 *cpp = cp;
3216 return namesz;
3217 }
3218
3219 /*
3220 * Store *string into the user-defined string called *name.
3221 * To clear an existing entry, call with (*r, *name, NULL, 0).
3222 * append == 0: replace mode
3223 * append == 1: single-line append mode
3224 * append == 2: multiline append mode, append '\n' after each call
3225 */
3226 static void
3227 roff_setstr(struct roff *r, const char *name, const char *string,
3228 int append)
3229 {
3230
3231 roff_setstrn(&r->strtab, name, strlen(name), string,
3232 string ? strlen(string) : 0, append);
3233 }
3234
3235 static void
3236 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3237 const char *string, size_t stringsz, int append)
3238 {
3239 struct roffkv *n;
3240 char *c;
3241 int i;
3242 size_t oldch, newch;
3243
3244 /* Search for an existing string with the same name. */
3245 n = *r;
3246
3247 while (n && (namesz != n->key.sz ||
3248 strncmp(n->key.p, name, namesz)))
3249 n = n->next;
3250
3251 if (NULL == n) {
3252 /* Create a new string table entry. */
3253 n = mandoc_malloc(sizeof(struct roffkv));
3254 n->key.p = mandoc_strndup(name, namesz);
3255 n->key.sz = namesz;
3256 n->val.p = NULL;
3257 n->val.sz = 0;
3258 n->next = *r;
3259 *r = n;
3260 } else if (0 == append) {
3261 free(n->val.p);
3262 n->val.p = NULL;
3263 n->val.sz = 0;
3264 }
3265
3266 if (NULL == string)
3267 return;
3268
3269 /*
3270 * One additional byte for the '\n' in multiline mode,
3271 * and one for the terminating '\0'.
3272 */
3273 newch = stringsz + (1 < append ? 2u : 1u);
3274
3275 if (NULL == n->val.p) {
3276 n->val.p = mandoc_malloc(newch);
3277 *n->val.p = '\0';
3278 oldch = 0;
3279 } else {
3280 oldch = n->val.sz;
3281 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3282 }
3283
3284 /* Skip existing content in the destination buffer. */
3285 c = n->val.p + (int)oldch;
3286
3287 /* Append new content to the destination buffer. */
3288 i = 0;
3289 while (i < (int)stringsz) {
3290 /*
3291 * Rudimentary roff copy mode:
3292 * Handle escaped backslashes.
3293 */
3294 if ('\\' == string[i] && '\\' == string[i + 1])
3295 i++;
3296 *c++ = string[i++];
3297 }
3298
3299 /* Append terminating bytes. */
3300 if (1 < append)
3301 *c++ = '\n';
3302
3303 *c = '\0';
3304 n->val.sz = (int)(c - n->val.p);
3305 }
3306
3307 static const char *
3308 roff_getstrn(const struct roff *r, const char *name, size_t len)
3309 {
3310 const struct roffkv *n;
3311 int i;
3312
3313 for (n = r->strtab; n; n = n->next)
3314 if (0 == strncmp(name, n->key.p, len) &&
3315 '\0' == n->key.p[(int)len])
3316 return n->val.p;
3317
3318 for (i = 0; i < PREDEFS_MAX; i++)
3319 if (0 == strncmp(name, predefs[i].name, len) &&
3320 '\0' == predefs[i].name[(int)len])
3321 return predefs[i].str;
3322
3323 return NULL;
3324 }
3325
3326 static void
3327 roff_freestr(struct roffkv *r)
3328 {
3329 struct roffkv *n, *nn;
3330
3331 for (n = r; n; n = nn) {
3332 free(n->key.p);
3333 free(n->val.p);
3334 nn = n->next;
3335 free(n);
3336 }
3337 }
3338
3339 /* --- accessors and utility functions ------------------------------------ */
3340
3341 const struct tbl_span *
3342 roff_span(const struct roff *r)
3343 {
3344
3345 return r->tbl ? tbl_span(r->tbl) : NULL;
3346 }
3347
3348 const struct eqn *
3349 roff_eqn(const struct roff *r)
3350 {
3351
3352 return r->last_eqn ? &r->last_eqn->eqn : NULL;
3353 }
3354
3355 /*
3356 * Duplicate an input string, making the appropriate character
3357 * conversations (as stipulated by `tr') along the way.
3358 * Returns a heap-allocated string with all the replacements made.
3359 */
3360 char *
3361 roff_strdup(const struct roff *r, const char *p)
3362 {
3363 const struct roffkv *cp;
3364 char *res;
3365 const char *pp;
3366 size_t ssz, sz;
3367 enum mandoc_esc esc;
3368
3369 if (NULL == r->xmbtab && NULL == r->xtab)
3370 return mandoc_strdup(p);
3371 else if ('\0' == *p)
3372 return mandoc_strdup("");
3373
3374 /*
3375 * Step through each character looking for term matches
3376 * (remember that a `tr' can be invoked with an escape, which is
3377 * a glyph but the escape is multi-character).
3378 * We only do this if the character hash has been initialised
3379 * and the string is >0 length.
3380 */
3381
3382 res = NULL;
3383 ssz = 0;
3384
3385 while ('\0' != *p) {
3386 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3387 sz = r->xtab[(int)*p].sz;
3388 res = mandoc_realloc(res, ssz + sz + 1);
3389 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3390 ssz += sz;
3391 p++;
3392 continue;
3393 } else if ('\\' != *p) {
3394 res = mandoc_realloc(res, ssz + 2);
3395 res[ssz++] = *p++;
3396 continue;
3397 }
3398
3399 /* Search for term matches. */
3400 for (cp = r->xmbtab; cp; cp = cp->next)
3401 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3402 break;
3403
3404 if (NULL != cp) {
3405 /*
3406 * A match has been found.
3407 * Append the match to the array and move
3408 * forward by its keysize.
3409 */
3410 res = mandoc_realloc(res,
3411 ssz + cp->val.sz + 1);
3412 memcpy(res + ssz, cp->val.p, cp->val.sz);
3413 ssz += cp->val.sz;
3414 p += (int)cp->key.sz;
3415 continue;
3416 }
3417
3418 /*
3419 * Handle escapes carefully: we need to copy
3420 * over just the escape itself, or else we might
3421 * do replacements within the escape itself.
3422 * Make sure to pass along the bogus string.
3423 */
3424 pp = p++;
3425 esc = mandoc_escape(&p, NULL, NULL);
3426 if (ESCAPE_ERROR == esc) {
3427 sz = strlen(pp);
3428 res = mandoc_realloc(res, ssz + sz + 1);
3429 memcpy(res + ssz, pp, sz);
3430 break;
3431 }
3432 /*
3433 * We bail out on bad escapes.
3434 * No need to warn: we already did so when
3435 * roff_res() was called.
3436 */
3437 sz = (int)(p - pp);
3438 res = mandoc_realloc(res, ssz + sz + 1);
3439 memcpy(res + ssz, pp, sz);
3440 ssz += sz;
3441 }
3442
3443 res[(int)ssz] = '\0';
3444 return res;
3445 }
3446
3447 int
3448 roff_getformat(const struct roff *r)
3449 {
3450
3451 return r->format;
3452 }
3453
3454 /*
3455 * Find out whether a line is a macro line or not.
3456 * If it is, adjust the current position and return one; if it isn't,
3457 * return zero and don't change the current position.
3458 * If the control character has been set with `.cc', then let that grain
3459 * precedence.
3460 * This is slighly contrary to groff, where using the non-breaking
3461 * control character when `cc' has been invoked will cause the
3462 * non-breaking macro contents to be printed verbatim.
3463 */
3464 int
3465 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3466 {
3467 int pos;
3468
3469 pos = *ppos;
3470
3471 if (0 != r->control && cp[pos] == r->control)
3472 pos++;
3473 else if (0 != r->control)
3474 return 0;
3475 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3476 pos += 2;
3477 else if ('.' == cp[pos] || '\'' == cp[pos])
3478 pos++;
3479 else
3480 return 0;
3481
3482 while (' ' == cp[pos] || '\t' == cp[pos])
3483 pos++;
3484
3485 *ppos = pos;
3486 return 1;
3487 }