]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
delete some TODO entries that were already fixed
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.271 2015/05/31 23:13:22 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "roff.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libroff.h"
35
36 /* Maximum number of nested if-else conditionals. */
37 #define RSTACK_MAX 128
38
39 /* Maximum number of string expansions per line, to break infinite loops. */
40 #define EXPAND_LIMIT 1000
41
42 /* --- data types --------------------------------------------------------- */
43
44 enum rofft {
45 ROFF_ab,
46 ROFF_ad,
47 ROFF_af,
48 ROFF_aln,
49 ROFF_als,
50 ROFF_am,
51 ROFF_am1,
52 ROFF_ami,
53 ROFF_ami1,
54 ROFF_as,
55 ROFF_as1,
56 ROFF_asciify,
57 ROFF_backtrace,
58 ROFF_bd,
59 ROFF_bleedat,
60 ROFF_blm,
61 ROFF_box,
62 ROFF_boxa,
63 ROFF_bp,
64 ROFF_BP,
65 /* MAN_br, MDOC_br */
66 ROFF_break,
67 ROFF_breakchar,
68 ROFF_brnl,
69 ROFF_brp,
70 ROFF_brpnl,
71 ROFF_c2,
72 ROFF_cc,
73 ROFF_ce,
74 ROFF_cf,
75 ROFF_cflags,
76 ROFF_ch,
77 ROFF_char,
78 ROFF_chop,
79 ROFF_class,
80 ROFF_close,
81 ROFF_CL,
82 ROFF_color,
83 ROFF_composite,
84 ROFF_continue,
85 ROFF_cp,
86 ROFF_cropat,
87 ROFF_cs,
88 ROFF_cu,
89 ROFF_da,
90 ROFF_dch,
91 ROFF_Dd,
92 ROFF_de,
93 ROFF_de1,
94 ROFF_defcolor,
95 ROFF_dei,
96 ROFF_dei1,
97 ROFF_device,
98 ROFF_devicem,
99 ROFF_di,
100 ROFF_do,
101 ROFF_ds,
102 ROFF_ds1,
103 ROFF_dwh,
104 ROFF_dt,
105 ROFF_ec,
106 ROFF_ecr,
107 ROFF_ecs,
108 ROFF_el,
109 ROFF_em,
110 ROFF_EN,
111 ROFF_eo,
112 ROFF_EP,
113 ROFF_EQ,
114 ROFF_errprint,
115 ROFF_ev,
116 ROFF_evc,
117 ROFF_ex,
118 ROFF_fallback,
119 ROFF_fam,
120 ROFF_fc,
121 ROFF_fchar,
122 ROFF_fcolor,
123 ROFF_fdeferlig,
124 ROFF_feature,
125 /* MAN_fi; ignored in mdoc(7) */
126 ROFF_fkern,
127 ROFF_fl,
128 ROFF_flig,
129 ROFF_fp,
130 ROFF_fps,
131 ROFF_fschar,
132 ROFF_fspacewidth,
133 ROFF_fspecial,
134 /* MAN_ft; ignored in mdoc(7) */
135 ROFF_ftr,
136 ROFF_fzoom,
137 ROFF_gcolor,
138 ROFF_hc,
139 ROFF_hcode,
140 ROFF_hidechar,
141 ROFF_hla,
142 ROFF_hlm,
143 ROFF_hpf,
144 ROFF_hpfa,
145 ROFF_hpfcode,
146 ROFF_hw,
147 ROFF_hy,
148 ROFF_hylang,
149 ROFF_hylen,
150 ROFF_hym,
151 ROFF_hypp,
152 ROFF_hys,
153 ROFF_ie,
154 ROFF_if,
155 ROFF_ig,
156 /* MAN_in; ignored in mdoc(7) */
157 ROFF_index,
158 ROFF_it,
159 ROFF_itc,
160 ROFF_IX,
161 ROFF_kern,
162 ROFF_kernafter,
163 ROFF_kernbefore,
164 ROFF_kernpair,
165 ROFF_lc,
166 ROFF_lc_ctype,
167 ROFF_lds,
168 ROFF_length,
169 ROFF_letadj,
170 ROFF_lf,
171 ROFF_lg,
172 ROFF_lhang,
173 ROFF_linetabs,
174 /* MAN_ll, MDOC_ll */
175 ROFF_lnr,
176 ROFF_lnrf,
177 ROFF_lpfx,
178 ROFF_ls,
179 ROFF_lsm,
180 ROFF_lt,
181 ROFF_mc,
182 ROFF_mediasize,
183 ROFF_minss,
184 ROFF_mk,
185 ROFF_mso,
186 ROFF_na,
187 ROFF_ne,
188 /* MAN_nf; ignored in mdoc(7) */
189 ROFF_nh,
190 ROFF_nhychar,
191 ROFF_nm,
192 ROFF_nn,
193 ROFF_nop,
194 ROFF_nr,
195 ROFF_nrf,
196 ROFF_nroff,
197 ROFF_ns,
198 ROFF_nx,
199 ROFF_open,
200 ROFF_opena,
201 ROFF_os,
202 ROFF_output,
203 ROFF_padj,
204 ROFF_papersize,
205 ROFF_pc,
206 ROFF_pev,
207 ROFF_pi,
208 ROFF_PI,
209 ROFF_pl,
210 ROFF_pm,
211 ROFF_pn,
212 ROFF_pnr,
213 ROFF_po,
214 ROFF_ps,
215 ROFF_psbb,
216 ROFF_pshape,
217 ROFF_pso,
218 ROFF_ptr,
219 ROFF_pvs,
220 ROFF_rchar,
221 ROFF_rd,
222 ROFF_recursionlimit,
223 ROFF_return,
224 ROFF_rfschar,
225 ROFF_rhang,
226 ROFF_rj,
227 ROFF_rm,
228 ROFF_rn,
229 ROFF_rnn,
230 ROFF_rr,
231 ROFF_rs,
232 ROFF_rt,
233 ROFF_schar,
234 ROFF_sentchar,
235 ROFF_shc,
236 ROFF_shift,
237 ROFF_sizes,
238 ROFF_so,
239 /* MAN_sp, MDOC_sp */
240 ROFF_spacewidth,
241 ROFF_special,
242 ROFF_spreadwarn,
243 ROFF_ss,
244 ROFF_sty,
245 ROFF_substring,
246 ROFF_sv,
247 ROFF_sy,
248 ROFF_T_,
249 ROFF_ta,
250 ROFF_tc,
251 ROFF_TE,
252 ROFF_TH,
253 ROFF_ti,
254 ROFF_tkf,
255 ROFF_tl,
256 ROFF_tm,
257 ROFF_tm1,
258 ROFF_tmc,
259 ROFF_tr,
260 ROFF_track,
261 ROFF_transchar,
262 ROFF_trf,
263 ROFF_trimat,
264 ROFF_trin,
265 ROFF_trnt,
266 ROFF_troff,
267 ROFF_TS,
268 ROFF_uf,
269 ROFF_ul,
270 ROFF_unformat,
271 ROFF_unwatch,
272 ROFF_unwatchn,
273 ROFF_vpt,
274 ROFF_vs,
275 ROFF_warn,
276 ROFF_warnscale,
277 ROFF_watch,
278 ROFF_watchlength,
279 ROFF_watchn,
280 ROFF_wh,
281 ROFF_while,
282 ROFF_write,
283 ROFF_writec,
284 ROFF_writem,
285 ROFF_xflag,
286 ROFF_cblock,
287 ROFF_USERDEF,
288 ROFF_MAX
289 };
290
291 /*
292 * An incredibly-simple string buffer.
293 */
294 struct roffstr {
295 char *p; /* nil-terminated buffer */
296 size_t sz; /* saved strlen(p) */
297 };
298
299 /*
300 * A key-value roffstr pair as part of a singly-linked list.
301 */
302 struct roffkv {
303 struct roffstr key;
304 struct roffstr val;
305 struct roffkv *next; /* next in list */
306 };
307
308 /*
309 * A single number register as part of a singly-linked list.
310 */
311 struct roffreg {
312 struct roffstr key;
313 int val;
314 struct roffreg *next;
315 };
316
317 struct roff {
318 struct mparse *parse; /* parse point */
319 const struct mchars *mchars; /* character table */
320 struct roffnode *last; /* leaf of stack */
321 int *rstack; /* stack of inverted `ie' values */
322 struct roffreg *regtab; /* number registers */
323 struct roffkv *strtab; /* user-defined strings & macros */
324 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
325 struct roffstr *xtab; /* single-byte trans table (`tr') */
326 const char *current_string; /* value of last called user macro */
327 struct tbl_node *first_tbl; /* first table parsed */
328 struct tbl_node *last_tbl; /* last table parsed */
329 struct tbl_node *tbl; /* current table being parsed */
330 struct eqn_node *last_eqn; /* last equation parsed */
331 struct eqn_node *first_eqn; /* first equation parsed */
332 struct eqn_node *eqn; /* current equation being parsed */
333 int eqn_inline; /* current equation is inline */
334 int options; /* parse options */
335 int rstacksz; /* current size limit of rstack */
336 int rstackpos; /* position in rstack */
337 int format; /* current file in mdoc or man format */
338 char control; /* control character */
339 };
340
341 struct roffnode {
342 enum rofft tok; /* type of node */
343 struct roffnode *parent; /* up one in stack */
344 int line; /* parse line */
345 int col; /* parse col */
346 char *name; /* node name, e.g. macro name */
347 char *end; /* end-rules: custom token */
348 int endspan; /* end-rules: next-line or infty */
349 int rule; /* current evaluation rule */
350 };
351
352 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
353 enum rofft tok, /* tok of macro */ \
354 struct buf *buf, /* input buffer */ \
355 int ln, /* parse line */ \
356 int ppos, /* original pos in buffer */ \
357 int pos, /* current pos in buffer */ \
358 int *offs /* reset offset of buffer data */
359
360 typedef enum rofferr (*roffproc)(ROFF_ARGS);
361
362 struct roffmac {
363 const char *name; /* macro name */
364 roffproc proc; /* process new macro */
365 roffproc text; /* process as child text of macro */
366 roffproc sub; /* process as child of macro */
367 int flags;
368 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
369 struct roffmac *next;
370 };
371
372 struct predef {
373 const char *name; /* predefined input name */
374 const char *str; /* replacement symbol */
375 };
376
377 #define PREDEF(__name, __str) \
378 { (__name), (__str) },
379
380 /* --- function prototypes ------------------------------------------------ */
381
382 static enum rofft roffhash_find(const char *, size_t);
383 static void roffhash_init(void);
384 static void roffnode_cleanscope(struct roff *);
385 static void roffnode_pop(struct roff *);
386 static void roffnode_push(struct roff *, enum rofft,
387 const char *, int, int);
388 static enum rofferr roff_block(ROFF_ARGS);
389 static enum rofferr roff_block_text(ROFF_ARGS);
390 static enum rofferr roff_block_sub(ROFF_ARGS);
391 static enum rofferr roff_brp(ROFF_ARGS);
392 static enum rofferr roff_cblock(ROFF_ARGS);
393 static enum rofferr roff_cc(ROFF_ARGS);
394 static void roff_ccond(struct roff *, int, int);
395 static enum rofferr roff_cond(ROFF_ARGS);
396 static enum rofferr roff_cond_text(ROFF_ARGS);
397 static enum rofferr roff_cond_sub(ROFF_ARGS);
398 static enum rofferr roff_ds(ROFF_ARGS);
399 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
400 static int roff_evalcond(struct roff *r, int, char *, int *);
401 static int roff_evalnum(struct roff *, int,
402 const char *, int *, int *, int);
403 static int roff_evalpar(struct roff *, int,
404 const char *, int *, int *, int);
405 static int roff_evalstrcond(const char *, int *);
406 static void roff_free1(struct roff *);
407 static void roff_freereg(struct roffreg *);
408 static void roff_freestr(struct roffkv *);
409 static size_t roff_getname(struct roff *, char **, int, int);
410 static int roff_getnum(const char *, int *, int *, int);
411 static int roff_getop(const char *, int *, char *);
412 static int roff_getregn(const struct roff *,
413 const char *, size_t);
414 static int roff_getregro(const char *name);
415 static const char *roff_getstrn(const struct roff *,
416 const char *, size_t);
417 static int roff_hasregn(const struct roff *,
418 const char *, size_t);
419 static enum rofferr roff_insec(ROFF_ARGS);
420 static enum rofferr roff_it(ROFF_ARGS);
421 static enum rofferr roff_line_ignore(ROFF_ARGS);
422 static void roff_man_alloc1(struct roff_man *);
423 static void roff_man_free1(struct roff_man *);
424 static enum rofferr roff_nr(ROFF_ARGS);
425 static enum rofft roff_parse(struct roff *, char *, int *,
426 int, int);
427 static enum rofferr roff_parsetext(struct buf *, int, int *);
428 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
429 static enum rofferr roff_rm(ROFF_ARGS);
430 static enum rofferr roff_rr(ROFF_ARGS);
431 static void roff_setstr(struct roff *,
432 const char *, const char *, int);
433 static void roff_setstrn(struct roffkv **, const char *,
434 size_t, const char *, size_t, int);
435 static enum rofferr roff_so(ROFF_ARGS);
436 static enum rofferr roff_tr(ROFF_ARGS);
437 static enum rofferr roff_Dd(ROFF_ARGS);
438 static enum rofferr roff_TH(ROFF_ARGS);
439 static enum rofferr roff_TE(ROFF_ARGS);
440 static enum rofferr roff_TS(ROFF_ARGS);
441 static enum rofferr roff_EQ(ROFF_ARGS);
442 static enum rofferr roff_EN(ROFF_ARGS);
443 static enum rofferr roff_T_(ROFF_ARGS);
444 static enum rofferr roff_unsupp(ROFF_ARGS);
445 static enum rofferr roff_userdef(ROFF_ARGS);
446
447 /* --- constant data ------------------------------------------------------ */
448
449 /* See roffhash_find() */
450
451 #define ASCII_HI 126
452 #define ASCII_LO 33
453 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
454
455 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
456 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
457
458 static struct roffmac *hash[HASHWIDTH];
459
460 static struct roffmac roffs[ROFF_MAX] = {
461 { "ab", roff_unsupp, NULL, NULL, 0, NULL },
462 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
463 { "af", roff_line_ignore, NULL, NULL, 0, NULL },
464 { "aln", roff_unsupp, NULL, NULL, 0, NULL },
465 { "als", roff_unsupp, NULL, NULL, 0, NULL },
466 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
467 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
468 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
469 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
470 { "as", roff_ds, NULL, NULL, 0, NULL },
471 { "as1", roff_ds, NULL, NULL, 0, NULL },
472 { "asciify", roff_unsupp, NULL, NULL, 0, NULL },
473 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
474 { "bd", roff_line_ignore, NULL, NULL, 0, NULL },
475 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
476 { "blm", roff_unsupp, NULL, NULL, 0, NULL },
477 { "box", roff_unsupp, NULL, NULL, 0, NULL },
478 { "boxa", roff_unsupp, NULL, NULL, 0, NULL },
479 { "bp", roff_line_ignore, NULL, NULL, 0, NULL },
480 { "BP", roff_unsupp, NULL, NULL, 0, NULL },
481 { "break", roff_unsupp, NULL, NULL, 0, NULL },
482 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
483 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
484 { "brp", roff_brp, NULL, NULL, 0, NULL },
485 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
486 { "c2", roff_unsupp, NULL, NULL, 0, NULL },
487 { "cc", roff_cc, NULL, NULL, 0, NULL },
488 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
489 { "cf", roff_insec, NULL, NULL, 0, NULL },
490 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
491 { "ch", roff_line_ignore, NULL, NULL, 0, NULL },
492 { "char", roff_unsupp, NULL, NULL, 0, NULL },
493 { "chop", roff_unsupp, NULL, NULL, 0, NULL },
494 { "class", roff_line_ignore, NULL, NULL, 0, NULL },
495 { "close", roff_insec, NULL, NULL, 0, NULL },
496 { "CL", roff_unsupp, NULL, NULL, 0, NULL },
497 { "color", roff_line_ignore, NULL, NULL, 0, NULL },
498 { "composite", roff_unsupp, NULL, NULL, 0, NULL },
499 { "continue", roff_unsupp, NULL, NULL, 0, NULL },
500 { "cp", roff_line_ignore, NULL, NULL, 0, NULL },
501 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
502 { "cs", roff_line_ignore, NULL, NULL, 0, NULL },
503 { "cu", roff_line_ignore, NULL, NULL, 0, NULL },
504 { "da", roff_unsupp, NULL, NULL, 0, NULL },
505 { "dch", roff_unsupp, NULL, NULL, 0, NULL },
506 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
507 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
508 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
509 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
510 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
511 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
512 { "device", roff_unsupp, NULL, NULL, 0, NULL },
513 { "devicem", roff_unsupp, NULL, NULL, 0, NULL },
514 { "di", roff_unsupp, NULL, NULL, 0, NULL },
515 { "do", roff_unsupp, NULL, NULL, 0, NULL },
516 { "ds", roff_ds, NULL, NULL, 0, NULL },
517 { "ds1", roff_ds, NULL, NULL, 0, NULL },
518 { "dwh", roff_unsupp, NULL, NULL, 0, NULL },
519 { "dt", roff_unsupp, NULL, NULL, 0, NULL },
520 { "ec", roff_unsupp, NULL, NULL, 0, NULL },
521 { "ecr", roff_unsupp, NULL, NULL, 0, NULL },
522 { "ecs", roff_unsupp, NULL, NULL, 0, NULL },
523 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
524 { "em", roff_unsupp, NULL, NULL, 0, NULL },
525 { "EN", roff_EN, NULL, NULL, 0, NULL },
526 { "eo", roff_unsupp, NULL, NULL, 0, NULL },
527 { "EP", roff_unsupp, NULL, NULL, 0, NULL },
528 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
529 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
530 { "ev", roff_unsupp, NULL, NULL, 0, NULL },
531 { "evc", roff_unsupp, NULL, NULL, 0, NULL },
532 { "ex", roff_unsupp, NULL, NULL, 0, NULL },
533 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
534 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
535 { "fc", roff_unsupp, NULL, NULL, 0, NULL },
536 { "fchar", roff_unsupp, NULL, NULL, 0, NULL },
537 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
538 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
539 { "feature", roff_line_ignore, NULL, NULL, 0, NULL },
540 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
541 { "fl", roff_line_ignore, NULL, NULL, 0, NULL },
542 { "flig", roff_line_ignore, NULL, NULL, 0, NULL },
543 { "fp", roff_line_ignore, NULL, NULL, 0, NULL },
544 { "fps", roff_line_ignore, NULL, NULL, 0, NULL },
545 { "fschar", roff_unsupp, NULL, NULL, 0, NULL },
546 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
547 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
548 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
549 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
550 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
551 { "hc", roff_line_ignore, NULL, NULL, 0, NULL },
552 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
553 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
554 { "hla", roff_line_ignore, NULL, NULL, 0, NULL },
555 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
556 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
557 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
558 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
559 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
560 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
561 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
562 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
563 { "hym", roff_line_ignore, NULL, NULL, 0, NULL },
564 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
565 { "hys", roff_line_ignore, NULL, NULL, 0, NULL },
566 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
567 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
568 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
569 { "index", roff_unsupp, NULL, NULL, 0, NULL },
570 { "it", roff_it, NULL, NULL, 0, NULL },
571 { "itc", roff_unsupp, NULL, NULL, 0, NULL },
572 { "IX", roff_line_ignore, NULL, NULL, 0, NULL },
573 { "kern", roff_line_ignore, NULL, NULL, 0, NULL },
574 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
575 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
576 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
577 { "lc", roff_unsupp, NULL, NULL, 0, NULL },
578 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
579 { "lds", roff_unsupp, NULL, NULL, 0, NULL },
580 { "length", roff_unsupp, NULL, NULL, 0, NULL },
581 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
582 { "lf", roff_insec, NULL, NULL, 0, NULL },
583 { "lg", roff_line_ignore, NULL, NULL, 0, NULL },
584 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
585 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
586 { "lnr", roff_unsupp, NULL, NULL, 0, NULL },
587 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
588 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
589 { "ls", roff_line_ignore, NULL, NULL, 0, NULL },
590 { "lsm", roff_unsupp, NULL, NULL, 0, NULL },
591 { "lt", roff_line_ignore, NULL, NULL, 0, NULL },
592 { "mc", roff_line_ignore, NULL, NULL, 0, NULL },
593 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
594 { "minss", roff_line_ignore, NULL, NULL, 0, NULL },
595 { "mk", roff_line_ignore, NULL, NULL, 0, NULL },
596 { "mso", roff_insec, NULL, NULL, 0, NULL },
597 { "na", roff_line_ignore, NULL, NULL, 0, NULL },
598 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
599 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
600 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
601 { "nm", roff_unsupp, NULL, NULL, 0, NULL },
602 { "nn", roff_unsupp, NULL, NULL, 0, NULL },
603 { "nop", roff_unsupp, NULL, NULL, 0, NULL },
604 { "nr", roff_nr, NULL, NULL, 0, NULL },
605 { "nrf", roff_unsupp, NULL, NULL, 0, NULL },
606 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
607 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
608 { "nx", roff_insec, NULL, NULL, 0, NULL },
609 { "open", roff_insec, NULL, NULL, 0, NULL },
610 { "opena", roff_insec, NULL, NULL, 0, NULL },
611 { "os", roff_line_ignore, NULL, NULL, 0, NULL },
612 { "output", roff_unsupp, NULL, NULL, 0, NULL },
613 { "padj", roff_line_ignore, NULL, NULL, 0, NULL },
614 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
615 { "pc", roff_line_ignore, NULL, NULL, 0, NULL },
616 { "pev", roff_line_ignore, NULL, NULL, 0, NULL },
617 { "pi", roff_insec, NULL, NULL, 0, NULL },
618 { "PI", roff_unsupp, NULL, NULL, 0, NULL },
619 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
620 { "pm", roff_line_ignore, NULL, NULL, 0, NULL },
621 { "pn", roff_line_ignore, NULL, NULL, 0, NULL },
622 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
623 { "po", roff_line_ignore, NULL, NULL, 0, NULL },
624 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
625 { "psbb", roff_unsupp, NULL, NULL, 0, NULL },
626 { "pshape", roff_unsupp, NULL, NULL, 0, NULL },
627 { "pso", roff_insec, NULL, NULL, 0, NULL },
628 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
629 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
630 { "rchar", roff_unsupp, NULL, NULL, 0, NULL },
631 { "rd", roff_line_ignore, NULL, NULL, 0, NULL },
632 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
633 { "return", roff_unsupp, NULL, NULL, 0, NULL },
634 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
635 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
636 { "rj", roff_line_ignore, NULL, NULL, 0, NULL },
637 { "rm", roff_rm, NULL, NULL, 0, NULL },
638 { "rn", roff_unsupp, NULL, NULL, 0, NULL },
639 { "rnn", roff_unsupp, NULL, NULL, 0, NULL },
640 { "rr", roff_rr, NULL, NULL, 0, NULL },
641 { "rs", roff_line_ignore, NULL, NULL, 0, NULL },
642 { "rt", roff_line_ignore, NULL, NULL, 0, NULL },
643 { "schar", roff_unsupp, NULL, NULL, 0, NULL },
644 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
645 { "shc", roff_line_ignore, NULL, NULL, 0, NULL },
646 { "shift", roff_unsupp, NULL, NULL, 0, NULL },
647 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
648 { "so", roff_so, NULL, NULL, 0, NULL },
649 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
650 { "special", roff_line_ignore, NULL, NULL, 0, NULL },
651 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
652 { "ss", roff_line_ignore, NULL, NULL, 0, NULL },
653 { "sty", roff_line_ignore, NULL, NULL, 0, NULL },
654 { "substring", roff_unsupp, NULL, NULL, 0, NULL },
655 { "sv", roff_line_ignore, NULL, NULL, 0, NULL },
656 { "sy", roff_insec, NULL, NULL, 0, NULL },
657 { "T&", roff_T_, NULL, NULL, 0, NULL },
658 { "ta", roff_unsupp, NULL, NULL, 0, NULL },
659 { "tc", roff_unsupp, NULL, NULL, 0, NULL },
660 { "TE", roff_TE, NULL, NULL, 0, NULL },
661 { "TH", roff_TH, NULL, NULL, 0, NULL },
662 { "ti", roff_unsupp, NULL, NULL, 0, NULL },
663 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
664 { "tl", roff_unsupp, NULL, NULL, 0, NULL },
665 { "tm", roff_line_ignore, NULL, NULL, 0, NULL },
666 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
667 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
668 { "tr", roff_tr, NULL, NULL, 0, NULL },
669 { "track", roff_line_ignore, NULL, NULL, 0, NULL },
670 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
671 { "trf", roff_insec, NULL, NULL, 0, NULL },
672 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
673 { "trin", roff_unsupp, NULL, NULL, 0, NULL },
674 { "trnt", roff_unsupp, NULL, NULL, 0, NULL },
675 { "troff", roff_line_ignore, NULL, NULL, 0, NULL },
676 { "TS", roff_TS, NULL, NULL, 0, NULL },
677 { "uf", roff_line_ignore, NULL, NULL, 0, NULL },
678 { "ul", roff_line_ignore, NULL, NULL, 0, NULL },
679 { "unformat", roff_unsupp, NULL, NULL, 0, NULL },
680 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
681 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
682 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
683 { "vs", roff_line_ignore, NULL, NULL, 0, NULL },
684 { "warn", roff_line_ignore, NULL, NULL, 0, NULL },
685 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
686 { "watch", roff_line_ignore, NULL, NULL, 0, NULL },
687 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
688 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
689 { "wh", roff_unsupp, NULL, NULL, 0, NULL },
690 { "while", roff_unsupp, NULL, NULL, 0, NULL },
691 { "write", roff_insec, NULL, NULL, 0, NULL },
692 { "writec", roff_insec, NULL, NULL, 0, NULL },
693 { "writem", roff_insec, NULL, NULL, 0, NULL },
694 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
695 { ".", roff_cblock, NULL, NULL, 0, NULL },
696 { NULL, roff_userdef, NULL, NULL, 0, NULL },
697 };
698
699 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
700 const char *const __mdoc_reserved[] = {
701 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
702 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
703 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
704 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
705 "Dt", "Dv", "Dx", "D1",
706 "Ec", "Ed", "Ef", "Ek", "El", "Em",
707 "En", "Eo", "Er", "Es", "Ev", "Ex",
708 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
709 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
710 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
711 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
712 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
713 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
714 "Sc", "Sh", "Sm", "So", "Sq",
715 "Ss", "St", "Sx", "Sy",
716 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
717 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
718 "%P", "%Q", "%R", "%T", "%U", "%V",
719 NULL
720 };
721
722 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
723 const char *const __man_reserved[] = {
724 "AT", "B", "BI", "BR", "DT",
725 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
726 "LP", "OP", "P", "PD", "PP",
727 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
728 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
729 NULL
730 };
731
732 /* Array of injected predefined strings. */
733 #define PREDEFS_MAX 38
734 static const struct predef predefs[PREDEFS_MAX] = {
735 #include "predefs.in"
736 };
737
738 /* See roffhash_find() */
739 #define ROFF_HASH(p) (p[0] - ASCII_LO)
740
741 static int roffit_lines; /* number of lines to delay */
742 static char *roffit_macro; /* nil-terminated macro line */
743
744
745 /* --- request table ------------------------------------------------------ */
746
747 static void
748 roffhash_init(void)
749 {
750 struct roffmac *n;
751 int buc, i;
752
753 for (i = 0; i < (int)ROFF_USERDEF; i++) {
754 assert(roffs[i].name[0] >= ASCII_LO);
755 assert(roffs[i].name[0] <= ASCII_HI);
756
757 buc = ROFF_HASH(roffs[i].name);
758
759 if (NULL != (n = hash[buc])) {
760 for ( ; n->next; n = n->next)
761 /* Do nothing. */ ;
762 n->next = &roffs[i];
763 } else
764 hash[buc] = &roffs[i];
765 }
766 }
767
768 /*
769 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
770 * the nil-terminated string name could be found.
771 */
772 static enum rofft
773 roffhash_find(const char *p, size_t s)
774 {
775 int buc;
776 struct roffmac *n;
777
778 /*
779 * libroff has an extremely simple hashtable, for the time
780 * being, which simply keys on the first character, which must
781 * be printable, then walks a chain. It works well enough until
782 * optimised.
783 */
784
785 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
786 return(ROFF_MAX);
787
788 buc = ROFF_HASH(p);
789
790 if (NULL == (n = hash[buc]))
791 return(ROFF_MAX);
792 for ( ; n; n = n->next)
793 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
794 return((enum rofft)(n - roffs));
795
796 return(ROFF_MAX);
797 }
798
799 /* --- stack of request blocks -------------------------------------------- */
800
801 /*
802 * Pop the current node off of the stack of roff instructions currently
803 * pending.
804 */
805 static void
806 roffnode_pop(struct roff *r)
807 {
808 struct roffnode *p;
809
810 assert(r->last);
811 p = r->last;
812
813 r->last = r->last->parent;
814 free(p->name);
815 free(p->end);
816 free(p);
817 }
818
819 /*
820 * Push a roff node onto the instruction stack. This must later be
821 * removed with roffnode_pop().
822 */
823 static void
824 roffnode_push(struct roff *r, enum rofft tok, const char *name,
825 int line, int col)
826 {
827 struct roffnode *p;
828
829 p = mandoc_calloc(1, sizeof(struct roffnode));
830 p->tok = tok;
831 if (name)
832 p->name = mandoc_strdup(name);
833 p->parent = r->last;
834 p->line = line;
835 p->col = col;
836 p->rule = p->parent ? p->parent->rule : 0;
837
838 r->last = p;
839 }
840
841 /* --- roff parser state data management ---------------------------------- */
842
843 static void
844 roff_free1(struct roff *r)
845 {
846 struct tbl_node *tbl;
847 struct eqn_node *e;
848 int i;
849
850 while (NULL != (tbl = r->first_tbl)) {
851 r->first_tbl = tbl->next;
852 tbl_free(tbl);
853 }
854 r->first_tbl = r->last_tbl = r->tbl = NULL;
855
856 while (NULL != (e = r->first_eqn)) {
857 r->first_eqn = e->next;
858 eqn_free(e);
859 }
860 r->first_eqn = r->last_eqn = r->eqn = NULL;
861
862 while (r->last)
863 roffnode_pop(r);
864
865 free (r->rstack);
866 r->rstack = NULL;
867 r->rstacksz = 0;
868 r->rstackpos = -1;
869
870 roff_freereg(r->regtab);
871 r->regtab = NULL;
872
873 roff_freestr(r->strtab);
874 roff_freestr(r->xmbtab);
875 r->strtab = r->xmbtab = NULL;
876
877 if (r->xtab)
878 for (i = 0; i < 128; i++)
879 free(r->xtab[i].p);
880 free(r->xtab);
881 r->xtab = NULL;
882 }
883
884 void
885 roff_reset(struct roff *r)
886 {
887
888 roff_free1(r);
889 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
890 r->control = 0;
891 }
892
893 void
894 roff_free(struct roff *r)
895 {
896
897 roff_free1(r);
898 free(r);
899 }
900
901 struct roff *
902 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
903 {
904 struct roff *r;
905
906 r = mandoc_calloc(1, sizeof(struct roff));
907 r->parse = parse;
908 r->mchars = mchars;
909 r->options = options;
910 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
911 r->rstackpos = -1;
912
913 roffhash_init();
914
915 return(r);
916 }
917
918 /* --- syntax tree state data management ---------------------------------- */
919
920 static void
921 roff_man_free1(struct roff_man *man)
922 {
923
924 if (man->first != NULL)
925 roff_node_delete(man, man->first);
926 free(man->meta.msec);
927 free(man->meta.vol);
928 free(man->meta.os);
929 free(man->meta.arch);
930 free(man->meta.title);
931 free(man->meta.name);
932 free(man->meta.date);
933 }
934
935 static void
936 roff_man_alloc1(struct roff_man *man)
937 {
938
939 memset(&man->meta, 0, sizeof(man->meta));
940 man->first = mandoc_calloc(1, sizeof(*man->first));
941 man->first->type = ROFFT_ROOT;
942 man->last = man->first;
943 man->last_es = NULL;
944 man->flags = 0;
945 man->macroset = MACROSET_NONE;
946 man->lastsec = man->lastnamed = SEC_NONE;
947 man->next = ROFF_NEXT_CHILD;
948 }
949
950 void
951 roff_man_reset(struct roff_man *man)
952 {
953
954 roff_man_free1(man);
955 roff_man_alloc1(man);
956 }
957
958 void
959 roff_man_free(struct roff_man *man)
960 {
961
962 roff_man_free1(man);
963 free(man);
964 }
965
966 struct roff_man *
967 roff_man_alloc(struct roff *roff, struct mparse *parse,
968 const char *defos, int quick)
969 {
970 struct roff_man *man;
971
972 man = mandoc_calloc(1, sizeof(*man));
973 man->parse = parse;
974 man->roff = roff;
975 man->defos = defos;
976 man->quick = quick;
977 roff_man_alloc1(man);
978 return(man);
979 }
980
981 /* --- syntax tree handling ----------------------------------------------- */
982
983 struct roff_node *
984 roff_node_alloc(struct roff_man *man, int line, int pos,
985 enum roff_type type, int tok)
986 {
987 struct roff_node *n;
988
989 n = mandoc_calloc(1, sizeof(*n));
990 n->line = line;
991 n->pos = pos;
992 n->tok = tok;
993 n->type = type;
994 n->sec = man->lastsec;
995
996 if (man->flags & MDOC_SYNOPSIS)
997 n->flags |= MDOC_SYNPRETTY;
998 else
999 n->flags &= ~MDOC_SYNPRETTY;
1000 if (man->flags & MDOC_NEWLINE)
1001 n->flags |= MDOC_LINE;
1002 man->flags &= ~MDOC_NEWLINE;
1003
1004 return(n);
1005 }
1006
1007 void
1008 roff_node_append(struct roff_man *man, struct roff_node *n)
1009 {
1010
1011 switch (man->next) {
1012 case ROFF_NEXT_SIBLING:
1013 man->last->next = n;
1014 n->prev = man->last;
1015 n->parent = man->last->parent;
1016 break;
1017 case ROFF_NEXT_CHILD:
1018 man->last->child = n;
1019 n->parent = man->last;
1020 break;
1021 default:
1022 abort();
1023 /* NOTREACHED */
1024 }
1025 n->parent->nchild++;
1026 n->parent->last = n;
1027
1028 /*
1029 * Copy over the normalised-data pointer of our parent. Not
1030 * everybody has one, but copying a null pointer is fine.
1031 */
1032
1033 switch (n->type) {
1034 case ROFFT_BODY:
1035 if (n->end != ENDBODY_NOT)
1036 break;
1037 /* FALLTHROUGH */
1038 case ROFFT_TAIL:
1039 /* FALLTHROUGH */
1040 case ROFFT_HEAD:
1041 n->norm = n->parent->norm;
1042 break;
1043 default:
1044 break;
1045 }
1046
1047 if (man->macroset == MACROSET_MDOC)
1048 mdoc_valid_pre(man, n);
1049
1050 switch (n->type) {
1051 case ROFFT_HEAD:
1052 assert(n->parent->type == ROFFT_BLOCK);
1053 n->parent->head = n;
1054 break;
1055 case ROFFT_BODY:
1056 if (n->end)
1057 break;
1058 assert(n->parent->type == ROFFT_BLOCK);
1059 n->parent->body = n;
1060 break;
1061 case ROFFT_TAIL:
1062 assert(n->parent->type == ROFFT_BLOCK);
1063 n->parent->tail = n;
1064 break;
1065 default:
1066 break;
1067 }
1068 man->last = n;
1069 }
1070
1071 void
1072 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1073 {
1074 struct roff_node *n;
1075
1076 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1077 n->string = roff_strdup(man->roff, word);
1078 roff_node_append(man, n);
1079 if (man->macroset == MACROSET_MDOC)
1080 mdoc_valid_post(man);
1081 else
1082 man_valid_post(man);
1083 man->next = ROFF_NEXT_SIBLING;
1084 }
1085
1086 void
1087 roff_word_append(struct roff_man *man, const char *word)
1088 {
1089 struct roff_node *n;
1090 char *addstr, *newstr;
1091
1092 n = man->last;
1093 addstr = roff_strdup(man->roff, word);
1094 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1095 free(addstr);
1096 free(n->string);
1097 n->string = newstr;
1098 man->next = ROFF_NEXT_SIBLING;
1099 }
1100
1101 void
1102 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1103 {
1104 struct roff_node *n;
1105
1106 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1107 roff_node_append(man, n);
1108 man->next = ROFF_NEXT_CHILD;
1109 }
1110
1111 struct roff_node *
1112 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1113 {
1114 struct roff_node *n;
1115
1116 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1117 roff_node_append(man, n);
1118 man->next = ROFF_NEXT_CHILD;
1119 return(n);
1120 }
1121
1122 struct roff_node *
1123 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1124 {
1125 struct roff_node *n;
1126
1127 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1128 roff_node_append(man, n);
1129 man->next = ROFF_NEXT_CHILD;
1130 return(n);
1131 }
1132
1133 struct roff_node *
1134 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1135 {
1136 struct roff_node *n;
1137
1138 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1139 roff_node_append(man, n);
1140 man->next = ROFF_NEXT_CHILD;
1141 return(n);
1142 }
1143
1144 void
1145 roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1146 {
1147 struct roff_node *n;
1148
1149 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1150 n->eqn = eqn;
1151 if (eqn->ln > man->last->line)
1152 n->flags |= MDOC_LINE;
1153 roff_node_append(man, n);
1154 man->next = ROFF_NEXT_SIBLING;
1155 }
1156
1157 void
1158 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1159 {
1160 struct roff_node *n;
1161
1162 if (man->macroset == MACROSET_MAN)
1163 man_breakscope(man, TOKEN_NONE);
1164 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1165 n->span = tbl;
1166 roff_node_append(man, n);
1167 if (man->macroset == MACROSET_MDOC)
1168 mdoc_valid_post(man);
1169 else
1170 man_valid_post(man);
1171 man->next = ROFF_NEXT_SIBLING;
1172 }
1173
1174 void
1175 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1176 {
1177
1178 /* Adjust siblings. */
1179
1180 if (n->prev)
1181 n->prev->next = n->next;
1182 if (n->next)
1183 n->next->prev = n->prev;
1184
1185 /* Adjust parent. */
1186
1187 if (n->parent != NULL) {
1188 n->parent->nchild--;
1189 if (n->parent->child == n)
1190 n->parent->child = n->next;
1191 if (n->parent->last == n)
1192 n->parent->last = n->prev;
1193 }
1194
1195 /* Adjust parse point. */
1196
1197 if (man == NULL)
1198 return;
1199 if (man->last == n) {
1200 if (n->prev == NULL) {
1201 man->last = n->parent;
1202 man->next = ROFF_NEXT_CHILD;
1203 } else {
1204 man->last = n->prev;
1205 man->next = ROFF_NEXT_SIBLING;
1206 }
1207 }
1208 if (man->first == n)
1209 man->first = NULL;
1210 }
1211
1212 void
1213 roff_node_free(struct roff_node *n)
1214 {
1215
1216 if (n->args != NULL)
1217 mdoc_argv_free(n->args);
1218 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1219 free(n->norm);
1220 free(n->string);
1221 free(n);
1222 }
1223
1224 void
1225 roff_node_delete(struct roff_man *man, struct roff_node *n)
1226 {
1227
1228 while (n->child != NULL)
1229 roff_node_delete(man, n->child);
1230 assert(n->nchild == 0);
1231 roff_node_unlink(man, n);
1232 roff_node_free(n);
1233 }
1234
1235 void
1236 deroff(char **dest, const struct roff_node *n)
1237 {
1238 char *cp;
1239 size_t sz;
1240
1241 if (n->type != ROFFT_TEXT) {
1242 for (n = n->child; n != NULL; n = n->next)
1243 deroff(dest, n);
1244 return;
1245 }
1246
1247 /* Skip leading whitespace and escape sequences. */
1248
1249 cp = n->string;
1250 while (*cp != '\0') {
1251 if ('\\' == *cp) {
1252 cp++;
1253 mandoc_escape((const char **)&cp, NULL, NULL);
1254 } else if (isspace((unsigned char)*cp))
1255 cp++;
1256 else
1257 break;
1258 }
1259
1260 /* Skip trailing whitespace. */
1261
1262 for (sz = strlen(cp); sz; sz--)
1263 if ( ! isspace((unsigned char)cp[sz-1]))
1264 break;
1265
1266 /* Skip empty strings. */
1267
1268 if (sz == 0)
1269 return;
1270
1271 if (*dest == NULL) {
1272 *dest = mandoc_strndup(cp, sz);
1273 return;
1274 }
1275
1276 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1277 free(*dest);
1278 *dest = cp;
1279 }
1280
1281 /* --- main functions of the roff parser ---------------------------------- */
1282
1283 /*
1284 * In the current line, expand escape sequences that tend to get
1285 * used in numerical expressions and conditional requests.
1286 * Also check the syntax of the remaining escape sequences.
1287 */
1288 static enum rofferr
1289 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1290 {
1291 char ubuf[24]; /* buffer to print the number */
1292 const char *start; /* start of the string to process */
1293 char *stesc; /* start of an escape sequence ('\\') */
1294 const char *stnam; /* start of the name, after "[(*" */
1295 const char *cp; /* end of the name, e.g. before ']' */
1296 const char *res; /* the string to be substituted */
1297 char *nbuf; /* new buffer to copy buf->buf to */
1298 size_t maxl; /* expected length of the escape name */
1299 size_t naml; /* actual length of the escape name */
1300 enum mandoc_esc esc; /* type of the escape sequence */
1301 int inaml; /* length returned from mandoc_escape() */
1302 int expand_count; /* to avoid infinite loops */
1303 int npos; /* position in numeric expression */
1304 int arg_complete; /* argument not interrupted by eol */
1305 char term; /* character terminating the escape */
1306
1307 expand_count = 0;
1308 start = buf->buf + pos;
1309 stesc = strchr(start, '\0') - 1;
1310 while (stesc-- > start) {
1311
1312 /* Search backwards for the next backslash. */
1313
1314 if (*stesc != '\\')
1315 continue;
1316
1317 /* If it is escaped, skip it. */
1318
1319 for (cp = stesc - 1; cp >= start; cp--)
1320 if (*cp != '\\')
1321 break;
1322
1323 if ((stesc - cp) % 2 == 0) {
1324 stesc = (char *)cp;
1325 continue;
1326 }
1327
1328 /* Decide whether to expand or to check only. */
1329
1330 term = '\0';
1331 cp = stesc + 1;
1332 switch (*cp) {
1333 case '*':
1334 res = NULL;
1335 break;
1336 case 'B':
1337 /* FALLTHROUGH */
1338 case 'w':
1339 term = cp[1];
1340 /* FALLTHROUGH */
1341 case 'n':
1342 res = ubuf;
1343 break;
1344 default:
1345 esc = mandoc_escape(&cp, &stnam, &inaml);
1346 if (esc == ESCAPE_ERROR ||
1347 (esc == ESCAPE_SPECIAL &&
1348 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
1349 mandoc_vmsg(MANDOCERR_ESC_BAD,
1350 r->parse, ln, (int)(stesc - buf->buf),
1351 "%.*s", (int)(cp - stesc), stesc);
1352 continue;
1353 }
1354
1355 if (EXPAND_LIMIT < ++expand_count) {
1356 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1357 ln, (int)(stesc - buf->buf), NULL);
1358 return(ROFF_IGN);
1359 }
1360
1361 /*
1362 * The third character decides the length
1363 * of the name of the string or register.
1364 * Save a pointer to the name.
1365 */
1366
1367 if (term == '\0') {
1368 switch (*++cp) {
1369 case '\0':
1370 maxl = 0;
1371 break;
1372 case '(':
1373 cp++;
1374 maxl = 2;
1375 break;
1376 case '[':
1377 cp++;
1378 term = ']';
1379 maxl = 0;
1380 break;
1381 default:
1382 maxl = 1;
1383 break;
1384 }
1385 } else {
1386 cp += 2;
1387 maxl = 0;
1388 }
1389 stnam = cp;
1390
1391 /* Advance to the end of the name. */
1392
1393 naml = 0;
1394 arg_complete = 1;
1395 while (maxl == 0 || naml < maxl) {
1396 if (*cp == '\0') {
1397 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1398 ln, (int)(stesc - buf->buf), stesc);
1399 arg_complete = 0;
1400 break;
1401 }
1402 if (maxl == 0 && *cp == term) {
1403 cp++;
1404 break;
1405 }
1406 if (*cp++ != '\\' || stesc[1] != 'w') {
1407 naml++;
1408 continue;
1409 }
1410 switch (mandoc_escape(&cp, NULL, NULL)) {
1411 case ESCAPE_SPECIAL:
1412 /* FALLTHROUGH */
1413 case ESCAPE_UNICODE:
1414 /* FALLTHROUGH */
1415 case ESCAPE_NUMBERED:
1416 /* FALLTHROUGH */
1417 case ESCAPE_OVERSTRIKE:
1418 naml++;
1419 break;
1420 default:
1421 break;
1422 }
1423 }
1424
1425 /*
1426 * Retrieve the replacement string; if it is
1427 * undefined, resume searching for escapes.
1428 */
1429
1430 switch (stesc[1]) {
1431 case '*':
1432 if (arg_complete)
1433 res = roff_getstrn(r, stnam, naml);
1434 break;
1435 case 'B':
1436 npos = 0;
1437 ubuf[0] = arg_complete &&
1438 roff_evalnum(r, ln, stnam, &npos,
1439 NULL, ROFFNUM_SCALE) &&
1440 stnam + npos + 1 == cp ? '1' : '0';
1441 ubuf[1] = '\0';
1442 break;
1443 case 'n':
1444 if (arg_complete)
1445 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1446 roff_getregn(r, stnam, naml));
1447 else
1448 ubuf[0] = '\0';
1449 break;
1450 case 'w':
1451 /* use even incomplete args */
1452 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1453 24 * (int)naml);
1454 break;
1455 }
1456
1457 if (res == NULL) {
1458 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1459 r->parse, ln, (int)(stesc - buf->buf),
1460 "%.*s", (int)naml, stnam);
1461 res = "";
1462 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1463 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1464 ln, (int)(stesc - buf->buf), NULL);
1465 return(ROFF_IGN);
1466 }
1467
1468 /* Replace the escape sequence by the string. */
1469
1470 *stesc = '\0';
1471 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1472 buf->buf, res, cp) + 1;
1473
1474 /* Prepare for the next replacement. */
1475
1476 start = nbuf + pos;
1477 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1478 free(buf->buf);
1479 buf->buf = nbuf;
1480 }
1481 return(ROFF_CONT);
1482 }
1483
1484 /*
1485 * Process text streams:
1486 * Convert all breakable hyphens into ASCII_HYPH.
1487 * Decrement and spring input line trap.
1488 */
1489 static enum rofferr
1490 roff_parsetext(struct buf *buf, int pos, int *offs)
1491 {
1492 size_t sz;
1493 const char *start;
1494 char *p;
1495 int isz;
1496 enum mandoc_esc esc;
1497
1498 start = p = buf->buf + pos;
1499
1500 while (*p != '\0') {
1501 sz = strcspn(p, "-\\");
1502 p += sz;
1503
1504 if (*p == '\0')
1505 break;
1506
1507 if (*p == '\\') {
1508 /* Skip over escapes. */
1509 p++;
1510 esc = mandoc_escape((const char **)&p, NULL, NULL);
1511 if (esc == ESCAPE_ERROR)
1512 break;
1513 while (*p == '-')
1514 p++;
1515 continue;
1516 } else if (p == start) {
1517 p++;
1518 continue;
1519 }
1520
1521 if (isalpha((unsigned char)p[-1]) &&
1522 isalpha((unsigned char)p[1]))
1523 *p = ASCII_HYPH;
1524 p++;
1525 }
1526
1527 /* Spring the input line trap. */
1528 if (roffit_lines == 1) {
1529 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1530 free(buf->buf);
1531 buf->buf = p;
1532 buf->sz = isz + 1;
1533 *offs = 0;
1534 free(roffit_macro);
1535 roffit_lines = 0;
1536 return(ROFF_REPARSE);
1537 } else if (roffit_lines > 1)
1538 --roffit_lines;
1539 return(ROFF_CONT);
1540 }
1541
1542 enum rofferr
1543 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1544 {
1545 enum rofft t;
1546 enum rofferr e;
1547 int pos; /* parse point */
1548 int spos; /* saved parse point for messages */
1549 int ppos; /* original offset in buf->buf */
1550 int ctl; /* macro line (boolean) */
1551
1552 ppos = pos = *offs;
1553
1554 /* Handle in-line equation delimiters. */
1555
1556 if (r->tbl == NULL &&
1557 r->last_eqn != NULL && r->last_eqn->delim &&
1558 (r->eqn == NULL || r->eqn_inline)) {
1559 e = roff_eqndelim(r, buf, pos);
1560 if (e == ROFF_REPARSE)
1561 return(e);
1562 assert(e == ROFF_CONT);
1563 }
1564
1565 /* Expand some escape sequences. */
1566
1567 e = roff_res(r, buf, ln, pos);
1568 if (e == ROFF_IGN)
1569 return(e);
1570 assert(e == ROFF_CONT);
1571
1572 ctl = roff_getcontrol(r, buf->buf, &pos);
1573
1574 /*
1575 * First, if a scope is open and we're not a macro, pass the
1576 * text through the macro's filter.
1577 * Equations process all content themselves.
1578 * Tables process almost all content themselves, but we want
1579 * to warn about macros before passing it there.
1580 */
1581
1582 if (r->last != NULL && ! ctl) {
1583 t = r->last->tok;
1584 assert(roffs[t].text);
1585 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1586 assert(e == ROFF_IGN || e == ROFF_CONT);
1587 if (e != ROFF_CONT)
1588 return(e);
1589 }
1590 if (r->eqn != NULL)
1591 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
1592 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1593 return(tbl_read(r->tbl, ln, buf->buf, ppos));
1594 if ( ! ctl)
1595 return(roff_parsetext(buf, pos, offs));
1596
1597 /* Skip empty request lines. */
1598
1599 if (buf->buf[pos] == '"') {
1600 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1601 ln, pos, NULL);
1602 return(ROFF_IGN);
1603 } else if (buf->buf[pos] == '\0')
1604 return(ROFF_IGN);
1605
1606 /*
1607 * If a scope is open, go to the child handler for that macro,
1608 * as it may want to preprocess before doing anything with it.
1609 * Don't do so if an equation is open.
1610 */
1611
1612 if (r->last) {
1613 t = r->last->tok;
1614 assert(roffs[t].sub);
1615 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
1616 }
1617
1618 /* No scope is open. This is a new request or macro. */
1619
1620 spos = pos;
1621 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1622
1623 /* Tables ignore most macros. */
1624
1625 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1626 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1627 ln, pos, buf->buf + spos);
1628 if (t == ROFF_TS)
1629 return(ROFF_IGN);
1630 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1631 pos++;
1632 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1633 pos++;
1634 return(tbl_read(r->tbl, ln, buf->buf, pos));
1635 }
1636
1637 /*
1638 * This is neither a roff request nor a user-defined macro.
1639 * Let the standard macro set parsers handle it.
1640 */
1641
1642 if (t == ROFF_MAX)
1643 return(ROFF_CONT);
1644
1645 /* Execute a roff request or a user defined macro. */
1646
1647 assert(roffs[t].proc);
1648 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1649 }
1650
1651 void
1652 roff_endparse(struct roff *r)
1653 {
1654
1655 if (r->last)
1656 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1657 r->last->line, r->last->col,
1658 roffs[r->last->tok].name);
1659
1660 if (r->eqn) {
1661 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1662 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1663 eqn_end(&r->eqn);
1664 }
1665
1666 if (r->tbl) {
1667 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1668 r->tbl->line, r->tbl->pos, "TS");
1669 tbl_end(&r->tbl);
1670 }
1671 }
1672
1673 /*
1674 * Parse a roff node's type from the input buffer. This must be in the
1675 * form of ".foo xxx" in the usual way.
1676 */
1677 static enum rofft
1678 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1679 {
1680 char *cp;
1681 const char *mac;
1682 size_t maclen;
1683 enum rofft t;
1684
1685 cp = buf + *pos;
1686
1687 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1688 return(ROFF_MAX);
1689
1690 mac = cp;
1691 maclen = roff_getname(r, &cp, ln, ppos);
1692
1693 t = (r->current_string = roff_getstrn(r, mac, maclen))
1694 ? ROFF_USERDEF : roffhash_find(mac, maclen);
1695
1696 if (ROFF_MAX != t)
1697 *pos = cp - buf;
1698
1699 return(t);
1700 }
1701
1702 /* --- handling of request blocks ----------------------------------------- */
1703
1704 static enum rofferr
1705 roff_cblock(ROFF_ARGS)
1706 {
1707
1708 /*
1709 * A block-close `..' should only be invoked as a child of an
1710 * ignore macro, otherwise raise a warning and just ignore it.
1711 */
1712
1713 if (r->last == NULL) {
1714 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1715 ln, ppos, "..");
1716 return(ROFF_IGN);
1717 }
1718
1719 switch (r->last->tok) {
1720 case ROFF_am:
1721 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1722 /* FALLTHROUGH */
1723 case ROFF_ami:
1724 /* FALLTHROUGH */
1725 case ROFF_de:
1726 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1727 /* FALLTHROUGH */
1728 case ROFF_dei:
1729 /* FALLTHROUGH */
1730 case ROFF_ig:
1731 break;
1732 default:
1733 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1734 ln, ppos, "..");
1735 return(ROFF_IGN);
1736 }
1737
1738 if (buf->buf[pos] != '\0')
1739 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1740 ".. %s", buf->buf + pos);
1741
1742 roffnode_pop(r);
1743 roffnode_cleanscope(r);
1744 return(ROFF_IGN);
1745
1746 }
1747
1748 static void
1749 roffnode_cleanscope(struct roff *r)
1750 {
1751
1752 while (r->last) {
1753 if (--r->last->endspan != 0)
1754 break;
1755 roffnode_pop(r);
1756 }
1757 }
1758
1759 static void
1760 roff_ccond(struct roff *r, int ln, int ppos)
1761 {
1762
1763 if (NULL == r->last) {
1764 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1765 ln, ppos, "\\}");
1766 return;
1767 }
1768
1769 switch (r->last->tok) {
1770 case ROFF_el:
1771 /* FALLTHROUGH */
1772 case ROFF_ie:
1773 /* FALLTHROUGH */
1774 case ROFF_if:
1775 break;
1776 default:
1777 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1778 ln, ppos, "\\}");
1779 return;
1780 }
1781
1782 if (r->last->endspan > -1) {
1783 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1784 ln, ppos, "\\}");
1785 return;
1786 }
1787
1788 roffnode_pop(r);
1789 roffnode_cleanscope(r);
1790 return;
1791 }
1792
1793 static enum rofferr
1794 roff_block(ROFF_ARGS)
1795 {
1796 const char *name;
1797 char *iname, *cp;
1798 size_t namesz;
1799
1800 /* Ignore groff compatibility mode for now. */
1801
1802 if (tok == ROFF_de1)
1803 tok = ROFF_de;
1804 else if (tok == ROFF_dei1)
1805 tok = ROFF_dei;
1806 else if (tok == ROFF_am1)
1807 tok = ROFF_am;
1808 else if (tok == ROFF_ami1)
1809 tok = ROFF_ami;
1810
1811 /* Parse the macro name argument. */
1812
1813 cp = buf->buf + pos;
1814 if (tok == ROFF_ig) {
1815 iname = NULL;
1816 namesz = 0;
1817 } else {
1818 iname = cp;
1819 namesz = roff_getname(r, &cp, ln, ppos);
1820 iname[namesz] = '\0';
1821 }
1822
1823 /* Resolve the macro name argument if it is indirect. */
1824
1825 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1826 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1827 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1828 r->parse, ln, (int)(iname - buf->buf),
1829 "%.*s", (int)namesz, iname);
1830 namesz = 0;
1831 } else
1832 namesz = strlen(name);
1833 } else
1834 name = iname;
1835
1836 if (namesz == 0 && tok != ROFF_ig) {
1837 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1838 ln, ppos, roffs[tok].name);
1839 return(ROFF_IGN);
1840 }
1841
1842 roffnode_push(r, tok, name, ln, ppos);
1843
1844 /*
1845 * At the beginning of a `de' macro, clear the existing string
1846 * with the same name, if there is one. New content will be
1847 * appended from roff_block_text() in multiline mode.
1848 */
1849
1850 if (tok == ROFF_de || tok == ROFF_dei)
1851 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1852
1853 if (*cp == '\0')
1854 return(ROFF_IGN);
1855
1856 /* Get the custom end marker. */
1857
1858 iname = cp;
1859 namesz = roff_getname(r, &cp, ln, ppos);
1860
1861 /* Resolve the end marker if it is indirect. */
1862
1863 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1864 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1865 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1866 r->parse, ln, (int)(iname - buf->buf),
1867 "%.*s", (int)namesz, iname);
1868 namesz = 0;
1869 } else
1870 namesz = strlen(name);
1871 } else
1872 name = iname;
1873
1874 if (namesz)
1875 r->last->end = mandoc_strndup(name, namesz);
1876
1877 if (*cp != '\0')
1878 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1879 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1880
1881 return(ROFF_IGN);
1882 }
1883
1884 static enum rofferr
1885 roff_block_sub(ROFF_ARGS)
1886 {
1887 enum rofft t;
1888 int i, j;
1889
1890 /*
1891 * First check whether a custom macro exists at this level. If
1892 * it does, then check against it. This is some of groff's
1893 * stranger behaviours. If we encountered a custom end-scope
1894 * tag and that tag also happens to be a "real" macro, then we
1895 * need to try interpreting it again as a real macro. If it's
1896 * not, then return ignore. Else continue.
1897 */
1898
1899 if (r->last->end) {
1900 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1901 if (buf->buf[i] != r->last->end[j])
1902 break;
1903
1904 if (r->last->end[j] == '\0' &&
1905 (buf->buf[i] == '\0' ||
1906 buf->buf[i] == ' ' ||
1907 buf->buf[i] == '\t')) {
1908 roffnode_pop(r);
1909 roffnode_cleanscope(r);
1910
1911 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1912 i++;
1913
1914 pos = i;
1915 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1916 ROFF_MAX)
1917 return(ROFF_RERUN);
1918 return(ROFF_IGN);
1919 }
1920 }
1921
1922 /*
1923 * If we have no custom end-query or lookup failed, then try
1924 * pulling it out of the hashtable.
1925 */
1926
1927 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1928
1929 if (t != ROFF_cblock) {
1930 if (tok != ROFF_ig)
1931 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1932 return(ROFF_IGN);
1933 }
1934
1935 assert(roffs[t].proc);
1936 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1937 }
1938
1939 static enum rofferr
1940 roff_block_text(ROFF_ARGS)
1941 {
1942
1943 if (tok != ROFF_ig)
1944 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1945
1946 return(ROFF_IGN);
1947 }
1948
1949 static enum rofferr
1950 roff_cond_sub(ROFF_ARGS)
1951 {
1952 enum rofft t;
1953 char *ep;
1954 int rr;
1955
1956 rr = r->last->rule;
1957 roffnode_cleanscope(r);
1958 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1959
1960 /*
1961 * Fully handle known macros when they are structurally
1962 * required or when the conditional evaluated to true.
1963 */
1964
1965 if ((t != ROFF_MAX) &&
1966 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1967 assert(roffs[t].proc);
1968 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1969 }
1970
1971 /*
1972 * If `\}' occurs on a macro line without a preceding macro,
1973 * drop the line completely.
1974 */
1975
1976 ep = buf->buf + pos;
1977 if (ep[0] == '\\' && ep[1] == '}')
1978 rr = 0;
1979
1980 /* Always check for the closing delimiter `\}'. */
1981
1982 while ((ep = strchr(ep, '\\')) != NULL) {
1983 if (*(++ep) == '}') {
1984 *ep = '&';
1985 roff_ccond(r, ln, ep - buf->buf - 1);
1986 }
1987 if (*ep != '\0')
1988 ++ep;
1989 }
1990 return(rr ? ROFF_CONT : ROFF_IGN);
1991 }
1992
1993 static enum rofferr
1994 roff_cond_text(ROFF_ARGS)
1995 {
1996 char *ep;
1997 int rr;
1998
1999 rr = r->last->rule;
2000 roffnode_cleanscope(r);
2001
2002 ep = buf->buf + pos;
2003 while ((ep = strchr(ep, '\\')) != NULL) {
2004 if (*(++ep) == '}') {
2005 *ep = '&';
2006 roff_ccond(r, ln, ep - buf->buf - 1);
2007 }
2008 if (*ep != '\0')
2009 ++ep;
2010 }
2011 return(rr ? ROFF_CONT : ROFF_IGN);
2012 }
2013
2014 /* --- handling of numeric and conditional expressions -------------------- */
2015
2016 /*
2017 * Parse a single signed integer number. Stop at the first non-digit.
2018 * If there is at least one digit, return success and advance the
2019 * parse point, else return failure and let the parse point unchanged.
2020 * Ignore overflows, treat them just like the C language.
2021 */
2022 static int
2023 roff_getnum(const char *v, int *pos, int *res, int flags)
2024 {
2025 int myres, scaled, n, p;
2026
2027 if (NULL == res)
2028 res = &myres;
2029
2030 p = *pos;
2031 n = v[p] == '-';
2032 if (n || v[p] == '+')
2033 p++;
2034
2035 if (flags & ROFFNUM_WHITE)
2036 while (isspace((unsigned char)v[p]))
2037 p++;
2038
2039 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2040 *res = 10 * *res + v[p] - '0';
2041 if (p == *pos + n)
2042 return 0;
2043
2044 if (n)
2045 *res = -*res;
2046
2047 /* Each number may be followed by one optional scaling unit. */
2048
2049 switch (v[p]) {
2050 case 'f':
2051 scaled = *res * 65536;
2052 break;
2053 case 'i':
2054 scaled = *res * 240;
2055 break;
2056 case 'c':
2057 scaled = *res * 240 / 2.54;
2058 break;
2059 case 'v':
2060 /* FALLTROUGH */
2061 case 'P':
2062 scaled = *res * 40;
2063 break;
2064 case 'm':
2065 /* FALLTROUGH */
2066 case 'n':
2067 scaled = *res * 24;
2068 break;
2069 case 'p':
2070 scaled = *res * 10 / 3;
2071 break;
2072 case 'u':
2073 scaled = *res;
2074 break;
2075 case 'M':
2076 scaled = *res * 6 / 25;
2077 break;
2078 default:
2079 scaled = *res;
2080 p--;
2081 break;
2082 }
2083 if (flags & ROFFNUM_SCALE)
2084 *res = scaled;
2085
2086 *pos = p + 1;
2087 return(1);
2088 }
2089
2090 /*
2091 * Evaluate a string comparison condition.
2092 * The first character is the delimiter.
2093 * Succeed if the string up to its second occurrence
2094 * matches the string up to its third occurence.
2095 * Advance the cursor after the third occurrence
2096 * or lacking that, to the end of the line.
2097 */
2098 static int
2099 roff_evalstrcond(const char *v, int *pos)
2100 {
2101 const char *s1, *s2, *s3;
2102 int match;
2103
2104 match = 0;
2105 s1 = v + *pos; /* initial delimiter */
2106 s2 = s1 + 1; /* for scanning the first string */
2107 s3 = strchr(s2, *s1); /* for scanning the second string */
2108
2109 if (NULL == s3) /* found no middle delimiter */
2110 goto out;
2111
2112 while ('\0' != *++s3) {
2113 if (*s2 != *s3) { /* mismatch */
2114 s3 = strchr(s3, *s1);
2115 break;
2116 }
2117 if (*s3 == *s1) { /* found the final delimiter */
2118 match = 1;
2119 break;
2120 }
2121 s2++;
2122 }
2123
2124 out:
2125 if (NULL == s3)
2126 s3 = strchr(s2, '\0');
2127 else if (*s3 != '\0')
2128 s3++;
2129 *pos = s3 - v;
2130 return(match);
2131 }
2132
2133 /*
2134 * Evaluate an optionally negated single character, numerical,
2135 * or string condition.
2136 */
2137 static int
2138 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2139 {
2140 char *cp, *name;
2141 size_t sz;
2142 int number, savepos, wanttrue;
2143
2144 if ('!' == v[*pos]) {
2145 wanttrue = 0;
2146 (*pos)++;
2147 } else
2148 wanttrue = 1;
2149
2150 switch (v[*pos]) {
2151 case '\0':
2152 return(0);
2153 case 'n':
2154 /* FALLTHROUGH */
2155 case 'o':
2156 (*pos)++;
2157 return(wanttrue);
2158 case 'c':
2159 /* FALLTHROUGH */
2160 case 'd':
2161 /* FALLTHROUGH */
2162 case 'e':
2163 /* FALLTHROUGH */
2164 case 't':
2165 /* FALLTHROUGH */
2166 case 'v':
2167 (*pos)++;
2168 return(!wanttrue);
2169 case 'r':
2170 cp = name = v + ++*pos;
2171 sz = roff_getname(r, &cp, ln, *pos);
2172 *pos = cp - v;
2173 return((sz && roff_hasregn(r, name, sz)) == wanttrue);
2174 default:
2175 break;
2176 }
2177
2178 savepos = *pos;
2179 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2180 return((number > 0) == wanttrue);
2181 else if (*pos == savepos)
2182 return(roff_evalstrcond(v, pos) == wanttrue);
2183 else
2184 return (0);
2185 }
2186
2187 static enum rofferr
2188 roff_line_ignore(ROFF_ARGS)
2189 {
2190
2191 return(ROFF_IGN);
2192 }
2193
2194 static enum rofferr
2195 roff_insec(ROFF_ARGS)
2196 {
2197
2198 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2199 ln, ppos, roffs[tok].name);
2200 return(ROFF_IGN);
2201 }
2202
2203 static enum rofferr
2204 roff_unsupp(ROFF_ARGS)
2205 {
2206
2207 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2208 ln, ppos, roffs[tok].name);
2209 return(ROFF_IGN);
2210 }
2211
2212 static enum rofferr
2213 roff_cond(ROFF_ARGS)
2214 {
2215
2216 roffnode_push(r, tok, NULL, ln, ppos);
2217
2218 /*
2219 * An `.el' has no conditional body: it will consume the value
2220 * of the current rstack entry set in prior `ie' calls or
2221 * defaults to DENY.
2222 *
2223 * If we're not an `el', however, then evaluate the conditional.
2224 */
2225
2226 r->last->rule = tok == ROFF_el ?
2227 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2228 roff_evalcond(r, ln, buf->buf, &pos);
2229
2230 /*
2231 * An if-else will put the NEGATION of the current evaluated
2232 * conditional into the stack of rules.
2233 */
2234
2235 if (tok == ROFF_ie) {
2236 if (r->rstackpos + 1 == r->rstacksz) {
2237 r->rstacksz += 16;
2238 r->rstack = mandoc_reallocarray(r->rstack,
2239 r->rstacksz, sizeof(int));
2240 }
2241 r->rstack[++r->rstackpos] = !r->last->rule;
2242 }
2243
2244 /* If the parent has false as its rule, then so do we. */
2245
2246 if (r->last->parent && !r->last->parent->rule)
2247 r->last->rule = 0;
2248
2249 /*
2250 * Determine scope.
2251 * If there is nothing on the line after the conditional,
2252 * not even whitespace, use next-line scope.
2253 */
2254
2255 if (buf->buf[pos] == '\0') {
2256 r->last->endspan = 2;
2257 goto out;
2258 }
2259
2260 while (buf->buf[pos] == ' ')
2261 pos++;
2262
2263 /* An opening brace requests multiline scope. */
2264
2265 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2266 r->last->endspan = -1;
2267 pos += 2;
2268 goto out;
2269 }
2270
2271 /*
2272 * Anything else following the conditional causes
2273 * single-line scope. Warn if the scope contains
2274 * nothing but trailing whitespace.
2275 */
2276
2277 if (buf->buf[pos] == '\0')
2278 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2279 ln, ppos, roffs[tok].name);
2280
2281 r->last->endspan = 1;
2282
2283 out:
2284 *offs = pos;
2285 return(ROFF_RERUN);
2286 }
2287
2288 static enum rofferr
2289 roff_ds(ROFF_ARGS)
2290 {
2291 char *string;
2292 const char *name;
2293 size_t namesz;
2294
2295 /* Ignore groff compatibility mode for now. */
2296
2297 if (tok == ROFF_ds1)
2298 tok = ROFF_ds;
2299 else if (tok == ROFF_as1)
2300 tok = ROFF_as;
2301
2302 /*
2303 * The first word is the name of the string.
2304 * If it is empty or terminated by an escape sequence,
2305 * abort the `ds' request without defining anything.
2306 */
2307
2308 name = string = buf->buf + pos;
2309 if (*name == '\0')
2310 return(ROFF_IGN);
2311
2312 namesz = roff_getname(r, &string, ln, pos);
2313 if (name[namesz] == '\\')
2314 return(ROFF_IGN);
2315
2316 /* Read past the initial double-quote, if any. */
2317 if (*string == '"')
2318 string++;
2319
2320 /* The rest is the value. */
2321 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2322 ROFF_as == tok);
2323 return(ROFF_IGN);
2324 }
2325
2326 /*
2327 * Parse a single operator, one or two characters long.
2328 * If the operator is recognized, return success and advance the
2329 * parse point, else return failure and let the parse point unchanged.
2330 */
2331 static int
2332 roff_getop(const char *v, int *pos, char *res)
2333 {
2334
2335 *res = v[*pos];
2336
2337 switch (*res) {
2338 case '+':
2339 /* FALLTHROUGH */
2340 case '-':
2341 /* FALLTHROUGH */
2342 case '*':
2343 /* FALLTHROUGH */
2344 case '/':
2345 /* FALLTHROUGH */
2346 case '%':
2347 /* FALLTHROUGH */
2348 case '&':
2349 /* FALLTHROUGH */
2350 case ':':
2351 break;
2352 case '<':
2353 switch (v[*pos + 1]) {
2354 case '=':
2355 *res = 'l';
2356 (*pos)++;
2357 break;
2358 case '>':
2359 *res = '!';
2360 (*pos)++;
2361 break;
2362 case '?':
2363 *res = 'i';
2364 (*pos)++;
2365 break;
2366 default:
2367 break;
2368 }
2369 break;
2370 case '>':
2371 switch (v[*pos + 1]) {
2372 case '=':
2373 *res = 'g';
2374 (*pos)++;
2375 break;
2376 case '?':
2377 *res = 'a';
2378 (*pos)++;
2379 break;
2380 default:
2381 break;
2382 }
2383 break;
2384 case '=':
2385 if ('=' == v[*pos + 1])
2386 (*pos)++;
2387 break;
2388 default:
2389 return(0);
2390 }
2391 (*pos)++;
2392
2393 return(*res);
2394 }
2395
2396 /*
2397 * Evaluate either a parenthesized numeric expression
2398 * or a single signed integer number.
2399 */
2400 static int
2401 roff_evalpar(struct roff *r, int ln,
2402 const char *v, int *pos, int *res, int flags)
2403 {
2404
2405 if ('(' != v[*pos])
2406 return(roff_getnum(v, pos, res, flags));
2407
2408 (*pos)++;
2409 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2410 return(0);
2411
2412 /*
2413 * Omission of the closing parenthesis
2414 * is an error in validation mode,
2415 * but ignored in evaluation mode.
2416 */
2417
2418 if (')' == v[*pos])
2419 (*pos)++;
2420 else if (NULL == res)
2421 return(0);
2422
2423 return(1);
2424 }
2425
2426 /*
2427 * Evaluate a complete numeric expression.
2428 * Proceed left to right, there is no concept of precedence.
2429 */
2430 static int
2431 roff_evalnum(struct roff *r, int ln, const char *v,
2432 int *pos, int *res, int flags)
2433 {
2434 int mypos, operand2;
2435 char operator;
2436
2437 if (NULL == pos) {
2438 mypos = 0;
2439 pos = &mypos;
2440 }
2441
2442 if (flags & ROFFNUM_WHITE)
2443 while (isspace((unsigned char)v[*pos]))
2444 (*pos)++;
2445
2446 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2447 return(0);
2448
2449 while (1) {
2450 if (flags & ROFFNUM_WHITE)
2451 while (isspace((unsigned char)v[*pos]))
2452 (*pos)++;
2453
2454 if ( ! roff_getop(v, pos, &operator))
2455 break;
2456
2457 if (flags & ROFFNUM_WHITE)
2458 while (isspace((unsigned char)v[*pos]))
2459 (*pos)++;
2460
2461 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2462 return(0);
2463
2464 if (flags & ROFFNUM_WHITE)
2465 while (isspace((unsigned char)v[*pos]))
2466 (*pos)++;
2467
2468 if (NULL == res)
2469 continue;
2470
2471 switch (operator) {
2472 case '+':
2473 *res += operand2;
2474 break;
2475 case '-':
2476 *res -= operand2;
2477 break;
2478 case '*':
2479 *res *= operand2;
2480 break;
2481 case '/':
2482 if (operand2 == 0) {
2483 mandoc_msg(MANDOCERR_DIVZERO,
2484 r->parse, ln, *pos, v);
2485 *res = 0;
2486 break;
2487 }
2488 *res /= operand2;
2489 break;
2490 case '%':
2491 if (operand2 == 0) {
2492 mandoc_msg(MANDOCERR_DIVZERO,
2493 r->parse, ln, *pos, v);
2494 *res = 0;
2495 break;
2496 }
2497 *res %= operand2;
2498 break;
2499 case '<':
2500 *res = *res < operand2;
2501 break;
2502 case '>':
2503 *res = *res > operand2;
2504 break;
2505 case 'l':
2506 *res = *res <= operand2;
2507 break;
2508 case 'g':
2509 *res = *res >= operand2;
2510 break;
2511 case '=':
2512 *res = *res == operand2;
2513 break;
2514 case '!':
2515 *res = *res != operand2;
2516 break;
2517 case '&':
2518 *res = *res && operand2;
2519 break;
2520 case ':':
2521 *res = *res || operand2;
2522 break;
2523 case 'i':
2524 if (operand2 < *res)
2525 *res = operand2;
2526 break;
2527 case 'a':
2528 if (operand2 > *res)
2529 *res = operand2;
2530 break;
2531 default:
2532 abort();
2533 }
2534 }
2535 return(1);
2536 }
2537
2538 /* --- register management ------------------------------------------------ */
2539
2540 void
2541 roff_setreg(struct roff *r, const char *name, int val, char sign)
2542 {
2543 struct roffreg *reg;
2544
2545 /* Search for an existing register with the same name. */
2546 reg = r->regtab;
2547
2548 while (reg && strcmp(name, reg->key.p))
2549 reg = reg->next;
2550
2551 if (NULL == reg) {
2552 /* Create a new register. */
2553 reg = mandoc_malloc(sizeof(struct roffreg));
2554 reg->key.p = mandoc_strdup(name);
2555 reg->key.sz = strlen(name);
2556 reg->val = 0;
2557 reg->next = r->regtab;
2558 r->regtab = reg;
2559 }
2560
2561 if ('+' == sign)
2562 reg->val += val;
2563 else if ('-' == sign)
2564 reg->val -= val;
2565 else
2566 reg->val = val;
2567 }
2568
2569 /*
2570 * Handle some predefined read-only number registers.
2571 * For now, return -1 if the requested register is not predefined;
2572 * in case a predefined read-only register having the value -1
2573 * were to turn up, another special value would have to be chosen.
2574 */
2575 static int
2576 roff_getregro(const char *name)
2577 {
2578
2579 switch (*name) {
2580 case 'A': /* ASCII approximation mode is always off. */
2581 return(0);
2582 case 'g': /* Groff compatibility mode is always on. */
2583 return(1);
2584 case 'H': /* Fixed horizontal resolution. */
2585 return (24);
2586 case 'j': /* Always adjust left margin only. */
2587 return(0);
2588 case 'T': /* Some output device is always defined. */
2589 return(1);
2590 case 'V': /* Fixed vertical resolution. */
2591 return (40);
2592 default:
2593 return (-1);
2594 }
2595 }
2596
2597 int
2598 roff_getreg(const struct roff *r, const char *name)
2599 {
2600 struct roffreg *reg;
2601 int val;
2602
2603 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2604 val = roff_getregro(name + 1);
2605 if (-1 != val)
2606 return (val);
2607 }
2608
2609 for (reg = r->regtab; reg; reg = reg->next)
2610 if (0 == strcmp(name, reg->key.p))
2611 return(reg->val);
2612
2613 return(0);
2614 }
2615
2616 static int
2617 roff_getregn(const struct roff *r, const char *name, size_t len)
2618 {
2619 struct roffreg *reg;
2620 int val;
2621
2622 if ('.' == name[0] && 2 == len) {
2623 val = roff_getregro(name + 1);
2624 if (-1 != val)
2625 return (val);
2626 }
2627
2628 for (reg = r->regtab; reg; reg = reg->next)
2629 if (len == reg->key.sz &&
2630 0 == strncmp(name, reg->key.p, len))
2631 return(reg->val);
2632
2633 return(0);
2634 }
2635
2636 static int
2637 roff_hasregn(const struct roff *r, const char *name, size_t len)
2638 {
2639 struct roffreg *reg;
2640 int val;
2641
2642 if ('.' == name[0] && 2 == len) {
2643 val = roff_getregro(name + 1);
2644 if (-1 != val)
2645 return(1);
2646 }
2647
2648 for (reg = r->regtab; reg; reg = reg->next)
2649 if (len == reg->key.sz &&
2650 0 == strncmp(name, reg->key.p, len))
2651 return(1);
2652
2653 return(0);
2654 }
2655
2656 static void
2657 roff_freereg(struct roffreg *reg)
2658 {
2659 struct roffreg *old_reg;
2660
2661 while (NULL != reg) {
2662 free(reg->key.p);
2663 old_reg = reg;
2664 reg = reg->next;
2665 free(old_reg);
2666 }
2667 }
2668
2669 static enum rofferr
2670 roff_nr(ROFF_ARGS)
2671 {
2672 char *key, *val;
2673 size_t keysz;
2674 int iv;
2675 char sign;
2676
2677 key = val = buf->buf + pos;
2678 if (*key == '\0')
2679 return(ROFF_IGN);
2680
2681 keysz = roff_getname(r, &val, ln, pos);
2682 if (key[keysz] == '\\')
2683 return(ROFF_IGN);
2684 key[keysz] = '\0';
2685
2686 sign = *val;
2687 if (sign == '+' || sign == '-')
2688 val++;
2689
2690 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2691 roff_setreg(r, key, iv, sign);
2692
2693 return(ROFF_IGN);
2694 }
2695
2696 static enum rofferr
2697 roff_rr(ROFF_ARGS)
2698 {
2699 struct roffreg *reg, **prev;
2700 char *name, *cp;
2701 size_t namesz;
2702
2703 name = cp = buf->buf + pos;
2704 if (*name == '\0')
2705 return(ROFF_IGN);
2706 namesz = roff_getname(r, &cp, ln, pos);
2707 name[namesz] = '\0';
2708
2709 prev = &r->regtab;
2710 while (1) {
2711 reg = *prev;
2712 if (reg == NULL || !strcmp(name, reg->key.p))
2713 break;
2714 prev = &reg->next;
2715 }
2716 if (reg != NULL) {
2717 *prev = reg->next;
2718 free(reg->key.p);
2719 free(reg);
2720 }
2721 return(ROFF_IGN);
2722 }
2723
2724 /* --- handler functions for roff requests -------------------------------- */
2725
2726 static enum rofferr
2727 roff_rm(ROFF_ARGS)
2728 {
2729 const char *name;
2730 char *cp;
2731 size_t namesz;
2732
2733 cp = buf->buf + pos;
2734 while (*cp != '\0') {
2735 name = cp;
2736 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2737 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2738 if (name[namesz] == '\\')
2739 break;
2740 }
2741 return(ROFF_IGN);
2742 }
2743
2744 static enum rofferr
2745 roff_it(ROFF_ARGS)
2746 {
2747 int iv;
2748
2749 /* Parse the number of lines. */
2750
2751 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2752 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2753 ln, ppos, buf->buf + 1);
2754 return(ROFF_IGN);
2755 }
2756
2757 while (isspace((unsigned char)buf->buf[pos]))
2758 pos++;
2759
2760 /*
2761 * Arm the input line trap.
2762 * Special-casing "an-trap" is an ugly workaround to cope
2763 * with DocBook stupidly fiddling with man(7) internals.
2764 */
2765
2766 roffit_lines = iv;
2767 roffit_macro = mandoc_strdup(iv != 1 ||
2768 strcmp(buf->buf + pos, "an-trap") ?
2769 buf->buf + pos : "br");
2770 return(ROFF_IGN);
2771 }
2772
2773 static enum rofferr
2774 roff_Dd(ROFF_ARGS)
2775 {
2776 const char *const *cp;
2777
2778 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2779 for (cp = __mdoc_reserved; *cp; cp++)
2780 roff_setstr(r, *cp, NULL, 0);
2781
2782 if (r->format == 0)
2783 r->format = MPARSE_MDOC;
2784
2785 return(ROFF_CONT);
2786 }
2787
2788 static enum rofferr
2789 roff_TH(ROFF_ARGS)
2790 {
2791 const char *const *cp;
2792
2793 if ((r->options & MPARSE_QUICK) == 0)
2794 for (cp = __man_reserved; *cp; cp++)
2795 roff_setstr(r, *cp, NULL, 0);
2796
2797 if (r->format == 0)
2798 r->format = MPARSE_MAN;
2799
2800 return(ROFF_CONT);
2801 }
2802
2803 static enum rofferr
2804 roff_TE(ROFF_ARGS)
2805 {
2806
2807 if (NULL == r->tbl)
2808 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2809 ln, ppos, "TE");
2810 else if ( ! tbl_end(&r->tbl)) {
2811 free(buf->buf);
2812 buf->buf = mandoc_strdup(".sp");
2813 buf->sz = 4;
2814 return(ROFF_REPARSE);
2815 }
2816 return(ROFF_IGN);
2817 }
2818
2819 static enum rofferr
2820 roff_T_(ROFF_ARGS)
2821 {
2822
2823 if (NULL == r->tbl)
2824 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2825 ln, ppos, "T&");
2826 else
2827 tbl_restart(ppos, ln, r->tbl);
2828
2829 return(ROFF_IGN);
2830 }
2831
2832 /*
2833 * Handle in-line equation delimiters.
2834 */
2835 static enum rofferr
2836 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2837 {
2838 char *cp1, *cp2;
2839 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2840
2841 /*
2842 * Outside equations, look for an opening delimiter.
2843 * If we are inside an equation, we already know it is
2844 * in-line, or this function wouldn't have been called;
2845 * so look for a closing delimiter.
2846 */
2847
2848 cp1 = buf->buf + pos;
2849 cp2 = strchr(cp1, r->eqn == NULL ?
2850 r->last_eqn->odelim : r->last_eqn->cdelim);
2851 if (cp2 == NULL)
2852 return(ROFF_CONT);
2853
2854 *cp2++ = '\0';
2855 bef_pr = bef_nl = aft_nl = aft_pr = "";
2856
2857 /* Handle preceding text, protecting whitespace. */
2858
2859 if (*buf->buf != '\0') {
2860 if (r->eqn == NULL)
2861 bef_pr = "\\&";
2862 bef_nl = "\n";
2863 }
2864
2865 /*
2866 * Prepare replacing the delimiter with an equation macro
2867 * and drop leading white space from the equation.
2868 */
2869
2870 if (r->eqn == NULL) {
2871 while (*cp2 == ' ')
2872 cp2++;
2873 mac = ".EQ";
2874 } else
2875 mac = ".EN";
2876
2877 /* Handle following text, protecting whitespace. */
2878
2879 if (*cp2 != '\0') {
2880 aft_nl = "\n";
2881 if (r->eqn != NULL)
2882 aft_pr = "\\&";
2883 }
2884
2885 /* Do the actual replacement. */
2886
2887 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2888 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2889 free(buf->buf);
2890 buf->buf = cp1;
2891
2892 /* Toggle the in-line state of the eqn subsystem. */
2893
2894 r->eqn_inline = r->eqn == NULL;
2895 return(ROFF_REPARSE);
2896 }
2897
2898 static enum rofferr
2899 roff_EQ(ROFF_ARGS)
2900 {
2901 struct eqn_node *e;
2902
2903 assert(r->eqn == NULL);
2904 e = eqn_alloc(ppos, ln, r->parse);
2905
2906 if (r->last_eqn) {
2907 r->last_eqn->next = e;
2908 e->delim = r->last_eqn->delim;
2909 e->odelim = r->last_eqn->odelim;
2910 e->cdelim = r->last_eqn->cdelim;
2911 } else
2912 r->first_eqn = r->last_eqn = e;
2913
2914 r->eqn = r->last_eqn = e;
2915
2916 if (buf->buf[pos] != '\0')
2917 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2918 ".EQ %s", buf->buf + pos);
2919
2920 return(ROFF_IGN);
2921 }
2922
2923 static enum rofferr
2924 roff_EN(ROFF_ARGS)
2925 {
2926
2927 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2928 return(ROFF_IGN);
2929 }
2930
2931 static enum rofferr
2932 roff_TS(ROFF_ARGS)
2933 {
2934 struct tbl_node *tbl;
2935
2936 if (r->tbl) {
2937 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2938 ln, ppos, "TS breaks TS");
2939 tbl_end(&r->tbl);
2940 }
2941
2942 tbl = tbl_alloc(ppos, ln, r->parse);
2943
2944 if (r->last_tbl)
2945 r->last_tbl->next = tbl;
2946 else
2947 r->first_tbl = r->last_tbl = tbl;
2948
2949 r->tbl = r->last_tbl = tbl;
2950 return(ROFF_IGN);
2951 }
2952
2953 static enum rofferr
2954 roff_brp(ROFF_ARGS)
2955 {
2956
2957 buf->buf[pos - 1] = '\0';
2958 return(ROFF_CONT);
2959 }
2960
2961 static enum rofferr
2962 roff_cc(ROFF_ARGS)
2963 {
2964 const char *p;
2965
2966 p = buf->buf + pos;
2967
2968 if (*p == '\0' || (r->control = *p++) == '.')
2969 r->control = 0;
2970
2971 if (*p != '\0')
2972 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2973 ln, p - buf->buf, "cc ... %s", p);
2974
2975 return(ROFF_IGN);
2976 }
2977
2978 static enum rofferr
2979 roff_tr(ROFF_ARGS)
2980 {
2981 const char *p, *first, *second;
2982 size_t fsz, ssz;
2983 enum mandoc_esc esc;
2984
2985 p = buf->buf + pos;
2986
2987 if (*p == '\0') {
2988 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2989 return(ROFF_IGN);
2990 }
2991
2992 while (*p != '\0') {
2993 fsz = ssz = 1;
2994
2995 first = p++;
2996 if (*first == '\\') {
2997 esc = mandoc_escape(&p, NULL, NULL);
2998 if (esc == ESCAPE_ERROR) {
2999 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3000 ln, (int)(p - buf->buf), first);
3001 return(ROFF_IGN);
3002 }
3003 fsz = (size_t)(p - first);
3004 }
3005
3006 second = p++;
3007 if (*second == '\\') {
3008 esc = mandoc_escape(&p, NULL, NULL);
3009 if (esc == ESCAPE_ERROR) {
3010 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3011 ln, (int)(p - buf->buf), second);
3012 return(ROFF_IGN);
3013 }
3014 ssz = (size_t)(p - second);
3015 } else if (*second == '\0') {
3016 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3017 ln, first - buf->buf, "tr %s", first);
3018 second = " ";
3019 p--;
3020 }
3021
3022 if (fsz > 1) {
3023 roff_setstrn(&r->xmbtab, first, fsz,
3024 second, ssz, 0);
3025 continue;
3026 }
3027
3028 if (r->xtab == NULL)
3029 r->xtab = mandoc_calloc(128,
3030 sizeof(struct roffstr));
3031
3032 free(r->xtab[(int)*first].p);
3033 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3034 r->xtab[(int)*first].sz = ssz;
3035 }
3036
3037 return(ROFF_IGN);
3038 }
3039
3040 static enum rofferr
3041 roff_so(ROFF_ARGS)
3042 {
3043 char *name, *cp;
3044
3045 name = buf->buf + pos;
3046 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3047
3048 /*
3049 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3050 * opening anything that's not in our cwd or anything beneath
3051 * it. Thus, explicitly disallow traversing up the file-system
3052 * or using absolute paths.
3053 */
3054
3055 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3056 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3057 ".so %s", name);
3058 buf->sz = mandoc_asprintf(&cp,
3059 ".sp\nSee the file %s.\n.sp", name) + 1;
3060 free(buf->buf);
3061 buf->buf = cp;
3062 *offs = 0;
3063 return(ROFF_REPARSE);
3064 }
3065
3066 *offs = pos;
3067 return(ROFF_SO);
3068 }
3069
3070 /* --- user defined strings and macros ------------------------------------ */
3071
3072 static enum rofferr
3073 roff_userdef(ROFF_ARGS)
3074 {
3075 const char *arg[9], *ap;
3076 char *cp, *n1, *n2;
3077 int i;
3078 size_t asz, rsz;
3079
3080 /*
3081 * Collect pointers to macro argument strings
3082 * and NUL-terminate them.
3083 */
3084
3085 cp = buf->buf + pos;
3086 for (i = 0; i < 9; i++)
3087 arg[i] = *cp == '\0' ? "" :
3088 mandoc_getarg(r->parse, &cp, ln, &pos);
3089
3090 /*
3091 * Expand macro arguments.
3092 */
3093
3094 buf->sz = strlen(r->current_string) + 1;
3095 n1 = cp = mandoc_malloc(buf->sz);
3096 memcpy(n1, r->current_string, buf->sz);
3097 while (*cp != '\0') {
3098
3099 /* Scan ahead for the next argument invocation. */
3100
3101 if (*cp++ != '\\')
3102 continue;
3103 if (*cp++ != '$')
3104 continue;
3105 i = *cp - '1';
3106 if (0 > i || 8 < i)
3107 continue;
3108 cp -= 2;
3109
3110 /*
3111 * Determine the size of the expanded argument,
3112 * taking escaping of quotes into account.
3113 */
3114
3115 asz = 0;
3116 for (ap = arg[i]; *ap != '\0'; ap++) {
3117 asz++;
3118 if (*ap == '"')
3119 asz += 3;
3120 }
3121 if (asz != 3) {
3122
3123 /*
3124 * Determine the size of the rest of the
3125 * unexpanded macro, including the NUL.
3126 */
3127
3128 rsz = buf->sz - (cp - n1) - 3;
3129
3130 /*
3131 * When shrinking, move before
3132 * releasing the storage.
3133 */
3134
3135 if (asz < 3)
3136 memmove(cp + asz, cp + 3, rsz);
3137
3138 /*
3139 * Resize the storage for the macro
3140 * and readjust the parse pointer.
3141 */
3142
3143 buf->sz += asz - 3;
3144 n2 = mandoc_realloc(n1, buf->sz);
3145 cp = n2 + (cp - n1);
3146 n1 = n2;
3147
3148 /*
3149 * When growing, make room
3150 * for the expanded argument.
3151 */
3152
3153 if (asz > 3)
3154 memmove(cp + asz, cp + 3, rsz);
3155 }
3156
3157 /* Copy the expanded argument, escaping quotes. */
3158
3159 n2 = cp;
3160 for (ap = arg[i]; *ap != '\0'; ap++) {
3161 if (*ap == '"') {
3162 memcpy(n2, "\\(dq", 4);
3163 n2 += 4;
3164 } else
3165 *n2++ = *ap;
3166 }
3167 }
3168
3169 /*
3170 * Replace the macro invocation
3171 * by the expanded macro.
3172 */
3173
3174 free(buf->buf);
3175 buf->buf = n1;
3176 *offs = 0;
3177
3178 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3179 ROFF_REPARSE : ROFF_APPEND);
3180 }
3181
3182 static size_t
3183 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3184 {
3185 char *name, *cp;
3186 size_t namesz;
3187
3188 name = *cpp;
3189 if ('\0' == *name)
3190 return(0);
3191
3192 /* Read until end of name and terminate it with NUL. */
3193 for (cp = name; 1; cp++) {
3194 if ('\0' == *cp || ' ' == *cp) {
3195 namesz = cp - name;
3196 break;
3197 }
3198 if ('\\' != *cp)
3199 continue;
3200 namesz = cp - name;
3201 if ('{' == cp[1] || '}' == cp[1])
3202 break;
3203 cp++;
3204 if ('\\' == *cp)
3205 continue;
3206 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3207 "%.*s", (int)(cp - name + 1), name);
3208 mandoc_escape((const char **)&cp, NULL, NULL);
3209 break;
3210 }
3211
3212 /* Read past spaces. */
3213 while (' ' == *cp)
3214 cp++;
3215
3216 *cpp = cp;
3217 return(namesz);
3218 }
3219
3220 /*
3221 * Store *string into the user-defined string called *name.
3222 * To clear an existing entry, call with (*r, *name, NULL, 0).
3223 * append == 0: replace mode
3224 * append == 1: single-line append mode
3225 * append == 2: multiline append mode, append '\n' after each call
3226 */
3227 static void
3228 roff_setstr(struct roff *r, const char *name, const char *string,
3229 int append)
3230 {
3231
3232 roff_setstrn(&r->strtab, name, strlen(name), string,
3233 string ? strlen(string) : 0, append);
3234 }
3235
3236 static void
3237 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3238 const char *string, size_t stringsz, int append)
3239 {
3240 struct roffkv *n;
3241 char *c;
3242 int i;
3243 size_t oldch, newch;
3244
3245 /* Search for an existing string with the same name. */
3246 n = *r;
3247
3248 while (n && (namesz != n->key.sz ||
3249 strncmp(n->key.p, name, namesz)))
3250 n = n->next;
3251
3252 if (NULL == n) {
3253 /* Create a new string table entry. */
3254 n = mandoc_malloc(sizeof(struct roffkv));
3255 n->key.p = mandoc_strndup(name, namesz);
3256 n->key.sz = namesz;
3257 n->val.p = NULL;
3258 n->val.sz = 0;
3259 n->next = *r;
3260 *r = n;
3261 } else if (0 == append) {
3262 free(n->val.p);
3263 n->val.p = NULL;
3264 n->val.sz = 0;
3265 }
3266
3267 if (NULL == string)
3268 return;
3269
3270 /*
3271 * One additional byte for the '\n' in multiline mode,
3272 * and one for the terminating '\0'.
3273 */
3274 newch = stringsz + (1 < append ? 2u : 1u);
3275
3276 if (NULL == n->val.p) {
3277 n->val.p = mandoc_malloc(newch);
3278 *n->val.p = '\0';
3279 oldch = 0;
3280 } else {
3281 oldch = n->val.sz;
3282 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3283 }
3284
3285 /* Skip existing content in the destination buffer. */
3286 c = n->val.p + (int)oldch;
3287
3288 /* Append new content to the destination buffer. */
3289 i = 0;
3290 while (i < (int)stringsz) {
3291 /*
3292 * Rudimentary roff copy mode:
3293 * Handle escaped backslashes.
3294 */
3295 if ('\\' == string[i] && '\\' == string[i + 1])
3296 i++;
3297 *c++ = string[i++];
3298 }
3299
3300 /* Append terminating bytes. */
3301 if (1 < append)
3302 *c++ = '\n';
3303
3304 *c = '\0';
3305 n->val.sz = (int)(c - n->val.p);
3306 }
3307
3308 static const char *
3309 roff_getstrn(const struct roff *r, const char *name, size_t len)
3310 {
3311 const struct roffkv *n;
3312 int i;
3313
3314 for (n = r->strtab; n; n = n->next)
3315 if (0 == strncmp(name, n->key.p, len) &&
3316 '\0' == n->key.p[(int)len])
3317 return(n->val.p);
3318
3319 for (i = 0; i < PREDEFS_MAX; i++)
3320 if (0 == strncmp(name, predefs[i].name, len) &&
3321 '\0' == predefs[i].name[(int)len])
3322 return(predefs[i].str);
3323
3324 return(NULL);
3325 }
3326
3327 static void
3328 roff_freestr(struct roffkv *r)
3329 {
3330 struct roffkv *n, *nn;
3331
3332 for (n = r; n; n = nn) {
3333 free(n->key.p);
3334 free(n->val.p);
3335 nn = n->next;
3336 free(n);
3337 }
3338 }
3339
3340 /* --- accessors and utility functions ------------------------------------ */
3341
3342 const struct tbl_span *
3343 roff_span(const struct roff *r)
3344 {
3345
3346 return(r->tbl ? tbl_span(r->tbl) : NULL);
3347 }
3348
3349 const struct eqn *
3350 roff_eqn(const struct roff *r)
3351 {
3352
3353 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
3354 }
3355
3356 /*
3357 * Duplicate an input string, making the appropriate character
3358 * conversations (as stipulated by `tr') along the way.
3359 * Returns a heap-allocated string with all the replacements made.
3360 */
3361 char *
3362 roff_strdup(const struct roff *r, const char *p)
3363 {
3364 const struct roffkv *cp;
3365 char *res;
3366 const char *pp;
3367 size_t ssz, sz;
3368 enum mandoc_esc esc;
3369
3370 if (NULL == r->xmbtab && NULL == r->xtab)
3371 return(mandoc_strdup(p));
3372 else if ('\0' == *p)
3373 return(mandoc_strdup(""));
3374
3375 /*
3376 * Step through each character looking for term matches
3377 * (remember that a `tr' can be invoked with an escape, which is
3378 * a glyph but the escape is multi-character).
3379 * We only do this if the character hash has been initialised
3380 * and the string is >0 length.
3381 */
3382
3383 res = NULL;
3384 ssz = 0;
3385
3386 while ('\0' != *p) {
3387 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3388 sz = r->xtab[(int)*p].sz;
3389 res = mandoc_realloc(res, ssz + sz + 1);
3390 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3391 ssz += sz;
3392 p++;
3393 continue;
3394 } else if ('\\' != *p) {
3395 res = mandoc_realloc(res, ssz + 2);
3396 res[ssz++] = *p++;
3397 continue;
3398 }
3399
3400 /* Search for term matches. */
3401 for (cp = r->xmbtab; cp; cp = cp->next)
3402 if (0 == strncmp(p, cp->key.p, cp->key.sz))
3403 break;
3404
3405 if (NULL != cp) {
3406 /*
3407 * A match has been found.
3408 * Append the match to the array and move
3409 * forward by its keysize.
3410 */
3411 res = mandoc_realloc(res,
3412 ssz + cp->val.sz + 1);
3413 memcpy(res + ssz, cp->val.p, cp->val.sz);
3414 ssz += cp->val.sz;
3415 p += (int)cp->key.sz;
3416 continue;
3417 }
3418
3419 /*
3420 * Handle escapes carefully: we need to copy
3421 * over just the escape itself, or else we might
3422 * do replacements within the escape itself.
3423 * Make sure to pass along the bogus string.
3424 */
3425 pp = p++;
3426 esc = mandoc_escape(&p, NULL, NULL);
3427 if (ESCAPE_ERROR == esc) {
3428 sz = strlen(pp);
3429 res = mandoc_realloc(res, ssz + sz + 1);
3430 memcpy(res + ssz, pp, sz);
3431 break;
3432 }
3433 /*
3434 * We bail out on bad escapes.
3435 * No need to warn: we already did so when
3436 * roff_res() was called.
3437 */
3438 sz = (int)(p - pp);
3439 res = mandoc_realloc(res, ssz + sz + 1);
3440 memcpy(res + ssz, pp, sz);
3441 ssz += sz;
3442 }
3443
3444 res[(int)ssz] = '\0';
3445 return(res);
3446 }
3447
3448 int
3449 roff_getformat(const struct roff *r)
3450 {
3451
3452 return(r->format);
3453 }
3454
3455 /*
3456 * Find out whether a line is a macro line or not.
3457 * If it is, adjust the current position and return one; if it isn't,
3458 * return zero and don't change the current position.
3459 * If the control character has been set with `.cc', then let that grain
3460 * precedence.
3461 * This is slighly contrary to groff, where using the non-breaking
3462 * control character when `cc' has been invoked will cause the
3463 * non-breaking macro contents to be printed verbatim.
3464 */
3465 int
3466 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3467 {
3468 int pos;
3469
3470 pos = *ppos;
3471
3472 if (0 != r->control && cp[pos] == r->control)
3473 pos++;
3474 else if (0 != r->control)
3475 return(0);
3476 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3477 pos += 2;
3478 else if ('.' == cp[pos] || '\'' == cp[pos])
3479 pos++;
3480 else
3481 return(0);
3482
3483 while (' ' == cp[pos] || '\t' == cp[pos])
3484 pos++;
3485
3486 *ppos = pos;
3487 return(1);
3488 }