]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
In -Tascii mode, print "<?>" only for Unicode escapes of unknown
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.237 2014/10/28 17:36:19 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
31 #include "libroff.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_pl,
64 ROFF_ps,
65 ROFF_rm,
66 ROFF_rr,
67 ROFF_so,
68 ROFF_ta,
69 ROFF_tr,
70 ROFF_Dd,
71 ROFF_TH,
72 ROFF_TS,
73 ROFF_TE,
74 ROFF_T_,
75 ROFF_EQ,
76 ROFF_EN,
77 ROFF_cblock,
78 ROFF_USERDEF,
79 ROFF_MAX
80 };
81
82 /*
83 * An incredibly-simple string buffer.
84 */
85 struct roffstr {
86 char *p; /* nil-terminated buffer */
87 size_t sz; /* saved strlen(p) */
88 };
89
90 /*
91 * A key-value roffstr pair as part of a singly-linked list.
92 */
93 struct roffkv {
94 struct roffstr key;
95 struct roffstr val;
96 struct roffkv *next; /* next in list */
97 };
98
99 /*
100 * A single number register as part of a singly-linked list.
101 */
102 struct roffreg {
103 struct roffstr key;
104 int val;
105 struct roffreg *next;
106 };
107
108 struct roff {
109 struct mparse *parse; /* parse point */
110 const struct mchars *mchars; /* character table */
111 struct roffnode *last; /* leaf of stack */
112 int *rstack; /* stack of inverted `ie' values */
113 struct roffreg *regtab; /* number registers */
114 struct roffkv *strtab; /* user-defined strings & macros */
115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
116 struct roffstr *xtab; /* single-byte trans table (`tr') */
117 const char *current_string; /* value of last called user macro */
118 struct tbl_node *first_tbl; /* first table parsed */
119 struct tbl_node *last_tbl; /* last table parsed */
120 struct tbl_node *tbl; /* current table being parsed */
121 struct eqn_node *last_eqn; /* last equation parsed */
122 struct eqn_node *first_eqn; /* first equation parsed */
123 struct eqn_node *eqn; /* current equation being parsed */
124 int eqn_inline; /* current equation is inline */
125 int options; /* parse options */
126 int rstacksz; /* current size limit of rstack */
127 int rstackpos; /* position in rstack */
128 int format; /* current file in mdoc or man format */
129 char control; /* control character */
130 };
131
132 struct roffnode {
133 enum rofft tok; /* type of node */
134 struct roffnode *parent; /* up one in stack */
135 int line; /* parse line */
136 int col; /* parse col */
137 char *name; /* node name, e.g. macro name */
138 char *end; /* end-rules: custom token */
139 int endspan; /* end-rules: next-line or infty */
140 int rule; /* current evaluation rule */
141 };
142
143 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
144 enum rofft tok, /* tok of macro */ \
145 char **bufp, /* input buffer */ \
146 size_t *szp, /* size of input buffer */ \
147 int ln, /* parse line */ \
148 int ppos, /* original pos in buffer */ \
149 int pos, /* current pos in buffer */ \
150 int *offs /* reset offset of buffer data */
151
152 typedef enum rofferr (*roffproc)(ROFF_ARGS);
153
154 struct roffmac {
155 const char *name; /* macro name */
156 roffproc proc; /* process new macro */
157 roffproc text; /* process as child text of macro */
158 roffproc sub; /* process as child of macro */
159 int flags;
160 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
161 struct roffmac *next;
162 };
163
164 struct predef {
165 const char *name; /* predefined input name */
166 const char *str; /* replacement symbol */
167 };
168
169 #define PREDEF(__name, __str) \
170 { (__name), (__str) },
171
172 static enum rofft roffhash_find(const char *, size_t);
173 static void roffhash_init(void);
174 static void roffnode_cleanscope(struct roff *);
175 static void roffnode_pop(struct roff *);
176 static void roffnode_push(struct roff *, enum rofft,
177 const char *, int, int);
178 static enum rofferr roff_block(ROFF_ARGS);
179 static enum rofferr roff_block_text(ROFF_ARGS);
180 static enum rofferr roff_block_sub(ROFF_ARGS);
181 static enum rofferr roff_cblock(ROFF_ARGS);
182 static enum rofferr roff_cc(ROFF_ARGS);
183 static void roff_ccond(struct roff *, int, int);
184 static enum rofferr roff_cond(ROFF_ARGS);
185 static enum rofferr roff_cond_text(ROFF_ARGS);
186 static enum rofferr roff_cond_sub(ROFF_ARGS);
187 static enum rofferr roff_ds(ROFF_ARGS);
188 static enum rofferr roff_eqndelim(struct roff *,
189 char **, size_t *, int);
190 static int roff_evalcond(struct roff *r, int,
191 const char *, int *);
192 static int roff_evalnum(struct roff *, int,
193 const char *, int *, int *, int);
194 static int roff_evalpar(struct roff *, int,
195 const char *, int *, int *);
196 static int roff_evalstrcond(const char *, int *);
197 static void roff_free1(struct roff *);
198 static void roff_freereg(struct roffreg *);
199 static void roff_freestr(struct roffkv *);
200 static size_t roff_getname(struct roff *, char **, int, int);
201 static int roff_getnum(const char *, int *, int *);
202 static int roff_getop(const char *, int *, char *);
203 static int roff_getregn(const struct roff *,
204 const char *, size_t);
205 static int roff_getregro(const char *name);
206 static const char *roff_getstrn(const struct roff *,
207 const char *, size_t);
208 static enum rofferr roff_it(ROFF_ARGS);
209 static enum rofferr roff_line_ignore(ROFF_ARGS);
210 static enum rofferr roff_nr(ROFF_ARGS);
211 static enum rofft roff_parse(struct roff *, char *, int *,
212 int, int);
213 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
214 static enum rofferr roff_res(struct roff *,
215 char **, size_t *, int, int);
216 static enum rofferr roff_rm(ROFF_ARGS);
217 static enum rofferr roff_rr(ROFF_ARGS);
218 static void roff_setstr(struct roff *,
219 const char *, const char *, int);
220 static void roff_setstrn(struct roffkv **, const char *,
221 size_t, const char *, size_t, int);
222 static enum rofferr roff_so(ROFF_ARGS);
223 static enum rofferr roff_tr(ROFF_ARGS);
224 static enum rofferr roff_Dd(ROFF_ARGS);
225 static enum rofferr roff_TH(ROFF_ARGS);
226 static enum rofferr roff_TE(ROFF_ARGS);
227 static enum rofferr roff_TS(ROFF_ARGS);
228 static enum rofferr roff_EQ(ROFF_ARGS);
229 static enum rofferr roff_EN(ROFF_ARGS);
230 static enum rofferr roff_T_(ROFF_ARGS);
231 static enum rofferr roff_userdef(ROFF_ARGS);
232
233 /* See roffhash_find() */
234
235 #define ASCII_HI 126
236 #define ASCII_LO 33
237 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
238
239 static struct roffmac *hash[HASHWIDTH];
240
241 static struct roffmac roffs[ROFF_MAX] = {
242 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
244 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
245 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
246 { "as", roff_ds, NULL, NULL, 0, NULL },
247 { "cc", roff_cc, NULL, NULL, 0, NULL },
248 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
250 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
252 { "ds", roff_ds, NULL, NULL, 0, NULL },
253 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
254 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
258 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
259 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
260 { "it", roff_it, NULL, NULL, 0, NULL },
261 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
262 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
263 { "nr", roff_nr, NULL, NULL, 0, NULL },
264 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
265 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
266 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
267 { "rm", roff_rm, NULL, NULL, 0, NULL },
268 { "rr", roff_rr, NULL, NULL, 0, NULL },
269 { "so", roff_so, NULL, NULL, 0, NULL },
270 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
271 { "tr", roff_tr, NULL, NULL, 0, NULL },
272 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
273 { "TH", roff_TH, NULL, NULL, 0, NULL },
274 { "TS", roff_TS, NULL, NULL, 0, NULL },
275 { "TE", roff_TE, NULL, NULL, 0, NULL },
276 { "T&", roff_T_, NULL, NULL, 0, NULL },
277 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
278 { "EN", roff_EN, NULL, NULL, 0, NULL },
279 { ".", roff_cblock, NULL, NULL, 0, NULL },
280 { NULL, roff_userdef, NULL, NULL, 0, NULL },
281 };
282
283 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
284 const char *const __mdoc_reserved[] = {
285 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
286 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
287 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
288 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
289 "Dt", "Dv", "Dx", "D1",
290 "Ec", "Ed", "Ef", "Ek", "El", "Em",
291 "En", "Eo", "Er", "Es", "Ev", "Ex",
292 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
293 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
294 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
295 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
296 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
297 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
298 "Sc", "Sh", "Sm", "So", "Sq",
299 "Ss", "St", "Sx", "Sy",
300 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
301 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
302 "%P", "%Q", "%R", "%T", "%U", "%V",
303 NULL
304 };
305
306 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
307 const char *const __man_reserved[] = {
308 "AT", "B", "BI", "BR", "DT",
309 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
310 "LP", "OP", "P", "PD", "PP",
311 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
312 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
313 NULL
314 };
315
316 /* Array of injected predefined strings. */
317 #define PREDEFS_MAX 38
318 static const struct predef predefs[PREDEFS_MAX] = {
319 #include "predefs.in"
320 };
321
322 /* See roffhash_find() */
323 #define ROFF_HASH(p) (p[0] - ASCII_LO)
324
325 static int roffit_lines; /* number of lines to delay */
326 static char *roffit_macro; /* nil-terminated macro line */
327
328
329 static void
330 roffhash_init(void)
331 {
332 struct roffmac *n;
333 int buc, i;
334
335 for (i = 0; i < (int)ROFF_USERDEF; i++) {
336 assert(roffs[i].name[0] >= ASCII_LO);
337 assert(roffs[i].name[0] <= ASCII_HI);
338
339 buc = ROFF_HASH(roffs[i].name);
340
341 if (NULL != (n = hash[buc])) {
342 for ( ; n->next; n = n->next)
343 /* Do nothing. */ ;
344 n->next = &roffs[i];
345 } else
346 hash[buc] = &roffs[i];
347 }
348 }
349
350 /*
351 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
352 * the nil-terminated string name could be found.
353 */
354 static enum rofft
355 roffhash_find(const char *p, size_t s)
356 {
357 int buc;
358 struct roffmac *n;
359
360 /*
361 * libroff has an extremely simple hashtable, for the time
362 * being, which simply keys on the first character, which must
363 * be printable, then walks a chain. It works well enough until
364 * optimised.
365 */
366
367 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
368 return(ROFF_MAX);
369
370 buc = ROFF_HASH(p);
371
372 if (NULL == (n = hash[buc]))
373 return(ROFF_MAX);
374 for ( ; n; n = n->next)
375 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
376 return((enum rofft)(n - roffs));
377
378 return(ROFF_MAX);
379 }
380
381 /*
382 * Pop the current node off of the stack of roff instructions currently
383 * pending.
384 */
385 static void
386 roffnode_pop(struct roff *r)
387 {
388 struct roffnode *p;
389
390 assert(r->last);
391 p = r->last;
392
393 r->last = r->last->parent;
394 free(p->name);
395 free(p->end);
396 free(p);
397 }
398
399 /*
400 * Push a roff node onto the instruction stack. This must later be
401 * removed with roffnode_pop().
402 */
403 static void
404 roffnode_push(struct roff *r, enum rofft tok, const char *name,
405 int line, int col)
406 {
407 struct roffnode *p;
408
409 p = mandoc_calloc(1, sizeof(struct roffnode));
410 p->tok = tok;
411 if (name)
412 p->name = mandoc_strdup(name);
413 p->parent = r->last;
414 p->line = line;
415 p->col = col;
416 p->rule = p->parent ? p->parent->rule : 0;
417
418 r->last = p;
419 }
420
421 static void
422 roff_free1(struct roff *r)
423 {
424 struct tbl_node *tbl;
425 struct eqn_node *e;
426 int i;
427
428 while (NULL != (tbl = r->first_tbl)) {
429 r->first_tbl = tbl->next;
430 tbl_free(tbl);
431 }
432 r->first_tbl = r->last_tbl = r->tbl = NULL;
433
434 while (NULL != (e = r->first_eqn)) {
435 r->first_eqn = e->next;
436 eqn_free(e);
437 }
438 r->first_eqn = r->last_eqn = r->eqn = NULL;
439
440 while (r->last)
441 roffnode_pop(r);
442
443 free (r->rstack);
444 r->rstack = NULL;
445 r->rstacksz = 0;
446 r->rstackpos = -1;
447
448 roff_freereg(r->regtab);
449 r->regtab = NULL;
450
451 roff_freestr(r->strtab);
452 roff_freestr(r->xmbtab);
453 r->strtab = r->xmbtab = NULL;
454
455 if (r->xtab)
456 for (i = 0; i < 128; i++)
457 free(r->xtab[i].p);
458 free(r->xtab);
459 r->xtab = NULL;
460 }
461
462 void
463 roff_reset(struct roff *r)
464 {
465
466 roff_free1(r);
467 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
468 r->control = 0;
469 }
470
471 void
472 roff_free(struct roff *r)
473 {
474
475 roff_free1(r);
476 free(r);
477 }
478
479 struct roff *
480 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
481 {
482 struct roff *r;
483
484 r = mandoc_calloc(1, sizeof(struct roff));
485 r->parse = parse;
486 r->mchars = mchars;
487 r->options = options;
488 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
489 r->rstackpos = -1;
490
491 roffhash_init();
492
493 return(r);
494 }
495
496 /*
497 * In the current line, expand escape sequences that tend to get
498 * used in numerical expressions and conditional requests.
499 * Also check the syntax of the remaining escape sequences.
500 */
501 static enum rofferr
502 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
503 {
504 char ubuf[24]; /* buffer to print the number */
505 const char *start; /* start of the string to process */
506 char *stesc; /* start of an escape sequence ('\\') */
507 const char *stnam; /* start of the name, after "[(*" */
508 const char *cp; /* end of the name, e.g. before ']' */
509 const char *res; /* the string to be substituted */
510 char *nbuf; /* new buffer to copy bufp to */
511 size_t maxl; /* expected length of the escape name */
512 size_t naml; /* actual length of the escape name */
513 enum mandoc_esc esc; /* type of the escape sequence */
514 int inaml; /* length returned from mandoc_escape() */
515 int expand_count; /* to avoid infinite loops */
516 int npos; /* position in numeric expression */
517 int arg_complete; /* argument not interrupted by eol */
518 char term; /* character terminating the escape */
519
520 expand_count = 0;
521 start = *bufp + pos;
522 stesc = strchr(start, '\0') - 1;
523 while (stesc-- > start) {
524
525 /* Search backwards for the next backslash. */
526
527 if ('\\' != *stesc)
528 continue;
529
530 /* If it is escaped, skip it. */
531
532 for (cp = stesc - 1; cp >= start; cp--)
533 if ('\\' != *cp)
534 break;
535
536 if (0 == (stesc - cp) % 2) {
537 stesc = (char *)cp;
538 continue;
539 }
540
541 /* Decide whether to expand or to check only. */
542
543 term = '\0';
544 cp = stesc + 1;
545 switch (*cp) {
546 case '*':
547 res = NULL;
548 break;
549 case 'B':
550 /* FALLTHROUGH */
551 case 'w':
552 term = cp[1];
553 /* FALLTHROUGH */
554 case 'n':
555 res = ubuf;
556 break;
557 default:
558 esc = mandoc_escape(&cp, &stnam, &inaml);
559 if (esc == ESCAPE_ERROR ||
560 (esc == ESCAPE_SPECIAL &&
561 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
562 mandoc_vmsg(MANDOCERR_ESC_BAD,
563 r->parse, ln, (int)(stesc - *bufp),
564 "%.*s", (int)(cp - stesc), stesc);
565 continue;
566 }
567
568 if (EXPAND_LIMIT < ++expand_count) {
569 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
570 ln, (int)(stesc - *bufp), NULL);
571 return(ROFF_IGN);
572 }
573
574 /*
575 * The third character decides the length
576 * of the name of the string or register.
577 * Save a pointer to the name.
578 */
579
580 if ('\0' == term) {
581 switch (*++cp) {
582 case '\0':
583 maxl = 0;
584 break;
585 case '(':
586 cp++;
587 maxl = 2;
588 break;
589 case '[':
590 cp++;
591 term = ']';
592 maxl = 0;
593 break;
594 default:
595 maxl = 1;
596 break;
597 }
598 } else {
599 cp += 2;
600 maxl = 0;
601 }
602 stnam = cp;
603
604 /* Advance to the end of the name. */
605
606 arg_complete = 1;
607 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
608 if ('\0' == *cp) {
609 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
610 ln, (int)(stesc - *bufp), stesc);
611 arg_complete = 0;
612 break;
613 }
614 if (0 == maxl && *cp == term) {
615 cp++;
616 break;
617 }
618 }
619
620 /*
621 * Retrieve the replacement string; if it is
622 * undefined, resume searching for escapes.
623 */
624
625 switch (stesc[1]) {
626 case '*':
627 if (arg_complete)
628 res = roff_getstrn(r, stnam, naml);
629 break;
630 case 'B':
631 npos = 0;
632 ubuf[0] = arg_complete &&
633 roff_evalnum(r, ln, stnam, &npos, NULL, 0) &&
634 stnam + npos + 1 == cp ? '1' : '0';
635 ubuf[1] = '\0';
636 break;
637 case 'n':
638 if (arg_complete)
639 (void)snprintf(ubuf, sizeof(ubuf), "%d",
640 roff_getregn(r, stnam, naml));
641 else
642 ubuf[0] = '\0';
643 break;
644 case 'w':
645 /* use even incomplete args */
646 (void)snprintf(ubuf, sizeof(ubuf), "%d",
647 24 * (int)naml);
648 break;
649 }
650
651 if (NULL == res) {
652 mandoc_vmsg(MANDOCERR_STR_UNDEF,
653 r->parse, ln, (int)(stesc - *bufp),
654 "%.*s", (int)naml, stnam);
655 res = "";
656 }
657
658 /* Replace the escape sequence by the string. */
659
660 *stesc = '\0';
661 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
662 *bufp, res, cp) + 1;
663
664 /* Prepare for the next replacement. */
665
666 start = nbuf + pos;
667 stesc = nbuf + (stesc - *bufp) + strlen(res);
668 free(*bufp);
669 *bufp = nbuf;
670 }
671 return(ROFF_CONT);
672 }
673
674 /*
675 * Process text streams:
676 * Convert all breakable hyphens into ASCII_HYPH.
677 * Decrement and spring input line trap.
678 */
679 static enum rofferr
680 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
681 {
682 size_t sz;
683 const char *start;
684 char *p;
685 int isz;
686 enum mandoc_esc esc;
687
688 start = p = *bufp + pos;
689
690 while ('\0' != *p) {
691 sz = strcspn(p, "-\\");
692 p += sz;
693
694 if ('\0' == *p)
695 break;
696
697 if ('\\' == *p) {
698 /* Skip over escapes. */
699 p++;
700 esc = mandoc_escape((const char **)&p, NULL, NULL);
701 if (ESCAPE_ERROR == esc)
702 break;
703 continue;
704 } else if (p == start) {
705 p++;
706 continue;
707 }
708
709 if (isalpha((unsigned char)p[-1]) &&
710 isalpha((unsigned char)p[1]))
711 *p = ASCII_HYPH;
712 p++;
713 }
714
715 /* Spring the input line trap. */
716 if (1 == roffit_lines) {
717 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
718 free(*bufp);
719 *bufp = p;
720 *szp = isz + 1;
721 *offs = 0;
722 free(roffit_macro);
723 roffit_lines = 0;
724 return(ROFF_REPARSE);
725 } else if (1 < roffit_lines)
726 --roffit_lines;
727 return(ROFF_CONT);
728 }
729
730 enum rofferr
731 roff_parseln(struct roff *r, int ln, char **bufp,
732 size_t *szp, int pos, int *offs)
733 {
734 enum rofft t;
735 enum rofferr e;
736 int ppos, ctl;
737
738 /* Handle in-line equation delimiters. */
739
740 if (r->tbl == NULL &&
741 r->last_eqn != NULL && r->last_eqn->delim &&
742 (r->eqn == NULL || r->eqn_inline)) {
743 e = roff_eqndelim(r, bufp, szp, pos);
744 if (e == ROFF_REPARSE)
745 return(e);
746 assert(e == ROFF_CONT);
747 }
748
749 /* Expand some escape sequences. */
750
751 e = roff_res(r, bufp, szp, ln, pos);
752 if (ROFF_IGN == e)
753 return(e);
754 assert(ROFF_CONT == e);
755
756 ppos = pos;
757 ctl = roff_getcontrol(r, *bufp, &pos);
758
759 /*
760 * First, if a scope is open and we're not a macro, pass the
761 * text through the macro's filter. If a scope isn't open and
762 * we're not a macro, just let it through.
763 * Finally, if there's an equation scope open, divert it into it
764 * no matter our state.
765 */
766
767 if (r->last && ! ctl) {
768 t = r->last->tok;
769 assert(roffs[t].text);
770 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
771 assert(ROFF_IGN == e || ROFF_CONT == e);
772 if (ROFF_CONT != e)
773 return(e);
774 }
775 if (r->eqn)
776 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
777 if ( ! ctl) {
778 if (r->tbl)
779 return(tbl_read(r->tbl, ln, *bufp, pos));
780 return(roff_parsetext(bufp, szp, pos, offs));
781 }
782
783 /* Skip empty request lines. */
784
785 if ((*bufp)[pos] == '"') {
786 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
787 ln, pos, NULL);
788 return(ROFF_IGN);
789 } else if ((*bufp)[pos] == '\0')
790 return(ROFF_IGN);
791
792 /*
793 * If a scope is open, go to the child handler for that macro,
794 * as it may want to preprocess before doing anything with it.
795 * Don't do so if an equation is open.
796 */
797
798 if (r->last) {
799 t = r->last->tok;
800 assert(roffs[t].sub);
801 return((*roffs[t].sub)(r, t, bufp, szp,
802 ln, ppos, pos, offs));
803 }
804
805 /*
806 * Lastly, as we've no scope open, try to look up and execute
807 * the new macro. If no macro is found, simply return and let
808 * the compilers handle it.
809 */
810
811 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
812 return(ROFF_CONT);
813
814 assert(roffs[t].proc);
815 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
816 }
817
818 void
819 roff_endparse(struct roff *r)
820 {
821
822 if (r->last)
823 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
824 r->last->line, r->last->col,
825 roffs[r->last->tok].name);
826
827 if (r->eqn) {
828 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
829 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
830 eqn_end(&r->eqn);
831 }
832
833 if (r->tbl) {
834 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
835 r->tbl->line, r->tbl->pos, "TS");
836 tbl_end(&r->tbl);
837 }
838 }
839
840 /*
841 * Parse a roff node's type from the input buffer. This must be in the
842 * form of ".foo xxx" in the usual way.
843 */
844 static enum rofft
845 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
846 {
847 char *cp;
848 const char *mac;
849 size_t maclen;
850 enum rofft t;
851
852 cp = buf + *pos;
853
854 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
855 return(ROFF_MAX);
856
857 mac = cp;
858 maclen = roff_getname(r, &cp, ln, ppos);
859
860 t = (r->current_string = roff_getstrn(r, mac, maclen))
861 ? ROFF_USERDEF : roffhash_find(mac, maclen);
862
863 if (ROFF_MAX != t)
864 *pos = cp - buf;
865
866 return(t);
867 }
868
869 static enum rofferr
870 roff_cblock(ROFF_ARGS)
871 {
872
873 /*
874 * A block-close `..' should only be invoked as a child of an
875 * ignore macro, otherwise raise a warning and just ignore it.
876 */
877
878 if (NULL == r->last) {
879 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
880 ln, ppos, "..");
881 return(ROFF_IGN);
882 }
883
884 switch (r->last->tok) {
885 case ROFF_am:
886 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
887 /* FALLTHROUGH */
888 case ROFF_ami:
889 /* FALLTHROUGH */
890 case ROFF_de:
891 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
892 /* FALLTHROUGH */
893 case ROFF_dei:
894 /* FALLTHROUGH */
895 case ROFF_ig:
896 break;
897 default:
898 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
899 ln, ppos, "..");
900 return(ROFF_IGN);
901 }
902
903 if ((*bufp)[pos])
904 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
905 ".. %s", *bufp + pos);
906
907 roffnode_pop(r);
908 roffnode_cleanscope(r);
909 return(ROFF_IGN);
910
911 }
912
913 static void
914 roffnode_cleanscope(struct roff *r)
915 {
916
917 while (r->last) {
918 if (--r->last->endspan != 0)
919 break;
920 roffnode_pop(r);
921 }
922 }
923
924 static void
925 roff_ccond(struct roff *r, int ln, int ppos)
926 {
927
928 if (NULL == r->last) {
929 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
930 ln, ppos, "\\}");
931 return;
932 }
933
934 switch (r->last->tok) {
935 case ROFF_el:
936 /* FALLTHROUGH */
937 case ROFF_ie:
938 /* FALLTHROUGH */
939 case ROFF_if:
940 break;
941 default:
942 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
943 ln, ppos, "\\}");
944 return;
945 }
946
947 if (r->last->endspan > -1) {
948 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
949 ln, ppos, "\\}");
950 return;
951 }
952
953 roffnode_pop(r);
954 roffnode_cleanscope(r);
955 return;
956 }
957
958 static enum rofferr
959 roff_block(ROFF_ARGS)
960 {
961 const char *name;
962 char *iname, *cp;
963 size_t namesz;
964
965 /* Ignore groff compatibility mode for now. */
966
967 if (ROFF_de1 == tok)
968 tok = ROFF_de;
969 else if (ROFF_am1 == tok)
970 tok = ROFF_am;
971
972 /* Parse the macro name argument. */
973
974 cp = *bufp + pos;
975 if (ROFF_ig == tok) {
976 iname = NULL;
977 namesz = 0;
978 } else {
979 iname = cp;
980 namesz = roff_getname(r, &cp, ln, ppos);
981 iname[namesz] = '\0';
982 }
983
984 /* Resolve the macro name argument if it is indirect. */
985
986 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
987 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
988 mandoc_vmsg(MANDOCERR_STR_UNDEF,
989 r->parse, ln, (int)(iname - *bufp),
990 "%.*s", (int)namesz, iname);
991 namesz = 0;
992 } else
993 namesz = strlen(name);
994 } else
995 name = iname;
996
997 if (0 == namesz && ROFF_ig != tok) {
998 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
999 ln, ppos, roffs[tok].name);
1000 return(ROFF_IGN);
1001 }
1002
1003 roffnode_push(r, tok, name, ln, ppos);
1004
1005 /*
1006 * At the beginning of a `de' macro, clear the existing string
1007 * with the same name, if there is one. New content will be
1008 * appended from roff_block_text() in multiline mode.
1009 */
1010
1011 if (ROFF_de == tok || ROFF_dei == tok)
1012 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1013
1014 if ('\0' == *cp)
1015 return(ROFF_IGN);
1016
1017 /* Get the custom end marker. */
1018
1019 iname = cp;
1020 namesz = roff_getname(r, &cp, ln, ppos);
1021
1022 /* Resolve the end marker if it is indirect. */
1023
1024 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
1025 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
1026 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1027 r->parse, ln, (int)(iname - *bufp),
1028 "%.*s", (int)namesz, iname);
1029 namesz = 0;
1030 } else
1031 namesz = strlen(name);
1032 } else
1033 name = iname;
1034
1035 if (namesz)
1036 r->last->end = mandoc_strndup(name, namesz);
1037
1038 if ('\0' != *cp)
1039 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1040 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1041
1042 return(ROFF_IGN);
1043 }
1044
1045 static enum rofferr
1046 roff_block_sub(ROFF_ARGS)
1047 {
1048 enum rofft t;
1049 int i, j;
1050
1051 /*
1052 * First check whether a custom macro exists at this level. If
1053 * it does, then check against it. This is some of groff's
1054 * stranger behaviours. If we encountered a custom end-scope
1055 * tag and that tag also happens to be a "real" macro, then we
1056 * need to try interpreting it again as a real macro. If it's
1057 * not, then return ignore. Else continue.
1058 */
1059
1060 if (r->last->end) {
1061 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1062 if ((*bufp)[i] != r->last->end[j])
1063 break;
1064
1065 if ('\0' == r->last->end[j] &&
1066 ('\0' == (*bufp)[i] ||
1067 ' ' == (*bufp)[i] ||
1068 '\t' == (*bufp)[i])) {
1069 roffnode_pop(r);
1070 roffnode_cleanscope(r);
1071
1072 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1073 i++;
1074
1075 pos = i;
1076 if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
1077 return(ROFF_RERUN);
1078 return(ROFF_IGN);
1079 }
1080 }
1081
1082 /*
1083 * If we have no custom end-query or lookup failed, then try
1084 * pulling it out of the hashtable.
1085 */
1086
1087 t = roff_parse(r, *bufp, &pos, ln, ppos);
1088
1089 if (ROFF_cblock != t) {
1090 if (ROFF_ig != tok)
1091 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1092 return(ROFF_IGN);
1093 }
1094
1095 assert(roffs[t].proc);
1096 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1097 }
1098
1099 static enum rofferr
1100 roff_block_text(ROFF_ARGS)
1101 {
1102
1103 if (ROFF_ig != tok)
1104 roff_setstr(r, r->last->name, *bufp + pos, 2);
1105
1106 return(ROFF_IGN);
1107 }
1108
1109 static enum rofferr
1110 roff_cond_sub(ROFF_ARGS)
1111 {
1112 enum rofft t;
1113 char *ep;
1114 int rr;
1115
1116 rr = r->last->rule;
1117 roffnode_cleanscope(r);
1118 t = roff_parse(r, *bufp, &pos, ln, ppos);
1119
1120 /*
1121 * Fully handle known macros when they are structurally
1122 * required or when the conditional evaluated to true.
1123 */
1124
1125 if ((ROFF_MAX != t) &&
1126 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1127 assert(roffs[t].proc);
1128 return((*roffs[t].proc)(r, t, bufp, szp,
1129 ln, ppos, pos, offs));
1130 }
1131
1132 /*
1133 * If `\}' occurs on a macro line without a preceding macro,
1134 * drop the line completely.
1135 */
1136
1137 ep = *bufp + pos;
1138 if ('\\' == ep[0] && '}' == ep[1])
1139 rr = 0;
1140
1141 /* Always check for the closing delimiter `\}'. */
1142
1143 while (NULL != (ep = strchr(ep, '\\'))) {
1144 if ('}' == *(++ep)) {
1145 *ep = '&';
1146 roff_ccond(r, ln, ep - *bufp - 1);
1147 }
1148 ++ep;
1149 }
1150 return(rr ? ROFF_CONT : ROFF_IGN);
1151 }
1152
1153 static enum rofferr
1154 roff_cond_text(ROFF_ARGS)
1155 {
1156 char *ep;
1157 int rr;
1158
1159 rr = r->last->rule;
1160 roffnode_cleanscope(r);
1161
1162 ep = *bufp + pos;
1163 while (NULL != (ep = strchr(ep, '\\'))) {
1164 if ('}' == *(++ep)) {
1165 *ep = '&';
1166 roff_ccond(r, ln, ep - *bufp - 1);
1167 }
1168 ++ep;
1169 }
1170 return(rr ? ROFF_CONT : ROFF_IGN);
1171 }
1172
1173 /*
1174 * Parse a single signed integer number. Stop at the first non-digit.
1175 * If there is at least one digit, return success and advance the
1176 * parse point, else return failure and let the parse point unchanged.
1177 * Ignore overflows, treat them just like the C language.
1178 */
1179 static int
1180 roff_getnum(const char *v, int *pos, int *res)
1181 {
1182 int myres, n, p;
1183
1184 if (NULL == res)
1185 res = &myres;
1186
1187 p = *pos;
1188 n = v[p] == '-';
1189 if (n)
1190 p++;
1191
1192 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1193 *res = 10 * *res + v[p] - '0';
1194 if (p == *pos + n)
1195 return 0;
1196
1197 if (n)
1198 *res = -*res;
1199
1200 *pos = p;
1201 return 1;
1202 }
1203
1204 /*
1205 * Evaluate a string comparison condition.
1206 * The first character is the delimiter.
1207 * Succeed if the string up to its second occurrence
1208 * matches the string up to its third occurence.
1209 * Advance the cursor after the third occurrence
1210 * or lacking that, to the end of the line.
1211 */
1212 static int
1213 roff_evalstrcond(const char *v, int *pos)
1214 {
1215 const char *s1, *s2, *s3;
1216 int match;
1217
1218 match = 0;
1219 s1 = v + *pos; /* initial delimiter */
1220 s2 = s1 + 1; /* for scanning the first string */
1221 s3 = strchr(s2, *s1); /* for scanning the second string */
1222
1223 if (NULL == s3) /* found no middle delimiter */
1224 goto out;
1225
1226 while ('\0' != *++s3) {
1227 if (*s2 != *s3) { /* mismatch */
1228 s3 = strchr(s3, *s1);
1229 break;
1230 }
1231 if (*s3 == *s1) { /* found the final delimiter */
1232 match = 1;
1233 break;
1234 }
1235 s2++;
1236 }
1237
1238 out:
1239 if (NULL == s3)
1240 s3 = strchr(s2, '\0');
1241 else
1242 s3++;
1243 *pos = s3 - v;
1244 return(match);
1245 }
1246
1247 /*
1248 * Evaluate an optionally negated single character, numerical,
1249 * or string condition.
1250 */
1251 static int
1252 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1253 {
1254 int wanttrue, number;
1255
1256 if ('!' == v[*pos]) {
1257 wanttrue = 0;
1258 (*pos)++;
1259 } else
1260 wanttrue = 1;
1261
1262 switch (v[*pos]) {
1263 case 'n':
1264 /* FALLTHROUGH */
1265 case 'o':
1266 (*pos)++;
1267 return(wanttrue);
1268 case 'c':
1269 /* FALLTHROUGH */
1270 case 'd':
1271 /* FALLTHROUGH */
1272 case 'e':
1273 /* FALLTHROUGH */
1274 case 'r':
1275 /* FALLTHROUGH */
1276 case 't':
1277 (*pos)++;
1278 return(!wanttrue);
1279 default:
1280 break;
1281 }
1282
1283 if (roff_evalnum(r, ln, v, pos, &number, 0))
1284 return((number > 0) == wanttrue);
1285 else
1286 return(roff_evalstrcond(v, pos) == wanttrue);
1287 }
1288
1289 static enum rofferr
1290 roff_line_ignore(ROFF_ARGS)
1291 {
1292
1293 return(ROFF_IGN);
1294 }
1295
1296 static enum rofferr
1297 roff_cond(ROFF_ARGS)
1298 {
1299
1300 roffnode_push(r, tok, NULL, ln, ppos);
1301
1302 /*
1303 * An `.el' has no conditional body: it will consume the value
1304 * of the current rstack entry set in prior `ie' calls or
1305 * defaults to DENY.
1306 *
1307 * If we're not an `el', however, then evaluate the conditional.
1308 */
1309
1310 r->last->rule = ROFF_el == tok ?
1311 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1312 roff_evalcond(r, ln, *bufp, &pos);
1313
1314 /*
1315 * An if-else will put the NEGATION of the current evaluated
1316 * conditional into the stack of rules.
1317 */
1318
1319 if (ROFF_ie == tok) {
1320 if (r->rstackpos + 1 == r->rstacksz) {
1321 r->rstacksz += 16;
1322 r->rstack = mandoc_reallocarray(r->rstack,
1323 r->rstacksz, sizeof(int));
1324 }
1325 r->rstack[++r->rstackpos] = !r->last->rule;
1326 }
1327
1328 /* If the parent has false as its rule, then so do we. */
1329
1330 if (r->last->parent && !r->last->parent->rule)
1331 r->last->rule = 0;
1332
1333 /*
1334 * Determine scope.
1335 * If there is nothing on the line after the conditional,
1336 * not even whitespace, use next-line scope.
1337 */
1338
1339 if ('\0' == (*bufp)[pos]) {
1340 r->last->endspan = 2;
1341 goto out;
1342 }
1343
1344 while (' ' == (*bufp)[pos])
1345 pos++;
1346
1347 /* An opening brace requests multiline scope. */
1348
1349 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1350 r->last->endspan = -1;
1351 pos += 2;
1352 goto out;
1353 }
1354
1355 /*
1356 * Anything else following the conditional causes
1357 * single-line scope. Warn if the scope contains
1358 * nothing but trailing whitespace.
1359 */
1360
1361 if ('\0' == (*bufp)[pos])
1362 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1363 ln, ppos, roffs[tok].name);
1364
1365 r->last->endspan = 1;
1366
1367 out:
1368 *offs = pos;
1369 return(ROFF_RERUN);
1370 }
1371
1372 static enum rofferr
1373 roff_ds(ROFF_ARGS)
1374 {
1375 char *string;
1376 const char *name;
1377 size_t namesz;
1378
1379 /*
1380 * The first word is the name of the string.
1381 * If it is empty or terminated by an escape sequence,
1382 * abort the `ds' request without defining anything.
1383 */
1384
1385 name = string = *bufp + pos;
1386 if ('\0' == *name)
1387 return(ROFF_IGN);
1388
1389 namesz = roff_getname(r, &string, ln, pos);
1390 if ('\\' == name[namesz])
1391 return(ROFF_IGN);
1392
1393 /* Read past the initial double-quote, if any. */
1394 if ('"' == *string)
1395 string++;
1396
1397 /* The rest is the value. */
1398 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1399 ROFF_as == tok);
1400 return(ROFF_IGN);
1401 }
1402
1403 /*
1404 * Parse a single operator, one or two characters long.
1405 * If the operator is recognized, return success and advance the
1406 * parse point, else return failure and let the parse point unchanged.
1407 */
1408 static int
1409 roff_getop(const char *v, int *pos, char *res)
1410 {
1411
1412 *res = v[*pos];
1413
1414 switch (*res) {
1415 case '+':
1416 /* FALLTHROUGH */
1417 case '-':
1418 /* FALLTHROUGH */
1419 case '*':
1420 /* FALLTHROUGH */
1421 case '/':
1422 /* FALLTHROUGH */
1423 case '%':
1424 /* FALLTHROUGH */
1425 case '&':
1426 /* FALLTHROUGH */
1427 case ':':
1428 break;
1429 case '<':
1430 switch (v[*pos + 1]) {
1431 case '=':
1432 *res = 'l';
1433 (*pos)++;
1434 break;
1435 case '>':
1436 *res = '!';
1437 (*pos)++;
1438 break;
1439 case '?':
1440 *res = 'i';
1441 (*pos)++;
1442 break;
1443 default:
1444 break;
1445 }
1446 break;
1447 case '>':
1448 switch (v[*pos + 1]) {
1449 case '=':
1450 *res = 'g';
1451 (*pos)++;
1452 break;
1453 case '?':
1454 *res = 'a';
1455 (*pos)++;
1456 break;
1457 default:
1458 break;
1459 }
1460 break;
1461 case '=':
1462 if ('=' == v[*pos + 1])
1463 (*pos)++;
1464 break;
1465 default:
1466 return(0);
1467 }
1468 (*pos)++;
1469
1470 return(*res);
1471 }
1472
1473 /*
1474 * Evaluate either a parenthesized numeric expression
1475 * or a single signed integer number.
1476 */
1477 static int
1478 roff_evalpar(struct roff *r, int ln,
1479 const char *v, int *pos, int *res)
1480 {
1481
1482 if ('(' != v[*pos])
1483 return(roff_getnum(v, pos, res));
1484
1485 (*pos)++;
1486 if ( ! roff_evalnum(r, ln, v, pos, res, 1))
1487 return(0);
1488
1489 /*
1490 * Omission of the closing parenthesis
1491 * is an error in validation mode,
1492 * but ignored in evaluation mode.
1493 */
1494
1495 if (')' == v[*pos])
1496 (*pos)++;
1497 else if (NULL == res)
1498 return(0);
1499
1500 return(1);
1501 }
1502
1503 /*
1504 * Evaluate a complete numeric expression.
1505 * Proceed left to right, there is no concept of precedence.
1506 */
1507 static int
1508 roff_evalnum(struct roff *r, int ln, const char *v,
1509 int *pos, int *res, int skipwhite)
1510 {
1511 int mypos, operand2;
1512 char operator;
1513
1514 if (NULL == pos) {
1515 mypos = 0;
1516 pos = &mypos;
1517 }
1518
1519 if (skipwhite)
1520 while (isspace((unsigned char)v[*pos]))
1521 (*pos)++;
1522
1523 if ( ! roff_evalpar(r, ln, v, pos, res))
1524 return(0);
1525
1526 while (1) {
1527 if (skipwhite)
1528 while (isspace((unsigned char)v[*pos]))
1529 (*pos)++;
1530
1531 if ( ! roff_getop(v, pos, &operator))
1532 break;
1533
1534 if (skipwhite)
1535 while (isspace((unsigned char)v[*pos]))
1536 (*pos)++;
1537
1538 if ( ! roff_evalpar(r, ln, v, pos, &operand2))
1539 return(0);
1540
1541 if (skipwhite)
1542 while (isspace((unsigned char)v[*pos]))
1543 (*pos)++;
1544
1545 if (NULL == res)
1546 continue;
1547
1548 switch (operator) {
1549 case '+':
1550 *res += operand2;
1551 break;
1552 case '-':
1553 *res -= operand2;
1554 break;
1555 case '*':
1556 *res *= operand2;
1557 break;
1558 case '/':
1559 if (0 == operand2) {
1560 mandoc_msg(MANDOCERR_DIVZERO,
1561 r->parse, ln, *pos, v);
1562 *res = 0;
1563 break;
1564 }
1565 *res /= operand2;
1566 break;
1567 case '%':
1568 *res %= operand2;
1569 break;
1570 case '<':
1571 *res = *res < operand2;
1572 break;
1573 case '>':
1574 *res = *res > operand2;
1575 break;
1576 case 'l':
1577 *res = *res <= operand2;
1578 break;
1579 case 'g':
1580 *res = *res >= operand2;
1581 break;
1582 case '=':
1583 *res = *res == operand2;
1584 break;
1585 case '!':
1586 *res = *res != operand2;
1587 break;
1588 case '&':
1589 *res = *res && operand2;
1590 break;
1591 case ':':
1592 *res = *res || operand2;
1593 break;
1594 case 'i':
1595 if (operand2 < *res)
1596 *res = operand2;
1597 break;
1598 case 'a':
1599 if (operand2 > *res)
1600 *res = operand2;
1601 break;
1602 default:
1603 abort();
1604 }
1605 }
1606 return(1);
1607 }
1608
1609 void
1610 roff_setreg(struct roff *r, const char *name, int val, char sign)
1611 {
1612 struct roffreg *reg;
1613
1614 /* Search for an existing register with the same name. */
1615 reg = r->regtab;
1616
1617 while (reg && strcmp(name, reg->key.p))
1618 reg = reg->next;
1619
1620 if (NULL == reg) {
1621 /* Create a new register. */
1622 reg = mandoc_malloc(sizeof(struct roffreg));
1623 reg->key.p = mandoc_strdup(name);
1624 reg->key.sz = strlen(name);
1625 reg->val = 0;
1626 reg->next = r->regtab;
1627 r->regtab = reg;
1628 }
1629
1630 if ('+' == sign)
1631 reg->val += val;
1632 else if ('-' == sign)
1633 reg->val -= val;
1634 else
1635 reg->val = val;
1636 }
1637
1638 /*
1639 * Handle some predefined read-only number registers.
1640 * For now, return -1 if the requested register is not predefined;
1641 * in case a predefined read-only register having the value -1
1642 * were to turn up, another special value would have to be chosen.
1643 */
1644 static int
1645 roff_getregro(const char *name)
1646 {
1647
1648 switch (*name) {
1649 case 'A': /* ASCII approximation mode is always off. */
1650 return(0);
1651 case 'g': /* Groff compatibility mode is always on. */
1652 return(1);
1653 case 'H': /* Fixed horizontal resolution. */
1654 return (24);
1655 case 'j': /* Always adjust left margin only. */
1656 return(0);
1657 case 'T': /* Some output device is always defined. */
1658 return(1);
1659 case 'V': /* Fixed vertical resolution. */
1660 return (40);
1661 default:
1662 return (-1);
1663 }
1664 }
1665
1666 int
1667 roff_getreg(const struct roff *r, const char *name)
1668 {
1669 struct roffreg *reg;
1670 int val;
1671
1672 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1673 val = roff_getregro(name + 1);
1674 if (-1 != val)
1675 return (val);
1676 }
1677
1678 for (reg = r->regtab; reg; reg = reg->next)
1679 if (0 == strcmp(name, reg->key.p))
1680 return(reg->val);
1681
1682 return(0);
1683 }
1684
1685 static int
1686 roff_getregn(const struct roff *r, const char *name, size_t len)
1687 {
1688 struct roffreg *reg;
1689 int val;
1690
1691 if ('.' == name[0] && 2 == len) {
1692 val = roff_getregro(name + 1);
1693 if (-1 != val)
1694 return (val);
1695 }
1696
1697 for (reg = r->regtab; reg; reg = reg->next)
1698 if (len == reg->key.sz &&
1699 0 == strncmp(name, reg->key.p, len))
1700 return(reg->val);
1701
1702 return(0);
1703 }
1704
1705 static void
1706 roff_freereg(struct roffreg *reg)
1707 {
1708 struct roffreg *old_reg;
1709
1710 while (NULL != reg) {
1711 free(reg->key.p);
1712 old_reg = reg;
1713 reg = reg->next;
1714 free(old_reg);
1715 }
1716 }
1717
1718 static enum rofferr
1719 roff_nr(ROFF_ARGS)
1720 {
1721 char *key, *val;
1722 size_t keysz;
1723 int iv;
1724 char sign;
1725
1726 key = val = *bufp + pos;
1727 if ('\0' == *key)
1728 return(ROFF_IGN);
1729
1730 keysz = roff_getname(r, &val, ln, pos);
1731 if ('\\' == key[keysz])
1732 return(ROFF_IGN);
1733 key[keysz] = '\0';
1734
1735 sign = *val;
1736 if ('+' == sign || '-' == sign)
1737 val++;
1738
1739 if (roff_evalnum(r, ln, val, NULL, &iv, 0))
1740 roff_setreg(r, key, iv, sign);
1741
1742 return(ROFF_IGN);
1743 }
1744
1745 static enum rofferr
1746 roff_rr(ROFF_ARGS)
1747 {
1748 struct roffreg *reg, **prev;
1749 char *name, *cp;
1750 size_t namesz;
1751
1752 name = cp = *bufp + pos;
1753 if ('\0' == *name)
1754 return(ROFF_IGN);
1755 namesz = roff_getname(r, &cp, ln, pos);
1756 name[namesz] = '\0';
1757
1758 prev = &r->regtab;
1759 while (1) {
1760 reg = *prev;
1761 if (NULL == reg || !strcmp(name, reg->key.p))
1762 break;
1763 prev = &reg->next;
1764 }
1765 if (NULL != reg) {
1766 *prev = reg->next;
1767 free(reg->key.p);
1768 free(reg);
1769 }
1770 return(ROFF_IGN);
1771 }
1772
1773 static enum rofferr
1774 roff_rm(ROFF_ARGS)
1775 {
1776 const char *name;
1777 char *cp;
1778 size_t namesz;
1779
1780 cp = *bufp + pos;
1781 while ('\0' != *cp) {
1782 name = cp;
1783 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1784 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1785 if ('\\' == name[namesz])
1786 break;
1787 }
1788 return(ROFF_IGN);
1789 }
1790
1791 static enum rofferr
1792 roff_it(ROFF_ARGS)
1793 {
1794 char *cp;
1795 size_t len;
1796 int iv;
1797
1798 /* Parse the number of lines. */
1799 cp = *bufp + pos;
1800 len = strcspn(cp, " \t");
1801 cp[len] = '\0';
1802 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1803 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1804 ln, ppos, *bufp + 1);
1805 return(ROFF_IGN);
1806 }
1807 cp += len + 1;
1808
1809 /* Arm the input line trap. */
1810 roffit_lines = iv;
1811 roffit_macro = mandoc_strdup(cp);
1812 return(ROFF_IGN);
1813 }
1814
1815 static enum rofferr
1816 roff_Dd(ROFF_ARGS)
1817 {
1818 const char *const *cp;
1819
1820 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
1821 for (cp = __mdoc_reserved; *cp; cp++)
1822 roff_setstr(r, *cp, NULL, 0);
1823
1824 if (r->format == 0)
1825 r->format = MPARSE_MDOC;
1826
1827 return(ROFF_CONT);
1828 }
1829
1830 static enum rofferr
1831 roff_TH(ROFF_ARGS)
1832 {
1833 const char *const *cp;
1834
1835 if ((r->options & MPARSE_QUICK) == 0)
1836 for (cp = __man_reserved; *cp; cp++)
1837 roff_setstr(r, *cp, NULL, 0);
1838
1839 if (r->format == 0)
1840 r->format = MPARSE_MAN;
1841
1842 return(ROFF_CONT);
1843 }
1844
1845 static enum rofferr
1846 roff_TE(ROFF_ARGS)
1847 {
1848
1849 if (NULL == r->tbl)
1850 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1851 ln, ppos, "TE");
1852 else
1853 tbl_end(&r->tbl);
1854
1855 return(ROFF_IGN);
1856 }
1857
1858 static enum rofferr
1859 roff_T_(ROFF_ARGS)
1860 {
1861
1862 if (NULL == r->tbl)
1863 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1864 ln, ppos, "T&");
1865 else
1866 tbl_restart(ppos, ln, r->tbl);
1867
1868 return(ROFF_IGN);
1869 }
1870
1871 /*
1872 * Handle in-line equation delimiters.
1873 */
1874 static enum rofferr
1875 roff_eqndelim(struct roff *r, char **bufp, size_t *szp, int pos)
1876 {
1877 char *cp1, *cp2;
1878 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
1879
1880 /*
1881 * Outside equations, look for an opening delimiter.
1882 * If we are inside an equation, we already know it is
1883 * in-line, or this function wouldn't have been called;
1884 * so look for a closing delimiter.
1885 */
1886
1887 cp1 = *bufp + pos;
1888 cp2 = strchr(cp1, r->eqn == NULL ?
1889 r->last_eqn->odelim : r->last_eqn->cdelim);
1890 if (cp2 == NULL)
1891 return(ROFF_CONT);
1892
1893 *cp2++ = '\0';
1894 bef_pr = bef_nl = aft_nl = aft_pr = "";
1895
1896 /* Handle preceding text, protecting whitespace. */
1897
1898 if (**bufp != '\0') {
1899 if (r->eqn == NULL)
1900 bef_pr = "\\&";
1901 bef_nl = "\n";
1902 }
1903
1904 /*
1905 * Prepare replacing the delimiter with an equation macro
1906 * and drop leading white space from the equation.
1907 */
1908
1909 if (r->eqn == NULL) {
1910 while (*cp2 == ' ')
1911 cp2++;
1912 mac = ".EQ";
1913 } else
1914 mac = ".EN";
1915
1916 /* Handle following text, protecting whitespace. */
1917
1918 if (*cp2 != '\0') {
1919 aft_nl = "\n";
1920 if (r->eqn != NULL)
1921 aft_pr = "\\&";
1922 }
1923
1924 /* Do the actual replacement. */
1925
1926 *szp = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", *bufp,
1927 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
1928 free(*bufp);
1929 *bufp = cp1;
1930
1931 /* Toggle the in-line state of the eqn subsystem. */
1932
1933 r->eqn_inline = r->eqn == NULL;
1934 return(ROFF_REPARSE);
1935 }
1936
1937 static enum rofferr
1938 roff_EQ(ROFF_ARGS)
1939 {
1940 struct eqn_node *e;
1941
1942 assert(NULL == r->eqn);
1943 e = eqn_alloc(ppos, ln, r->parse);
1944
1945 if (r->last_eqn) {
1946 r->last_eqn->next = e;
1947 e->delim = r->last_eqn->delim;
1948 e->odelim = r->last_eqn->odelim;
1949 e->cdelim = r->last_eqn->cdelim;
1950 } else
1951 r->first_eqn = r->last_eqn = e;
1952
1953 r->eqn = r->last_eqn = e;
1954
1955 if ((*bufp)[pos])
1956 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1957 ".EQ %s", *bufp + pos);
1958
1959 return(ROFF_IGN);
1960 }
1961
1962 static enum rofferr
1963 roff_EN(ROFF_ARGS)
1964 {
1965
1966 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1967 return(ROFF_IGN);
1968 }
1969
1970 static enum rofferr
1971 roff_TS(ROFF_ARGS)
1972 {
1973 struct tbl_node *tbl;
1974
1975 if (r->tbl) {
1976 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
1977 ln, ppos, "TS breaks TS");
1978 tbl_end(&r->tbl);
1979 }
1980
1981 tbl = tbl_alloc(ppos, ln, r->parse);
1982
1983 if (r->last_tbl)
1984 r->last_tbl->next = tbl;
1985 else
1986 r->first_tbl = r->last_tbl = tbl;
1987
1988 r->tbl = r->last_tbl = tbl;
1989 return(ROFF_IGN);
1990 }
1991
1992 static enum rofferr
1993 roff_cc(ROFF_ARGS)
1994 {
1995 const char *p;
1996
1997 p = *bufp + pos;
1998
1999 if ('\0' == *p || '.' == (r->control = *p++))
2000 r->control = 0;
2001
2002 if ('\0' != *p)
2003 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2004
2005 return(ROFF_IGN);
2006 }
2007
2008 static enum rofferr
2009 roff_tr(ROFF_ARGS)
2010 {
2011 const char *p, *first, *second;
2012 size_t fsz, ssz;
2013 enum mandoc_esc esc;
2014
2015 p = *bufp + pos;
2016
2017 if ('\0' == *p) {
2018 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2019 return(ROFF_IGN);
2020 }
2021
2022 while ('\0' != *p) {
2023 fsz = ssz = 1;
2024
2025 first = p++;
2026 if ('\\' == *first) {
2027 esc = mandoc_escape(&p, NULL, NULL);
2028 if (ESCAPE_ERROR == esc) {
2029 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2030 ln, (int)(p - *bufp), first);
2031 return(ROFF_IGN);
2032 }
2033 fsz = (size_t)(p - first);
2034 }
2035
2036 second = p++;
2037 if ('\\' == *second) {
2038 esc = mandoc_escape(&p, NULL, NULL);
2039 if (ESCAPE_ERROR == esc) {
2040 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2041 ln, (int)(p - *bufp), second);
2042 return(ROFF_IGN);
2043 }
2044 ssz = (size_t)(p - second);
2045 } else if ('\0' == *second) {
2046 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
2047 ln, (int)(p - *bufp), NULL);
2048 second = " ";
2049 p--;
2050 }
2051
2052 if (fsz > 1) {
2053 roff_setstrn(&r->xmbtab, first, fsz,
2054 second, ssz, 0);
2055 continue;
2056 }
2057
2058 if (NULL == r->xtab)
2059 r->xtab = mandoc_calloc(128,
2060 sizeof(struct roffstr));
2061
2062 free(r->xtab[(int)*first].p);
2063 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2064 r->xtab[(int)*first].sz = ssz;
2065 }
2066
2067 return(ROFF_IGN);
2068 }
2069
2070 static enum rofferr
2071 roff_so(ROFF_ARGS)
2072 {
2073 char *name;
2074
2075 name = *bufp + pos;
2076 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2077
2078 /*
2079 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2080 * opening anything that's not in our cwd or anything beneath
2081 * it. Thus, explicitly disallow traversing up the file-system
2082 * or using absolute paths.
2083 */
2084
2085 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
2086 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2087 ".so %s", name);
2088 return(ROFF_ERR);
2089 }
2090
2091 *offs = pos;
2092 return(ROFF_SO);
2093 }
2094
2095 static enum rofferr
2096 roff_userdef(ROFF_ARGS)
2097 {
2098 const char *arg[9];
2099 char *cp, *n1, *n2;
2100 int i;
2101
2102 /*
2103 * Collect pointers to macro argument strings
2104 * and NUL-terminate them.
2105 */
2106 cp = *bufp + pos;
2107 for (i = 0; i < 9; i++)
2108 arg[i] = '\0' == *cp ? "" :
2109 mandoc_getarg(r->parse, &cp, ln, &pos);
2110
2111 /*
2112 * Expand macro arguments.
2113 */
2114 *szp = 0;
2115 n1 = cp = mandoc_strdup(r->current_string);
2116 while (NULL != (cp = strstr(cp, "\\$"))) {
2117 i = cp[2] - '1';
2118 if (0 > i || 8 < i) {
2119 /* Not an argument invocation. */
2120 cp += 2;
2121 continue;
2122 }
2123 *cp = '\0';
2124 *szp = mandoc_asprintf(&n2, "%s%s%s",
2125 n1, arg[i], cp + 3) + 1;
2126 cp = n2 + (cp - n1);
2127 free(n1);
2128 n1 = n2;
2129 }
2130
2131 /*
2132 * Replace the macro invocation
2133 * by the expanded macro.
2134 */
2135 free(*bufp);
2136 *bufp = n1;
2137 if (0 == *szp)
2138 *szp = strlen(*bufp) + 1;
2139
2140 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2141 ROFF_REPARSE : ROFF_APPEND);
2142 }
2143
2144 static size_t
2145 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2146 {
2147 char *name, *cp;
2148 size_t namesz;
2149
2150 name = *cpp;
2151 if ('\0' == *name)
2152 return(0);
2153
2154 /* Read until end of name and terminate it with NUL. */
2155 for (cp = name; 1; cp++) {
2156 if ('\0' == *cp || ' ' == *cp) {
2157 namesz = cp - name;
2158 break;
2159 }
2160 if ('\\' != *cp)
2161 continue;
2162 namesz = cp - name;
2163 if ('{' == cp[1] || '}' == cp[1])
2164 break;
2165 cp++;
2166 if ('\\' == *cp)
2167 continue;
2168 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2169 "%.*s", (int)(cp - name + 1), name);
2170 mandoc_escape((const char **)&cp, NULL, NULL);
2171 break;
2172 }
2173
2174 /* Read past spaces. */
2175 while (' ' == *cp)
2176 cp++;
2177
2178 *cpp = cp;
2179 return(namesz);
2180 }
2181
2182 /*
2183 * Store *string into the user-defined string called *name.
2184 * To clear an existing entry, call with (*r, *name, NULL, 0).
2185 * append == 0: replace mode
2186 * append == 1: single-line append mode
2187 * append == 2: multiline append mode, append '\n' after each call
2188 */
2189 static void
2190 roff_setstr(struct roff *r, const char *name, const char *string,
2191 int append)
2192 {
2193
2194 roff_setstrn(&r->strtab, name, strlen(name), string,
2195 string ? strlen(string) : 0, append);
2196 }
2197
2198 static void
2199 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2200 const char *string, size_t stringsz, int append)
2201 {
2202 struct roffkv *n;
2203 char *c;
2204 int i;
2205 size_t oldch, newch;
2206
2207 /* Search for an existing string with the same name. */
2208 n = *r;
2209
2210 while (n && (namesz != n->key.sz ||
2211 strncmp(n->key.p, name, namesz)))
2212 n = n->next;
2213
2214 if (NULL == n) {
2215 /* Create a new string table entry. */
2216 n = mandoc_malloc(sizeof(struct roffkv));
2217 n->key.p = mandoc_strndup(name, namesz);
2218 n->key.sz = namesz;
2219 n->val.p = NULL;
2220 n->val.sz = 0;
2221 n->next = *r;
2222 *r = n;
2223 } else if (0 == append) {
2224 free(n->val.p);
2225 n->val.p = NULL;
2226 n->val.sz = 0;
2227 }
2228
2229 if (NULL == string)
2230 return;
2231
2232 /*
2233 * One additional byte for the '\n' in multiline mode,
2234 * and one for the terminating '\0'.
2235 */
2236 newch = stringsz + (1 < append ? 2u : 1u);
2237
2238 if (NULL == n->val.p) {
2239 n->val.p = mandoc_malloc(newch);
2240 *n->val.p = '\0';
2241 oldch = 0;
2242 } else {
2243 oldch = n->val.sz;
2244 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2245 }
2246
2247 /* Skip existing content in the destination buffer. */
2248 c = n->val.p + (int)oldch;
2249
2250 /* Append new content to the destination buffer. */
2251 i = 0;
2252 while (i < (int)stringsz) {
2253 /*
2254 * Rudimentary roff copy mode:
2255 * Handle escaped backslashes.
2256 */
2257 if ('\\' == string[i] && '\\' == string[i + 1])
2258 i++;
2259 *c++ = string[i++];
2260 }
2261
2262 /* Append terminating bytes. */
2263 if (1 < append)
2264 *c++ = '\n';
2265
2266 *c = '\0';
2267 n->val.sz = (int)(c - n->val.p);
2268 }
2269
2270 static const char *
2271 roff_getstrn(const struct roff *r, const char *name, size_t len)
2272 {
2273 const struct roffkv *n;
2274 int i;
2275
2276 for (n = r->strtab; n; n = n->next)
2277 if (0 == strncmp(name, n->key.p, len) &&
2278 '\0' == n->key.p[(int)len])
2279 return(n->val.p);
2280
2281 for (i = 0; i < PREDEFS_MAX; i++)
2282 if (0 == strncmp(name, predefs[i].name, len) &&
2283 '\0' == predefs[i].name[(int)len])
2284 return(predefs[i].str);
2285
2286 return(NULL);
2287 }
2288
2289 static void
2290 roff_freestr(struct roffkv *r)
2291 {
2292 struct roffkv *n, *nn;
2293
2294 for (n = r; n; n = nn) {
2295 free(n->key.p);
2296 free(n->val.p);
2297 nn = n->next;
2298 free(n);
2299 }
2300 }
2301
2302 const struct tbl_span *
2303 roff_span(const struct roff *r)
2304 {
2305
2306 return(r->tbl ? tbl_span(r->tbl) : NULL);
2307 }
2308
2309 const struct eqn *
2310 roff_eqn(const struct roff *r)
2311 {
2312
2313 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2314 }
2315
2316 /*
2317 * Duplicate an input string, making the appropriate character
2318 * conversations (as stipulated by `tr') along the way.
2319 * Returns a heap-allocated string with all the replacements made.
2320 */
2321 char *
2322 roff_strdup(const struct roff *r, const char *p)
2323 {
2324 const struct roffkv *cp;
2325 char *res;
2326 const char *pp;
2327 size_t ssz, sz;
2328 enum mandoc_esc esc;
2329
2330 if (NULL == r->xmbtab && NULL == r->xtab)
2331 return(mandoc_strdup(p));
2332 else if ('\0' == *p)
2333 return(mandoc_strdup(""));
2334
2335 /*
2336 * Step through each character looking for term matches
2337 * (remember that a `tr' can be invoked with an escape, which is
2338 * a glyph but the escape is multi-character).
2339 * We only do this if the character hash has been initialised
2340 * and the string is >0 length.
2341 */
2342
2343 res = NULL;
2344 ssz = 0;
2345
2346 while ('\0' != *p) {
2347 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2348 sz = r->xtab[(int)*p].sz;
2349 res = mandoc_realloc(res, ssz + sz + 1);
2350 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2351 ssz += sz;
2352 p++;
2353 continue;
2354 } else if ('\\' != *p) {
2355 res = mandoc_realloc(res, ssz + 2);
2356 res[ssz++] = *p++;
2357 continue;
2358 }
2359
2360 /* Search for term matches. */
2361 for (cp = r->xmbtab; cp; cp = cp->next)
2362 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2363 break;
2364
2365 if (NULL != cp) {
2366 /*
2367 * A match has been found.
2368 * Append the match to the array and move
2369 * forward by its keysize.
2370 */
2371 res = mandoc_realloc(res,
2372 ssz + cp->val.sz + 1);
2373 memcpy(res + ssz, cp->val.p, cp->val.sz);
2374 ssz += cp->val.sz;
2375 p += (int)cp->key.sz;
2376 continue;
2377 }
2378
2379 /*
2380 * Handle escapes carefully: we need to copy
2381 * over just the escape itself, or else we might
2382 * do replacements within the escape itself.
2383 * Make sure to pass along the bogus string.
2384 */
2385 pp = p++;
2386 esc = mandoc_escape(&p, NULL, NULL);
2387 if (ESCAPE_ERROR == esc) {
2388 sz = strlen(pp);
2389 res = mandoc_realloc(res, ssz + sz + 1);
2390 memcpy(res + ssz, pp, sz);
2391 break;
2392 }
2393 /*
2394 * We bail out on bad escapes.
2395 * No need to warn: we already did so when
2396 * roff_res() was called.
2397 */
2398 sz = (int)(p - pp);
2399 res = mandoc_realloc(res, ssz + sz + 1);
2400 memcpy(res + ssz, pp, sz);
2401 ssz += sz;
2402 }
2403
2404 res[(int)ssz] = '\0';
2405 return(res);
2406 }
2407
2408 int
2409 roff_getformat(const struct roff *r)
2410 {
2411
2412 return(r->format);
2413 }
2414
2415 /*
2416 * Find out whether a line is a macro line or not.
2417 * If it is, adjust the current position and return one; if it isn't,
2418 * return zero and don't change the current position.
2419 * If the control character has been set with `.cc', then let that grain
2420 * precedence.
2421 * This is slighly contrary to groff, where using the non-breaking
2422 * control character when `cc' has been invoked will cause the
2423 * non-breaking macro contents to be printed verbatim.
2424 */
2425 int
2426 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2427 {
2428 int pos;
2429
2430 pos = *ppos;
2431
2432 if (0 != r->control && cp[pos] == r->control)
2433 pos++;
2434 else if (0 != r->control)
2435 return(0);
2436 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2437 pos += 2;
2438 else if ('.' == cp[pos] || '\'' == cp[pos])
2439 pos++;
2440 else
2441 return(0);
2442
2443 while (' ' == cp[pos] || '\t' == cp[pos])
2444 pos++;
2445
2446 *ppos = pos;
2447 return(1);
2448 }