]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Protect the roff parser from dividing by zero. ok schwarze@
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.234 2014/10/20 19:04:45 kristaps Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
31 #include "libroff.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_pl,
64 ROFF_ps,
65 ROFF_rm,
66 ROFF_rr,
67 ROFF_so,
68 ROFF_ta,
69 ROFF_tr,
70 ROFF_Dd,
71 ROFF_TH,
72 ROFF_TS,
73 ROFF_TE,
74 ROFF_T_,
75 ROFF_EQ,
76 ROFF_EN,
77 ROFF_cblock,
78 ROFF_USERDEF,
79 ROFF_MAX
80 };
81
82 /*
83 * An incredibly-simple string buffer.
84 */
85 struct roffstr {
86 char *p; /* nil-terminated buffer */
87 size_t sz; /* saved strlen(p) */
88 };
89
90 /*
91 * A key-value roffstr pair as part of a singly-linked list.
92 */
93 struct roffkv {
94 struct roffstr key;
95 struct roffstr val;
96 struct roffkv *next; /* next in list */
97 };
98
99 /*
100 * A single number register as part of a singly-linked list.
101 */
102 struct roffreg {
103 struct roffstr key;
104 int val;
105 struct roffreg *next;
106 };
107
108 struct roff {
109 struct mparse *parse; /* parse point */
110 struct roffnode *last; /* leaf of stack */
111 int *rstack; /* stack of inverted `ie' values */
112 struct roffreg *regtab; /* number registers */
113 struct roffkv *strtab; /* user-defined strings & macros */
114 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
115 struct roffstr *xtab; /* single-byte trans table (`tr') */
116 const char *current_string; /* value of last called user macro */
117 struct tbl_node *first_tbl; /* first table parsed */
118 struct tbl_node *last_tbl; /* last table parsed */
119 struct tbl_node *tbl; /* current table being parsed */
120 struct eqn_node *last_eqn; /* last equation parsed */
121 struct eqn_node *first_eqn; /* first equation parsed */
122 struct eqn_node *eqn; /* current equation being parsed */
123 int eqn_inline; /* current equation is inline */
124 int options; /* parse options */
125 int rstacksz; /* current size limit of rstack */
126 int rstackpos; /* position in rstack */
127 int format; /* current file in mdoc or man format */
128 char control; /* control character */
129 };
130
131 struct roffnode {
132 enum rofft tok; /* type of node */
133 struct roffnode *parent; /* up one in stack */
134 int line; /* parse line */
135 int col; /* parse col */
136 char *name; /* node name, e.g. macro name */
137 char *end; /* end-rules: custom token */
138 int endspan; /* end-rules: next-line or infty */
139 int rule; /* current evaluation rule */
140 };
141
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
150
151 typedef enum rofferr (*roffproc)(ROFF_ARGS);
152
153 struct roffmac {
154 const char *name; /* macro name */
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac *next;
161 };
162
163 struct predef {
164 const char *name; /* predefined input name */
165 const char *str; /* replacement symbol */
166 };
167
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
170
171 static enum rofft roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff *);
174 static void roffnode_pop(struct roff *);
175 static void roffnode_push(struct roff *, enum rofft,
176 const char *, int, int);
177 static enum rofferr roff_block(ROFF_ARGS);
178 static enum rofferr roff_block_text(ROFF_ARGS);
179 static enum rofferr roff_block_sub(ROFF_ARGS);
180 static enum rofferr roff_cblock(ROFF_ARGS);
181 static enum rofferr roff_cc(ROFF_ARGS);
182 static void roff_ccond(struct roff *, int, int);
183 static enum rofferr roff_cond(ROFF_ARGS);
184 static enum rofferr roff_cond_text(ROFF_ARGS);
185 static enum rofferr roff_cond_sub(ROFF_ARGS);
186 static enum rofferr roff_ds(ROFF_ARGS);
187 static enum rofferr roff_eqndelim(struct roff *,
188 char **, size_t *, int);
189 static int roff_evalcond(struct roff *r, int,
190 const char *, int *);
191 static int roff_evalnum(struct roff *, int,
192 const char *, int *, int *, int);
193 static int roff_evalpar(struct roff *, int,
194 const char *, int *, int *);
195 static int roff_evalstrcond(const char *, int *);
196 static void roff_free1(struct roff *);
197 static void roff_freereg(struct roffreg *);
198 static void roff_freestr(struct roffkv *);
199 static size_t roff_getname(struct roff *, char **, int, int);
200 static int roff_getnum(const char *, int *, int *);
201 static int roff_getop(const char *, int *, char *);
202 static int roff_getregn(const struct roff *,
203 const char *, size_t);
204 static int roff_getregro(const char *name);
205 static const char *roff_getstrn(const struct roff *,
206 const char *, size_t);
207 static enum rofferr roff_it(ROFF_ARGS);
208 static enum rofferr roff_line_ignore(ROFF_ARGS);
209 static enum rofferr roff_nr(ROFF_ARGS);
210 static void roff_openeqn(struct roff *, const char *,
211 int, int, const char *);
212 static enum rofft roff_parse(struct roff *, char *, int *,
213 int, int);
214 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
215 static enum rofferr roff_res(struct roff *,
216 char **, size_t *, int, int);
217 static enum rofferr roff_rm(ROFF_ARGS);
218 static enum rofferr roff_rr(ROFF_ARGS);
219 static void roff_setstr(struct roff *,
220 const char *, const char *, int);
221 static void roff_setstrn(struct roffkv **, const char *,
222 size_t, const char *, size_t, int);
223 static enum rofferr roff_so(ROFF_ARGS);
224 static enum rofferr roff_tr(ROFF_ARGS);
225 static enum rofferr roff_Dd(ROFF_ARGS);
226 static enum rofferr roff_TH(ROFF_ARGS);
227 static enum rofferr roff_TE(ROFF_ARGS);
228 static enum rofferr roff_TS(ROFF_ARGS);
229 static enum rofferr roff_EQ(ROFF_ARGS);
230 static enum rofferr roff_EN(ROFF_ARGS);
231 static enum rofferr roff_T_(ROFF_ARGS);
232 static enum rofferr roff_userdef(ROFF_ARGS);
233
234 /* See roffhash_find() */
235
236 #define ASCII_HI 126
237 #define ASCII_LO 33
238 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
239
240 static struct roffmac *hash[HASHWIDTH];
241
242 static struct roffmac roffs[ROFF_MAX] = {
243 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
245 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
246 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
247 { "as", roff_ds, NULL, NULL, 0, NULL },
248 { "cc", roff_cc, NULL, NULL, 0, NULL },
249 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
250 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
252 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
253 { "ds", roff_ds, NULL, NULL, 0, NULL },
254 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
255 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
258 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
259 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
260 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
261 { "it", roff_it, NULL, NULL, 0, NULL },
262 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
263 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
264 { "nr", roff_nr, NULL, NULL, 0, NULL },
265 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
266 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
267 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
268 { "rm", roff_rm, NULL, NULL, 0, NULL },
269 { "rr", roff_rr, NULL, NULL, 0, NULL },
270 { "so", roff_so, NULL, NULL, 0, NULL },
271 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
272 { "tr", roff_tr, NULL, NULL, 0, NULL },
273 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
274 { "TH", roff_TH, NULL, NULL, 0, NULL },
275 { "TS", roff_TS, NULL, NULL, 0, NULL },
276 { "TE", roff_TE, NULL, NULL, 0, NULL },
277 { "T&", roff_T_, NULL, NULL, 0, NULL },
278 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
279 { "EN", roff_EN, NULL, NULL, 0, NULL },
280 { ".", roff_cblock, NULL, NULL, 0, NULL },
281 { NULL, roff_userdef, NULL, NULL, 0, NULL },
282 };
283
284 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
285 const char *const __mdoc_reserved[] = {
286 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
287 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
288 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
289 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
290 "Dt", "Dv", "Dx", "D1",
291 "Ec", "Ed", "Ef", "Ek", "El", "Em",
292 "En", "Eo", "Er", "Es", "Ev", "Ex",
293 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
294 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
295 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
296 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
297 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
298 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
299 "Sc", "Sh", "Sm", "So", "Sq",
300 "Ss", "St", "Sx", "Sy",
301 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
302 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
303 "%P", "%Q", "%R", "%T", "%U", "%V",
304 NULL
305 };
306
307 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
308 const char *const __man_reserved[] = {
309 "AT", "B", "BI", "BR", "DT",
310 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
311 "LP", "OP", "P", "PD", "PP",
312 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
313 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
314 NULL
315 };
316
317 /* Array of injected predefined strings. */
318 #define PREDEFS_MAX 38
319 static const struct predef predefs[PREDEFS_MAX] = {
320 #include "predefs.in"
321 };
322
323 /* See roffhash_find() */
324 #define ROFF_HASH(p) (p[0] - ASCII_LO)
325
326 static int roffit_lines; /* number of lines to delay */
327 static char *roffit_macro; /* nil-terminated macro line */
328
329
330 static void
331 roffhash_init(void)
332 {
333 struct roffmac *n;
334 int buc, i;
335
336 for (i = 0; i < (int)ROFF_USERDEF; i++) {
337 assert(roffs[i].name[0] >= ASCII_LO);
338 assert(roffs[i].name[0] <= ASCII_HI);
339
340 buc = ROFF_HASH(roffs[i].name);
341
342 if (NULL != (n = hash[buc])) {
343 for ( ; n->next; n = n->next)
344 /* Do nothing. */ ;
345 n->next = &roffs[i];
346 } else
347 hash[buc] = &roffs[i];
348 }
349 }
350
351 /*
352 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
353 * the nil-terminated string name could be found.
354 */
355 static enum rofft
356 roffhash_find(const char *p, size_t s)
357 {
358 int buc;
359 struct roffmac *n;
360
361 /*
362 * libroff has an extremely simple hashtable, for the time
363 * being, which simply keys on the first character, which must
364 * be printable, then walks a chain. It works well enough until
365 * optimised.
366 */
367
368 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
369 return(ROFF_MAX);
370
371 buc = ROFF_HASH(p);
372
373 if (NULL == (n = hash[buc]))
374 return(ROFF_MAX);
375 for ( ; n; n = n->next)
376 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
377 return((enum rofft)(n - roffs));
378
379 return(ROFF_MAX);
380 }
381
382 /*
383 * Pop the current node off of the stack of roff instructions currently
384 * pending.
385 */
386 static void
387 roffnode_pop(struct roff *r)
388 {
389 struct roffnode *p;
390
391 assert(r->last);
392 p = r->last;
393
394 r->last = r->last->parent;
395 free(p->name);
396 free(p->end);
397 free(p);
398 }
399
400 /*
401 * Push a roff node onto the instruction stack. This must later be
402 * removed with roffnode_pop().
403 */
404 static void
405 roffnode_push(struct roff *r, enum rofft tok, const char *name,
406 int line, int col)
407 {
408 struct roffnode *p;
409
410 p = mandoc_calloc(1, sizeof(struct roffnode));
411 p->tok = tok;
412 if (name)
413 p->name = mandoc_strdup(name);
414 p->parent = r->last;
415 p->line = line;
416 p->col = col;
417 p->rule = p->parent ? p->parent->rule : 0;
418
419 r->last = p;
420 }
421
422 static void
423 roff_free1(struct roff *r)
424 {
425 struct tbl_node *tbl;
426 struct eqn_node *e;
427 int i;
428
429 while (NULL != (tbl = r->first_tbl)) {
430 r->first_tbl = tbl->next;
431 tbl_free(tbl);
432 }
433 r->first_tbl = r->last_tbl = r->tbl = NULL;
434
435 while (NULL != (e = r->first_eqn)) {
436 r->first_eqn = e->next;
437 eqn_free(e);
438 }
439 r->first_eqn = r->last_eqn = r->eqn = NULL;
440
441 while (r->last)
442 roffnode_pop(r);
443
444 free (r->rstack);
445 r->rstack = NULL;
446 r->rstacksz = 0;
447 r->rstackpos = -1;
448
449 roff_freereg(r->regtab);
450 r->regtab = NULL;
451
452 roff_freestr(r->strtab);
453 roff_freestr(r->xmbtab);
454 r->strtab = r->xmbtab = NULL;
455
456 if (r->xtab)
457 for (i = 0; i < 128; i++)
458 free(r->xtab[i].p);
459 free(r->xtab);
460 r->xtab = NULL;
461 }
462
463 void
464 roff_reset(struct roff *r)
465 {
466
467 roff_free1(r);
468 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
469 r->control = 0;
470 }
471
472 void
473 roff_free(struct roff *r)
474 {
475
476 roff_free1(r);
477 free(r);
478 }
479
480 struct roff *
481 roff_alloc(struct mparse *parse, int options)
482 {
483 struct roff *r;
484
485 r = mandoc_calloc(1, sizeof(struct roff));
486 r->parse = parse;
487 r->options = options;
488 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
489 r->rstackpos = -1;
490
491 roffhash_init();
492
493 return(r);
494 }
495
496 /*
497 * In the current line, expand escape sequences that tend to get
498 * used in numerical expressions and conditional requests.
499 * Also check the syntax of the remaining escape sequences.
500 */
501 static enum rofferr
502 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
503 {
504 char ubuf[24]; /* buffer to print the number */
505 const char *start; /* start of the string to process */
506 char *stesc; /* start of an escape sequence ('\\') */
507 const char *stnam; /* start of the name, after "[(*" */
508 const char *cp; /* end of the name, e.g. before ']' */
509 const char *res; /* the string to be substituted */
510 char *nbuf; /* new buffer to copy bufp to */
511 size_t maxl; /* expected length of the escape name */
512 size_t naml; /* actual length of the escape name */
513 int expand_count; /* to avoid infinite loops */
514 int npos; /* position in numeric expression */
515 int arg_complete; /* argument not interrupted by eol */
516 char term; /* character terminating the escape */
517
518 expand_count = 0;
519 start = *bufp + pos;
520 stesc = strchr(start, '\0') - 1;
521 while (stesc-- > start) {
522
523 /* Search backwards for the next backslash. */
524
525 if ('\\' != *stesc)
526 continue;
527
528 /* If it is escaped, skip it. */
529
530 for (cp = stesc - 1; cp >= start; cp--)
531 if ('\\' != *cp)
532 break;
533
534 if (0 == (stesc - cp) % 2) {
535 stesc = (char *)cp;
536 continue;
537 }
538
539 /* Decide whether to expand or to check only. */
540
541 term = '\0';
542 cp = stesc + 1;
543 switch (*cp) {
544 case '*':
545 res = NULL;
546 break;
547 case 'B':
548 /* FALLTHROUGH */
549 case 'w':
550 term = cp[1];
551 /* FALLTHROUGH */
552 case 'n':
553 res = ubuf;
554 break;
555 default:
556 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
557 mandoc_vmsg(MANDOCERR_ESC_BAD,
558 r->parse, ln, (int)(stesc - *bufp),
559 "%.*s", (int)(cp - stesc), stesc);
560 continue;
561 }
562
563 if (EXPAND_LIMIT < ++expand_count) {
564 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
565 ln, (int)(stesc - *bufp), NULL);
566 return(ROFF_IGN);
567 }
568
569 /*
570 * The third character decides the length
571 * of the name of the string or register.
572 * Save a pointer to the name.
573 */
574
575 if ('\0' == term) {
576 switch (*++cp) {
577 case '\0':
578 maxl = 0;
579 break;
580 case '(':
581 cp++;
582 maxl = 2;
583 break;
584 case '[':
585 cp++;
586 term = ']';
587 maxl = 0;
588 break;
589 default:
590 maxl = 1;
591 break;
592 }
593 } else {
594 cp += 2;
595 maxl = 0;
596 }
597 stnam = cp;
598
599 /* Advance to the end of the name. */
600
601 arg_complete = 1;
602 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
603 if ('\0' == *cp) {
604 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
605 ln, (int)(stesc - *bufp), stesc);
606 arg_complete = 0;
607 break;
608 }
609 if (0 == maxl && *cp == term) {
610 cp++;
611 break;
612 }
613 }
614
615 /*
616 * Retrieve the replacement string; if it is
617 * undefined, resume searching for escapes.
618 */
619
620 switch (stesc[1]) {
621 case '*':
622 if (arg_complete)
623 res = roff_getstrn(r, stnam, naml);
624 break;
625 case 'B':
626 npos = 0;
627 ubuf[0] = arg_complete &&
628 roff_evalnum(r, ln, stnam, &npos, NULL, 0) &&
629 stnam + npos + 1 == cp ? '1' : '0';
630 ubuf[1] = '\0';
631 break;
632 case 'n':
633 if (arg_complete)
634 (void)snprintf(ubuf, sizeof(ubuf), "%d",
635 roff_getregn(r, stnam, naml));
636 else
637 ubuf[0] = '\0';
638 break;
639 case 'w':
640 /* use even incomplete args */
641 (void)snprintf(ubuf, sizeof(ubuf), "%d",
642 24 * (int)naml);
643 break;
644 }
645
646 if (NULL == res) {
647 mandoc_vmsg(MANDOCERR_STR_UNDEF,
648 r->parse, ln, (int)(stesc - *bufp),
649 "%.*s", (int)naml, stnam);
650 res = "";
651 }
652
653 /* Replace the escape sequence by the string. */
654
655 *stesc = '\0';
656 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
657 *bufp, res, cp) + 1;
658
659 /* Prepare for the next replacement. */
660
661 start = nbuf + pos;
662 stesc = nbuf + (stesc - *bufp) + strlen(res);
663 free(*bufp);
664 *bufp = nbuf;
665 }
666 return(ROFF_CONT);
667 }
668
669 /*
670 * Process text streams:
671 * Convert all breakable hyphens into ASCII_HYPH.
672 * Decrement and spring input line trap.
673 */
674 static enum rofferr
675 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
676 {
677 size_t sz;
678 const char *start;
679 char *p;
680 int isz;
681 enum mandoc_esc esc;
682
683 start = p = *bufp + pos;
684
685 while ('\0' != *p) {
686 sz = strcspn(p, "-\\");
687 p += sz;
688
689 if ('\0' == *p)
690 break;
691
692 if ('\\' == *p) {
693 /* Skip over escapes. */
694 p++;
695 esc = mandoc_escape((const char **)&p, NULL, NULL);
696 if (ESCAPE_ERROR == esc)
697 break;
698 continue;
699 } else if (p == start) {
700 p++;
701 continue;
702 }
703
704 if (isalpha((unsigned char)p[-1]) &&
705 isalpha((unsigned char)p[1]))
706 *p = ASCII_HYPH;
707 p++;
708 }
709
710 /* Spring the input line trap. */
711 if (1 == roffit_lines) {
712 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
713 free(*bufp);
714 *bufp = p;
715 *szp = isz + 1;
716 *offs = 0;
717 free(roffit_macro);
718 roffit_lines = 0;
719 return(ROFF_REPARSE);
720 } else if (1 < roffit_lines)
721 --roffit_lines;
722 return(ROFF_CONT);
723 }
724
725 enum rofferr
726 roff_parseln(struct roff *r, int ln, char **bufp,
727 size_t *szp, int pos, int *offs)
728 {
729 enum rofft t;
730 enum rofferr e;
731 int ppos, ctl;
732
733 /* Handle in-line equation delimiters. */
734
735 if (r->last_eqn != NULL && r->last_eqn->delim &&
736 (r->eqn == NULL || r->eqn_inline)) {
737 e = roff_eqndelim(r, bufp, szp, pos);
738 if (e == ROFF_REPARSE)
739 return(e);
740 assert(e == ROFF_CONT);
741 }
742
743 /* Expand some escape sequences. */
744
745 e = roff_res(r, bufp, szp, ln, pos);
746 if (ROFF_IGN == e)
747 return(e);
748 assert(ROFF_CONT == e);
749
750 ppos = pos;
751 ctl = roff_getcontrol(r, *bufp, &pos);
752
753 /*
754 * First, if a scope is open and we're not a macro, pass the
755 * text through the macro's filter. If a scope isn't open and
756 * we're not a macro, just let it through.
757 * Finally, if there's an equation scope open, divert it into it
758 * no matter our state.
759 */
760
761 if (r->last && ! ctl) {
762 t = r->last->tok;
763 assert(roffs[t].text);
764 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
765 assert(ROFF_IGN == e || ROFF_CONT == e);
766 if (ROFF_CONT != e)
767 return(e);
768 }
769 if (r->eqn)
770 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
771 if ( ! ctl) {
772 if (r->tbl)
773 return(tbl_read(r->tbl, ln, *bufp, pos));
774 return(roff_parsetext(bufp, szp, pos, offs));
775 }
776
777 /* Skip empty request lines. */
778
779 if ((*bufp)[pos] == '"') {
780 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
781 ln, pos, NULL);
782 return(ROFF_IGN);
783 } else if ((*bufp)[pos] == '\0')
784 return(ROFF_IGN);
785
786 /*
787 * If a scope is open, go to the child handler for that macro,
788 * as it may want to preprocess before doing anything with it.
789 * Don't do so if an equation is open.
790 */
791
792 if (r->last) {
793 t = r->last->tok;
794 assert(roffs[t].sub);
795 return((*roffs[t].sub)(r, t, bufp, szp,
796 ln, ppos, pos, offs));
797 }
798
799 /*
800 * Lastly, as we've no scope open, try to look up and execute
801 * the new macro. If no macro is found, simply return and let
802 * the compilers handle it.
803 */
804
805 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
806 return(ROFF_CONT);
807
808 assert(roffs[t].proc);
809 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
810 }
811
812 void
813 roff_endparse(struct roff *r)
814 {
815
816 if (r->last)
817 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
818 r->last->line, r->last->col,
819 roffs[r->last->tok].name);
820
821 if (r->eqn) {
822 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
823 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
824 eqn_end(&r->eqn);
825 }
826
827 if (r->tbl) {
828 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
829 r->tbl->line, r->tbl->pos, "TS");
830 tbl_end(&r->tbl);
831 }
832 }
833
834 /*
835 * Parse a roff node's type from the input buffer. This must be in the
836 * form of ".foo xxx" in the usual way.
837 */
838 static enum rofft
839 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
840 {
841 char *cp;
842 const char *mac;
843 size_t maclen;
844 enum rofft t;
845
846 cp = buf + *pos;
847
848 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
849 return(ROFF_MAX);
850
851 mac = cp;
852 maclen = roff_getname(r, &cp, ln, ppos);
853
854 t = (r->current_string = roff_getstrn(r, mac, maclen))
855 ? ROFF_USERDEF : roffhash_find(mac, maclen);
856
857 if (ROFF_MAX != t)
858 *pos = cp - buf;
859
860 return(t);
861 }
862
863 static enum rofferr
864 roff_cblock(ROFF_ARGS)
865 {
866
867 /*
868 * A block-close `..' should only be invoked as a child of an
869 * ignore macro, otherwise raise a warning and just ignore it.
870 */
871
872 if (NULL == r->last) {
873 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
874 ln, ppos, "..");
875 return(ROFF_IGN);
876 }
877
878 switch (r->last->tok) {
879 case ROFF_am:
880 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
881 /* FALLTHROUGH */
882 case ROFF_ami:
883 /* FALLTHROUGH */
884 case ROFF_de:
885 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
886 /* FALLTHROUGH */
887 case ROFF_dei:
888 /* FALLTHROUGH */
889 case ROFF_ig:
890 break;
891 default:
892 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
893 ln, ppos, "..");
894 return(ROFF_IGN);
895 }
896
897 if ((*bufp)[pos])
898 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
899 ".. %s", *bufp + pos);
900
901 roffnode_pop(r);
902 roffnode_cleanscope(r);
903 return(ROFF_IGN);
904
905 }
906
907 static void
908 roffnode_cleanscope(struct roff *r)
909 {
910
911 while (r->last) {
912 if (--r->last->endspan != 0)
913 break;
914 roffnode_pop(r);
915 }
916 }
917
918 static void
919 roff_ccond(struct roff *r, int ln, int ppos)
920 {
921
922 if (NULL == r->last) {
923 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
924 ln, ppos, "\\}");
925 return;
926 }
927
928 switch (r->last->tok) {
929 case ROFF_el:
930 /* FALLTHROUGH */
931 case ROFF_ie:
932 /* FALLTHROUGH */
933 case ROFF_if:
934 break;
935 default:
936 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
937 ln, ppos, "\\}");
938 return;
939 }
940
941 if (r->last->endspan > -1) {
942 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
943 ln, ppos, "\\}");
944 return;
945 }
946
947 roffnode_pop(r);
948 roffnode_cleanscope(r);
949 return;
950 }
951
952 static enum rofferr
953 roff_block(ROFF_ARGS)
954 {
955 const char *name;
956 char *iname, *cp;
957 size_t namesz;
958
959 /* Ignore groff compatibility mode for now. */
960
961 if (ROFF_de1 == tok)
962 tok = ROFF_de;
963 else if (ROFF_am1 == tok)
964 tok = ROFF_am;
965
966 /* Parse the macro name argument. */
967
968 cp = *bufp + pos;
969 if (ROFF_ig == tok) {
970 iname = NULL;
971 namesz = 0;
972 } else {
973 iname = cp;
974 namesz = roff_getname(r, &cp, ln, ppos);
975 iname[namesz] = '\0';
976 }
977
978 /* Resolve the macro name argument if it is indirect. */
979
980 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
981 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
982 mandoc_vmsg(MANDOCERR_STR_UNDEF,
983 r->parse, ln, (int)(iname - *bufp),
984 "%.*s", (int)namesz, iname);
985 namesz = 0;
986 } else
987 namesz = strlen(name);
988 } else
989 name = iname;
990
991 if (0 == namesz && ROFF_ig != tok) {
992 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
993 ln, ppos, roffs[tok].name);
994 return(ROFF_IGN);
995 }
996
997 roffnode_push(r, tok, name, ln, ppos);
998
999 /*
1000 * At the beginning of a `de' macro, clear the existing string
1001 * with the same name, if there is one. New content will be
1002 * appended from roff_block_text() in multiline mode.
1003 */
1004
1005 if (ROFF_de == tok || ROFF_dei == tok)
1006 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1007
1008 if ('\0' == *cp)
1009 return(ROFF_IGN);
1010
1011 /* Get the custom end marker. */
1012
1013 iname = cp;
1014 namesz = roff_getname(r, &cp, ln, ppos);
1015
1016 /* Resolve the end marker if it is indirect. */
1017
1018 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
1019 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
1020 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1021 r->parse, ln, (int)(iname - *bufp),
1022 "%.*s", (int)namesz, iname);
1023 namesz = 0;
1024 } else
1025 namesz = strlen(name);
1026 } else
1027 name = iname;
1028
1029 if (namesz)
1030 r->last->end = mandoc_strndup(name, namesz);
1031
1032 if ('\0' != *cp)
1033 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1034 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1035
1036 return(ROFF_IGN);
1037 }
1038
1039 static enum rofferr
1040 roff_block_sub(ROFF_ARGS)
1041 {
1042 enum rofft t;
1043 int i, j;
1044
1045 /*
1046 * First check whether a custom macro exists at this level. If
1047 * it does, then check against it. This is some of groff's
1048 * stranger behaviours. If we encountered a custom end-scope
1049 * tag and that tag also happens to be a "real" macro, then we
1050 * need to try interpreting it again as a real macro. If it's
1051 * not, then return ignore. Else continue.
1052 */
1053
1054 if (r->last->end) {
1055 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1056 if ((*bufp)[i] != r->last->end[j])
1057 break;
1058
1059 if ('\0' == r->last->end[j] &&
1060 ('\0' == (*bufp)[i] ||
1061 ' ' == (*bufp)[i] ||
1062 '\t' == (*bufp)[i])) {
1063 roffnode_pop(r);
1064 roffnode_cleanscope(r);
1065
1066 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1067 i++;
1068
1069 pos = i;
1070 if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
1071 return(ROFF_RERUN);
1072 return(ROFF_IGN);
1073 }
1074 }
1075
1076 /*
1077 * If we have no custom end-query or lookup failed, then try
1078 * pulling it out of the hashtable.
1079 */
1080
1081 t = roff_parse(r, *bufp, &pos, ln, ppos);
1082
1083 if (ROFF_cblock != t) {
1084 if (ROFF_ig != tok)
1085 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1086 return(ROFF_IGN);
1087 }
1088
1089 assert(roffs[t].proc);
1090 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1091 }
1092
1093 static enum rofferr
1094 roff_block_text(ROFF_ARGS)
1095 {
1096
1097 if (ROFF_ig != tok)
1098 roff_setstr(r, r->last->name, *bufp + pos, 2);
1099
1100 return(ROFF_IGN);
1101 }
1102
1103 static enum rofferr
1104 roff_cond_sub(ROFF_ARGS)
1105 {
1106 enum rofft t;
1107 char *ep;
1108 int rr;
1109
1110 rr = r->last->rule;
1111 roffnode_cleanscope(r);
1112 t = roff_parse(r, *bufp, &pos, ln, ppos);
1113
1114 /*
1115 * Fully handle known macros when they are structurally
1116 * required or when the conditional evaluated to true.
1117 */
1118
1119 if ((ROFF_MAX != t) &&
1120 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1121 assert(roffs[t].proc);
1122 return((*roffs[t].proc)(r, t, bufp, szp,
1123 ln, ppos, pos, offs));
1124 }
1125
1126 /*
1127 * If `\}' occurs on a macro line without a preceding macro,
1128 * drop the line completely.
1129 */
1130
1131 ep = *bufp + pos;
1132 if ('\\' == ep[0] && '}' == ep[1])
1133 rr = 0;
1134
1135 /* Always check for the closing delimiter `\}'. */
1136
1137 while (NULL != (ep = strchr(ep, '\\'))) {
1138 if ('}' == *(++ep)) {
1139 *ep = '&';
1140 roff_ccond(r, ln, ep - *bufp - 1);
1141 }
1142 ++ep;
1143 }
1144 return(rr ? ROFF_CONT : ROFF_IGN);
1145 }
1146
1147 static enum rofferr
1148 roff_cond_text(ROFF_ARGS)
1149 {
1150 char *ep;
1151 int rr;
1152
1153 rr = r->last->rule;
1154 roffnode_cleanscope(r);
1155
1156 ep = *bufp + pos;
1157 while (NULL != (ep = strchr(ep, '\\'))) {
1158 if ('}' == *(++ep)) {
1159 *ep = '&';
1160 roff_ccond(r, ln, ep - *bufp - 1);
1161 }
1162 ++ep;
1163 }
1164 return(rr ? ROFF_CONT : ROFF_IGN);
1165 }
1166
1167 /*
1168 * Parse a single signed integer number. Stop at the first non-digit.
1169 * If there is at least one digit, return success and advance the
1170 * parse point, else return failure and let the parse point unchanged.
1171 * Ignore overflows, treat them just like the C language.
1172 */
1173 static int
1174 roff_getnum(const char *v, int *pos, int *res)
1175 {
1176 int myres, n, p;
1177
1178 if (NULL == res)
1179 res = &myres;
1180
1181 p = *pos;
1182 n = v[p] == '-';
1183 if (n)
1184 p++;
1185
1186 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1187 *res = 10 * *res + v[p] - '0';
1188 if (p == *pos + n)
1189 return 0;
1190
1191 if (n)
1192 *res = -*res;
1193
1194 *pos = p;
1195 return 1;
1196 }
1197
1198 /*
1199 * Evaluate a string comparison condition.
1200 * The first character is the delimiter.
1201 * Succeed if the string up to its second occurrence
1202 * matches the string up to its third occurence.
1203 * Advance the cursor after the third occurrence
1204 * or lacking that, to the end of the line.
1205 */
1206 static int
1207 roff_evalstrcond(const char *v, int *pos)
1208 {
1209 const char *s1, *s2, *s3;
1210 int match;
1211
1212 match = 0;
1213 s1 = v + *pos; /* initial delimiter */
1214 s2 = s1 + 1; /* for scanning the first string */
1215 s3 = strchr(s2, *s1); /* for scanning the second string */
1216
1217 if (NULL == s3) /* found no middle delimiter */
1218 goto out;
1219
1220 while ('\0' != *++s3) {
1221 if (*s2 != *s3) { /* mismatch */
1222 s3 = strchr(s3, *s1);
1223 break;
1224 }
1225 if (*s3 == *s1) { /* found the final delimiter */
1226 match = 1;
1227 break;
1228 }
1229 s2++;
1230 }
1231
1232 out:
1233 if (NULL == s3)
1234 s3 = strchr(s2, '\0');
1235 else
1236 s3++;
1237 *pos = s3 - v;
1238 return(match);
1239 }
1240
1241 /*
1242 * Evaluate an optionally negated single character, numerical,
1243 * or string condition.
1244 */
1245 static int
1246 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1247 {
1248 int wanttrue, number;
1249
1250 if ('!' == v[*pos]) {
1251 wanttrue = 0;
1252 (*pos)++;
1253 } else
1254 wanttrue = 1;
1255
1256 switch (v[*pos]) {
1257 case 'n':
1258 /* FALLTHROUGH */
1259 case 'o':
1260 (*pos)++;
1261 return(wanttrue);
1262 case 'c':
1263 /* FALLTHROUGH */
1264 case 'd':
1265 /* FALLTHROUGH */
1266 case 'e':
1267 /* FALLTHROUGH */
1268 case 'r':
1269 /* FALLTHROUGH */
1270 case 't':
1271 (*pos)++;
1272 return(!wanttrue);
1273 default:
1274 break;
1275 }
1276
1277 if (roff_evalnum(r, ln, v, pos, &number, 0))
1278 return((number > 0) == wanttrue);
1279 else
1280 return(roff_evalstrcond(v, pos) == wanttrue);
1281 }
1282
1283 static enum rofferr
1284 roff_line_ignore(ROFF_ARGS)
1285 {
1286
1287 return(ROFF_IGN);
1288 }
1289
1290 static enum rofferr
1291 roff_cond(ROFF_ARGS)
1292 {
1293
1294 roffnode_push(r, tok, NULL, ln, ppos);
1295
1296 /*
1297 * An `.el' has no conditional body: it will consume the value
1298 * of the current rstack entry set in prior `ie' calls or
1299 * defaults to DENY.
1300 *
1301 * If we're not an `el', however, then evaluate the conditional.
1302 */
1303
1304 r->last->rule = ROFF_el == tok ?
1305 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1306 roff_evalcond(r, ln, *bufp, &pos);
1307
1308 /*
1309 * An if-else will put the NEGATION of the current evaluated
1310 * conditional into the stack of rules.
1311 */
1312
1313 if (ROFF_ie == tok) {
1314 if (r->rstackpos + 1 == r->rstacksz) {
1315 r->rstacksz += 16;
1316 r->rstack = mandoc_reallocarray(r->rstack,
1317 r->rstacksz, sizeof(int));
1318 }
1319 r->rstack[++r->rstackpos] = !r->last->rule;
1320 }
1321
1322 /* If the parent has false as its rule, then so do we. */
1323
1324 if (r->last->parent && !r->last->parent->rule)
1325 r->last->rule = 0;
1326
1327 /*
1328 * Determine scope.
1329 * If there is nothing on the line after the conditional,
1330 * not even whitespace, use next-line scope.
1331 */
1332
1333 if ('\0' == (*bufp)[pos]) {
1334 r->last->endspan = 2;
1335 goto out;
1336 }
1337
1338 while (' ' == (*bufp)[pos])
1339 pos++;
1340
1341 /* An opening brace requests multiline scope. */
1342
1343 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1344 r->last->endspan = -1;
1345 pos += 2;
1346 goto out;
1347 }
1348
1349 /*
1350 * Anything else following the conditional causes
1351 * single-line scope. Warn if the scope contains
1352 * nothing but trailing whitespace.
1353 */
1354
1355 if ('\0' == (*bufp)[pos])
1356 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1357 ln, ppos, roffs[tok].name);
1358
1359 r->last->endspan = 1;
1360
1361 out:
1362 *offs = pos;
1363 return(ROFF_RERUN);
1364 }
1365
1366 static enum rofferr
1367 roff_ds(ROFF_ARGS)
1368 {
1369 char *string;
1370 const char *name;
1371 size_t namesz;
1372
1373 /*
1374 * The first word is the name of the string.
1375 * If it is empty or terminated by an escape sequence,
1376 * abort the `ds' request without defining anything.
1377 */
1378
1379 name = string = *bufp + pos;
1380 if ('\0' == *name)
1381 return(ROFF_IGN);
1382
1383 namesz = roff_getname(r, &string, ln, pos);
1384 if ('\\' == name[namesz])
1385 return(ROFF_IGN);
1386
1387 /* Read past the initial double-quote, if any. */
1388 if ('"' == *string)
1389 string++;
1390
1391 /* The rest is the value. */
1392 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1393 ROFF_as == tok);
1394 return(ROFF_IGN);
1395 }
1396
1397 /*
1398 * Parse a single operator, one or two characters long.
1399 * If the operator is recognized, return success and advance the
1400 * parse point, else return failure and let the parse point unchanged.
1401 */
1402 static int
1403 roff_getop(const char *v, int *pos, char *res)
1404 {
1405
1406 *res = v[*pos];
1407
1408 switch (*res) {
1409 case '+':
1410 /* FALLTHROUGH */
1411 case '-':
1412 /* FALLTHROUGH */
1413 case '*':
1414 /* FALLTHROUGH */
1415 case '/':
1416 /* FALLTHROUGH */
1417 case '%':
1418 /* FALLTHROUGH */
1419 case '&':
1420 /* FALLTHROUGH */
1421 case ':':
1422 break;
1423 case '<':
1424 switch (v[*pos + 1]) {
1425 case '=':
1426 *res = 'l';
1427 (*pos)++;
1428 break;
1429 case '>':
1430 *res = '!';
1431 (*pos)++;
1432 break;
1433 case '?':
1434 *res = 'i';
1435 (*pos)++;
1436 break;
1437 default:
1438 break;
1439 }
1440 break;
1441 case '>':
1442 switch (v[*pos + 1]) {
1443 case '=':
1444 *res = 'g';
1445 (*pos)++;
1446 break;
1447 case '?':
1448 *res = 'a';
1449 (*pos)++;
1450 break;
1451 default:
1452 break;
1453 }
1454 break;
1455 case '=':
1456 if ('=' == v[*pos + 1])
1457 (*pos)++;
1458 break;
1459 default:
1460 return(0);
1461 }
1462 (*pos)++;
1463
1464 return(*res);
1465 }
1466
1467 /*
1468 * Evaluate either a parenthesized numeric expression
1469 * or a single signed integer number.
1470 */
1471 static int
1472 roff_evalpar(struct roff *r, int ln,
1473 const char *v, int *pos, int *res)
1474 {
1475
1476 if ('(' != v[*pos])
1477 return(roff_getnum(v, pos, res));
1478
1479 (*pos)++;
1480 if ( ! roff_evalnum(r, ln, v, pos, res, 1))
1481 return(0);
1482
1483 /*
1484 * Omission of the closing parenthesis
1485 * is an error in validation mode,
1486 * but ignored in evaluation mode.
1487 */
1488
1489 if (')' == v[*pos])
1490 (*pos)++;
1491 else if (NULL == res)
1492 return(0);
1493
1494 return(1);
1495 }
1496
1497 /*
1498 * Evaluate a complete numeric expression.
1499 * Proceed left to right, there is no concept of precedence.
1500 */
1501 static int
1502 roff_evalnum(struct roff *r, int ln, const char *v,
1503 int *pos, int *res, int skipwhite)
1504 {
1505 int mypos, operand2;
1506 char operator;
1507
1508 if (NULL == pos) {
1509 mypos = 0;
1510 pos = &mypos;
1511 }
1512
1513 if (skipwhite)
1514 while (isspace((unsigned char)v[*pos]))
1515 (*pos)++;
1516
1517 if ( ! roff_evalpar(r, ln, v, pos, res))
1518 return(0);
1519
1520 while (1) {
1521 if (skipwhite)
1522 while (isspace((unsigned char)v[*pos]))
1523 (*pos)++;
1524
1525 if ( ! roff_getop(v, pos, &operator))
1526 break;
1527
1528 if (skipwhite)
1529 while (isspace((unsigned char)v[*pos]))
1530 (*pos)++;
1531
1532 if ( ! roff_evalpar(r, ln, v, pos, &operand2))
1533 return(0);
1534
1535 if (skipwhite)
1536 while (isspace((unsigned char)v[*pos]))
1537 (*pos)++;
1538
1539 if (NULL == res)
1540 continue;
1541
1542 switch (operator) {
1543 case '+':
1544 *res += operand2;
1545 break;
1546 case '-':
1547 *res -= operand2;
1548 break;
1549 case '*':
1550 *res *= operand2;
1551 break;
1552 case '/':
1553 if (0 == operand2) {
1554 mandoc_msg(MANDOCERR_DIVZERO,
1555 r->parse, ln, *pos, v);
1556 *res = 0;
1557 break;
1558 }
1559 *res /= operand2;
1560 break;
1561 case '%':
1562 *res %= operand2;
1563 break;
1564 case '<':
1565 *res = *res < operand2;
1566 break;
1567 case '>':
1568 *res = *res > operand2;
1569 break;
1570 case 'l':
1571 *res = *res <= operand2;
1572 break;
1573 case 'g':
1574 *res = *res >= operand2;
1575 break;
1576 case '=':
1577 *res = *res == operand2;
1578 break;
1579 case '!':
1580 *res = *res != operand2;
1581 break;
1582 case '&':
1583 *res = *res && operand2;
1584 break;
1585 case ':':
1586 *res = *res || operand2;
1587 break;
1588 case 'i':
1589 if (operand2 < *res)
1590 *res = operand2;
1591 break;
1592 case 'a':
1593 if (operand2 > *res)
1594 *res = operand2;
1595 break;
1596 default:
1597 abort();
1598 }
1599 }
1600 return(1);
1601 }
1602
1603 void
1604 roff_setreg(struct roff *r, const char *name, int val, char sign)
1605 {
1606 struct roffreg *reg;
1607
1608 /* Search for an existing register with the same name. */
1609 reg = r->regtab;
1610
1611 while (reg && strcmp(name, reg->key.p))
1612 reg = reg->next;
1613
1614 if (NULL == reg) {
1615 /* Create a new register. */
1616 reg = mandoc_malloc(sizeof(struct roffreg));
1617 reg->key.p = mandoc_strdup(name);
1618 reg->key.sz = strlen(name);
1619 reg->val = 0;
1620 reg->next = r->regtab;
1621 r->regtab = reg;
1622 }
1623
1624 if ('+' == sign)
1625 reg->val += val;
1626 else if ('-' == sign)
1627 reg->val -= val;
1628 else
1629 reg->val = val;
1630 }
1631
1632 /*
1633 * Handle some predefined read-only number registers.
1634 * For now, return -1 if the requested register is not predefined;
1635 * in case a predefined read-only register having the value -1
1636 * were to turn up, another special value would have to be chosen.
1637 */
1638 static int
1639 roff_getregro(const char *name)
1640 {
1641
1642 switch (*name) {
1643 case 'A': /* ASCII approximation mode is always off. */
1644 return(0);
1645 case 'g': /* Groff compatibility mode is always on. */
1646 return(1);
1647 case 'H': /* Fixed horizontal resolution. */
1648 return (24);
1649 case 'j': /* Always adjust left margin only. */
1650 return(0);
1651 case 'T': /* Some output device is always defined. */
1652 return(1);
1653 case 'V': /* Fixed vertical resolution. */
1654 return (40);
1655 default:
1656 return (-1);
1657 }
1658 }
1659
1660 int
1661 roff_getreg(const struct roff *r, const char *name)
1662 {
1663 struct roffreg *reg;
1664 int val;
1665
1666 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1667 val = roff_getregro(name + 1);
1668 if (-1 != val)
1669 return (val);
1670 }
1671
1672 for (reg = r->regtab; reg; reg = reg->next)
1673 if (0 == strcmp(name, reg->key.p))
1674 return(reg->val);
1675
1676 return(0);
1677 }
1678
1679 static int
1680 roff_getregn(const struct roff *r, const char *name, size_t len)
1681 {
1682 struct roffreg *reg;
1683 int val;
1684
1685 if ('.' == name[0] && 2 == len) {
1686 val = roff_getregro(name + 1);
1687 if (-1 != val)
1688 return (val);
1689 }
1690
1691 for (reg = r->regtab; reg; reg = reg->next)
1692 if (len == reg->key.sz &&
1693 0 == strncmp(name, reg->key.p, len))
1694 return(reg->val);
1695
1696 return(0);
1697 }
1698
1699 static void
1700 roff_freereg(struct roffreg *reg)
1701 {
1702 struct roffreg *old_reg;
1703
1704 while (NULL != reg) {
1705 free(reg->key.p);
1706 old_reg = reg;
1707 reg = reg->next;
1708 free(old_reg);
1709 }
1710 }
1711
1712 static enum rofferr
1713 roff_nr(ROFF_ARGS)
1714 {
1715 char *key, *val;
1716 size_t keysz;
1717 int iv;
1718 char sign;
1719
1720 key = val = *bufp + pos;
1721 if ('\0' == *key)
1722 return(ROFF_IGN);
1723
1724 keysz = roff_getname(r, &val, ln, pos);
1725 if ('\\' == key[keysz])
1726 return(ROFF_IGN);
1727 key[keysz] = '\0';
1728
1729 sign = *val;
1730 if ('+' == sign || '-' == sign)
1731 val++;
1732
1733 if (roff_evalnum(r, ln, val, NULL, &iv, 0))
1734 roff_setreg(r, key, iv, sign);
1735
1736 return(ROFF_IGN);
1737 }
1738
1739 static enum rofferr
1740 roff_rr(ROFF_ARGS)
1741 {
1742 struct roffreg *reg, **prev;
1743 char *name, *cp;
1744 size_t namesz;
1745
1746 name = cp = *bufp + pos;
1747 if ('\0' == *name)
1748 return(ROFF_IGN);
1749 namesz = roff_getname(r, &cp, ln, pos);
1750 name[namesz] = '\0';
1751
1752 prev = &r->regtab;
1753 while (1) {
1754 reg = *prev;
1755 if (NULL == reg || !strcmp(name, reg->key.p))
1756 break;
1757 prev = &reg->next;
1758 }
1759 if (NULL != reg) {
1760 *prev = reg->next;
1761 free(reg->key.p);
1762 free(reg);
1763 }
1764 return(ROFF_IGN);
1765 }
1766
1767 static enum rofferr
1768 roff_rm(ROFF_ARGS)
1769 {
1770 const char *name;
1771 char *cp;
1772 size_t namesz;
1773
1774 cp = *bufp + pos;
1775 while ('\0' != *cp) {
1776 name = cp;
1777 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1778 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1779 if ('\\' == name[namesz])
1780 break;
1781 }
1782 return(ROFF_IGN);
1783 }
1784
1785 static enum rofferr
1786 roff_it(ROFF_ARGS)
1787 {
1788 char *cp;
1789 size_t len;
1790 int iv;
1791
1792 /* Parse the number of lines. */
1793 cp = *bufp + pos;
1794 len = strcspn(cp, " \t");
1795 cp[len] = '\0';
1796 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1797 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1798 ln, ppos, *bufp + 1);
1799 return(ROFF_IGN);
1800 }
1801 cp += len + 1;
1802
1803 /* Arm the input line trap. */
1804 roffit_lines = iv;
1805 roffit_macro = mandoc_strdup(cp);
1806 return(ROFF_IGN);
1807 }
1808
1809 static enum rofferr
1810 roff_Dd(ROFF_ARGS)
1811 {
1812 const char *const *cp;
1813
1814 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
1815 for (cp = __mdoc_reserved; *cp; cp++)
1816 roff_setstr(r, *cp, NULL, 0);
1817
1818 if (r->format == 0)
1819 r->format = MPARSE_MDOC;
1820
1821 return(ROFF_CONT);
1822 }
1823
1824 static enum rofferr
1825 roff_TH(ROFF_ARGS)
1826 {
1827 const char *const *cp;
1828
1829 if ((r->options & MPARSE_QUICK) == 0)
1830 for (cp = __man_reserved; *cp; cp++)
1831 roff_setstr(r, *cp, NULL, 0);
1832
1833 if (r->format == 0)
1834 r->format = MPARSE_MAN;
1835
1836 return(ROFF_CONT);
1837 }
1838
1839 static enum rofferr
1840 roff_TE(ROFF_ARGS)
1841 {
1842
1843 if (NULL == r->tbl)
1844 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1845 ln, ppos, "TE");
1846 else
1847 tbl_end(&r->tbl);
1848
1849 return(ROFF_IGN);
1850 }
1851
1852 static enum rofferr
1853 roff_T_(ROFF_ARGS)
1854 {
1855
1856 if (NULL == r->tbl)
1857 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1858 ln, ppos, "T&");
1859 else
1860 tbl_restart(ppos, ln, r->tbl);
1861
1862 return(ROFF_IGN);
1863 }
1864
1865 /*
1866 * Handle in-line equation delimiters.
1867 */
1868 static enum rofferr
1869 roff_eqndelim(struct roff *r, char **bufp, size_t *szp, int pos)
1870 {
1871 char *cp1, *cp2;
1872 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
1873
1874 /*
1875 * Outside equations, look for an opening delimiter.
1876 * If we are inside an equation, we already know it is
1877 * in-line, or this function wouldn't have been called;
1878 * so look for a closing delimiter.
1879 */
1880
1881 cp1 = *bufp + pos;
1882 cp2 = strchr(cp1, r->eqn == NULL ?
1883 r->last_eqn->odelim : r->last_eqn->cdelim);
1884 if (cp2 == NULL)
1885 return(ROFF_CONT);
1886
1887 *cp2++ = '\0';
1888 bef_pr = bef_nl = aft_nl = aft_pr = "";
1889
1890 /* Handle preceding text, protecting whitespace. */
1891
1892 if (**bufp != '\0') {
1893 if (r->eqn == NULL)
1894 bef_pr = "\\&";
1895 bef_nl = "\n";
1896 }
1897
1898 /*
1899 * Prepare replacing the delimiter with an equation macro
1900 * and drop leading white space from the equation.
1901 */
1902
1903 if (r->eqn == NULL) {
1904 while (*cp2 == ' ')
1905 cp2++;
1906 mac = ".EQ";
1907 } else
1908 mac = ".EN";
1909
1910 /* Handle following text, protecting whitespace. */
1911
1912 if (*cp2 != '\0') {
1913 aft_nl = "\n";
1914 if (r->eqn != NULL)
1915 aft_pr = "\\&";
1916 }
1917
1918 /* Do the actual replacement. */
1919
1920 *szp = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", *bufp,
1921 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
1922 free(*bufp);
1923 *bufp = cp1;
1924
1925 /* Toggle the in-line state of the eqn subsystem. */
1926
1927 r->eqn_inline = r->eqn == NULL;
1928 return(ROFF_REPARSE);
1929 }
1930
1931 static void
1932 roff_openeqn(struct roff *r, const char *name, int line,
1933 int offs, const char *buf)
1934 {
1935 struct eqn_node *e;
1936 int poff;
1937
1938 assert(NULL == r->eqn);
1939 e = eqn_alloc(name, offs, line, r->parse);
1940
1941 if (r->last_eqn) {
1942 r->last_eqn->next = e;
1943 e->delim = r->last_eqn->delim;
1944 e->odelim = r->last_eqn->odelim;
1945 e->cdelim = r->last_eqn->cdelim;
1946 } else
1947 r->first_eqn = r->last_eqn = e;
1948
1949 r->eqn = r->last_eqn = e;
1950
1951 if (buf) {
1952 poff = 0;
1953 eqn_read(&r->eqn, line, buf, offs, &poff);
1954 }
1955 }
1956
1957 static enum rofferr
1958 roff_EQ(ROFF_ARGS)
1959 {
1960
1961 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1962 return(ROFF_IGN);
1963 }
1964
1965 static enum rofferr
1966 roff_EN(ROFF_ARGS)
1967 {
1968
1969 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1970 return(ROFF_IGN);
1971 }
1972
1973 static enum rofferr
1974 roff_TS(ROFF_ARGS)
1975 {
1976 struct tbl_node *tbl;
1977
1978 if (r->tbl) {
1979 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
1980 ln, ppos, "TS breaks TS");
1981 tbl_end(&r->tbl);
1982 }
1983
1984 tbl = tbl_alloc(ppos, ln, r->parse);
1985
1986 if (r->last_tbl)
1987 r->last_tbl->next = tbl;
1988 else
1989 r->first_tbl = r->last_tbl = tbl;
1990
1991 r->tbl = r->last_tbl = tbl;
1992 return(ROFF_IGN);
1993 }
1994
1995 static enum rofferr
1996 roff_cc(ROFF_ARGS)
1997 {
1998 const char *p;
1999
2000 p = *bufp + pos;
2001
2002 if ('\0' == *p || '.' == (r->control = *p++))
2003 r->control = 0;
2004
2005 if ('\0' != *p)
2006 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2007
2008 return(ROFF_IGN);
2009 }
2010
2011 static enum rofferr
2012 roff_tr(ROFF_ARGS)
2013 {
2014 const char *p, *first, *second;
2015 size_t fsz, ssz;
2016 enum mandoc_esc esc;
2017
2018 p = *bufp + pos;
2019
2020 if ('\0' == *p) {
2021 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2022 return(ROFF_IGN);
2023 }
2024
2025 while ('\0' != *p) {
2026 fsz = ssz = 1;
2027
2028 first = p++;
2029 if ('\\' == *first) {
2030 esc = mandoc_escape(&p, NULL, NULL);
2031 if (ESCAPE_ERROR == esc) {
2032 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2033 ln, (int)(p - *bufp), first);
2034 return(ROFF_IGN);
2035 }
2036 fsz = (size_t)(p - first);
2037 }
2038
2039 second = p++;
2040 if ('\\' == *second) {
2041 esc = mandoc_escape(&p, NULL, NULL);
2042 if (ESCAPE_ERROR == esc) {
2043 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2044 ln, (int)(p - *bufp), second);
2045 return(ROFF_IGN);
2046 }
2047 ssz = (size_t)(p - second);
2048 } else if ('\0' == *second) {
2049 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
2050 ln, (int)(p - *bufp), NULL);
2051 second = " ";
2052 p--;
2053 }
2054
2055 if (fsz > 1) {
2056 roff_setstrn(&r->xmbtab, first, fsz,
2057 second, ssz, 0);
2058 continue;
2059 }
2060
2061 if (NULL == r->xtab)
2062 r->xtab = mandoc_calloc(128,
2063 sizeof(struct roffstr));
2064
2065 free(r->xtab[(int)*first].p);
2066 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2067 r->xtab[(int)*first].sz = ssz;
2068 }
2069
2070 return(ROFF_IGN);
2071 }
2072
2073 static enum rofferr
2074 roff_so(ROFF_ARGS)
2075 {
2076 char *name;
2077
2078 name = *bufp + pos;
2079 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2080
2081 /*
2082 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2083 * opening anything that's not in our cwd or anything beneath
2084 * it. Thus, explicitly disallow traversing up the file-system
2085 * or using absolute paths.
2086 */
2087
2088 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
2089 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2090 ".so %s", name);
2091 return(ROFF_ERR);
2092 }
2093
2094 *offs = pos;
2095 return(ROFF_SO);
2096 }
2097
2098 static enum rofferr
2099 roff_userdef(ROFF_ARGS)
2100 {
2101 const char *arg[9];
2102 char *cp, *n1, *n2;
2103 int i;
2104
2105 /*
2106 * Collect pointers to macro argument strings
2107 * and NUL-terminate them.
2108 */
2109 cp = *bufp + pos;
2110 for (i = 0; i < 9; i++)
2111 arg[i] = '\0' == *cp ? "" :
2112 mandoc_getarg(r->parse, &cp, ln, &pos);
2113
2114 /*
2115 * Expand macro arguments.
2116 */
2117 *szp = 0;
2118 n1 = cp = mandoc_strdup(r->current_string);
2119 while (NULL != (cp = strstr(cp, "\\$"))) {
2120 i = cp[2] - '1';
2121 if (0 > i || 8 < i) {
2122 /* Not an argument invocation. */
2123 cp += 2;
2124 continue;
2125 }
2126 *cp = '\0';
2127 *szp = mandoc_asprintf(&n2, "%s%s%s",
2128 n1, arg[i], cp + 3) + 1;
2129 cp = n2 + (cp - n1);
2130 free(n1);
2131 n1 = n2;
2132 }
2133
2134 /*
2135 * Replace the macro invocation
2136 * by the expanded macro.
2137 */
2138 free(*bufp);
2139 *bufp = n1;
2140 if (0 == *szp)
2141 *szp = strlen(*bufp) + 1;
2142
2143 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2144 ROFF_REPARSE : ROFF_APPEND);
2145 }
2146
2147 static size_t
2148 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2149 {
2150 char *name, *cp;
2151 size_t namesz;
2152
2153 name = *cpp;
2154 if ('\0' == *name)
2155 return(0);
2156
2157 /* Read until end of name and terminate it with NUL. */
2158 for (cp = name; 1; cp++) {
2159 if ('\0' == *cp || ' ' == *cp) {
2160 namesz = cp - name;
2161 break;
2162 }
2163 if ('\\' != *cp)
2164 continue;
2165 namesz = cp - name;
2166 if ('{' == cp[1] || '}' == cp[1])
2167 break;
2168 cp++;
2169 if ('\\' == *cp)
2170 continue;
2171 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2172 "%.*s", (int)(cp - name + 1), name);
2173 mandoc_escape((const char **)&cp, NULL, NULL);
2174 break;
2175 }
2176
2177 /* Read past spaces. */
2178 while (' ' == *cp)
2179 cp++;
2180
2181 *cpp = cp;
2182 return(namesz);
2183 }
2184
2185 /*
2186 * Store *string into the user-defined string called *name.
2187 * To clear an existing entry, call with (*r, *name, NULL, 0).
2188 * append == 0: replace mode
2189 * append == 1: single-line append mode
2190 * append == 2: multiline append mode, append '\n' after each call
2191 */
2192 static void
2193 roff_setstr(struct roff *r, const char *name, const char *string,
2194 int append)
2195 {
2196
2197 roff_setstrn(&r->strtab, name, strlen(name), string,
2198 string ? strlen(string) : 0, append);
2199 }
2200
2201 static void
2202 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2203 const char *string, size_t stringsz, int append)
2204 {
2205 struct roffkv *n;
2206 char *c;
2207 int i;
2208 size_t oldch, newch;
2209
2210 /* Search for an existing string with the same name. */
2211 n = *r;
2212
2213 while (n && (namesz != n->key.sz ||
2214 strncmp(n->key.p, name, namesz)))
2215 n = n->next;
2216
2217 if (NULL == n) {
2218 /* Create a new string table entry. */
2219 n = mandoc_malloc(sizeof(struct roffkv));
2220 n->key.p = mandoc_strndup(name, namesz);
2221 n->key.sz = namesz;
2222 n->val.p = NULL;
2223 n->val.sz = 0;
2224 n->next = *r;
2225 *r = n;
2226 } else if (0 == append) {
2227 free(n->val.p);
2228 n->val.p = NULL;
2229 n->val.sz = 0;
2230 }
2231
2232 if (NULL == string)
2233 return;
2234
2235 /*
2236 * One additional byte for the '\n' in multiline mode,
2237 * and one for the terminating '\0'.
2238 */
2239 newch = stringsz + (1 < append ? 2u : 1u);
2240
2241 if (NULL == n->val.p) {
2242 n->val.p = mandoc_malloc(newch);
2243 *n->val.p = '\0';
2244 oldch = 0;
2245 } else {
2246 oldch = n->val.sz;
2247 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2248 }
2249
2250 /* Skip existing content in the destination buffer. */
2251 c = n->val.p + (int)oldch;
2252
2253 /* Append new content to the destination buffer. */
2254 i = 0;
2255 while (i < (int)stringsz) {
2256 /*
2257 * Rudimentary roff copy mode:
2258 * Handle escaped backslashes.
2259 */
2260 if ('\\' == string[i] && '\\' == string[i + 1])
2261 i++;
2262 *c++ = string[i++];
2263 }
2264
2265 /* Append terminating bytes. */
2266 if (1 < append)
2267 *c++ = '\n';
2268
2269 *c = '\0';
2270 n->val.sz = (int)(c - n->val.p);
2271 }
2272
2273 static const char *
2274 roff_getstrn(const struct roff *r, const char *name, size_t len)
2275 {
2276 const struct roffkv *n;
2277 int i;
2278
2279 for (n = r->strtab; n; n = n->next)
2280 if (0 == strncmp(name, n->key.p, len) &&
2281 '\0' == n->key.p[(int)len])
2282 return(n->val.p);
2283
2284 for (i = 0; i < PREDEFS_MAX; i++)
2285 if (0 == strncmp(name, predefs[i].name, len) &&
2286 '\0' == predefs[i].name[(int)len])
2287 return(predefs[i].str);
2288
2289 return(NULL);
2290 }
2291
2292 static void
2293 roff_freestr(struct roffkv *r)
2294 {
2295 struct roffkv *n, *nn;
2296
2297 for (n = r; n; n = nn) {
2298 free(n->key.p);
2299 free(n->val.p);
2300 nn = n->next;
2301 free(n);
2302 }
2303 }
2304
2305 const struct tbl_span *
2306 roff_span(const struct roff *r)
2307 {
2308
2309 return(r->tbl ? tbl_span(r->tbl) : NULL);
2310 }
2311
2312 const struct eqn *
2313 roff_eqn(const struct roff *r)
2314 {
2315
2316 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2317 }
2318
2319 /*
2320 * Duplicate an input string, making the appropriate character
2321 * conversations (as stipulated by `tr') along the way.
2322 * Returns a heap-allocated string with all the replacements made.
2323 */
2324 char *
2325 roff_strdup(const struct roff *r, const char *p)
2326 {
2327 const struct roffkv *cp;
2328 char *res;
2329 const char *pp;
2330 size_t ssz, sz;
2331 enum mandoc_esc esc;
2332
2333 if (NULL == r->xmbtab && NULL == r->xtab)
2334 return(mandoc_strdup(p));
2335 else if ('\0' == *p)
2336 return(mandoc_strdup(""));
2337
2338 /*
2339 * Step through each character looking for term matches
2340 * (remember that a `tr' can be invoked with an escape, which is
2341 * a glyph but the escape is multi-character).
2342 * We only do this if the character hash has been initialised
2343 * and the string is >0 length.
2344 */
2345
2346 res = NULL;
2347 ssz = 0;
2348
2349 while ('\0' != *p) {
2350 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2351 sz = r->xtab[(int)*p].sz;
2352 res = mandoc_realloc(res, ssz + sz + 1);
2353 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2354 ssz += sz;
2355 p++;
2356 continue;
2357 } else if ('\\' != *p) {
2358 res = mandoc_realloc(res, ssz + 2);
2359 res[ssz++] = *p++;
2360 continue;
2361 }
2362
2363 /* Search for term matches. */
2364 for (cp = r->xmbtab; cp; cp = cp->next)
2365 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2366 break;
2367
2368 if (NULL != cp) {
2369 /*
2370 * A match has been found.
2371 * Append the match to the array and move
2372 * forward by its keysize.
2373 */
2374 res = mandoc_realloc(res,
2375 ssz + cp->val.sz + 1);
2376 memcpy(res + ssz, cp->val.p, cp->val.sz);
2377 ssz += cp->val.sz;
2378 p += (int)cp->key.sz;
2379 continue;
2380 }
2381
2382 /*
2383 * Handle escapes carefully: we need to copy
2384 * over just the escape itself, or else we might
2385 * do replacements within the escape itself.
2386 * Make sure to pass along the bogus string.
2387 */
2388 pp = p++;
2389 esc = mandoc_escape(&p, NULL, NULL);
2390 if (ESCAPE_ERROR == esc) {
2391 sz = strlen(pp);
2392 res = mandoc_realloc(res, ssz + sz + 1);
2393 memcpy(res + ssz, pp, sz);
2394 break;
2395 }
2396 /*
2397 * We bail out on bad escapes.
2398 * No need to warn: we already did so when
2399 * roff_res() was called.
2400 */
2401 sz = (int)(p - pp);
2402 res = mandoc_realloc(res, ssz + sz + 1);
2403 memcpy(res + ssz, pp, sz);
2404 ssz += sz;
2405 }
2406
2407 res[(int)ssz] = '\0';
2408 return(res);
2409 }
2410
2411 int
2412 roff_getformat(const struct roff *r)
2413 {
2414
2415 return(r->format);
2416 }
2417
2418 /*
2419 * Find out whether a line is a macro line or not.
2420 * If it is, adjust the current position and return one; if it isn't,
2421 * return zero and don't change the current position.
2422 * If the control character has been set with `.cc', then let that grain
2423 * precedence.
2424 * This is slighly contrary to groff, where using the non-breaking
2425 * control character when `cc' has been invoked will cause the
2426 * non-breaking macro contents to be printed verbatim.
2427 */
2428 int
2429 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2430 {
2431 int pos;
2432
2433 pos = *ppos;
2434
2435 if (0 != r->control && cp[pos] == r->control)
2436 pos++;
2437 else if (0 != r->control)
2438 return(0);
2439 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2440 pos += 2;
2441 else if ('.' == cp[pos] || '\'' == cp[pos])
2442 pos++;
2443 else
2444 return(0);
2445
2446 while (' ' == cp[pos] || '\t' == cp[pos])
2447 pos++;
2448
2449 *ppos = pos;
2450 return(1);
2451 }