]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Don't let .Ta creep into an already-closed list; same as for .It.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.244 2014/12/18 17:43:41 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
31 #include "libroff.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_pl,
64 ROFF_ps,
65 ROFF_rm,
66 ROFF_rr,
67 ROFF_so,
68 ROFF_ta,
69 ROFF_tr,
70 ROFF_Dd,
71 ROFF_TH,
72 ROFF_TS,
73 ROFF_TE,
74 ROFF_T_,
75 ROFF_EQ,
76 ROFF_EN,
77 ROFF_cblock,
78 ROFF_USERDEF,
79 ROFF_MAX
80 };
81
82 /*
83 * An incredibly-simple string buffer.
84 */
85 struct roffstr {
86 char *p; /* nil-terminated buffer */
87 size_t sz; /* saved strlen(p) */
88 };
89
90 /*
91 * A key-value roffstr pair as part of a singly-linked list.
92 */
93 struct roffkv {
94 struct roffstr key;
95 struct roffstr val;
96 struct roffkv *next; /* next in list */
97 };
98
99 /*
100 * A single number register as part of a singly-linked list.
101 */
102 struct roffreg {
103 struct roffstr key;
104 int val;
105 struct roffreg *next;
106 };
107
108 struct roff {
109 struct mparse *parse; /* parse point */
110 const struct mchars *mchars; /* character table */
111 struct roffnode *last; /* leaf of stack */
112 int *rstack; /* stack of inverted `ie' values */
113 struct roffreg *regtab; /* number registers */
114 struct roffkv *strtab; /* user-defined strings & macros */
115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
116 struct roffstr *xtab; /* single-byte trans table (`tr') */
117 const char *current_string; /* value of last called user macro */
118 struct tbl_node *first_tbl; /* first table parsed */
119 struct tbl_node *last_tbl; /* last table parsed */
120 struct tbl_node *tbl; /* current table being parsed */
121 struct eqn_node *last_eqn; /* last equation parsed */
122 struct eqn_node *first_eqn; /* first equation parsed */
123 struct eqn_node *eqn; /* current equation being parsed */
124 int eqn_inline; /* current equation is inline */
125 int options; /* parse options */
126 int rstacksz; /* current size limit of rstack */
127 int rstackpos; /* position in rstack */
128 int format; /* current file in mdoc or man format */
129 char control; /* control character */
130 };
131
132 struct roffnode {
133 enum rofft tok; /* type of node */
134 struct roffnode *parent; /* up one in stack */
135 int line; /* parse line */
136 int col; /* parse col */
137 char *name; /* node name, e.g. macro name */
138 char *end; /* end-rules: custom token */
139 int endspan; /* end-rules: next-line or infty */
140 int rule; /* current evaluation rule */
141 };
142
143 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
144 enum rofft tok, /* tok of macro */ \
145 struct buf *buf, /* input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
150
151 typedef enum rofferr (*roffproc)(ROFF_ARGS);
152
153 struct roffmac {
154 const char *name; /* macro name */
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac *next;
161 };
162
163 struct predef {
164 const char *name; /* predefined input name */
165 const char *str; /* replacement symbol */
166 };
167
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
170
171 static enum rofft roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff *);
174 static void roffnode_pop(struct roff *);
175 static void roffnode_push(struct roff *, enum rofft,
176 const char *, int, int);
177 static enum rofferr roff_block(ROFF_ARGS);
178 static enum rofferr roff_block_text(ROFF_ARGS);
179 static enum rofferr roff_block_sub(ROFF_ARGS);
180 static enum rofferr roff_cblock(ROFF_ARGS);
181 static enum rofferr roff_cc(ROFF_ARGS);
182 static void roff_ccond(struct roff *, int, int);
183 static enum rofferr roff_cond(ROFF_ARGS);
184 static enum rofferr roff_cond_text(ROFF_ARGS);
185 static enum rofferr roff_cond_sub(ROFF_ARGS);
186 static enum rofferr roff_ds(ROFF_ARGS);
187 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
188 static int roff_evalcond(struct roff *r, int,
189 const char *, int *);
190 static int roff_evalnum(struct roff *, int,
191 const char *, int *, int *, int);
192 static int roff_evalpar(struct roff *, int,
193 const char *, int *, int *);
194 static int roff_evalstrcond(const char *, int *);
195 static void roff_free1(struct roff *);
196 static void roff_freereg(struct roffreg *);
197 static void roff_freestr(struct roffkv *);
198 static size_t roff_getname(struct roff *, char **, int, int);
199 static int roff_getnum(const char *, int *, int *);
200 static int roff_getop(const char *, int *, char *);
201 static int roff_getregn(const struct roff *,
202 const char *, size_t);
203 static int roff_getregro(const char *name);
204 static const char *roff_getstrn(const struct roff *,
205 const char *, size_t);
206 static enum rofferr roff_it(ROFF_ARGS);
207 static enum rofferr roff_line_ignore(ROFF_ARGS);
208 static enum rofferr roff_nr(ROFF_ARGS);
209 static enum rofft roff_parse(struct roff *, char *, int *,
210 int, int);
211 static enum rofferr roff_parsetext(struct buf *, int, int *);
212 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
213 static enum rofferr roff_rm(ROFF_ARGS);
214 static enum rofferr roff_rr(ROFF_ARGS);
215 static void roff_setstr(struct roff *,
216 const char *, const char *, int);
217 static void roff_setstrn(struct roffkv **, const char *,
218 size_t, const char *, size_t, int);
219 static enum rofferr roff_so(ROFF_ARGS);
220 static enum rofferr roff_tr(ROFF_ARGS);
221 static enum rofferr roff_Dd(ROFF_ARGS);
222 static enum rofferr roff_TH(ROFF_ARGS);
223 static enum rofferr roff_TE(ROFF_ARGS);
224 static enum rofferr roff_TS(ROFF_ARGS);
225 static enum rofferr roff_EQ(ROFF_ARGS);
226 static enum rofferr roff_EN(ROFF_ARGS);
227 static enum rofferr roff_T_(ROFF_ARGS);
228 static enum rofferr roff_userdef(ROFF_ARGS);
229
230 /* See roffhash_find() */
231
232 #define ASCII_HI 126
233 #define ASCII_LO 33
234 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
235
236 static struct roffmac *hash[HASHWIDTH];
237
238 static struct roffmac roffs[ROFF_MAX] = {
239 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "as", roff_ds, NULL, NULL, 0, NULL },
244 { "cc", roff_cc, NULL, NULL, 0, NULL },
245 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
247 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
248 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
249 { "ds", roff_ds, NULL, NULL, 0, NULL },
250 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
251 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
255 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
256 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
257 { "it", roff_it, NULL, NULL, 0, NULL },
258 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
259 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
260 { "nr", roff_nr, NULL, NULL, 0, NULL },
261 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
262 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
263 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
264 { "rm", roff_rm, NULL, NULL, 0, NULL },
265 { "rr", roff_rr, NULL, NULL, 0, NULL },
266 { "so", roff_so, NULL, NULL, 0, NULL },
267 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
268 { "tr", roff_tr, NULL, NULL, 0, NULL },
269 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
270 { "TH", roff_TH, NULL, NULL, 0, NULL },
271 { "TS", roff_TS, NULL, NULL, 0, NULL },
272 { "TE", roff_TE, NULL, NULL, 0, NULL },
273 { "T&", roff_T_, NULL, NULL, 0, NULL },
274 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
275 { "EN", roff_EN, NULL, NULL, 0, NULL },
276 { ".", roff_cblock, NULL, NULL, 0, NULL },
277 { NULL, roff_userdef, NULL, NULL, 0, NULL },
278 };
279
280 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
281 const char *const __mdoc_reserved[] = {
282 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
283 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
284 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
285 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
286 "Dt", "Dv", "Dx", "D1",
287 "Ec", "Ed", "Ef", "Ek", "El", "Em",
288 "En", "Eo", "Er", "Es", "Ev", "Ex",
289 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
290 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
291 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
292 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
293 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
294 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
295 "Sc", "Sh", "Sm", "So", "Sq",
296 "Ss", "St", "Sx", "Sy",
297 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
298 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
299 "%P", "%Q", "%R", "%T", "%U", "%V",
300 NULL
301 };
302
303 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
304 const char *const __man_reserved[] = {
305 "AT", "B", "BI", "BR", "DT",
306 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
307 "LP", "OP", "P", "PD", "PP",
308 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
309 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
310 NULL
311 };
312
313 /* Array of injected predefined strings. */
314 #define PREDEFS_MAX 38
315 static const struct predef predefs[PREDEFS_MAX] = {
316 #include "predefs.in"
317 };
318
319 /* See roffhash_find() */
320 #define ROFF_HASH(p) (p[0] - ASCII_LO)
321
322 static int roffit_lines; /* number of lines to delay */
323 static char *roffit_macro; /* nil-terminated macro line */
324
325
326 static void
327 roffhash_init(void)
328 {
329 struct roffmac *n;
330 int buc, i;
331
332 for (i = 0; i < (int)ROFF_USERDEF; i++) {
333 assert(roffs[i].name[0] >= ASCII_LO);
334 assert(roffs[i].name[0] <= ASCII_HI);
335
336 buc = ROFF_HASH(roffs[i].name);
337
338 if (NULL != (n = hash[buc])) {
339 for ( ; n->next; n = n->next)
340 /* Do nothing. */ ;
341 n->next = &roffs[i];
342 } else
343 hash[buc] = &roffs[i];
344 }
345 }
346
347 /*
348 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
349 * the nil-terminated string name could be found.
350 */
351 static enum rofft
352 roffhash_find(const char *p, size_t s)
353 {
354 int buc;
355 struct roffmac *n;
356
357 /*
358 * libroff has an extremely simple hashtable, for the time
359 * being, which simply keys on the first character, which must
360 * be printable, then walks a chain. It works well enough until
361 * optimised.
362 */
363
364 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
365 return(ROFF_MAX);
366
367 buc = ROFF_HASH(p);
368
369 if (NULL == (n = hash[buc]))
370 return(ROFF_MAX);
371 for ( ; n; n = n->next)
372 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
373 return((enum rofft)(n - roffs));
374
375 return(ROFF_MAX);
376 }
377
378 /*
379 * Pop the current node off of the stack of roff instructions currently
380 * pending.
381 */
382 static void
383 roffnode_pop(struct roff *r)
384 {
385 struct roffnode *p;
386
387 assert(r->last);
388 p = r->last;
389
390 r->last = r->last->parent;
391 free(p->name);
392 free(p->end);
393 free(p);
394 }
395
396 /*
397 * Push a roff node onto the instruction stack. This must later be
398 * removed with roffnode_pop().
399 */
400 static void
401 roffnode_push(struct roff *r, enum rofft tok, const char *name,
402 int line, int col)
403 {
404 struct roffnode *p;
405
406 p = mandoc_calloc(1, sizeof(struct roffnode));
407 p->tok = tok;
408 if (name)
409 p->name = mandoc_strdup(name);
410 p->parent = r->last;
411 p->line = line;
412 p->col = col;
413 p->rule = p->parent ? p->parent->rule : 0;
414
415 r->last = p;
416 }
417
418 static void
419 roff_free1(struct roff *r)
420 {
421 struct tbl_node *tbl;
422 struct eqn_node *e;
423 int i;
424
425 while (NULL != (tbl = r->first_tbl)) {
426 r->first_tbl = tbl->next;
427 tbl_free(tbl);
428 }
429 r->first_tbl = r->last_tbl = r->tbl = NULL;
430
431 while (NULL != (e = r->first_eqn)) {
432 r->first_eqn = e->next;
433 eqn_free(e);
434 }
435 r->first_eqn = r->last_eqn = r->eqn = NULL;
436
437 while (r->last)
438 roffnode_pop(r);
439
440 free (r->rstack);
441 r->rstack = NULL;
442 r->rstacksz = 0;
443 r->rstackpos = -1;
444
445 roff_freereg(r->regtab);
446 r->regtab = NULL;
447
448 roff_freestr(r->strtab);
449 roff_freestr(r->xmbtab);
450 r->strtab = r->xmbtab = NULL;
451
452 if (r->xtab)
453 for (i = 0; i < 128; i++)
454 free(r->xtab[i].p);
455 free(r->xtab);
456 r->xtab = NULL;
457 }
458
459 void
460 roff_reset(struct roff *r)
461 {
462
463 roff_free1(r);
464 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
465 r->control = 0;
466 }
467
468 void
469 roff_free(struct roff *r)
470 {
471
472 roff_free1(r);
473 free(r);
474 }
475
476 struct roff *
477 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
478 {
479 struct roff *r;
480
481 r = mandoc_calloc(1, sizeof(struct roff));
482 r->parse = parse;
483 r->mchars = mchars;
484 r->options = options;
485 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
486 r->rstackpos = -1;
487
488 roffhash_init();
489
490 return(r);
491 }
492
493 /*
494 * In the current line, expand escape sequences that tend to get
495 * used in numerical expressions and conditional requests.
496 * Also check the syntax of the remaining escape sequences.
497 */
498 static enum rofferr
499 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
500 {
501 char ubuf[24]; /* buffer to print the number */
502 const char *start; /* start of the string to process */
503 char *stesc; /* start of an escape sequence ('\\') */
504 const char *stnam; /* start of the name, after "[(*" */
505 const char *cp; /* end of the name, e.g. before ']' */
506 const char *res; /* the string to be substituted */
507 char *nbuf; /* new buffer to copy buf->buf to */
508 size_t maxl; /* expected length of the escape name */
509 size_t naml; /* actual length of the escape name */
510 enum mandoc_esc esc; /* type of the escape sequence */
511 int inaml; /* length returned from mandoc_escape() */
512 int expand_count; /* to avoid infinite loops */
513 int npos; /* position in numeric expression */
514 int arg_complete; /* argument not interrupted by eol */
515 char term; /* character terminating the escape */
516
517 expand_count = 0;
518 start = buf->buf + pos;
519 stesc = strchr(start, '\0') - 1;
520 while (stesc-- > start) {
521
522 /* Search backwards for the next backslash. */
523
524 if (*stesc != '\\')
525 continue;
526
527 /* If it is escaped, skip it. */
528
529 for (cp = stesc - 1; cp >= start; cp--)
530 if (*cp != '\\')
531 break;
532
533 if ((stesc - cp) % 2 == 0) {
534 stesc = (char *)cp;
535 continue;
536 }
537
538 /* Decide whether to expand or to check only. */
539
540 term = '\0';
541 cp = stesc + 1;
542 switch (*cp) {
543 case '*':
544 res = NULL;
545 break;
546 case 'B':
547 /* FALLTHROUGH */
548 case 'w':
549 term = cp[1];
550 /* FALLTHROUGH */
551 case 'n':
552 res = ubuf;
553 break;
554 default:
555 esc = mandoc_escape(&cp, &stnam, &inaml);
556 if (esc == ESCAPE_ERROR ||
557 (esc == ESCAPE_SPECIAL &&
558 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
559 mandoc_vmsg(MANDOCERR_ESC_BAD,
560 r->parse, ln, (int)(stesc - buf->buf),
561 "%.*s", (int)(cp - stesc), stesc);
562 continue;
563 }
564
565 if (EXPAND_LIMIT < ++expand_count) {
566 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
567 ln, (int)(stesc - buf->buf), NULL);
568 return(ROFF_IGN);
569 }
570
571 /*
572 * The third character decides the length
573 * of the name of the string or register.
574 * Save a pointer to the name.
575 */
576
577 if (term == '\0') {
578 switch (*++cp) {
579 case '\0':
580 maxl = 0;
581 break;
582 case '(':
583 cp++;
584 maxl = 2;
585 break;
586 case '[':
587 cp++;
588 term = ']';
589 maxl = 0;
590 break;
591 default:
592 maxl = 1;
593 break;
594 }
595 } else {
596 cp += 2;
597 maxl = 0;
598 }
599 stnam = cp;
600
601 /* Advance to the end of the name. */
602
603 arg_complete = 1;
604 for (naml = 0; maxl == 0 || naml < maxl; naml++, cp++) {
605 if (*cp == '\0') {
606 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
607 ln, (int)(stesc - buf->buf), stesc);
608 arg_complete = 0;
609 break;
610 }
611 if (maxl == 0 && *cp == term) {
612 cp++;
613 break;
614 }
615 }
616
617 /*
618 * Retrieve the replacement string; if it is
619 * undefined, resume searching for escapes.
620 */
621
622 switch (stesc[1]) {
623 case '*':
624 if (arg_complete)
625 res = roff_getstrn(r, stnam, naml);
626 break;
627 case 'B':
628 npos = 0;
629 ubuf[0] = arg_complete &&
630 roff_evalnum(r, ln, stnam, &npos, NULL, 0) &&
631 stnam + npos + 1 == cp ? '1' : '0';
632 ubuf[1] = '\0';
633 break;
634 case 'n':
635 if (arg_complete)
636 (void)snprintf(ubuf, sizeof(ubuf), "%d",
637 roff_getregn(r, stnam, naml));
638 else
639 ubuf[0] = '\0';
640 break;
641 case 'w':
642 /* use even incomplete args */
643 (void)snprintf(ubuf, sizeof(ubuf), "%d",
644 24 * (int)naml);
645 break;
646 }
647
648 if (res == NULL) {
649 mandoc_vmsg(MANDOCERR_STR_UNDEF,
650 r->parse, ln, (int)(stesc - buf->buf),
651 "%.*s", (int)naml, stnam);
652 res = "";
653 }
654
655 /* Replace the escape sequence by the string. */
656
657 *stesc = '\0';
658 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
659 buf->buf, res, cp) + 1;
660
661 /* Prepare for the next replacement. */
662
663 start = nbuf + pos;
664 stesc = nbuf + (stesc - buf->buf) + strlen(res);
665 free(buf->buf);
666 buf->buf = nbuf;
667 }
668 return(ROFF_CONT);
669 }
670
671 /*
672 * Process text streams:
673 * Convert all breakable hyphens into ASCII_HYPH.
674 * Decrement and spring input line trap.
675 */
676 static enum rofferr
677 roff_parsetext(struct buf *buf, int pos, int *offs)
678 {
679 size_t sz;
680 const char *start;
681 char *p;
682 int isz;
683 enum mandoc_esc esc;
684
685 start = p = buf->buf + pos;
686
687 while (*p != '\0') {
688 sz = strcspn(p, "-\\");
689 p += sz;
690
691 if (*p == '\0')
692 break;
693
694 if (*p == '\\') {
695 /* Skip over escapes. */
696 p++;
697 esc = mandoc_escape((const char **)&p, NULL, NULL);
698 if (esc == ESCAPE_ERROR)
699 break;
700 continue;
701 } else if (p == start) {
702 p++;
703 continue;
704 }
705
706 if (isalpha((unsigned char)p[-1]) &&
707 isalpha((unsigned char)p[1]))
708 *p = ASCII_HYPH;
709 p++;
710 }
711
712 /* Spring the input line trap. */
713 if (roffit_lines == 1) {
714 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
715 free(buf->buf);
716 buf->buf = p;
717 buf->sz = isz + 1;
718 *offs = 0;
719 free(roffit_macro);
720 roffit_lines = 0;
721 return(ROFF_REPARSE);
722 } else if (roffit_lines > 1)
723 --roffit_lines;
724 return(ROFF_CONT);
725 }
726
727 enum rofferr
728 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
729 {
730 enum rofft t;
731 enum rofferr e;
732 int pos; /* parse point */
733 int spos; /* saved parse point for messages */
734 int ppos; /* original offset in buf->buf */
735 int ctl; /* macro line (boolean) */
736
737 ppos = pos = *offs;
738
739 /* Handle in-line equation delimiters. */
740
741 if (r->tbl == NULL &&
742 r->last_eqn != NULL && r->last_eqn->delim &&
743 (r->eqn == NULL || r->eqn_inline)) {
744 e = roff_eqndelim(r, buf, pos);
745 if (e == ROFF_REPARSE)
746 return(e);
747 assert(e == ROFF_CONT);
748 }
749
750 /* Expand some escape sequences. */
751
752 e = roff_res(r, buf, ln, pos);
753 if (e == ROFF_IGN)
754 return(e);
755 assert(e == ROFF_CONT);
756
757 ctl = roff_getcontrol(r, buf->buf, &pos);
758
759 /*
760 * First, if a scope is open and we're not a macro, pass the
761 * text through the macro's filter. If a scope isn't open and
762 * we're not a macro, just let it through.
763 * Finally, if there's an equation scope open, divert it into it
764 * no matter our state.
765 */
766
767 if (r->last && ! ctl) {
768 t = r->last->tok;
769 assert(roffs[t].text);
770 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
771 assert(e == ROFF_IGN || e == ROFF_CONT);
772 if (e != ROFF_CONT)
773 return(e);
774 }
775 if (r->eqn)
776 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
777 if ( ! ctl) {
778 if (r->tbl)
779 return(tbl_read(r->tbl, ln, buf->buf, pos));
780 return(roff_parsetext(buf, pos, offs));
781 }
782
783 /* Skip empty request lines. */
784
785 if (buf->buf[pos] == '"') {
786 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
787 ln, pos, NULL);
788 return(ROFF_IGN);
789 } else if (buf->buf[pos] == '\0')
790 return(ROFF_IGN);
791
792 /*
793 * If a scope is open, go to the child handler for that macro,
794 * as it may want to preprocess before doing anything with it.
795 * Don't do so if an equation is open.
796 */
797
798 if (r->last) {
799 t = r->last->tok;
800 assert(roffs[t].sub);
801 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
802 }
803
804 /* No scope is open. This is a new request or macro. */
805
806 spos = pos;
807 t = roff_parse(r, buf->buf, &pos, ln, ppos);
808
809 /* Tables ignore most macros. */
810
811 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
812 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
813 ln, pos, buf->buf + spos);
814 return(ROFF_IGN);
815 }
816
817 /*
818 * This is neither a roff request nor a user-defined macro.
819 * Let the standard macro set parsers handle it.
820 */
821
822 if (t == ROFF_MAX)
823 return(ROFF_CONT);
824
825 /* Execute a roff request or a user defined macro. */
826
827 assert(roffs[t].proc);
828 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
829 }
830
831 void
832 roff_endparse(struct roff *r)
833 {
834
835 if (r->last)
836 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
837 r->last->line, r->last->col,
838 roffs[r->last->tok].name);
839
840 if (r->eqn) {
841 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
842 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
843 eqn_end(&r->eqn);
844 }
845
846 if (r->tbl) {
847 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
848 r->tbl->line, r->tbl->pos, "TS");
849 tbl_end(&r->tbl);
850 }
851 }
852
853 /*
854 * Parse a roff node's type from the input buffer. This must be in the
855 * form of ".foo xxx" in the usual way.
856 */
857 static enum rofft
858 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
859 {
860 char *cp;
861 const char *mac;
862 size_t maclen;
863 enum rofft t;
864
865 cp = buf + *pos;
866
867 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
868 return(ROFF_MAX);
869
870 mac = cp;
871 maclen = roff_getname(r, &cp, ln, ppos);
872
873 t = (r->current_string = roff_getstrn(r, mac, maclen))
874 ? ROFF_USERDEF : roffhash_find(mac, maclen);
875
876 if (ROFF_MAX != t)
877 *pos = cp - buf;
878
879 return(t);
880 }
881
882 static enum rofferr
883 roff_cblock(ROFF_ARGS)
884 {
885
886 /*
887 * A block-close `..' should only be invoked as a child of an
888 * ignore macro, otherwise raise a warning and just ignore it.
889 */
890
891 if (r->last == NULL) {
892 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
893 ln, ppos, "..");
894 return(ROFF_IGN);
895 }
896
897 switch (r->last->tok) {
898 case ROFF_am:
899 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
900 /* FALLTHROUGH */
901 case ROFF_ami:
902 /* FALLTHROUGH */
903 case ROFF_de:
904 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
905 /* FALLTHROUGH */
906 case ROFF_dei:
907 /* FALLTHROUGH */
908 case ROFF_ig:
909 break;
910 default:
911 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
912 ln, ppos, "..");
913 return(ROFF_IGN);
914 }
915
916 if (buf->buf[pos] != '\0')
917 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
918 ".. %s", buf->buf + pos);
919
920 roffnode_pop(r);
921 roffnode_cleanscope(r);
922 return(ROFF_IGN);
923
924 }
925
926 static void
927 roffnode_cleanscope(struct roff *r)
928 {
929
930 while (r->last) {
931 if (--r->last->endspan != 0)
932 break;
933 roffnode_pop(r);
934 }
935 }
936
937 static void
938 roff_ccond(struct roff *r, int ln, int ppos)
939 {
940
941 if (NULL == r->last) {
942 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
943 ln, ppos, "\\}");
944 return;
945 }
946
947 switch (r->last->tok) {
948 case ROFF_el:
949 /* FALLTHROUGH */
950 case ROFF_ie:
951 /* FALLTHROUGH */
952 case ROFF_if:
953 break;
954 default:
955 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
956 ln, ppos, "\\}");
957 return;
958 }
959
960 if (r->last->endspan > -1) {
961 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
962 ln, ppos, "\\}");
963 return;
964 }
965
966 roffnode_pop(r);
967 roffnode_cleanscope(r);
968 return;
969 }
970
971 static enum rofferr
972 roff_block(ROFF_ARGS)
973 {
974 const char *name;
975 char *iname, *cp;
976 size_t namesz;
977
978 /* Ignore groff compatibility mode for now. */
979
980 if (tok == ROFF_de1)
981 tok = ROFF_de;
982 else if (tok == ROFF_am1)
983 tok = ROFF_am;
984
985 /* Parse the macro name argument. */
986
987 cp = buf->buf + pos;
988 if (tok == ROFF_ig) {
989 iname = NULL;
990 namesz = 0;
991 } else {
992 iname = cp;
993 namesz = roff_getname(r, &cp, ln, ppos);
994 iname[namesz] = '\0';
995 }
996
997 /* Resolve the macro name argument if it is indirect. */
998
999 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1000 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1001 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1002 r->parse, ln, (int)(iname - buf->buf),
1003 "%.*s", (int)namesz, iname);
1004 namesz = 0;
1005 } else
1006 namesz = strlen(name);
1007 } else
1008 name = iname;
1009
1010 if (namesz == 0 && tok != ROFF_ig) {
1011 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1012 ln, ppos, roffs[tok].name);
1013 return(ROFF_IGN);
1014 }
1015
1016 roffnode_push(r, tok, name, ln, ppos);
1017
1018 /*
1019 * At the beginning of a `de' macro, clear the existing string
1020 * with the same name, if there is one. New content will be
1021 * appended from roff_block_text() in multiline mode.
1022 */
1023
1024 if (tok == ROFF_de || tok == ROFF_dei)
1025 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1026
1027 if (*cp == '\0')
1028 return(ROFF_IGN);
1029
1030 /* Get the custom end marker. */
1031
1032 iname = cp;
1033 namesz = roff_getname(r, &cp, ln, ppos);
1034
1035 /* Resolve the end marker if it is indirect. */
1036
1037 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1038 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1039 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1040 r->parse, ln, (int)(iname - buf->buf),
1041 "%.*s", (int)namesz, iname);
1042 namesz = 0;
1043 } else
1044 namesz = strlen(name);
1045 } else
1046 name = iname;
1047
1048 if (namesz)
1049 r->last->end = mandoc_strndup(name, namesz);
1050
1051 if (*cp != '\0')
1052 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1053 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1054
1055 return(ROFF_IGN);
1056 }
1057
1058 static enum rofferr
1059 roff_block_sub(ROFF_ARGS)
1060 {
1061 enum rofft t;
1062 int i, j;
1063
1064 /*
1065 * First check whether a custom macro exists at this level. If
1066 * it does, then check against it. This is some of groff's
1067 * stranger behaviours. If we encountered a custom end-scope
1068 * tag and that tag also happens to be a "real" macro, then we
1069 * need to try interpreting it again as a real macro. If it's
1070 * not, then return ignore. Else continue.
1071 */
1072
1073 if (r->last->end) {
1074 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1075 if (buf->buf[i] != r->last->end[j])
1076 break;
1077
1078 if (r->last->end[j] == '\0' &&
1079 (buf->buf[i] == '\0' ||
1080 buf->buf[i] == ' ' ||
1081 buf->buf[i] == '\t')) {
1082 roffnode_pop(r);
1083 roffnode_cleanscope(r);
1084
1085 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1086 i++;
1087
1088 pos = i;
1089 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1090 ROFF_MAX)
1091 return(ROFF_RERUN);
1092 return(ROFF_IGN);
1093 }
1094 }
1095
1096 /*
1097 * If we have no custom end-query or lookup failed, then try
1098 * pulling it out of the hashtable.
1099 */
1100
1101 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1102
1103 if (t != ROFF_cblock) {
1104 if (tok != ROFF_ig)
1105 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1106 return(ROFF_IGN);
1107 }
1108
1109 assert(roffs[t].proc);
1110 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1111 }
1112
1113 static enum rofferr
1114 roff_block_text(ROFF_ARGS)
1115 {
1116
1117 if (tok != ROFF_ig)
1118 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1119
1120 return(ROFF_IGN);
1121 }
1122
1123 static enum rofferr
1124 roff_cond_sub(ROFF_ARGS)
1125 {
1126 enum rofft t;
1127 char *ep;
1128 int rr;
1129
1130 rr = r->last->rule;
1131 roffnode_cleanscope(r);
1132 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1133
1134 /*
1135 * Fully handle known macros when they are structurally
1136 * required or when the conditional evaluated to true.
1137 */
1138
1139 if ((t != ROFF_MAX) &&
1140 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1141 assert(roffs[t].proc);
1142 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1143 }
1144
1145 /*
1146 * If `\}' occurs on a macro line without a preceding macro,
1147 * drop the line completely.
1148 */
1149
1150 ep = buf->buf + pos;
1151 if (ep[0] == '\\' && ep[1] == '}')
1152 rr = 0;
1153
1154 /* Always check for the closing delimiter `\}'. */
1155
1156 while ((ep = strchr(ep, '\\')) != NULL) {
1157 if (*(++ep) == '}') {
1158 *ep = '&';
1159 roff_ccond(r, ln, ep - buf->buf - 1);
1160 }
1161 ++ep;
1162 }
1163 return(rr ? ROFF_CONT : ROFF_IGN);
1164 }
1165
1166 static enum rofferr
1167 roff_cond_text(ROFF_ARGS)
1168 {
1169 char *ep;
1170 int rr;
1171
1172 rr = r->last->rule;
1173 roffnode_cleanscope(r);
1174
1175 ep = buf->buf + pos;
1176 while ((ep = strchr(ep, '\\')) != NULL) {
1177 if (*(++ep) == '}') {
1178 *ep = '&';
1179 roff_ccond(r, ln, ep - buf->buf - 1);
1180 }
1181 ++ep;
1182 }
1183 return(rr ? ROFF_CONT : ROFF_IGN);
1184 }
1185
1186 /*
1187 * Parse a single signed integer number. Stop at the first non-digit.
1188 * If there is at least one digit, return success and advance the
1189 * parse point, else return failure and let the parse point unchanged.
1190 * Ignore overflows, treat them just like the C language.
1191 */
1192 static int
1193 roff_getnum(const char *v, int *pos, int *res)
1194 {
1195 int myres, n, p;
1196
1197 if (NULL == res)
1198 res = &myres;
1199
1200 p = *pos;
1201 n = v[p] == '-';
1202 if (n)
1203 p++;
1204
1205 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1206 *res = 10 * *res + v[p] - '0';
1207 if (p == *pos + n)
1208 return 0;
1209
1210 if (n)
1211 *res = -*res;
1212
1213 *pos = p;
1214 return 1;
1215 }
1216
1217 /*
1218 * Evaluate a string comparison condition.
1219 * The first character is the delimiter.
1220 * Succeed if the string up to its second occurrence
1221 * matches the string up to its third occurence.
1222 * Advance the cursor after the third occurrence
1223 * or lacking that, to the end of the line.
1224 */
1225 static int
1226 roff_evalstrcond(const char *v, int *pos)
1227 {
1228 const char *s1, *s2, *s3;
1229 int match;
1230
1231 match = 0;
1232 s1 = v + *pos; /* initial delimiter */
1233 s2 = s1 + 1; /* for scanning the first string */
1234 s3 = strchr(s2, *s1); /* for scanning the second string */
1235
1236 if (NULL == s3) /* found no middle delimiter */
1237 goto out;
1238
1239 while ('\0' != *++s3) {
1240 if (*s2 != *s3) { /* mismatch */
1241 s3 = strchr(s3, *s1);
1242 break;
1243 }
1244 if (*s3 == *s1) { /* found the final delimiter */
1245 match = 1;
1246 break;
1247 }
1248 s2++;
1249 }
1250
1251 out:
1252 if (NULL == s3)
1253 s3 = strchr(s2, '\0');
1254 else if (*s3 != '\0')
1255 s3++;
1256 *pos = s3 - v;
1257 return(match);
1258 }
1259
1260 /*
1261 * Evaluate an optionally negated single character, numerical,
1262 * or string condition.
1263 */
1264 static int
1265 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1266 {
1267 int number, savepos, wanttrue;
1268
1269 if ('!' == v[*pos]) {
1270 wanttrue = 0;
1271 (*pos)++;
1272 } else
1273 wanttrue = 1;
1274
1275 switch (v[*pos]) {
1276 case '\0':
1277 return(0);
1278 case 'n':
1279 /* FALLTHROUGH */
1280 case 'o':
1281 (*pos)++;
1282 return(wanttrue);
1283 case 'c':
1284 /* FALLTHROUGH */
1285 case 'd':
1286 /* FALLTHROUGH */
1287 case 'e':
1288 /* FALLTHROUGH */
1289 case 'r':
1290 /* FALLTHROUGH */
1291 case 't':
1292 /* FALLTHROUGH */
1293 case 'v':
1294 (*pos)++;
1295 return(!wanttrue);
1296 default:
1297 break;
1298 }
1299
1300 savepos = *pos;
1301 if (roff_evalnum(r, ln, v, pos, &number, 0))
1302 return((number > 0) == wanttrue);
1303 else if (*pos == savepos)
1304 return(roff_evalstrcond(v, pos) == wanttrue);
1305 else
1306 return (0);
1307 }
1308
1309 static enum rofferr
1310 roff_line_ignore(ROFF_ARGS)
1311 {
1312
1313 return(ROFF_IGN);
1314 }
1315
1316 static enum rofferr
1317 roff_cond(ROFF_ARGS)
1318 {
1319
1320 roffnode_push(r, tok, NULL, ln, ppos);
1321
1322 /*
1323 * An `.el' has no conditional body: it will consume the value
1324 * of the current rstack entry set in prior `ie' calls or
1325 * defaults to DENY.
1326 *
1327 * If we're not an `el', however, then evaluate the conditional.
1328 */
1329
1330 r->last->rule = tok == ROFF_el ?
1331 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1332 roff_evalcond(r, ln, buf->buf, &pos);
1333
1334 /*
1335 * An if-else will put the NEGATION of the current evaluated
1336 * conditional into the stack of rules.
1337 */
1338
1339 if (tok == ROFF_ie) {
1340 if (r->rstackpos + 1 == r->rstacksz) {
1341 r->rstacksz += 16;
1342 r->rstack = mandoc_reallocarray(r->rstack,
1343 r->rstacksz, sizeof(int));
1344 }
1345 r->rstack[++r->rstackpos] = !r->last->rule;
1346 }
1347
1348 /* If the parent has false as its rule, then so do we. */
1349
1350 if (r->last->parent && !r->last->parent->rule)
1351 r->last->rule = 0;
1352
1353 /*
1354 * Determine scope.
1355 * If there is nothing on the line after the conditional,
1356 * not even whitespace, use next-line scope.
1357 */
1358
1359 if (buf->buf[pos] == '\0') {
1360 r->last->endspan = 2;
1361 goto out;
1362 }
1363
1364 while (buf->buf[pos] == ' ')
1365 pos++;
1366
1367 /* An opening brace requests multiline scope. */
1368
1369 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
1370 r->last->endspan = -1;
1371 pos += 2;
1372 goto out;
1373 }
1374
1375 /*
1376 * Anything else following the conditional causes
1377 * single-line scope. Warn if the scope contains
1378 * nothing but trailing whitespace.
1379 */
1380
1381 if (buf->buf[pos] == '\0')
1382 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1383 ln, ppos, roffs[tok].name);
1384
1385 r->last->endspan = 1;
1386
1387 out:
1388 *offs = pos;
1389 return(ROFF_RERUN);
1390 }
1391
1392 static enum rofferr
1393 roff_ds(ROFF_ARGS)
1394 {
1395 char *string;
1396 const char *name;
1397 size_t namesz;
1398
1399 /*
1400 * The first word is the name of the string.
1401 * If it is empty or terminated by an escape sequence,
1402 * abort the `ds' request without defining anything.
1403 */
1404
1405 name = string = buf->buf + pos;
1406 if (*name == '\0')
1407 return(ROFF_IGN);
1408
1409 namesz = roff_getname(r, &string, ln, pos);
1410 if (name[namesz] == '\\')
1411 return(ROFF_IGN);
1412
1413 /* Read past the initial double-quote, if any. */
1414 if (*string == '"')
1415 string++;
1416
1417 /* The rest is the value. */
1418 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1419 ROFF_as == tok);
1420 return(ROFF_IGN);
1421 }
1422
1423 /*
1424 * Parse a single operator, one or two characters long.
1425 * If the operator is recognized, return success and advance the
1426 * parse point, else return failure and let the parse point unchanged.
1427 */
1428 static int
1429 roff_getop(const char *v, int *pos, char *res)
1430 {
1431
1432 *res = v[*pos];
1433
1434 switch (*res) {
1435 case '+':
1436 /* FALLTHROUGH */
1437 case '-':
1438 /* FALLTHROUGH */
1439 case '*':
1440 /* FALLTHROUGH */
1441 case '/':
1442 /* FALLTHROUGH */
1443 case '%':
1444 /* FALLTHROUGH */
1445 case '&':
1446 /* FALLTHROUGH */
1447 case ':':
1448 break;
1449 case '<':
1450 switch (v[*pos + 1]) {
1451 case '=':
1452 *res = 'l';
1453 (*pos)++;
1454 break;
1455 case '>':
1456 *res = '!';
1457 (*pos)++;
1458 break;
1459 case '?':
1460 *res = 'i';
1461 (*pos)++;
1462 break;
1463 default:
1464 break;
1465 }
1466 break;
1467 case '>':
1468 switch (v[*pos + 1]) {
1469 case '=':
1470 *res = 'g';
1471 (*pos)++;
1472 break;
1473 case '?':
1474 *res = 'a';
1475 (*pos)++;
1476 break;
1477 default:
1478 break;
1479 }
1480 break;
1481 case '=':
1482 if ('=' == v[*pos + 1])
1483 (*pos)++;
1484 break;
1485 default:
1486 return(0);
1487 }
1488 (*pos)++;
1489
1490 return(*res);
1491 }
1492
1493 /*
1494 * Evaluate either a parenthesized numeric expression
1495 * or a single signed integer number.
1496 */
1497 static int
1498 roff_evalpar(struct roff *r, int ln,
1499 const char *v, int *pos, int *res)
1500 {
1501
1502 if ('(' != v[*pos])
1503 return(roff_getnum(v, pos, res));
1504
1505 (*pos)++;
1506 if ( ! roff_evalnum(r, ln, v, pos, res, 1))
1507 return(0);
1508
1509 /*
1510 * Omission of the closing parenthesis
1511 * is an error in validation mode,
1512 * but ignored in evaluation mode.
1513 */
1514
1515 if (')' == v[*pos])
1516 (*pos)++;
1517 else if (NULL == res)
1518 return(0);
1519
1520 return(1);
1521 }
1522
1523 /*
1524 * Evaluate a complete numeric expression.
1525 * Proceed left to right, there is no concept of precedence.
1526 */
1527 static int
1528 roff_evalnum(struct roff *r, int ln, const char *v,
1529 int *pos, int *res, int skipwhite)
1530 {
1531 int mypos, operand2;
1532 char operator;
1533
1534 if (NULL == pos) {
1535 mypos = 0;
1536 pos = &mypos;
1537 }
1538
1539 if (skipwhite)
1540 while (isspace((unsigned char)v[*pos]))
1541 (*pos)++;
1542
1543 if ( ! roff_evalpar(r, ln, v, pos, res))
1544 return(0);
1545
1546 while (1) {
1547 if (skipwhite)
1548 while (isspace((unsigned char)v[*pos]))
1549 (*pos)++;
1550
1551 if ( ! roff_getop(v, pos, &operator))
1552 break;
1553
1554 if (skipwhite)
1555 while (isspace((unsigned char)v[*pos]))
1556 (*pos)++;
1557
1558 if ( ! roff_evalpar(r, ln, v, pos, &operand2))
1559 return(0);
1560
1561 if (skipwhite)
1562 while (isspace((unsigned char)v[*pos]))
1563 (*pos)++;
1564
1565 if (NULL == res)
1566 continue;
1567
1568 switch (operator) {
1569 case '+':
1570 *res += operand2;
1571 break;
1572 case '-':
1573 *res -= operand2;
1574 break;
1575 case '*':
1576 *res *= operand2;
1577 break;
1578 case '/':
1579 if (operand2 == 0) {
1580 mandoc_msg(MANDOCERR_DIVZERO,
1581 r->parse, ln, *pos, v);
1582 *res = 0;
1583 break;
1584 }
1585 *res /= operand2;
1586 break;
1587 case '%':
1588 if (operand2 == 0) {
1589 mandoc_msg(MANDOCERR_DIVZERO,
1590 r->parse, ln, *pos, v);
1591 *res = 0;
1592 break;
1593 }
1594 *res %= operand2;
1595 break;
1596 case '<':
1597 *res = *res < operand2;
1598 break;
1599 case '>':
1600 *res = *res > operand2;
1601 break;
1602 case 'l':
1603 *res = *res <= operand2;
1604 break;
1605 case 'g':
1606 *res = *res >= operand2;
1607 break;
1608 case '=':
1609 *res = *res == operand2;
1610 break;
1611 case '!':
1612 *res = *res != operand2;
1613 break;
1614 case '&':
1615 *res = *res && operand2;
1616 break;
1617 case ':':
1618 *res = *res || operand2;
1619 break;
1620 case 'i':
1621 if (operand2 < *res)
1622 *res = operand2;
1623 break;
1624 case 'a':
1625 if (operand2 > *res)
1626 *res = operand2;
1627 break;
1628 default:
1629 abort();
1630 }
1631 }
1632 return(1);
1633 }
1634
1635 void
1636 roff_setreg(struct roff *r, const char *name, int val, char sign)
1637 {
1638 struct roffreg *reg;
1639
1640 /* Search for an existing register with the same name. */
1641 reg = r->regtab;
1642
1643 while (reg && strcmp(name, reg->key.p))
1644 reg = reg->next;
1645
1646 if (NULL == reg) {
1647 /* Create a new register. */
1648 reg = mandoc_malloc(sizeof(struct roffreg));
1649 reg->key.p = mandoc_strdup(name);
1650 reg->key.sz = strlen(name);
1651 reg->val = 0;
1652 reg->next = r->regtab;
1653 r->regtab = reg;
1654 }
1655
1656 if ('+' == sign)
1657 reg->val += val;
1658 else if ('-' == sign)
1659 reg->val -= val;
1660 else
1661 reg->val = val;
1662 }
1663
1664 /*
1665 * Handle some predefined read-only number registers.
1666 * For now, return -1 if the requested register is not predefined;
1667 * in case a predefined read-only register having the value -1
1668 * were to turn up, another special value would have to be chosen.
1669 */
1670 static int
1671 roff_getregro(const char *name)
1672 {
1673
1674 switch (*name) {
1675 case 'A': /* ASCII approximation mode is always off. */
1676 return(0);
1677 case 'g': /* Groff compatibility mode is always on. */
1678 return(1);
1679 case 'H': /* Fixed horizontal resolution. */
1680 return (24);
1681 case 'j': /* Always adjust left margin only. */
1682 return(0);
1683 case 'T': /* Some output device is always defined. */
1684 return(1);
1685 case 'V': /* Fixed vertical resolution. */
1686 return (40);
1687 default:
1688 return (-1);
1689 }
1690 }
1691
1692 int
1693 roff_getreg(const struct roff *r, const char *name)
1694 {
1695 struct roffreg *reg;
1696 int val;
1697
1698 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1699 val = roff_getregro(name + 1);
1700 if (-1 != val)
1701 return (val);
1702 }
1703
1704 for (reg = r->regtab; reg; reg = reg->next)
1705 if (0 == strcmp(name, reg->key.p))
1706 return(reg->val);
1707
1708 return(0);
1709 }
1710
1711 static int
1712 roff_getregn(const struct roff *r, const char *name, size_t len)
1713 {
1714 struct roffreg *reg;
1715 int val;
1716
1717 if ('.' == name[0] && 2 == len) {
1718 val = roff_getregro(name + 1);
1719 if (-1 != val)
1720 return (val);
1721 }
1722
1723 for (reg = r->regtab; reg; reg = reg->next)
1724 if (len == reg->key.sz &&
1725 0 == strncmp(name, reg->key.p, len))
1726 return(reg->val);
1727
1728 return(0);
1729 }
1730
1731 static void
1732 roff_freereg(struct roffreg *reg)
1733 {
1734 struct roffreg *old_reg;
1735
1736 while (NULL != reg) {
1737 free(reg->key.p);
1738 old_reg = reg;
1739 reg = reg->next;
1740 free(old_reg);
1741 }
1742 }
1743
1744 static enum rofferr
1745 roff_nr(ROFF_ARGS)
1746 {
1747 char *key, *val;
1748 size_t keysz;
1749 int iv;
1750 char sign;
1751
1752 key = val = buf->buf + pos;
1753 if (*key == '\0')
1754 return(ROFF_IGN);
1755
1756 keysz = roff_getname(r, &val, ln, pos);
1757 if (key[keysz] == '\\')
1758 return(ROFF_IGN);
1759 key[keysz] = '\0';
1760
1761 sign = *val;
1762 if (sign == '+' || sign == '-')
1763 val++;
1764
1765 if (roff_evalnum(r, ln, val, NULL, &iv, 0))
1766 roff_setreg(r, key, iv, sign);
1767
1768 return(ROFF_IGN);
1769 }
1770
1771 static enum rofferr
1772 roff_rr(ROFF_ARGS)
1773 {
1774 struct roffreg *reg, **prev;
1775 char *name, *cp;
1776 size_t namesz;
1777
1778 name = cp = buf->buf + pos;
1779 if (*name == '\0')
1780 return(ROFF_IGN);
1781 namesz = roff_getname(r, &cp, ln, pos);
1782 name[namesz] = '\0';
1783
1784 prev = &r->regtab;
1785 while (1) {
1786 reg = *prev;
1787 if (reg == NULL || !strcmp(name, reg->key.p))
1788 break;
1789 prev = &reg->next;
1790 }
1791 if (reg != NULL) {
1792 *prev = reg->next;
1793 free(reg->key.p);
1794 free(reg);
1795 }
1796 return(ROFF_IGN);
1797 }
1798
1799 static enum rofferr
1800 roff_rm(ROFF_ARGS)
1801 {
1802 const char *name;
1803 char *cp;
1804 size_t namesz;
1805
1806 cp = buf->buf + pos;
1807 while (*cp != '\0') {
1808 name = cp;
1809 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
1810 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1811 if (name[namesz] == '\\')
1812 break;
1813 }
1814 return(ROFF_IGN);
1815 }
1816
1817 static enum rofferr
1818 roff_it(ROFF_ARGS)
1819 {
1820 char *cp;
1821 size_t len;
1822 int iv;
1823
1824 /* Parse the number of lines. */
1825 cp = buf->buf + pos;
1826 len = strcspn(cp, " \t");
1827 cp[len] = '\0';
1828 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1829 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1830 ln, ppos, buf->buf + 1);
1831 return(ROFF_IGN);
1832 }
1833 cp += len + 1;
1834
1835 /* Arm the input line trap. */
1836 roffit_lines = iv;
1837 roffit_macro = mandoc_strdup(cp);
1838 return(ROFF_IGN);
1839 }
1840
1841 static enum rofferr
1842 roff_Dd(ROFF_ARGS)
1843 {
1844 const char *const *cp;
1845
1846 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
1847 for (cp = __mdoc_reserved; *cp; cp++)
1848 roff_setstr(r, *cp, NULL, 0);
1849
1850 if (r->format == 0)
1851 r->format = MPARSE_MDOC;
1852
1853 return(ROFF_CONT);
1854 }
1855
1856 static enum rofferr
1857 roff_TH(ROFF_ARGS)
1858 {
1859 const char *const *cp;
1860
1861 if ((r->options & MPARSE_QUICK) == 0)
1862 for (cp = __man_reserved; *cp; cp++)
1863 roff_setstr(r, *cp, NULL, 0);
1864
1865 if (r->format == 0)
1866 r->format = MPARSE_MAN;
1867
1868 return(ROFF_CONT);
1869 }
1870
1871 static enum rofferr
1872 roff_TE(ROFF_ARGS)
1873 {
1874
1875 if (NULL == r->tbl)
1876 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1877 ln, ppos, "TE");
1878 else
1879 tbl_end(&r->tbl);
1880
1881 return(ROFF_IGN);
1882 }
1883
1884 static enum rofferr
1885 roff_T_(ROFF_ARGS)
1886 {
1887
1888 if (NULL == r->tbl)
1889 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1890 ln, ppos, "T&");
1891 else
1892 tbl_restart(ppos, ln, r->tbl);
1893
1894 return(ROFF_IGN);
1895 }
1896
1897 /*
1898 * Handle in-line equation delimiters.
1899 */
1900 static enum rofferr
1901 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
1902 {
1903 char *cp1, *cp2;
1904 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
1905
1906 /*
1907 * Outside equations, look for an opening delimiter.
1908 * If we are inside an equation, we already know it is
1909 * in-line, or this function wouldn't have been called;
1910 * so look for a closing delimiter.
1911 */
1912
1913 cp1 = buf->buf + pos;
1914 cp2 = strchr(cp1, r->eqn == NULL ?
1915 r->last_eqn->odelim : r->last_eqn->cdelim);
1916 if (cp2 == NULL)
1917 return(ROFF_CONT);
1918
1919 *cp2++ = '\0';
1920 bef_pr = bef_nl = aft_nl = aft_pr = "";
1921
1922 /* Handle preceding text, protecting whitespace. */
1923
1924 if (*buf->buf != '\0') {
1925 if (r->eqn == NULL)
1926 bef_pr = "\\&";
1927 bef_nl = "\n";
1928 }
1929
1930 /*
1931 * Prepare replacing the delimiter with an equation macro
1932 * and drop leading white space from the equation.
1933 */
1934
1935 if (r->eqn == NULL) {
1936 while (*cp2 == ' ')
1937 cp2++;
1938 mac = ".EQ";
1939 } else
1940 mac = ".EN";
1941
1942 /* Handle following text, protecting whitespace. */
1943
1944 if (*cp2 != '\0') {
1945 aft_nl = "\n";
1946 if (r->eqn != NULL)
1947 aft_pr = "\\&";
1948 }
1949
1950 /* Do the actual replacement. */
1951
1952 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
1953 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
1954 free(buf->buf);
1955 buf->buf = cp1;
1956
1957 /* Toggle the in-line state of the eqn subsystem. */
1958
1959 r->eqn_inline = r->eqn == NULL;
1960 return(ROFF_REPARSE);
1961 }
1962
1963 static enum rofferr
1964 roff_EQ(ROFF_ARGS)
1965 {
1966 struct eqn_node *e;
1967
1968 assert(r->eqn == NULL);
1969 e = eqn_alloc(ppos, ln, r->parse);
1970
1971 if (r->last_eqn) {
1972 r->last_eqn->next = e;
1973 e->delim = r->last_eqn->delim;
1974 e->odelim = r->last_eqn->odelim;
1975 e->cdelim = r->last_eqn->cdelim;
1976 } else
1977 r->first_eqn = r->last_eqn = e;
1978
1979 r->eqn = r->last_eqn = e;
1980
1981 if (buf->buf[pos] != '\0')
1982 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1983 ".EQ %s", buf->buf + pos);
1984
1985 return(ROFF_IGN);
1986 }
1987
1988 static enum rofferr
1989 roff_EN(ROFF_ARGS)
1990 {
1991
1992 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1993 return(ROFF_IGN);
1994 }
1995
1996 static enum rofferr
1997 roff_TS(ROFF_ARGS)
1998 {
1999 struct tbl_node *tbl;
2000
2001 if (r->tbl) {
2002 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2003 ln, ppos, "TS breaks TS");
2004 tbl_end(&r->tbl);
2005 }
2006
2007 tbl = tbl_alloc(ppos, ln, r->parse);
2008
2009 if (r->last_tbl)
2010 r->last_tbl->next = tbl;
2011 else
2012 r->first_tbl = r->last_tbl = tbl;
2013
2014 r->tbl = r->last_tbl = tbl;
2015 return(ROFF_IGN);
2016 }
2017
2018 static enum rofferr
2019 roff_cc(ROFF_ARGS)
2020 {
2021 const char *p;
2022
2023 p = buf->buf + pos;
2024
2025 if (*p == '\0' || (r->control = *p++) == '.')
2026 r->control = 0;
2027
2028 if (*p != '\0')
2029 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2030
2031 return(ROFF_IGN);
2032 }
2033
2034 static enum rofferr
2035 roff_tr(ROFF_ARGS)
2036 {
2037 const char *p, *first, *second;
2038 size_t fsz, ssz;
2039 enum mandoc_esc esc;
2040
2041 p = buf->buf + pos;
2042
2043 if (*p == '\0') {
2044 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2045 return(ROFF_IGN);
2046 }
2047
2048 while (*p != '\0') {
2049 fsz = ssz = 1;
2050
2051 first = p++;
2052 if (*first == '\\') {
2053 esc = mandoc_escape(&p, NULL, NULL);
2054 if (esc == ESCAPE_ERROR) {
2055 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2056 ln, (int)(p - buf->buf), first);
2057 return(ROFF_IGN);
2058 }
2059 fsz = (size_t)(p - first);
2060 }
2061
2062 second = p++;
2063 if (*second == '\\') {
2064 esc = mandoc_escape(&p, NULL, NULL);
2065 if (esc == ESCAPE_ERROR) {
2066 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2067 ln, (int)(p - buf->buf), second);
2068 return(ROFF_IGN);
2069 }
2070 ssz = (size_t)(p - second);
2071 } else if (*second == '\0') {
2072 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
2073 ln, (int)(p - buf->buf), NULL);
2074 second = " ";
2075 p--;
2076 }
2077
2078 if (fsz > 1) {
2079 roff_setstrn(&r->xmbtab, first, fsz,
2080 second, ssz, 0);
2081 continue;
2082 }
2083
2084 if (r->xtab == NULL)
2085 r->xtab = mandoc_calloc(128,
2086 sizeof(struct roffstr));
2087
2088 free(r->xtab[(int)*first].p);
2089 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2090 r->xtab[(int)*first].sz = ssz;
2091 }
2092
2093 return(ROFF_IGN);
2094 }
2095
2096 static enum rofferr
2097 roff_so(ROFF_ARGS)
2098 {
2099 char *name;
2100
2101 name = buf->buf + pos;
2102 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2103
2104 /*
2105 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2106 * opening anything that's not in our cwd or anything beneath
2107 * it. Thus, explicitly disallow traversing up the file-system
2108 * or using absolute paths.
2109 */
2110
2111 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2112 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2113 ".so %s", name);
2114 return(ROFF_ERR);
2115 }
2116
2117 *offs = pos;
2118 return(ROFF_SO);
2119 }
2120
2121 static enum rofferr
2122 roff_userdef(ROFF_ARGS)
2123 {
2124 const char *arg[9];
2125 char *cp, *n1, *n2;
2126 int i;
2127
2128 /*
2129 * Collect pointers to macro argument strings
2130 * and NUL-terminate them.
2131 */
2132 cp = buf->buf + pos;
2133 for (i = 0; i < 9; i++)
2134 arg[i] = *cp == '\0' ? "" :
2135 mandoc_getarg(r->parse, &cp, ln, &pos);
2136
2137 /*
2138 * Expand macro arguments.
2139 */
2140 buf->sz = 0;
2141 n1 = cp = mandoc_strdup(r->current_string);
2142 while ((cp = strstr(cp, "\\$")) != NULL) {
2143 i = cp[2] - '1';
2144 if (0 > i || 8 < i) {
2145 /* Not an argument invocation. */
2146 cp += 2;
2147 continue;
2148 }
2149 *cp = '\0';
2150 buf->sz = mandoc_asprintf(&n2, "%s%s%s",
2151 n1, arg[i], cp + 3) + 1;
2152 cp = n2 + (cp - n1);
2153 free(n1);
2154 n1 = n2;
2155 }
2156
2157 /*
2158 * Replace the macro invocation
2159 * by the expanded macro.
2160 */
2161 free(buf->buf);
2162 buf->buf = n1;
2163 if (buf->sz == 0)
2164 buf->sz = strlen(buf->buf) + 1;
2165
2166 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
2167 ROFF_REPARSE : ROFF_APPEND);
2168 }
2169
2170 static size_t
2171 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2172 {
2173 char *name, *cp;
2174 size_t namesz;
2175
2176 name = *cpp;
2177 if ('\0' == *name)
2178 return(0);
2179
2180 /* Read until end of name and terminate it with NUL. */
2181 for (cp = name; 1; cp++) {
2182 if ('\0' == *cp || ' ' == *cp) {
2183 namesz = cp - name;
2184 break;
2185 }
2186 if ('\\' != *cp)
2187 continue;
2188 namesz = cp - name;
2189 if ('{' == cp[1] || '}' == cp[1])
2190 break;
2191 cp++;
2192 if ('\\' == *cp)
2193 continue;
2194 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2195 "%.*s", (int)(cp - name + 1), name);
2196 mandoc_escape((const char **)&cp, NULL, NULL);
2197 break;
2198 }
2199
2200 /* Read past spaces. */
2201 while (' ' == *cp)
2202 cp++;
2203
2204 *cpp = cp;
2205 return(namesz);
2206 }
2207
2208 /*
2209 * Store *string into the user-defined string called *name.
2210 * To clear an existing entry, call with (*r, *name, NULL, 0).
2211 * append == 0: replace mode
2212 * append == 1: single-line append mode
2213 * append == 2: multiline append mode, append '\n' after each call
2214 */
2215 static void
2216 roff_setstr(struct roff *r, const char *name, const char *string,
2217 int append)
2218 {
2219
2220 roff_setstrn(&r->strtab, name, strlen(name), string,
2221 string ? strlen(string) : 0, append);
2222 }
2223
2224 static void
2225 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2226 const char *string, size_t stringsz, int append)
2227 {
2228 struct roffkv *n;
2229 char *c;
2230 int i;
2231 size_t oldch, newch;
2232
2233 /* Search for an existing string with the same name. */
2234 n = *r;
2235
2236 while (n && (namesz != n->key.sz ||
2237 strncmp(n->key.p, name, namesz)))
2238 n = n->next;
2239
2240 if (NULL == n) {
2241 /* Create a new string table entry. */
2242 n = mandoc_malloc(sizeof(struct roffkv));
2243 n->key.p = mandoc_strndup(name, namesz);
2244 n->key.sz = namesz;
2245 n->val.p = NULL;
2246 n->val.sz = 0;
2247 n->next = *r;
2248 *r = n;
2249 } else if (0 == append) {
2250 free(n->val.p);
2251 n->val.p = NULL;
2252 n->val.sz = 0;
2253 }
2254
2255 if (NULL == string)
2256 return;
2257
2258 /*
2259 * One additional byte for the '\n' in multiline mode,
2260 * and one for the terminating '\0'.
2261 */
2262 newch = stringsz + (1 < append ? 2u : 1u);
2263
2264 if (NULL == n->val.p) {
2265 n->val.p = mandoc_malloc(newch);
2266 *n->val.p = '\0';
2267 oldch = 0;
2268 } else {
2269 oldch = n->val.sz;
2270 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2271 }
2272
2273 /* Skip existing content in the destination buffer. */
2274 c = n->val.p + (int)oldch;
2275
2276 /* Append new content to the destination buffer. */
2277 i = 0;
2278 while (i < (int)stringsz) {
2279 /*
2280 * Rudimentary roff copy mode:
2281 * Handle escaped backslashes.
2282 */
2283 if ('\\' == string[i] && '\\' == string[i + 1])
2284 i++;
2285 *c++ = string[i++];
2286 }
2287
2288 /* Append terminating bytes. */
2289 if (1 < append)
2290 *c++ = '\n';
2291
2292 *c = '\0';
2293 n->val.sz = (int)(c - n->val.p);
2294 }
2295
2296 static const char *
2297 roff_getstrn(const struct roff *r, const char *name, size_t len)
2298 {
2299 const struct roffkv *n;
2300 int i;
2301
2302 for (n = r->strtab; n; n = n->next)
2303 if (0 == strncmp(name, n->key.p, len) &&
2304 '\0' == n->key.p[(int)len])
2305 return(n->val.p);
2306
2307 for (i = 0; i < PREDEFS_MAX; i++)
2308 if (0 == strncmp(name, predefs[i].name, len) &&
2309 '\0' == predefs[i].name[(int)len])
2310 return(predefs[i].str);
2311
2312 return(NULL);
2313 }
2314
2315 static void
2316 roff_freestr(struct roffkv *r)
2317 {
2318 struct roffkv *n, *nn;
2319
2320 for (n = r; n; n = nn) {
2321 free(n->key.p);
2322 free(n->val.p);
2323 nn = n->next;
2324 free(n);
2325 }
2326 }
2327
2328 const struct tbl_span *
2329 roff_span(const struct roff *r)
2330 {
2331
2332 return(r->tbl ? tbl_span(r->tbl) : NULL);
2333 }
2334
2335 const struct eqn *
2336 roff_eqn(const struct roff *r)
2337 {
2338
2339 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2340 }
2341
2342 /*
2343 * Duplicate an input string, making the appropriate character
2344 * conversations (as stipulated by `tr') along the way.
2345 * Returns a heap-allocated string with all the replacements made.
2346 */
2347 char *
2348 roff_strdup(const struct roff *r, const char *p)
2349 {
2350 const struct roffkv *cp;
2351 char *res;
2352 const char *pp;
2353 size_t ssz, sz;
2354 enum mandoc_esc esc;
2355
2356 if (NULL == r->xmbtab && NULL == r->xtab)
2357 return(mandoc_strdup(p));
2358 else if ('\0' == *p)
2359 return(mandoc_strdup(""));
2360
2361 /*
2362 * Step through each character looking for term matches
2363 * (remember that a `tr' can be invoked with an escape, which is
2364 * a glyph but the escape is multi-character).
2365 * We only do this if the character hash has been initialised
2366 * and the string is >0 length.
2367 */
2368
2369 res = NULL;
2370 ssz = 0;
2371
2372 while ('\0' != *p) {
2373 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2374 sz = r->xtab[(int)*p].sz;
2375 res = mandoc_realloc(res, ssz + sz + 1);
2376 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2377 ssz += sz;
2378 p++;
2379 continue;
2380 } else if ('\\' != *p) {
2381 res = mandoc_realloc(res, ssz + 2);
2382 res[ssz++] = *p++;
2383 continue;
2384 }
2385
2386 /* Search for term matches. */
2387 for (cp = r->xmbtab; cp; cp = cp->next)
2388 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2389 break;
2390
2391 if (NULL != cp) {
2392 /*
2393 * A match has been found.
2394 * Append the match to the array and move
2395 * forward by its keysize.
2396 */
2397 res = mandoc_realloc(res,
2398 ssz + cp->val.sz + 1);
2399 memcpy(res + ssz, cp->val.p, cp->val.sz);
2400 ssz += cp->val.sz;
2401 p += (int)cp->key.sz;
2402 continue;
2403 }
2404
2405 /*
2406 * Handle escapes carefully: we need to copy
2407 * over just the escape itself, or else we might
2408 * do replacements within the escape itself.
2409 * Make sure to pass along the bogus string.
2410 */
2411 pp = p++;
2412 esc = mandoc_escape(&p, NULL, NULL);
2413 if (ESCAPE_ERROR == esc) {
2414 sz = strlen(pp);
2415 res = mandoc_realloc(res, ssz + sz + 1);
2416 memcpy(res + ssz, pp, sz);
2417 break;
2418 }
2419 /*
2420 * We bail out on bad escapes.
2421 * No need to warn: we already did so when
2422 * roff_res() was called.
2423 */
2424 sz = (int)(p - pp);
2425 res = mandoc_realloc(res, ssz + sz + 1);
2426 memcpy(res + ssz, pp, sz);
2427 ssz += sz;
2428 }
2429
2430 res[(int)ssz] = '\0';
2431 return(res);
2432 }
2433
2434 int
2435 roff_getformat(const struct roff *r)
2436 {
2437
2438 return(r->format);
2439 }
2440
2441 /*
2442 * Find out whether a line is a macro line or not.
2443 * If it is, adjust the current position and return one; if it isn't,
2444 * return zero and don't change the current position.
2445 * If the control character has been set with `.cc', then let that grain
2446 * precedence.
2447 * This is slighly contrary to groff, where using the non-breaking
2448 * control character when `cc' has been invoked will cause the
2449 * non-breaking macro contents to be printed verbatim.
2450 */
2451 int
2452 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2453 {
2454 int pos;
2455
2456 pos = *ppos;
2457
2458 if (0 != r->control && cp[pos] == r->control)
2459 pos++;
2460 else if (0 != r->control)
2461 return(0);
2462 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2463 pos += 2;
2464 else if ('.' == cp[pos] || '\'' == cp[pos])
2465 pos++;
2466 else
2467 return(0);
2468
2469 while (' ' == cp[pos] || '\t' == cp[pos])
2470 pos++;
2471
2472 *ppos = pos;
2473 return(1);
2474 }