]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
show the {MDOC,MAN}_EQN node, it contains interesting information,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.231 2014/10/16 01:28:38 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
31 #include "libroff.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_pl,
64 ROFF_ps,
65 ROFF_rm,
66 ROFF_rr,
67 ROFF_so,
68 ROFF_ta,
69 ROFF_tr,
70 ROFF_Dd,
71 ROFF_TH,
72 ROFF_TS,
73 ROFF_TE,
74 ROFF_T_,
75 ROFF_EQ,
76 ROFF_EN,
77 ROFF_cblock,
78 ROFF_USERDEF,
79 ROFF_MAX
80 };
81
82 /*
83 * An incredibly-simple string buffer.
84 */
85 struct roffstr {
86 char *p; /* nil-terminated buffer */
87 size_t sz; /* saved strlen(p) */
88 };
89
90 /*
91 * A key-value roffstr pair as part of a singly-linked list.
92 */
93 struct roffkv {
94 struct roffstr key;
95 struct roffstr val;
96 struct roffkv *next; /* next in list */
97 };
98
99 /*
100 * A single number register as part of a singly-linked list.
101 */
102 struct roffreg {
103 struct roffstr key;
104 int val;
105 struct roffreg *next;
106 };
107
108 struct roff {
109 struct mparse *parse; /* parse point */
110 struct roffnode *last; /* leaf of stack */
111 int *rstack; /* stack of inverted `ie' values */
112 struct roffreg *regtab; /* number registers */
113 struct roffkv *strtab; /* user-defined strings & macros */
114 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
115 struct roffstr *xtab; /* single-byte trans table (`tr') */
116 const char *current_string; /* value of last called user macro */
117 struct tbl_node *first_tbl; /* first table parsed */
118 struct tbl_node *last_tbl; /* last table parsed */
119 struct tbl_node *tbl; /* current table being parsed */
120 struct eqn_node *last_eqn; /* last equation parsed */
121 struct eqn_node *first_eqn; /* first equation parsed */
122 struct eqn_node *eqn; /* current equation being parsed */
123 int eqn_inline; /* current equation is inline */
124 int options; /* parse options */
125 int rstacksz; /* current size limit of rstack */
126 int rstackpos; /* position in rstack */
127 int format; /* current file in mdoc or man format */
128 char control; /* control character */
129 };
130
131 struct roffnode {
132 enum rofft tok; /* type of node */
133 struct roffnode *parent; /* up one in stack */
134 int line; /* parse line */
135 int col; /* parse col */
136 char *name; /* node name, e.g. macro name */
137 char *end; /* end-rules: custom token */
138 int endspan; /* end-rules: next-line or infty */
139 int rule; /* current evaluation rule */
140 };
141
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
150
151 typedef enum rofferr (*roffproc)(ROFF_ARGS);
152
153 struct roffmac {
154 const char *name; /* macro name */
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac *next;
161 };
162
163 struct predef {
164 const char *name; /* predefined input name */
165 const char *str; /* replacement symbol */
166 };
167
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
170
171 static enum rofft roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff *);
174 static void roffnode_pop(struct roff *);
175 static void roffnode_push(struct roff *, enum rofft,
176 const char *, int, int);
177 static enum rofferr roff_block(ROFF_ARGS);
178 static enum rofferr roff_block_text(ROFF_ARGS);
179 static enum rofferr roff_block_sub(ROFF_ARGS);
180 static enum rofferr roff_cblock(ROFF_ARGS);
181 static enum rofferr roff_cc(ROFF_ARGS);
182 static void roff_ccond(struct roff *, int, int);
183 static enum rofferr roff_cond(ROFF_ARGS);
184 static enum rofferr roff_cond_text(ROFF_ARGS);
185 static enum rofferr roff_cond_sub(ROFF_ARGS);
186 static enum rofferr roff_ds(ROFF_ARGS);
187 static enum rofferr roff_eqndelim(struct roff *,
188 char **, size_t *, int);
189 static int roff_evalcond(const char *, int *);
190 static int roff_evalnum(const char *, int *, int *, int);
191 static int roff_evalpar(const char *, int *, int *);
192 static int roff_evalstrcond(const char *, int *);
193 static void roff_free1(struct roff *);
194 static void roff_freereg(struct roffreg *);
195 static void roff_freestr(struct roffkv *);
196 static size_t roff_getname(struct roff *, char **, int, int);
197 static int roff_getnum(const char *, int *, int *);
198 static int roff_getop(const char *, int *, char *);
199 static int roff_getregn(const struct roff *,
200 const char *, size_t);
201 static int roff_getregro(const char *name);
202 static const char *roff_getstrn(const struct roff *,
203 const char *, size_t);
204 static enum rofferr roff_it(ROFF_ARGS);
205 static enum rofferr roff_line_ignore(ROFF_ARGS);
206 static enum rofferr roff_nr(ROFF_ARGS);
207 static void roff_openeqn(struct roff *, const char *,
208 int, int, const char *);
209 static enum rofft roff_parse(struct roff *, char *, int *,
210 int, int);
211 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
212 static enum rofferr roff_res(struct roff *,
213 char **, size_t *, int, int);
214 static enum rofferr roff_rm(ROFF_ARGS);
215 static enum rofferr roff_rr(ROFF_ARGS);
216 static void roff_setstr(struct roff *,
217 const char *, const char *, int);
218 static void roff_setstrn(struct roffkv **, const char *,
219 size_t, const char *, size_t, int);
220 static enum rofferr roff_so(ROFF_ARGS);
221 static enum rofferr roff_tr(ROFF_ARGS);
222 static enum rofferr roff_Dd(ROFF_ARGS);
223 static enum rofferr roff_TH(ROFF_ARGS);
224 static enum rofferr roff_TE(ROFF_ARGS);
225 static enum rofferr roff_TS(ROFF_ARGS);
226 static enum rofferr roff_EQ(ROFF_ARGS);
227 static enum rofferr roff_EN(ROFF_ARGS);
228 static enum rofferr roff_T_(ROFF_ARGS);
229 static enum rofferr roff_userdef(ROFF_ARGS);
230
231 /* See roffhash_find() */
232
233 #define ASCII_HI 126
234 #define ASCII_LO 33
235 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
236
237 static struct roffmac *hash[HASHWIDTH];
238
239 static struct roffmac roffs[ROFF_MAX] = {
240 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
244 { "as", roff_ds, NULL, NULL, 0, NULL },
245 { "cc", roff_cc, NULL, NULL, 0, NULL },
246 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
248 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
249 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
250 { "ds", roff_ds, NULL, NULL, 0, NULL },
251 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
252 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
256 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
257 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
258 { "it", roff_it, NULL, NULL, 0, NULL },
259 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
260 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
261 { "nr", roff_nr, NULL, NULL, 0, NULL },
262 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
263 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
264 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
265 { "rm", roff_rm, NULL, NULL, 0, NULL },
266 { "rr", roff_rr, NULL, NULL, 0, NULL },
267 { "so", roff_so, NULL, NULL, 0, NULL },
268 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
269 { "tr", roff_tr, NULL, NULL, 0, NULL },
270 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
271 { "TH", roff_TH, NULL, NULL, 0, NULL },
272 { "TS", roff_TS, NULL, NULL, 0, NULL },
273 { "TE", roff_TE, NULL, NULL, 0, NULL },
274 { "T&", roff_T_, NULL, NULL, 0, NULL },
275 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
276 { "EN", roff_EN, NULL, NULL, 0, NULL },
277 { ".", roff_cblock, NULL, NULL, 0, NULL },
278 { NULL, roff_userdef, NULL, NULL, 0, NULL },
279 };
280
281 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
282 const char *const __mdoc_reserved[] = {
283 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
284 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
285 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
286 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
287 "Dt", "Dv", "Dx", "D1",
288 "Ec", "Ed", "Ef", "Ek", "El", "Em",
289 "En", "Eo", "Er", "Es", "Ev", "Ex",
290 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
291 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
292 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
293 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
294 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
295 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
296 "Sc", "Sh", "Sm", "So", "Sq",
297 "Ss", "St", "Sx", "Sy",
298 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
299 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
300 "%P", "%Q", "%R", "%T", "%U", "%V",
301 NULL
302 };
303
304 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
305 const char *const __man_reserved[] = {
306 "AT", "B", "BI", "BR", "DT",
307 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
308 "LP", "OP", "P", "PD", "PP",
309 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
310 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
311 NULL
312 };
313
314 /* Array of injected predefined strings. */
315 #define PREDEFS_MAX 38
316 static const struct predef predefs[PREDEFS_MAX] = {
317 #include "predefs.in"
318 };
319
320 /* See roffhash_find() */
321 #define ROFF_HASH(p) (p[0] - ASCII_LO)
322
323 static int roffit_lines; /* number of lines to delay */
324 static char *roffit_macro; /* nil-terminated macro line */
325
326
327 static void
328 roffhash_init(void)
329 {
330 struct roffmac *n;
331 int buc, i;
332
333 for (i = 0; i < (int)ROFF_USERDEF; i++) {
334 assert(roffs[i].name[0] >= ASCII_LO);
335 assert(roffs[i].name[0] <= ASCII_HI);
336
337 buc = ROFF_HASH(roffs[i].name);
338
339 if (NULL != (n = hash[buc])) {
340 for ( ; n->next; n = n->next)
341 /* Do nothing. */ ;
342 n->next = &roffs[i];
343 } else
344 hash[buc] = &roffs[i];
345 }
346 }
347
348 /*
349 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
350 * the nil-terminated string name could be found.
351 */
352 static enum rofft
353 roffhash_find(const char *p, size_t s)
354 {
355 int buc;
356 struct roffmac *n;
357
358 /*
359 * libroff has an extremely simple hashtable, for the time
360 * being, which simply keys on the first character, which must
361 * be printable, then walks a chain. It works well enough until
362 * optimised.
363 */
364
365 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
366 return(ROFF_MAX);
367
368 buc = ROFF_HASH(p);
369
370 if (NULL == (n = hash[buc]))
371 return(ROFF_MAX);
372 for ( ; n; n = n->next)
373 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
374 return((enum rofft)(n - roffs));
375
376 return(ROFF_MAX);
377 }
378
379 /*
380 * Pop the current node off of the stack of roff instructions currently
381 * pending.
382 */
383 static void
384 roffnode_pop(struct roff *r)
385 {
386 struct roffnode *p;
387
388 assert(r->last);
389 p = r->last;
390
391 r->last = r->last->parent;
392 free(p->name);
393 free(p->end);
394 free(p);
395 }
396
397 /*
398 * Push a roff node onto the instruction stack. This must later be
399 * removed with roffnode_pop().
400 */
401 static void
402 roffnode_push(struct roff *r, enum rofft tok, const char *name,
403 int line, int col)
404 {
405 struct roffnode *p;
406
407 p = mandoc_calloc(1, sizeof(struct roffnode));
408 p->tok = tok;
409 if (name)
410 p->name = mandoc_strdup(name);
411 p->parent = r->last;
412 p->line = line;
413 p->col = col;
414 p->rule = p->parent ? p->parent->rule : 0;
415
416 r->last = p;
417 }
418
419 static void
420 roff_free1(struct roff *r)
421 {
422 struct tbl_node *tbl;
423 struct eqn_node *e;
424 int i;
425
426 while (NULL != (tbl = r->first_tbl)) {
427 r->first_tbl = tbl->next;
428 tbl_free(tbl);
429 }
430 r->first_tbl = r->last_tbl = r->tbl = NULL;
431
432 while (NULL != (e = r->first_eqn)) {
433 r->first_eqn = e->next;
434 eqn_free(e);
435 }
436 r->first_eqn = r->last_eqn = r->eqn = NULL;
437
438 while (r->last)
439 roffnode_pop(r);
440
441 free (r->rstack);
442 r->rstack = NULL;
443 r->rstacksz = 0;
444 r->rstackpos = -1;
445
446 roff_freereg(r->regtab);
447 r->regtab = NULL;
448
449 roff_freestr(r->strtab);
450 roff_freestr(r->xmbtab);
451 r->strtab = r->xmbtab = NULL;
452
453 if (r->xtab)
454 for (i = 0; i < 128; i++)
455 free(r->xtab[i].p);
456 free(r->xtab);
457 r->xtab = NULL;
458 }
459
460 void
461 roff_reset(struct roff *r)
462 {
463
464 roff_free1(r);
465 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
466 r->control = 0;
467 }
468
469 void
470 roff_free(struct roff *r)
471 {
472
473 roff_free1(r);
474 free(r);
475 }
476
477 struct roff *
478 roff_alloc(struct mparse *parse, int options)
479 {
480 struct roff *r;
481
482 r = mandoc_calloc(1, sizeof(struct roff));
483 r->parse = parse;
484 r->options = options;
485 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
486 r->rstackpos = -1;
487
488 roffhash_init();
489
490 return(r);
491 }
492
493 /*
494 * In the current line, expand escape sequences that tend to get
495 * used in numerical expressions and conditional requests.
496 * Also check the syntax of the remaining escape sequences.
497 */
498 static enum rofferr
499 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
500 {
501 char ubuf[24]; /* buffer to print the number */
502 const char *start; /* start of the string to process */
503 char *stesc; /* start of an escape sequence ('\\') */
504 const char *stnam; /* start of the name, after "[(*" */
505 const char *cp; /* end of the name, e.g. before ']' */
506 const char *res; /* the string to be substituted */
507 char *nbuf; /* new buffer to copy bufp to */
508 size_t maxl; /* expected length of the escape name */
509 size_t naml; /* actual length of the escape name */
510 int expand_count; /* to avoid infinite loops */
511 int npos; /* position in numeric expression */
512 int arg_complete; /* argument not interrupted by eol */
513 char term; /* character terminating the escape */
514
515 expand_count = 0;
516 start = *bufp + pos;
517 stesc = strchr(start, '\0') - 1;
518 while (stesc-- > start) {
519
520 /* Search backwards for the next backslash. */
521
522 if ('\\' != *stesc)
523 continue;
524
525 /* If it is escaped, skip it. */
526
527 for (cp = stesc - 1; cp >= start; cp--)
528 if ('\\' != *cp)
529 break;
530
531 if (0 == (stesc - cp) % 2) {
532 stesc = (char *)cp;
533 continue;
534 }
535
536 /* Decide whether to expand or to check only. */
537
538 term = '\0';
539 cp = stesc + 1;
540 switch (*cp) {
541 case '*':
542 res = NULL;
543 break;
544 case 'B':
545 /* FALLTHROUGH */
546 case 'w':
547 term = cp[1];
548 /* FALLTHROUGH */
549 case 'n':
550 res = ubuf;
551 break;
552 default:
553 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
554 mandoc_vmsg(MANDOCERR_ESC_BAD,
555 r->parse, ln, (int)(stesc - *bufp),
556 "%.*s", (int)(cp - stesc), stesc);
557 continue;
558 }
559
560 if (EXPAND_LIMIT < ++expand_count) {
561 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
562 ln, (int)(stesc - *bufp), NULL);
563 return(ROFF_IGN);
564 }
565
566 /*
567 * The third character decides the length
568 * of the name of the string or register.
569 * Save a pointer to the name.
570 */
571
572 if ('\0' == term) {
573 switch (*++cp) {
574 case '\0':
575 maxl = 0;
576 break;
577 case '(':
578 cp++;
579 maxl = 2;
580 break;
581 case '[':
582 cp++;
583 term = ']';
584 maxl = 0;
585 break;
586 default:
587 maxl = 1;
588 break;
589 }
590 } else {
591 cp += 2;
592 maxl = 0;
593 }
594 stnam = cp;
595
596 /* Advance to the end of the name. */
597
598 arg_complete = 1;
599 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
600 if ('\0' == *cp) {
601 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
602 ln, (int)(stesc - *bufp), stesc);
603 arg_complete = 0;
604 break;
605 }
606 if (0 == maxl && *cp == term) {
607 cp++;
608 break;
609 }
610 }
611
612 /*
613 * Retrieve the replacement string; if it is
614 * undefined, resume searching for escapes.
615 */
616
617 switch (stesc[1]) {
618 case '*':
619 if (arg_complete)
620 res = roff_getstrn(r, stnam, naml);
621 break;
622 case 'B':
623 npos = 0;
624 ubuf[0] = arg_complete &&
625 roff_evalnum(stnam, &npos, NULL, 0) &&
626 stnam + npos + 1 == cp ? '1' : '0';
627 ubuf[1] = '\0';
628 break;
629 case 'n':
630 if (arg_complete)
631 (void)snprintf(ubuf, sizeof(ubuf), "%d",
632 roff_getregn(r, stnam, naml));
633 else
634 ubuf[0] = '\0';
635 break;
636 case 'w':
637 /* use even incomplete args */
638 (void)snprintf(ubuf, sizeof(ubuf), "%d",
639 24 * (int)naml);
640 break;
641 }
642
643 if (NULL == res) {
644 mandoc_vmsg(MANDOCERR_STR_UNDEF,
645 r->parse, ln, (int)(stesc - *bufp),
646 "%.*s", (int)naml, stnam);
647 res = "";
648 }
649
650 /* Replace the escape sequence by the string. */
651
652 *stesc = '\0';
653 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
654 *bufp, res, cp) + 1;
655
656 /* Prepare for the next replacement. */
657
658 start = nbuf + pos;
659 stesc = nbuf + (stesc - *bufp) + strlen(res);
660 free(*bufp);
661 *bufp = nbuf;
662 }
663 return(ROFF_CONT);
664 }
665
666 /*
667 * Process text streams:
668 * Convert all breakable hyphens into ASCII_HYPH.
669 * Decrement and spring input line trap.
670 */
671 static enum rofferr
672 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
673 {
674 size_t sz;
675 const char *start;
676 char *p;
677 int isz;
678 enum mandoc_esc esc;
679
680 start = p = *bufp + pos;
681
682 while ('\0' != *p) {
683 sz = strcspn(p, "-\\");
684 p += sz;
685
686 if ('\0' == *p)
687 break;
688
689 if ('\\' == *p) {
690 /* Skip over escapes. */
691 p++;
692 esc = mandoc_escape((const char **)&p, NULL, NULL);
693 if (ESCAPE_ERROR == esc)
694 break;
695 continue;
696 } else if (p == start) {
697 p++;
698 continue;
699 }
700
701 if (isalpha((unsigned char)p[-1]) &&
702 isalpha((unsigned char)p[1]))
703 *p = ASCII_HYPH;
704 p++;
705 }
706
707 /* Spring the input line trap. */
708 if (1 == roffit_lines) {
709 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
710 free(*bufp);
711 *bufp = p;
712 *szp = isz + 1;
713 *offs = 0;
714 free(roffit_macro);
715 roffit_lines = 0;
716 return(ROFF_REPARSE);
717 } else if (1 < roffit_lines)
718 --roffit_lines;
719 return(ROFF_CONT);
720 }
721
722 enum rofferr
723 roff_parseln(struct roff *r, int ln, char **bufp,
724 size_t *szp, int pos, int *offs)
725 {
726 enum rofft t;
727 enum rofferr e;
728 int ppos, ctl;
729
730 /* Handle in-line equation delimiters. */
731
732 if (r->last_eqn != NULL && r->last_eqn->delim &&
733 (r->eqn == NULL || r->eqn_inline)) {
734 e = roff_eqndelim(r, bufp, szp, pos);
735 if (e == ROFF_REPARSE)
736 return(e);
737 assert(e == ROFF_CONT);
738 }
739
740 /* Expand some escape sequences. */
741
742 e = roff_res(r, bufp, szp, ln, pos);
743 if (ROFF_IGN == e)
744 return(e);
745 assert(ROFF_CONT == e);
746
747 ppos = pos;
748 ctl = roff_getcontrol(r, *bufp, &pos);
749
750 /*
751 * First, if a scope is open and we're not a macro, pass the
752 * text through the macro's filter. If a scope isn't open and
753 * we're not a macro, just let it through.
754 * Finally, if there's an equation scope open, divert it into it
755 * no matter our state.
756 */
757
758 if (r->last && ! ctl) {
759 t = r->last->tok;
760 assert(roffs[t].text);
761 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
762 assert(ROFF_IGN == e || ROFF_CONT == e);
763 if (ROFF_CONT != e)
764 return(e);
765 }
766 if (r->eqn)
767 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
768 if ( ! ctl) {
769 if (r->tbl)
770 return(tbl_read(r->tbl, ln, *bufp, pos));
771 return(roff_parsetext(bufp, szp, pos, offs));
772 }
773
774 /* Skip empty request lines. */
775
776 if ((*bufp)[pos] == '"') {
777 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
778 ln, pos, NULL);
779 return(ROFF_IGN);
780 } else if ((*bufp)[pos] == '\0')
781 return(ROFF_IGN);
782
783 /*
784 * If a scope is open, go to the child handler for that macro,
785 * as it may want to preprocess before doing anything with it.
786 * Don't do so if an equation is open.
787 */
788
789 if (r->last) {
790 t = r->last->tok;
791 assert(roffs[t].sub);
792 return((*roffs[t].sub)(r, t, bufp, szp,
793 ln, ppos, pos, offs));
794 }
795
796 /*
797 * Lastly, as we've no scope open, try to look up and execute
798 * the new macro. If no macro is found, simply return and let
799 * the compilers handle it.
800 */
801
802 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
803 return(ROFF_CONT);
804
805 assert(roffs[t].proc);
806 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
807 }
808
809 void
810 roff_endparse(struct roff *r)
811 {
812
813 if (r->last)
814 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
815 r->last->line, r->last->col,
816 roffs[r->last->tok].name);
817
818 if (r->eqn) {
819 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
820 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
821 eqn_end(&r->eqn);
822 }
823
824 if (r->tbl) {
825 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
826 r->tbl->line, r->tbl->pos, "TS");
827 tbl_end(&r->tbl);
828 }
829 }
830
831 /*
832 * Parse a roff node's type from the input buffer. This must be in the
833 * form of ".foo xxx" in the usual way.
834 */
835 static enum rofft
836 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
837 {
838 char *cp;
839 const char *mac;
840 size_t maclen;
841 enum rofft t;
842
843 cp = buf + *pos;
844
845 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
846 return(ROFF_MAX);
847
848 mac = cp;
849 maclen = roff_getname(r, &cp, ln, ppos);
850
851 t = (r->current_string = roff_getstrn(r, mac, maclen))
852 ? ROFF_USERDEF : roffhash_find(mac, maclen);
853
854 if (ROFF_MAX != t)
855 *pos = cp - buf;
856
857 return(t);
858 }
859
860 static enum rofferr
861 roff_cblock(ROFF_ARGS)
862 {
863
864 /*
865 * A block-close `..' should only be invoked as a child of an
866 * ignore macro, otherwise raise a warning and just ignore it.
867 */
868
869 if (NULL == r->last) {
870 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
871 ln, ppos, "..");
872 return(ROFF_IGN);
873 }
874
875 switch (r->last->tok) {
876 case ROFF_am:
877 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
878 /* FALLTHROUGH */
879 case ROFF_ami:
880 /* FALLTHROUGH */
881 case ROFF_de:
882 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
883 /* FALLTHROUGH */
884 case ROFF_dei:
885 /* FALLTHROUGH */
886 case ROFF_ig:
887 break;
888 default:
889 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
890 ln, ppos, "..");
891 return(ROFF_IGN);
892 }
893
894 if ((*bufp)[pos])
895 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
896 ".. %s", *bufp + pos);
897
898 roffnode_pop(r);
899 roffnode_cleanscope(r);
900 return(ROFF_IGN);
901
902 }
903
904 static void
905 roffnode_cleanscope(struct roff *r)
906 {
907
908 while (r->last) {
909 if (--r->last->endspan != 0)
910 break;
911 roffnode_pop(r);
912 }
913 }
914
915 static void
916 roff_ccond(struct roff *r, int ln, int ppos)
917 {
918
919 if (NULL == r->last) {
920 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
921 ln, ppos, "\\}");
922 return;
923 }
924
925 switch (r->last->tok) {
926 case ROFF_el:
927 /* FALLTHROUGH */
928 case ROFF_ie:
929 /* FALLTHROUGH */
930 case ROFF_if:
931 break;
932 default:
933 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
934 ln, ppos, "\\}");
935 return;
936 }
937
938 if (r->last->endspan > -1) {
939 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
940 ln, ppos, "\\}");
941 return;
942 }
943
944 roffnode_pop(r);
945 roffnode_cleanscope(r);
946 return;
947 }
948
949 static enum rofferr
950 roff_block(ROFF_ARGS)
951 {
952 const char *name;
953 char *iname, *cp;
954 size_t namesz;
955
956 /* Ignore groff compatibility mode for now. */
957
958 if (ROFF_de1 == tok)
959 tok = ROFF_de;
960 else if (ROFF_am1 == tok)
961 tok = ROFF_am;
962
963 /* Parse the macro name argument. */
964
965 cp = *bufp + pos;
966 if (ROFF_ig == tok) {
967 iname = NULL;
968 namesz = 0;
969 } else {
970 iname = cp;
971 namesz = roff_getname(r, &cp, ln, ppos);
972 iname[namesz] = '\0';
973 }
974
975 /* Resolve the macro name argument if it is indirect. */
976
977 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
978 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
979 mandoc_vmsg(MANDOCERR_STR_UNDEF,
980 r->parse, ln, (int)(iname - *bufp),
981 "%.*s", (int)namesz, iname);
982 namesz = 0;
983 } else
984 namesz = strlen(name);
985 } else
986 name = iname;
987
988 if (0 == namesz && ROFF_ig != tok) {
989 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
990 ln, ppos, roffs[tok].name);
991 return(ROFF_IGN);
992 }
993
994 roffnode_push(r, tok, name, ln, ppos);
995
996 /*
997 * At the beginning of a `de' macro, clear the existing string
998 * with the same name, if there is one. New content will be
999 * appended from roff_block_text() in multiline mode.
1000 */
1001
1002 if (ROFF_de == tok || ROFF_dei == tok)
1003 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1004
1005 if ('\0' == *cp)
1006 return(ROFF_IGN);
1007
1008 /* Get the custom end marker. */
1009
1010 iname = cp;
1011 namesz = roff_getname(r, &cp, ln, ppos);
1012
1013 /* Resolve the end marker if it is indirect. */
1014
1015 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
1016 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
1017 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1018 r->parse, ln, (int)(iname - *bufp),
1019 "%.*s", (int)namesz, iname);
1020 namesz = 0;
1021 } else
1022 namesz = strlen(name);
1023 } else
1024 name = iname;
1025
1026 if (namesz)
1027 r->last->end = mandoc_strndup(name, namesz);
1028
1029 if ('\0' != *cp)
1030 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1031 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1032
1033 return(ROFF_IGN);
1034 }
1035
1036 static enum rofferr
1037 roff_block_sub(ROFF_ARGS)
1038 {
1039 enum rofft t;
1040 int i, j;
1041
1042 /*
1043 * First check whether a custom macro exists at this level. If
1044 * it does, then check against it. This is some of groff's
1045 * stranger behaviours. If we encountered a custom end-scope
1046 * tag and that tag also happens to be a "real" macro, then we
1047 * need to try interpreting it again as a real macro. If it's
1048 * not, then return ignore. Else continue.
1049 */
1050
1051 if (r->last->end) {
1052 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1053 if ((*bufp)[i] != r->last->end[j])
1054 break;
1055
1056 if ('\0' == r->last->end[j] &&
1057 ('\0' == (*bufp)[i] ||
1058 ' ' == (*bufp)[i] ||
1059 '\t' == (*bufp)[i])) {
1060 roffnode_pop(r);
1061 roffnode_cleanscope(r);
1062
1063 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1064 i++;
1065
1066 pos = i;
1067 if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
1068 return(ROFF_RERUN);
1069 return(ROFF_IGN);
1070 }
1071 }
1072
1073 /*
1074 * If we have no custom end-query or lookup failed, then try
1075 * pulling it out of the hashtable.
1076 */
1077
1078 t = roff_parse(r, *bufp, &pos, ln, ppos);
1079
1080 if (ROFF_cblock != t) {
1081 if (ROFF_ig != tok)
1082 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1083 return(ROFF_IGN);
1084 }
1085
1086 assert(roffs[t].proc);
1087 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1088 }
1089
1090 static enum rofferr
1091 roff_block_text(ROFF_ARGS)
1092 {
1093
1094 if (ROFF_ig != tok)
1095 roff_setstr(r, r->last->name, *bufp + pos, 2);
1096
1097 return(ROFF_IGN);
1098 }
1099
1100 static enum rofferr
1101 roff_cond_sub(ROFF_ARGS)
1102 {
1103 enum rofft t;
1104 char *ep;
1105 int rr;
1106
1107 rr = r->last->rule;
1108 roffnode_cleanscope(r);
1109 t = roff_parse(r, *bufp, &pos, ln, ppos);
1110
1111 /*
1112 * Fully handle known macros when they are structurally
1113 * required or when the conditional evaluated to true.
1114 */
1115
1116 if ((ROFF_MAX != t) &&
1117 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1118 assert(roffs[t].proc);
1119 return((*roffs[t].proc)(r, t, bufp, szp,
1120 ln, ppos, pos, offs));
1121 }
1122
1123 /*
1124 * If `\}' occurs on a macro line without a preceding macro,
1125 * drop the line completely.
1126 */
1127
1128 ep = *bufp + pos;
1129 if ('\\' == ep[0] && '}' == ep[1])
1130 rr = 0;
1131
1132 /* Always check for the closing delimiter `\}'. */
1133
1134 while (NULL != (ep = strchr(ep, '\\'))) {
1135 if ('}' == *(++ep)) {
1136 *ep = '&';
1137 roff_ccond(r, ln, ep - *bufp - 1);
1138 }
1139 ++ep;
1140 }
1141 return(rr ? ROFF_CONT : ROFF_IGN);
1142 }
1143
1144 static enum rofferr
1145 roff_cond_text(ROFF_ARGS)
1146 {
1147 char *ep;
1148 int rr;
1149
1150 rr = r->last->rule;
1151 roffnode_cleanscope(r);
1152
1153 ep = *bufp + pos;
1154 while (NULL != (ep = strchr(ep, '\\'))) {
1155 if ('}' == *(++ep)) {
1156 *ep = '&';
1157 roff_ccond(r, ln, ep - *bufp - 1);
1158 }
1159 ++ep;
1160 }
1161 return(rr ? ROFF_CONT : ROFF_IGN);
1162 }
1163
1164 /*
1165 * Parse a single signed integer number. Stop at the first non-digit.
1166 * If there is at least one digit, return success and advance the
1167 * parse point, else return failure and let the parse point unchanged.
1168 * Ignore overflows, treat them just like the C language.
1169 */
1170 static int
1171 roff_getnum(const char *v, int *pos, int *res)
1172 {
1173 int myres, n, p;
1174
1175 if (NULL == res)
1176 res = &myres;
1177
1178 p = *pos;
1179 n = v[p] == '-';
1180 if (n)
1181 p++;
1182
1183 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1184 *res = 10 * *res + v[p] - '0';
1185 if (p == *pos + n)
1186 return 0;
1187
1188 if (n)
1189 *res = -*res;
1190
1191 *pos = p;
1192 return 1;
1193 }
1194
1195 /*
1196 * Evaluate a string comparison condition.
1197 * The first character is the delimiter.
1198 * Succeed if the string up to its second occurrence
1199 * matches the string up to its third occurence.
1200 * Advance the cursor after the third occurrence
1201 * or lacking that, to the end of the line.
1202 */
1203 static int
1204 roff_evalstrcond(const char *v, int *pos)
1205 {
1206 const char *s1, *s2, *s3;
1207 int match;
1208
1209 match = 0;
1210 s1 = v + *pos; /* initial delimiter */
1211 s2 = s1 + 1; /* for scanning the first string */
1212 s3 = strchr(s2, *s1); /* for scanning the second string */
1213
1214 if (NULL == s3) /* found no middle delimiter */
1215 goto out;
1216
1217 while ('\0' != *++s3) {
1218 if (*s2 != *s3) { /* mismatch */
1219 s3 = strchr(s3, *s1);
1220 break;
1221 }
1222 if (*s3 == *s1) { /* found the final delimiter */
1223 match = 1;
1224 break;
1225 }
1226 s2++;
1227 }
1228
1229 out:
1230 if (NULL == s3)
1231 s3 = strchr(s2, '\0');
1232 else
1233 s3++;
1234 *pos = s3 - v;
1235 return(match);
1236 }
1237
1238 /*
1239 * Evaluate an optionally negated single character, numerical,
1240 * or string condition.
1241 */
1242 static int
1243 roff_evalcond(const char *v, int *pos)
1244 {
1245 int wanttrue, number;
1246
1247 if ('!' == v[*pos]) {
1248 wanttrue = 0;
1249 (*pos)++;
1250 } else
1251 wanttrue = 1;
1252
1253 switch (v[*pos]) {
1254 case 'n':
1255 /* FALLTHROUGH */
1256 case 'o':
1257 (*pos)++;
1258 return(wanttrue);
1259 case 'c':
1260 /* FALLTHROUGH */
1261 case 'd':
1262 /* FALLTHROUGH */
1263 case 'e':
1264 /* FALLTHROUGH */
1265 case 'r':
1266 /* FALLTHROUGH */
1267 case 't':
1268 (*pos)++;
1269 return(!wanttrue);
1270 default:
1271 break;
1272 }
1273
1274 if (roff_evalnum(v, pos, &number, 0))
1275 return((number > 0) == wanttrue);
1276 else
1277 return(roff_evalstrcond(v, pos) == wanttrue);
1278 }
1279
1280 static enum rofferr
1281 roff_line_ignore(ROFF_ARGS)
1282 {
1283
1284 return(ROFF_IGN);
1285 }
1286
1287 static enum rofferr
1288 roff_cond(ROFF_ARGS)
1289 {
1290
1291 roffnode_push(r, tok, NULL, ln, ppos);
1292
1293 /*
1294 * An `.el' has no conditional body: it will consume the value
1295 * of the current rstack entry set in prior `ie' calls or
1296 * defaults to DENY.
1297 *
1298 * If we're not an `el', however, then evaluate the conditional.
1299 */
1300
1301 r->last->rule = ROFF_el == tok ?
1302 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1303 roff_evalcond(*bufp, &pos);
1304
1305 /*
1306 * An if-else will put the NEGATION of the current evaluated
1307 * conditional into the stack of rules.
1308 */
1309
1310 if (ROFF_ie == tok) {
1311 if (r->rstackpos + 1 == r->rstacksz) {
1312 r->rstacksz += 16;
1313 r->rstack = mandoc_reallocarray(r->rstack,
1314 r->rstacksz, sizeof(int));
1315 }
1316 r->rstack[++r->rstackpos] = !r->last->rule;
1317 }
1318
1319 /* If the parent has false as its rule, then so do we. */
1320
1321 if (r->last->parent && !r->last->parent->rule)
1322 r->last->rule = 0;
1323
1324 /*
1325 * Determine scope.
1326 * If there is nothing on the line after the conditional,
1327 * not even whitespace, use next-line scope.
1328 */
1329
1330 if ('\0' == (*bufp)[pos]) {
1331 r->last->endspan = 2;
1332 goto out;
1333 }
1334
1335 while (' ' == (*bufp)[pos])
1336 pos++;
1337
1338 /* An opening brace requests multiline scope. */
1339
1340 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1341 r->last->endspan = -1;
1342 pos += 2;
1343 goto out;
1344 }
1345
1346 /*
1347 * Anything else following the conditional causes
1348 * single-line scope. Warn if the scope contains
1349 * nothing but trailing whitespace.
1350 */
1351
1352 if ('\0' == (*bufp)[pos])
1353 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1354 ln, ppos, roffs[tok].name);
1355
1356 r->last->endspan = 1;
1357
1358 out:
1359 *offs = pos;
1360 return(ROFF_RERUN);
1361 }
1362
1363 static enum rofferr
1364 roff_ds(ROFF_ARGS)
1365 {
1366 char *string;
1367 const char *name;
1368 size_t namesz;
1369
1370 /*
1371 * The first word is the name of the string.
1372 * If it is empty or terminated by an escape sequence,
1373 * abort the `ds' request without defining anything.
1374 */
1375
1376 name = string = *bufp + pos;
1377 if ('\0' == *name)
1378 return(ROFF_IGN);
1379
1380 namesz = roff_getname(r, &string, ln, pos);
1381 if ('\\' == name[namesz])
1382 return(ROFF_IGN);
1383
1384 /* Read past the initial double-quote, if any. */
1385 if ('"' == *string)
1386 string++;
1387
1388 /* The rest is the value. */
1389 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1390 ROFF_as == tok);
1391 return(ROFF_IGN);
1392 }
1393
1394 /*
1395 * Parse a single operator, one or two characters long.
1396 * If the operator is recognized, return success and advance the
1397 * parse point, else return failure and let the parse point unchanged.
1398 */
1399 static int
1400 roff_getop(const char *v, int *pos, char *res)
1401 {
1402
1403 *res = v[*pos];
1404
1405 switch (*res) {
1406 case '+':
1407 /* FALLTHROUGH */
1408 case '-':
1409 /* FALLTHROUGH */
1410 case '*':
1411 /* FALLTHROUGH */
1412 case '/':
1413 /* FALLTHROUGH */
1414 case '%':
1415 /* FALLTHROUGH */
1416 case '&':
1417 /* FALLTHROUGH */
1418 case ':':
1419 break;
1420 case '<':
1421 switch (v[*pos + 1]) {
1422 case '=':
1423 *res = 'l';
1424 (*pos)++;
1425 break;
1426 case '>':
1427 *res = '!';
1428 (*pos)++;
1429 break;
1430 case '?':
1431 *res = 'i';
1432 (*pos)++;
1433 break;
1434 default:
1435 break;
1436 }
1437 break;
1438 case '>':
1439 switch (v[*pos + 1]) {
1440 case '=':
1441 *res = 'g';
1442 (*pos)++;
1443 break;
1444 case '?':
1445 *res = 'a';
1446 (*pos)++;
1447 break;
1448 default:
1449 break;
1450 }
1451 break;
1452 case '=':
1453 if ('=' == v[*pos + 1])
1454 (*pos)++;
1455 break;
1456 default:
1457 return(0);
1458 }
1459 (*pos)++;
1460
1461 return(*res);
1462 }
1463
1464 /*
1465 * Evaluate either a parenthesized numeric expression
1466 * or a single signed integer number.
1467 */
1468 static int
1469 roff_evalpar(const char *v, int *pos, int *res)
1470 {
1471
1472 if ('(' != v[*pos])
1473 return(roff_getnum(v, pos, res));
1474
1475 (*pos)++;
1476 if ( ! roff_evalnum(v, pos, res, 1))
1477 return(0);
1478
1479 /*
1480 * Omission of the closing parenthesis
1481 * is an error in validation mode,
1482 * but ignored in evaluation mode.
1483 */
1484
1485 if (')' == v[*pos])
1486 (*pos)++;
1487 else if (NULL == res)
1488 return(0);
1489
1490 return(1);
1491 }
1492
1493 /*
1494 * Evaluate a complete numeric expression.
1495 * Proceed left to right, there is no concept of precedence.
1496 */
1497 static int
1498 roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
1499 {
1500 int mypos, operand2;
1501 char operator;
1502
1503 if (NULL == pos) {
1504 mypos = 0;
1505 pos = &mypos;
1506 }
1507
1508 if (skipwhite)
1509 while (isspace((unsigned char)v[*pos]))
1510 (*pos)++;
1511
1512 if ( ! roff_evalpar(v, pos, res))
1513 return(0);
1514
1515 while (1) {
1516 if (skipwhite)
1517 while (isspace((unsigned char)v[*pos]))
1518 (*pos)++;
1519
1520 if ( ! roff_getop(v, pos, &operator))
1521 break;
1522
1523 if (skipwhite)
1524 while (isspace((unsigned char)v[*pos]))
1525 (*pos)++;
1526
1527 if ( ! roff_evalpar(v, pos, &operand2))
1528 return(0);
1529
1530 if (skipwhite)
1531 while (isspace((unsigned char)v[*pos]))
1532 (*pos)++;
1533
1534 if (NULL == res)
1535 continue;
1536
1537 switch (operator) {
1538 case '+':
1539 *res += operand2;
1540 break;
1541 case '-':
1542 *res -= operand2;
1543 break;
1544 case '*':
1545 *res *= operand2;
1546 break;
1547 case '/':
1548 *res /= operand2;
1549 break;
1550 case '%':
1551 *res %= operand2;
1552 break;
1553 case '<':
1554 *res = *res < operand2;
1555 break;
1556 case '>':
1557 *res = *res > operand2;
1558 break;
1559 case 'l':
1560 *res = *res <= operand2;
1561 break;
1562 case 'g':
1563 *res = *res >= operand2;
1564 break;
1565 case '=':
1566 *res = *res == operand2;
1567 break;
1568 case '!':
1569 *res = *res != operand2;
1570 break;
1571 case '&':
1572 *res = *res && operand2;
1573 break;
1574 case ':':
1575 *res = *res || operand2;
1576 break;
1577 case 'i':
1578 if (operand2 < *res)
1579 *res = operand2;
1580 break;
1581 case 'a':
1582 if (operand2 > *res)
1583 *res = operand2;
1584 break;
1585 default:
1586 abort();
1587 }
1588 }
1589 return(1);
1590 }
1591
1592 void
1593 roff_setreg(struct roff *r, const char *name, int val, char sign)
1594 {
1595 struct roffreg *reg;
1596
1597 /* Search for an existing register with the same name. */
1598 reg = r->regtab;
1599
1600 while (reg && strcmp(name, reg->key.p))
1601 reg = reg->next;
1602
1603 if (NULL == reg) {
1604 /* Create a new register. */
1605 reg = mandoc_malloc(sizeof(struct roffreg));
1606 reg->key.p = mandoc_strdup(name);
1607 reg->key.sz = strlen(name);
1608 reg->val = 0;
1609 reg->next = r->regtab;
1610 r->regtab = reg;
1611 }
1612
1613 if ('+' == sign)
1614 reg->val += val;
1615 else if ('-' == sign)
1616 reg->val -= val;
1617 else
1618 reg->val = val;
1619 }
1620
1621 /*
1622 * Handle some predefined read-only number registers.
1623 * For now, return -1 if the requested register is not predefined;
1624 * in case a predefined read-only register having the value -1
1625 * were to turn up, another special value would have to be chosen.
1626 */
1627 static int
1628 roff_getregro(const char *name)
1629 {
1630
1631 switch (*name) {
1632 case 'A': /* ASCII approximation mode is always off. */
1633 return(0);
1634 case 'g': /* Groff compatibility mode is always on. */
1635 return(1);
1636 case 'H': /* Fixed horizontal resolution. */
1637 return (24);
1638 case 'j': /* Always adjust left margin only. */
1639 return(0);
1640 case 'T': /* Some output device is always defined. */
1641 return(1);
1642 case 'V': /* Fixed vertical resolution. */
1643 return (40);
1644 default:
1645 return (-1);
1646 }
1647 }
1648
1649 int
1650 roff_getreg(const struct roff *r, const char *name)
1651 {
1652 struct roffreg *reg;
1653 int val;
1654
1655 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1656 val = roff_getregro(name + 1);
1657 if (-1 != val)
1658 return (val);
1659 }
1660
1661 for (reg = r->regtab; reg; reg = reg->next)
1662 if (0 == strcmp(name, reg->key.p))
1663 return(reg->val);
1664
1665 return(0);
1666 }
1667
1668 static int
1669 roff_getregn(const struct roff *r, const char *name, size_t len)
1670 {
1671 struct roffreg *reg;
1672 int val;
1673
1674 if ('.' == name[0] && 2 == len) {
1675 val = roff_getregro(name + 1);
1676 if (-1 != val)
1677 return (val);
1678 }
1679
1680 for (reg = r->regtab; reg; reg = reg->next)
1681 if (len == reg->key.sz &&
1682 0 == strncmp(name, reg->key.p, len))
1683 return(reg->val);
1684
1685 return(0);
1686 }
1687
1688 static void
1689 roff_freereg(struct roffreg *reg)
1690 {
1691 struct roffreg *old_reg;
1692
1693 while (NULL != reg) {
1694 free(reg->key.p);
1695 old_reg = reg;
1696 reg = reg->next;
1697 free(old_reg);
1698 }
1699 }
1700
1701 static enum rofferr
1702 roff_nr(ROFF_ARGS)
1703 {
1704 char *key, *val;
1705 size_t keysz;
1706 int iv;
1707 char sign;
1708
1709 key = val = *bufp + pos;
1710 if ('\0' == *key)
1711 return(ROFF_IGN);
1712
1713 keysz = roff_getname(r, &val, ln, pos);
1714 if ('\\' == key[keysz])
1715 return(ROFF_IGN);
1716 key[keysz] = '\0';
1717
1718 sign = *val;
1719 if ('+' == sign || '-' == sign)
1720 val++;
1721
1722 if (roff_evalnum(val, NULL, &iv, 0))
1723 roff_setreg(r, key, iv, sign);
1724
1725 return(ROFF_IGN);
1726 }
1727
1728 static enum rofferr
1729 roff_rr(ROFF_ARGS)
1730 {
1731 struct roffreg *reg, **prev;
1732 char *name, *cp;
1733 size_t namesz;
1734
1735 name = cp = *bufp + pos;
1736 if ('\0' == *name)
1737 return(ROFF_IGN);
1738 namesz = roff_getname(r, &cp, ln, pos);
1739 name[namesz] = '\0';
1740
1741 prev = &r->regtab;
1742 while (1) {
1743 reg = *prev;
1744 if (NULL == reg || !strcmp(name, reg->key.p))
1745 break;
1746 prev = &reg->next;
1747 }
1748 if (NULL != reg) {
1749 *prev = reg->next;
1750 free(reg->key.p);
1751 free(reg);
1752 }
1753 return(ROFF_IGN);
1754 }
1755
1756 static enum rofferr
1757 roff_rm(ROFF_ARGS)
1758 {
1759 const char *name;
1760 char *cp;
1761 size_t namesz;
1762
1763 cp = *bufp + pos;
1764 while ('\0' != *cp) {
1765 name = cp;
1766 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1767 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1768 if ('\\' == name[namesz])
1769 break;
1770 }
1771 return(ROFF_IGN);
1772 }
1773
1774 static enum rofferr
1775 roff_it(ROFF_ARGS)
1776 {
1777 char *cp;
1778 size_t len;
1779 int iv;
1780
1781 /* Parse the number of lines. */
1782 cp = *bufp + pos;
1783 len = strcspn(cp, " \t");
1784 cp[len] = '\0';
1785 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1786 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1787 ln, ppos, *bufp + 1);
1788 return(ROFF_IGN);
1789 }
1790 cp += len + 1;
1791
1792 /* Arm the input line trap. */
1793 roffit_lines = iv;
1794 roffit_macro = mandoc_strdup(cp);
1795 return(ROFF_IGN);
1796 }
1797
1798 static enum rofferr
1799 roff_Dd(ROFF_ARGS)
1800 {
1801 const char *const *cp;
1802
1803 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
1804 for (cp = __mdoc_reserved; *cp; cp++)
1805 roff_setstr(r, *cp, NULL, 0);
1806
1807 if (r->format == 0)
1808 r->format = MPARSE_MDOC;
1809
1810 return(ROFF_CONT);
1811 }
1812
1813 static enum rofferr
1814 roff_TH(ROFF_ARGS)
1815 {
1816 const char *const *cp;
1817
1818 if ((r->options & MPARSE_QUICK) == 0)
1819 for (cp = __man_reserved; *cp; cp++)
1820 roff_setstr(r, *cp, NULL, 0);
1821
1822 if (r->format == 0)
1823 r->format = MPARSE_MAN;
1824
1825 return(ROFF_CONT);
1826 }
1827
1828 static enum rofferr
1829 roff_TE(ROFF_ARGS)
1830 {
1831
1832 if (NULL == r->tbl)
1833 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1834 ln, ppos, "TE");
1835 else
1836 tbl_end(&r->tbl);
1837
1838 return(ROFF_IGN);
1839 }
1840
1841 static enum rofferr
1842 roff_T_(ROFF_ARGS)
1843 {
1844
1845 if (NULL == r->tbl)
1846 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1847 ln, ppos, "T&");
1848 else
1849 tbl_restart(ppos, ln, r->tbl);
1850
1851 return(ROFF_IGN);
1852 }
1853
1854 /*
1855 * Handle in-line equation delimiters.
1856 */
1857 static enum rofferr
1858 roff_eqndelim(struct roff *r, char **bufp, size_t *szp, int pos)
1859 {
1860 char *cp1, *cp2;
1861
1862 /*
1863 * Outside equations, look for an opening delimiter.
1864 * If we are inside an equation, we already know it is
1865 * in-line, or this function wouldn't have been called;
1866 * so look for a closing delimiter.
1867 */
1868
1869 cp1 = *bufp + pos;
1870 cp2 = strchr(cp1, r->eqn == NULL ?
1871 r->last_eqn->odelim : r->last_eqn->cdelim);
1872 if (cp2 == NULL)
1873 return(ROFF_CONT);
1874
1875 /* Found a delimiter; get rid of surrounding blanks. */
1876
1877 cp1 = cp2++;
1878 while (cp2[0] == ' ')
1879 cp2++;
1880 while (cp1[-1] == ' ')
1881 cp1--;
1882 *cp1 = '\0';
1883
1884 /* Replace the delimiter with an equation macro. */
1885
1886 *szp = mandoc_asprintf(&cp1, "%s\n.E%s%s", *bufp,
1887 r->eqn == NULL ? "Q\n" : "N\n\\&", cp2) + 1;
1888 free(*bufp);
1889 *bufp = cp1;
1890
1891 /* Toggle the in-line state of the eqn subsystem. */
1892
1893 r->eqn_inline = r->eqn == NULL;
1894 return(ROFF_REPARSE);
1895 }
1896
1897 static void
1898 roff_openeqn(struct roff *r, const char *name, int line,
1899 int offs, const char *buf)
1900 {
1901 struct eqn_node *e;
1902 int poff;
1903
1904 assert(NULL == r->eqn);
1905 e = eqn_alloc(name, offs, line, r->parse);
1906
1907 if (r->last_eqn) {
1908 r->last_eqn->next = e;
1909 e->delim = r->last_eqn->delim;
1910 e->odelim = r->last_eqn->odelim;
1911 e->cdelim = r->last_eqn->cdelim;
1912 } else
1913 r->first_eqn = r->last_eqn = e;
1914
1915 r->eqn = r->last_eqn = e;
1916
1917 if (buf) {
1918 poff = 0;
1919 eqn_read(&r->eqn, line, buf, offs, &poff);
1920 }
1921 }
1922
1923 static enum rofferr
1924 roff_EQ(ROFF_ARGS)
1925 {
1926
1927 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1928 return(ROFF_IGN);
1929 }
1930
1931 static enum rofferr
1932 roff_EN(ROFF_ARGS)
1933 {
1934
1935 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1936 return(ROFF_IGN);
1937 }
1938
1939 static enum rofferr
1940 roff_TS(ROFF_ARGS)
1941 {
1942 struct tbl_node *tbl;
1943
1944 if (r->tbl) {
1945 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
1946 ln, ppos, "TS breaks TS");
1947 tbl_end(&r->tbl);
1948 }
1949
1950 tbl = tbl_alloc(ppos, ln, r->parse);
1951
1952 if (r->last_tbl)
1953 r->last_tbl->next = tbl;
1954 else
1955 r->first_tbl = r->last_tbl = tbl;
1956
1957 r->tbl = r->last_tbl = tbl;
1958 return(ROFF_IGN);
1959 }
1960
1961 static enum rofferr
1962 roff_cc(ROFF_ARGS)
1963 {
1964 const char *p;
1965
1966 p = *bufp + pos;
1967
1968 if ('\0' == *p || '.' == (r->control = *p++))
1969 r->control = 0;
1970
1971 if ('\0' != *p)
1972 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1973
1974 return(ROFF_IGN);
1975 }
1976
1977 static enum rofferr
1978 roff_tr(ROFF_ARGS)
1979 {
1980 const char *p, *first, *second;
1981 size_t fsz, ssz;
1982 enum mandoc_esc esc;
1983
1984 p = *bufp + pos;
1985
1986 if ('\0' == *p) {
1987 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1988 return(ROFF_IGN);
1989 }
1990
1991 while ('\0' != *p) {
1992 fsz = ssz = 1;
1993
1994 first = p++;
1995 if ('\\' == *first) {
1996 esc = mandoc_escape(&p, NULL, NULL);
1997 if (ESCAPE_ERROR == esc) {
1998 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1999 ln, (int)(p - *bufp), first);
2000 return(ROFF_IGN);
2001 }
2002 fsz = (size_t)(p - first);
2003 }
2004
2005 second = p++;
2006 if ('\\' == *second) {
2007 esc = mandoc_escape(&p, NULL, NULL);
2008 if (ESCAPE_ERROR == esc) {
2009 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2010 ln, (int)(p - *bufp), second);
2011 return(ROFF_IGN);
2012 }
2013 ssz = (size_t)(p - second);
2014 } else if ('\0' == *second) {
2015 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
2016 ln, (int)(p - *bufp), NULL);
2017 second = " ";
2018 p--;
2019 }
2020
2021 if (fsz > 1) {
2022 roff_setstrn(&r->xmbtab, first, fsz,
2023 second, ssz, 0);
2024 continue;
2025 }
2026
2027 if (NULL == r->xtab)
2028 r->xtab = mandoc_calloc(128,
2029 sizeof(struct roffstr));
2030
2031 free(r->xtab[(int)*first].p);
2032 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2033 r->xtab[(int)*first].sz = ssz;
2034 }
2035
2036 return(ROFF_IGN);
2037 }
2038
2039 static enum rofferr
2040 roff_so(ROFF_ARGS)
2041 {
2042 char *name;
2043
2044 name = *bufp + pos;
2045 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2046
2047 /*
2048 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2049 * opening anything that's not in our cwd or anything beneath
2050 * it. Thus, explicitly disallow traversing up the file-system
2051 * or using absolute paths.
2052 */
2053
2054 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
2055 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2056 ".so %s", name);
2057 return(ROFF_ERR);
2058 }
2059
2060 *offs = pos;
2061 return(ROFF_SO);
2062 }
2063
2064 static enum rofferr
2065 roff_userdef(ROFF_ARGS)
2066 {
2067 const char *arg[9];
2068 char *cp, *n1, *n2;
2069 int i;
2070
2071 /*
2072 * Collect pointers to macro argument strings
2073 * and NUL-terminate them.
2074 */
2075 cp = *bufp + pos;
2076 for (i = 0; i < 9; i++)
2077 arg[i] = '\0' == *cp ? "" :
2078 mandoc_getarg(r->parse, &cp, ln, &pos);
2079
2080 /*
2081 * Expand macro arguments.
2082 */
2083 *szp = 0;
2084 n1 = cp = mandoc_strdup(r->current_string);
2085 while (NULL != (cp = strstr(cp, "\\$"))) {
2086 i = cp[2] - '1';
2087 if (0 > i || 8 < i) {
2088 /* Not an argument invocation. */
2089 cp += 2;
2090 continue;
2091 }
2092 *cp = '\0';
2093 *szp = mandoc_asprintf(&n2, "%s%s%s",
2094 n1, arg[i], cp + 3) + 1;
2095 cp = n2 + (cp - n1);
2096 free(n1);
2097 n1 = n2;
2098 }
2099
2100 /*
2101 * Replace the macro invocation
2102 * by the expanded macro.
2103 */
2104 free(*bufp);
2105 *bufp = n1;
2106 if (0 == *szp)
2107 *szp = strlen(*bufp) + 1;
2108
2109 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2110 ROFF_REPARSE : ROFF_APPEND);
2111 }
2112
2113 static size_t
2114 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2115 {
2116 char *name, *cp;
2117 size_t namesz;
2118
2119 name = *cpp;
2120 if ('\0' == *name)
2121 return(0);
2122
2123 /* Read until end of name and terminate it with NUL. */
2124 for (cp = name; 1; cp++) {
2125 if ('\0' == *cp || ' ' == *cp) {
2126 namesz = cp - name;
2127 break;
2128 }
2129 if ('\\' != *cp)
2130 continue;
2131 namesz = cp - name;
2132 if ('{' == cp[1] || '}' == cp[1])
2133 break;
2134 cp++;
2135 if ('\\' == *cp)
2136 continue;
2137 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2138 "%.*s", (int)(cp - name + 1), name);
2139 mandoc_escape((const char **)&cp, NULL, NULL);
2140 break;
2141 }
2142
2143 /* Read past spaces. */
2144 while (' ' == *cp)
2145 cp++;
2146
2147 *cpp = cp;
2148 return(namesz);
2149 }
2150
2151 /*
2152 * Store *string into the user-defined string called *name.
2153 * To clear an existing entry, call with (*r, *name, NULL, 0).
2154 * append == 0: replace mode
2155 * append == 1: single-line append mode
2156 * append == 2: multiline append mode, append '\n' after each call
2157 */
2158 static void
2159 roff_setstr(struct roff *r, const char *name, const char *string,
2160 int append)
2161 {
2162
2163 roff_setstrn(&r->strtab, name, strlen(name), string,
2164 string ? strlen(string) : 0, append);
2165 }
2166
2167 static void
2168 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2169 const char *string, size_t stringsz, int append)
2170 {
2171 struct roffkv *n;
2172 char *c;
2173 int i;
2174 size_t oldch, newch;
2175
2176 /* Search for an existing string with the same name. */
2177 n = *r;
2178
2179 while (n && (namesz != n->key.sz ||
2180 strncmp(n->key.p, name, namesz)))
2181 n = n->next;
2182
2183 if (NULL == n) {
2184 /* Create a new string table entry. */
2185 n = mandoc_malloc(sizeof(struct roffkv));
2186 n->key.p = mandoc_strndup(name, namesz);
2187 n->key.sz = namesz;
2188 n->val.p = NULL;
2189 n->val.sz = 0;
2190 n->next = *r;
2191 *r = n;
2192 } else if (0 == append) {
2193 free(n->val.p);
2194 n->val.p = NULL;
2195 n->val.sz = 0;
2196 }
2197
2198 if (NULL == string)
2199 return;
2200
2201 /*
2202 * One additional byte for the '\n' in multiline mode,
2203 * and one for the terminating '\0'.
2204 */
2205 newch = stringsz + (1 < append ? 2u : 1u);
2206
2207 if (NULL == n->val.p) {
2208 n->val.p = mandoc_malloc(newch);
2209 *n->val.p = '\0';
2210 oldch = 0;
2211 } else {
2212 oldch = n->val.sz;
2213 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2214 }
2215
2216 /* Skip existing content in the destination buffer. */
2217 c = n->val.p + (int)oldch;
2218
2219 /* Append new content to the destination buffer. */
2220 i = 0;
2221 while (i < (int)stringsz) {
2222 /*
2223 * Rudimentary roff copy mode:
2224 * Handle escaped backslashes.
2225 */
2226 if ('\\' == string[i] && '\\' == string[i + 1])
2227 i++;
2228 *c++ = string[i++];
2229 }
2230
2231 /* Append terminating bytes. */
2232 if (1 < append)
2233 *c++ = '\n';
2234
2235 *c = '\0';
2236 n->val.sz = (int)(c - n->val.p);
2237 }
2238
2239 static const char *
2240 roff_getstrn(const struct roff *r, const char *name, size_t len)
2241 {
2242 const struct roffkv *n;
2243 int i;
2244
2245 for (n = r->strtab; n; n = n->next)
2246 if (0 == strncmp(name, n->key.p, len) &&
2247 '\0' == n->key.p[(int)len])
2248 return(n->val.p);
2249
2250 for (i = 0; i < PREDEFS_MAX; i++)
2251 if (0 == strncmp(name, predefs[i].name, len) &&
2252 '\0' == predefs[i].name[(int)len])
2253 return(predefs[i].str);
2254
2255 return(NULL);
2256 }
2257
2258 static void
2259 roff_freestr(struct roffkv *r)
2260 {
2261 struct roffkv *n, *nn;
2262
2263 for (n = r; n; n = nn) {
2264 free(n->key.p);
2265 free(n->val.p);
2266 nn = n->next;
2267 free(n);
2268 }
2269 }
2270
2271 const struct tbl_span *
2272 roff_span(const struct roff *r)
2273 {
2274
2275 return(r->tbl ? tbl_span(r->tbl) : NULL);
2276 }
2277
2278 const struct eqn *
2279 roff_eqn(const struct roff *r)
2280 {
2281
2282 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2283 }
2284
2285 /*
2286 * Duplicate an input string, making the appropriate character
2287 * conversations (as stipulated by `tr') along the way.
2288 * Returns a heap-allocated string with all the replacements made.
2289 */
2290 char *
2291 roff_strdup(const struct roff *r, const char *p)
2292 {
2293 const struct roffkv *cp;
2294 char *res;
2295 const char *pp;
2296 size_t ssz, sz;
2297 enum mandoc_esc esc;
2298
2299 if (NULL == r->xmbtab && NULL == r->xtab)
2300 return(mandoc_strdup(p));
2301 else if ('\0' == *p)
2302 return(mandoc_strdup(""));
2303
2304 /*
2305 * Step through each character looking for term matches
2306 * (remember that a `tr' can be invoked with an escape, which is
2307 * a glyph but the escape is multi-character).
2308 * We only do this if the character hash has been initialised
2309 * and the string is >0 length.
2310 */
2311
2312 res = NULL;
2313 ssz = 0;
2314
2315 while ('\0' != *p) {
2316 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2317 sz = r->xtab[(int)*p].sz;
2318 res = mandoc_realloc(res, ssz + sz + 1);
2319 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2320 ssz += sz;
2321 p++;
2322 continue;
2323 } else if ('\\' != *p) {
2324 res = mandoc_realloc(res, ssz + 2);
2325 res[ssz++] = *p++;
2326 continue;
2327 }
2328
2329 /* Search for term matches. */
2330 for (cp = r->xmbtab; cp; cp = cp->next)
2331 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2332 break;
2333
2334 if (NULL != cp) {
2335 /*
2336 * A match has been found.
2337 * Append the match to the array and move
2338 * forward by its keysize.
2339 */
2340 res = mandoc_realloc(res,
2341 ssz + cp->val.sz + 1);
2342 memcpy(res + ssz, cp->val.p, cp->val.sz);
2343 ssz += cp->val.sz;
2344 p += (int)cp->key.sz;
2345 continue;
2346 }
2347
2348 /*
2349 * Handle escapes carefully: we need to copy
2350 * over just the escape itself, or else we might
2351 * do replacements within the escape itself.
2352 * Make sure to pass along the bogus string.
2353 */
2354 pp = p++;
2355 esc = mandoc_escape(&p, NULL, NULL);
2356 if (ESCAPE_ERROR == esc) {
2357 sz = strlen(pp);
2358 res = mandoc_realloc(res, ssz + sz + 1);
2359 memcpy(res + ssz, pp, sz);
2360 break;
2361 }
2362 /*
2363 * We bail out on bad escapes.
2364 * No need to warn: we already did so when
2365 * roff_res() was called.
2366 */
2367 sz = (int)(p - pp);
2368 res = mandoc_realloc(res, ssz + sz + 1);
2369 memcpy(res + ssz, pp, sz);
2370 ssz += sz;
2371 }
2372
2373 res[(int)ssz] = '\0';
2374 return(res);
2375 }
2376
2377 int
2378 roff_getformat(const struct roff *r)
2379 {
2380
2381 return(r->format);
2382 }
2383
2384 /*
2385 * Find out whether a line is a macro line or not.
2386 * If it is, adjust the current position and return one; if it isn't,
2387 * return zero and don't change the current position.
2388 * If the control character has been set with `.cc', then let that grain
2389 * precedence.
2390 * This is slighly contrary to groff, where using the non-breaking
2391 * control character when `cc' has been invoked will cause the
2392 * non-breaking macro contents to be printed verbatim.
2393 */
2394 int
2395 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2396 {
2397 int pos;
2398
2399 pos = *ppos;
2400
2401 if (0 != r->control && cp[pos] == r->control)
2402 pos++;
2403 else if (0 != r->control)
2404 return(0);
2405 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2406 pos += 2;
2407 else if ('.' == cp[pos] || '\'' == cp[pos])
2408 pos++;
2409 else
2410 return(0);
2411
2412 while (' ' == cp[pos] || '\t' == cp[pos])
2413 pos++;
2414
2415 *ppos = pos;
2416 return(1);
2417 }