]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
When a file is given on the command line, actually exists, and its name
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.246 2014/12/28 14:16:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33
34 /* Maximum number of nested if-else conditionals. */
35 #define RSTACK_MAX 128
36
37 /* Maximum number of string expansions per line, to break infinite loops. */
38 #define EXPAND_LIMIT 1000
39
40 enum rofft {
41 ROFF_ad,
42 ROFF_am,
43 ROFF_ami,
44 ROFF_am1,
45 ROFF_as,
46 ROFF_cc,
47 ROFF_ce,
48 ROFF_de,
49 ROFF_dei,
50 ROFF_de1,
51 ROFF_ds,
52 ROFF_el,
53 ROFF_fam,
54 ROFF_hw,
55 ROFF_hy,
56 ROFF_ie,
57 ROFF_if,
58 ROFF_ig,
59 ROFF_it,
60 ROFF_ne,
61 ROFF_nh,
62 ROFF_nr,
63 ROFF_ns,
64 ROFF_pl,
65 ROFF_ps,
66 ROFF_rm,
67 ROFF_rr,
68 ROFF_so,
69 ROFF_ta,
70 ROFF_tr,
71 ROFF_Dd,
72 ROFF_TH,
73 ROFF_TS,
74 ROFF_TE,
75 ROFF_T_,
76 ROFF_EQ,
77 ROFF_EN,
78 ROFF_cblock,
79 ROFF_USERDEF,
80 ROFF_MAX
81 };
82
83 /*
84 * An incredibly-simple string buffer.
85 */
86 struct roffstr {
87 char *p; /* nil-terminated buffer */
88 size_t sz; /* saved strlen(p) */
89 };
90
91 /*
92 * A key-value roffstr pair as part of a singly-linked list.
93 */
94 struct roffkv {
95 struct roffstr key;
96 struct roffstr val;
97 struct roffkv *next; /* next in list */
98 };
99
100 /*
101 * A single number register as part of a singly-linked list.
102 */
103 struct roffreg {
104 struct roffstr key;
105 int val;
106 struct roffreg *next;
107 };
108
109 struct roff {
110 struct mparse *parse; /* parse point */
111 const struct mchars *mchars; /* character table */
112 struct roffnode *last; /* leaf of stack */
113 int *rstack; /* stack of inverted `ie' values */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 int eqn_inline; /* current equation is inline */
126 int options; /* parse options */
127 int rstacksz; /* current size limit of rstack */
128 int rstackpos; /* position in rstack */
129 int format; /* current file in mdoc or man format */
130 char control; /* control character */
131 };
132
133 struct roffnode {
134 enum rofft tok; /* type of node */
135 struct roffnode *parent; /* up one in stack */
136 int line; /* parse line */
137 int col; /* parse col */
138 char *name; /* node name, e.g. macro name */
139 char *end; /* end-rules: custom token */
140 int endspan; /* end-rules: next-line or infty */
141 int rule; /* current evaluation rule */
142 };
143
144 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
145 enum rofft tok, /* tok of macro */ \
146 struct buf *buf, /* input buffer */ \
147 int ln, /* parse line */ \
148 int ppos, /* original pos in buffer */ \
149 int pos, /* current pos in buffer */ \
150 int *offs /* reset offset of buffer data */
151
152 typedef enum rofferr (*roffproc)(ROFF_ARGS);
153
154 struct roffmac {
155 const char *name; /* macro name */
156 roffproc proc; /* process new macro */
157 roffproc text; /* process as child text of macro */
158 roffproc sub; /* process as child of macro */
159 int flags;
160 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
161 struct roffmac *next;
162 };
163
164 struct predef {
165 const char *name; /* predefined input name */
166 const char *str; /* replacement symbol */
167 };
168
169 #define PREDEF(__name, __str) \
170 { (__name), (__str) },
171
172 static enum rofft roffhash_find(const char *, size_t);
173 static void roffhash_init(void);
174 static void roffnode_cleanscope(struct roff *);
175 static void roffnode_pop(struct roff *);
176 static void roffnode_push(struct roff *, enum rofft,
177 const char *, int, int);
178 static enum rofferr roff_block(ROFF_ARGS);
179 static enum rofferr roff_block_text(ROFF_ARGS);
180 static enum rofferr roff_block_sub(ROFF_ARGS);
181 static enum rofferr roff_cblock(ROFF_ARGS);
182 static enum rofferr roff_cc(ROFF_ARGS);
183 static void roff_ccond(struct roff *, int, int);
184 static enum rofferr roff_cond(ROFF_ARGS);
185 static enum rofferr roff_cond_text(ROFF_ARGS);
186 static enum rofferr roff_cond_sub(ROFF_ARGS);
187 static enum rofferr roff_ds(ROFF_ARGS);
188 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
189 static int roff_evalcond(struct roff *r, int,
190 const char *, int *);
191 static int roff_evalnum(struct roff *, int,
192 const char *, int *, int *, int);
193 static int roff_evalpar(struct roff *, int,
194 const char *, int *, int *);
195 static int roff_evalstrcond(const char *, int *);
196 static void roff_free1(struct roff *);
197 static void roff_freereg(struct roffreg *);
198 static void roff_freestr(struct roffkv *);
199 static size_t roff_getname(struct roff *, char **, int, int);
200 static int roff_getnum(const char *, int *, int *);
201 static int roff_getop(const char *, int *, char *);
202 static int roff_getregn(const struct roff *,
203 const char *, size_t);
204 static int roff_getregro(const char *name);
205 static const char *roff_getstrn(const struct roff *,
206 const char *, size_t);
207 static enum rofferr roff_it(ROFF_ARGS);
208 static enum rofferr roff_line_ignore(ROFF_ARGS);
209 static enum rofferr roff_nr(ROFF_ARGS);
210 static enum rofft roff_parse(struct roff *, char *, int *,
211 int, int);
212 static enum rofferr roff_parsetext(struct buf *, int, int *);
213 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
214 static enum rofferr roff_rm(ROFF_ARGS);
215 static enum rofferr roff_rr(ROFF_ARGS);
216 static void roff_setstr(struct roff *,
217 const char *, const char *, int);
218 static void roff_setstrn(struct roffkv **, const char *,
219 size_t, const char *, size_t, int);
220 static enum rofferr roff_so(ROFF_ARGS);
221 static enum rofferr roff_tr(ROFF_ARGS);
222 static enum rofferr roff_Dd(ROFF_ARGS);
223 static enum rofferr roff_TH(ROFF_ARGS);
224 static enum rofferr roff_TE(ROFF_ARGS);
225 static enum rofferr roff_TS(ROFF_ARGS);
226 static enum rofferr roff_EQ(ROFF_ARGS);
227 static enum rofferr roff_EN(ROFF_ARGS);
228 static enum rofferr roff_T_(ROFF_ARGS);
229 static enum rofferr roff_userdef(ROFF_ARGS);
230
231 /* See roffhash_find() */
232
233 #define ASCII_HI 126
234 #define ASCII_LO 33
235 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
236
237 static struct roffmac *hash[HASHWIDTH];
238
239 static struct roffmac roffs[ROFF_MAX] = {
240 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
244 { "as", roff_ds, NULL, NULL, 0, NULL },
245 { "cc", roff_cc, NULL, NULL, 0, NULL },
246 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
248 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
249 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
250 { "ds", roff_ds, NULL, NULL, 0, NULL },
251 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
252 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
256 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
257 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
258 { "it", roff_it, NULL, NULL, 0, NULL },
259 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
260 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
261 { "nr", roff_nr, NULL, NULL, 0, NULL },
262 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
263 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
264 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
265 { "rm", roff_rm, NULL, NULL, 0, NULL },
266 { "rr", roff_rr, NULL, NULL, 0, NULL },
267 { "so", roff_so, NULL, NULL, 0, NULL },
268 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
269 { "tr", roff_tr, NULL, NULL, 0, NULL },
270 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
271 { "TH", roff_TH, NULL, NULL, 0, NULL },
272 { "TS", roff_TS, NULL, NULL, 0, NULL },
273 { "TE", roff_TE, NULL, NULL, 0, NULL },
274 { "T&", roff_T_, NULL, NULL, 0, NULL },
275 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
276 { "EN", roff_EN, NULL, NULL, 0, NULL },
277 { ".", roff_cblock, NULL, NULL, 0, NULL },
278 { NULL, roff_userdef, NULL, NULL, 0, NULL },
279 };
280
281 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
282 const char *const __mdoc_reserved[] = {
283 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
284 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
285 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
286 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
287 "Dt", "Dv", "Dx", "D1",
288 "Ec", "Ed", "Ef", "Ek", "El", "Em",
289 "En", "Eo", "Er", "Es", "Ev", "Ex",
290 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
291 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
292 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
293 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
294 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
295 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
296 "Sc", "Sh", "Sm", "So", "Sq",
297 "Ss", "St", "Sx", "Sy",
298 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
299 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
300 "%P", "%Q", "%R", "%T", "%U", "%V",
301 NULL
302 };
303
304 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
305 const char *const __man_reserved[] = {
306 "AT", "B", "BI", "BR", "DT",
307 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
308 "LP", "OP", "P", "PD", "PP",
309 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
310 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
311 NULL
312 };
313
314 /* Array of injected predefined strings. */
315 #define PREDEFS_MAX 38
316 static const struct predef predefs[PREDEFS_MAX] = {
317 #include "predefs.in"
318 };
319
320 /* See roffhash_find() */
321 #define ROFF_HASH(p) (p[0] - ASCII_LO)
322
323 static int roffit_lines; /* number of lines to delay */
324 static char *roffit_macro; /* nil-terminated macro line */
325
326
327 static void
328 roffhash_init(void)
329 {
330 struct roffmac *n;
331 int buc, i;
332
333 for (i = 0; i < (int)ROFF_USERDEF; i++) {
334 assert(roffs[i].name[0] >= ASCII_LO);
335 assert(roffs[i].name[0] <= ASCII_HI);
336
337 buc = ROFF_HASH(roffs[i].name);
338
339 if (NULL != (n = hash[buc])) {
340 for ( ; n->next; n = n->next)
341 /* Do nothing. */ ;
342 n->next = &roffs[i];
343 } else
344 hash[buc] = &roffs[i];
345 }
346 }
347
348 /*
349 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
350 * the nil-terminated string name could be found.
351 */
352 static enum rofft
353 roffhash_find(const char *p, size_t s)
354 {
355 int buc;
356 struct roffmac *n;
357
358 /*
359 * libroff has an extremely simple hashtable, for the time
360 * being, which simply keys on the first character, which must
361 * be printable, then walks a chain. It works well enough until
362 * optimised.
363 */
364
365 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
366 return(ROFF_MAX);
367
368 buc = ROFF_HASH(p);
369
370 if (NULL == (n = hash[buc]))
371 return(ROFF_MAX);
372 for ( ; n; n = n->next)
373 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
374 return((enum rofft)(n - roffs));
375
376 return(ROFF_MAX);
377 }
378
379 /*
380 * Pop the current node off of the stack of roff instructions currently
381 * pending.
382 */
383 static void
384 roffnode_pop(struct roff *r)
385 {
386 struct roffnode *p;
387
388 assert(r->last);
389 p = r->last;
390
391 r->last = r->last->parent;
392 free(p->name);
393 free(p->end);
394 free(p);
395 }
396
397 /*
398 * Push a roff node onto the instruction stack. This must later be
399 * removed with roffnode_pop().
400 */
401 static void
402 roffnode_push(struct roff *r, enum rofft tok, const char *name,
403 int line, int col)
404 {
405 struct roffnode *p;
406
407 p = mandoc_calloc(1, sizeof(struct roffnode));
408 p->tok = tok;
409 if (name)
410 p->name = mandoc_strdup(name);
411 p->parent = r->last;
412 p->line = line;
413 p->col = col;
414 p->rule = p->parent ? p->parent->rule : 0;
415
416 r->last = p;
417 }
418
419 static void
420 roff_free1(struct roff *r)
421 {
422 struct tbl_node *tbl;
423 struct eqn_node *e;
424 int i;
425
426 while (NULL != (tbl = r->first_tbl)) {
427 r->first_tbl = tbl->next;
428 tbl_free(tbl);
429 }
430 r->first_tbl = r->last_tbl = r->tbl = NULL;
431
432 while (NULL != (e = r->first_eqn)) {
433 r->first_eqn = e->next;
434 eqn_free(e);
435 }
436 r->first_eqn = r->last_eqn = r->eqn = NULL;
437
438 while (r->last)
439 roffnode_pop(r);
440
441 free (r->rstack);
442 r->rstack = NULL;
443 r->rstacksz = 0;
444 r->rstackpos = -1;
445
446 roff_freereg(r->regtab);
447 r->regtab = NULL;
448
449 roff_freestr(r->strtab);
450 roff_freestr(r->xmbtab);
451 r->strtab = r->xmbtab = NULL;
452
453 if (r->xtab)
454 for (i = 0; i < 128; i++)
455 free(r->xtab[i].p);
456 free(r->xtab);
457 r->xtab = NULL;
458 }
459
460 void
461 roff_reset(struct roff *r)
462 {
463
464 roff_free1(r);
465 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
466 r->control = 0;
467 }
468
469 void
470 roff_free(struct roff *r)
471 {
472
473 roff_free1(r);
474 free(r);
475 }
476
477 struct roff *
478 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
479 {
480 struct roff *r;
481
482 r = mandoc_calloc(1, sizeof(struct roff));
483 r->parse = parse;
484 r->mchars = mchars;
485 r->options = options;
486 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
487 r->rstackpos = -1;
488
489 roffhash_init();
490
491 return(r);
492 }
493
494 /*
495 * In the current line, expand escape sequences that tend to get
496 * used in numerical expressions and conditional requests.
497 * Also check the syntax of the remaining escape sequences.
498 */
499 static enum rofferr
500 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
501 {
502 char ubuf[24]; /* buffer to print the number */
503 const char *start; /* start of the string to process */
504 char *stesc; /* start of an escape sequence ('\\') */
505 const char *stnam; /* start of the name, after "[(*" */
506 const char *cp; /* end of the name, e.g. before ']' */
507 const char *res; /* the string to be substituted */
508 char *nbuf; /* new buffer to copy buf->buf to */
509 size_t maxl; /* expected length of the escape name */
510 size_t naml; /* actual length of the escape name */
511 enum mandoc_esc esc; /* type of the escape sequence */
512 int inaml; /* length returned from mandoc_escape() */
513 int expand_count; /* to avoid infinite loops */
514 int npos; /* position in numeric expression */
515 int arg_complete; /* argument not interrupted by eol */
516 char term; /* character terminating the escape */
517
518 expand_count = 0;
519 start = buf->buf + pos;
520 stesc = strchr(start, '\0') - 1;
521 while (stesc-- > start) {
522
523 /* Search backwards for the next backslash. */
524
525 if (*stesc != '\\')
526 continue;
527
528 /* If it is escaped, skip it. */
529
530 for (cp = stesc - 1; cp >= start; cp--)
531 if (*cp != '\\')
532 break;
533
534 if ((stesc - cp) % 2 == 0) {
535 stesc = (char *)cp;
536 continue;
537 }
538
539 /* Decide whether to expand or to check only. */
540
541 term = '\0';
542 cp = stesc + 1;
543 switch (*cp) {
544 case '*':
545 res = NULL;
546 break;
547 case 'B':
548 /* FALLTHROUGH */
549 case 'w':
550 term = cp[1];
551 /* FALLTHROUGH */
552 case 'n':
553 res = ubuf;
554 break;
555 default:
556 esc = mandoc_escape(&cp, &stnam, &inaml);
557 if (esc == ESCAPE_ERROR ||
558 (esc == ESCAPE_SPECIAL &&
559 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
560 mandoc_vmsg(MANDOCERR_ESC_BAD,
561 r->parse, ln, (int)(stesc - buf->buf),
562 "%.*s", (int)(cp - stesc), stesc);
563 continue;
564 }
565
566 if (EXPAND_LIMIT < ++expand_count) {
567 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
568 ln, (int)(stesc - buf->buf), NULL);
569 return(ROFF_IGN);
570 }
571
572 /*
573 * The third character decides the length
574 * of the name of the string or register.
575 * Save a pointer to the name.
576 */
577
578 if (term == '\0') {
579 switch (*++cp) {
580 case '\0':
581 maxl = 0;
582 break;
583 case '(':
584 cp++;
585 maxl = 2;
586 break;
587 case '[':
588 cp++;
589 term = ']';
590 maxl = 0;
591 break;
592 default:
593 maxl = 1;
594 break;
595 }
596 } else {
597 cp += 2;
598 maxl = 0;
599 }
600 stnam = cp;
601
602 /* Advance to the end of the name. */
603
604 arg_complete = 1;
605 for (naml = 0; maxl == 0 || naml < maxl; naml++, cp++) {
606 if (*cp == '\0') {
607 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
608 ln, (int)(stesc - buf->buf), stesc);
609 arg_complete = 0;
610 break;
611 }
612 if (maxl == 0 && *cp == term) {
613 cp++;
614 break;
615 }
616 }
617
618 /*
619 * Retrieve the replacement string; if it is
620 * undefined, resume searching for escapes.
621 */
622
623 switch (stesc[1]) {
624 case '*':
625 if (arg_complete)
626 res = roff_getstrn(r, stnam, naml);
627 break;
628 case 'B':
629 npos = 0;
630 ubuf[0] = arg_complete &&
631 roff_evalnum(r, ln, stnam, &npos, NULL, 0) &&
632 stnam + npos + 1 == cp ? '1' : '0';
633 ubuf[1] = '\0';
634 break;
635 case 'n':
636 if (arg_complete)
637 (void)snprintf(ubuf, sizeof(ubuf), "%d",
638 roff_getregn(r, stnam, naml));
639 else
640 ubuf[0] = '\0';
641 break;
642 case 'w':
643 /* use even incomplete args */
644 (void)snprintf(ubuf, sizeof(ubuf), "%d",
645 24 * (int)naml);
646 break;
647 }
648
649 if (res == NULL) {
650 mandoc_vmsg(MANDOCERR_STR_UNDEF,
651 r->parse, ln, (int)(stesc - buf->buf),
652 "%.*s", (int)naml, stnam);
653 res = "";
654 } else if (buf->sz + strlen(res) > SHRT_MAX) {
655 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
656 ln, (int)(stesc - buf->buf), NULL);
657 return(ROFF_IGN);
658 }
659
660 /* Replace the escape sequence by the string. */
661
662 *stesc = '\0';
663 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
664 buf->buf, res, cp) + 1;
665
666 /* Prepare for the next replacement. */
667
668 start = nbuf + pos;
669 stesc = nbuf + (stesc - buf->buf) + strlen(res);
670 free(buf->buf);
671 buf->buf = nbuf;
672 }
673 return(ROFF_CONT);
674 }
675
676 /*
677 * Process text streams:
678 * Convert all breakable hyphens into ASCII_HYPH.
679 * Decrement and spring input line trap.
680 */
681 static enum rofferr
682 roff_parsetext(struct buf *buf, int pos, int *offs)
683 {
684 size_t sz;
685 const char *start;
686 char *p;
687 int isz;
688 enum mandoc_esc esc;
689
690 start = p = buf->buf + pos;
691
692 while (*p != '\0') {
693 sz = strcspn(p, "-\\");
694 p += sz;
695
696 if (*p == '\0')
697 break;
698
699 if (*p == '\\') {
700 /* Skip over escapes. */
701 p++;
702 esc = mandoc_escape((const char **)&p, NULL, NULL);
703 if (esc == ESCAPE_ERROR)
704 break;
705 continue;
706 } else if (p == start) {
707 p++;
708 continue;
709 }
710
711 if (isalpha((unsigned char)p[-1]) &&
712 isalpha((unsigned char)p[1]))
713 *p = ASCII_HYPH;
714 p++;
715 }
716
717 /* Spring the input line trap. */
718 if (roffit_lines == 1) {
719 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
720 free(buf->buf);
721 buf->buf = p;
722 buf->sz = isz + 1;
723 *offs = 0;
724 free(roffit_macro);
725 roffit_lines = 0;
726 return(ROFF_REPARSE);
727 } else if (roffit_lines > 1)
728 --roffit_lines;
729 return(ROFF_CONT);
730 }
731
732 enum rofferr
733 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
734 {
735 enum rofft t;
736 enum rofferr e;
737 int pos; /* parse point */
738 int spos; /* saved parse point for messages */
739 int ppos; /* original offset in buf->buf */
740 int ctl; /* macro line (boolean) */
741
742 ppos = pos = *offs;
743
744 /* Handle in-line equation delimiters. */
745
746 if (r->tbl == NULL &&
747 r->last_eqn != NULL && r->last_eqn->delim &&
748 (r->eqn == NULL || r->eqn_inline)) {
749 e = roff_eqndelim(r, buf, pos);
750 if (e == ROFF_REPARSE)
751 return(e);
752 assert(e == ROFF_CONT);
753 }
754
755 /* Expand some escape sequences. */
756
757 e = roff_res(r, buf, ln, pos);
758 if (e == ROFF_IGN)
759 return(e);
760 assert(e == ROFF_CONT);
761
762 ctl = roff_getcontrol(r, buf->buf, &pos);
763
764 /*
765 * First, if a scope is open and we're not a macro, pass the
766 * text through the macro's filter. If a scope isn't open and
767 * we're not a macro, just let it through.
768 * Finally, if there's an equation scope open, divert it into it
769 * no matter our state.
770 */
771
772 if (r->last && ! ctl) {
773 t = r->last->tok;
774 assert(roffs[t].text);
775 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
776 assert(e == ROFF_IGN || e == ROFF_CONT);
777 if (e != ROFF_CONT)
778 return(e);
779 }
780 if (r->eqn)
781 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
782 if ( ! ctl) {
783 if (r->tbl)
784 return(tbl_read(r->tbl, ln, buf->buf, pos));
785 return(roff_parsetext(buf, pos, offs));
786 }
787
788 /* Skip empty request lines. */
789
790 if (buf->buf[pos] == '"') {
791 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
792 ln, pos, NULL);
793 return(ROFF_IGN);
794 } else if (buf->buf[pos] == '\0')
795 return(ROFF_IGN);
796
797 /*
798 * If a scope is open, go to the child handler for that macro,
799 * as it may want to preprocess before doing anything with it.
800 * Don't do so if an equation is open.
801 */
802
803 if (r->last) {
804 t = r->last->tok;
805 assert(roffs[t].sub);
806 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
807 }
808
809 /* No scope is open. This is a new request or macro. */
810
811 spos = pos;
812 t = roff_parse(r, buf->buf, &pos, ln, ppos);
813
814 /* Tables ignore most macros. */
815
816 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
817 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
818 ln, pos, buf->buf + spos);
819 return(ROFF_IGN);
820 }
821
822 /*
823 * This is neither a roff request nor a user-defined macro.
824 * Let the standard macro set parsers handle it.
825 */
826
827 if (t == ROFF_MAX)
828 return(ROFF_CONT);
829
830 /* Execute a roff request or a user defined macro. */
831
832 assert(roffs[t].proc);
833 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
834 }
835
836 void
837 roff_endparse(struct roff *r)
838 {
839
840 if (r->last)
841 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
842 r->last->line, r->last->col,
843 roffs[r->last->tok].name);
844
845 if (r->eqn) {
846 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
847 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
848 eqn_end(&r->eqn);
849 }
850
851 if (r->tbl) {
852 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
853 r->tbl->line, r->tbl->pos, "TS");
854 tbl_end(&r->tbl);
855 }
856 }
857
858 /*
859 * Parse a roff node's type from the input buffer. This must be in the
860 * form of ".foo xxx" in the usual way.
861 */
862 static enum rofft
863 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
864 {
865 char *cp;
866 const char *mac;
867 size_t maclen;
868 enum rofft t;
869
870 cp = buf + *pos;
871
872 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
873 return(ROFF_MAX);
874
875 mac = cp;
876 maclen = roff_getname(r, &cp, ln, ppos);
877
878 t = (r->current_string = roff_getstrn(r, mac, maclen))
879 ? ROFF_USERDEF : roffhash_find(mac, maclen);
880
881 if (ROFF_MAX != t)
882 *pos = cp - buf;
883
884 return(t);
885 }
886
887 static enum rofferr
888 roff_cblock(ROFF_ARGS)
889 {
890
891 /*
892 * A block-close `..' should only be invoked as a child of an
893 * ignore macro, otherwise raise a warning and just ignore it.
894 */
895
896 if (r->last == NULL) {
897 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
898 ln, ppos, "..");
899 return(ROFF_IGN);
900 }
901
902 switch (r->last->tok) {
903 case ROFF_am:
904 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
905 /* FALLTHROUGH */
906 case ROFF_ami:
907 /* FALLTHROUGH */
908 case ROFF_de:
909 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
910 /* FALLTHROUGH */
911 case ROFF_dei:
912 /* FALLTHROUGH */
913 case ROFF_ig:
914 break;
915 default:
916 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
917 ln, ppos, "..");
918 return(ROFF_IGN);
919 }
920
921 if (buf->buf[pos] != '\0')
922 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
923 ".. %s", buf->buf + pos);
924
925 roffnode_pop(r);
926 roffnode_cleanscope(r);
927 return(ROFF_IGN);
928
929 }
930
931 static void
932 roffnode_cleanscope(struct roff *r)
933 {
934
935 while (r->last) {
936 if (--r->last->endspan != 0)
937 break;
938 roffnode_pop(r);
939 }
940 }
941
942 static void
943 roff_ccond(struct roff *r, int ln, int ppos)
944 {
945
946 if (NULL == r->last) {
947 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
948 ln, ppos, "\\}");
949 return;
950 }
951
952 switch (r->last->tok) {
953 case ROFF_el:
954 /* FALLTHROUGH */
955 case ROFF_ie:
956 /* FALLTHROUGH */
957 case ROFF_if:
958 break;
959 default:
960 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
961 ln, ppos, "\\}");
962 return;
963 }
964
965 if (r->last->endspan > -1) {
966 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
967 ln, ppos, "\\}");
968 return;
969 }
970
971 roffnode_pop(r);
972 roffnode_cleanscope(r);
973 return;
974 }
975
976 static enum rofferr
977 roff_block(ROFF_ARGS)
978 {
979 const char *name;
980 char *iname, *cp;
981 size_t namesz;
982
983 /* Ignore groff compatibility mode for now. */
984
985 if (tok == ROFF_de1)
986 tok = ROFF_de;
987 else if (tok == ROFF_am1)
988 tok = ROFF_am;
989
990 /* Parse the macro name argument. */
991
992 cp = buf->buf + pos;
993 if (tok == ROFF_ig) {
994 iname = NULL;
995 namesz = 0;
996 } else {
997 iname = cp;
998 namesz = roff_getname(r, &cp, ln, ppos);
999 iname[namesz] = '\0';
1000 }
1001
1002 /* Resolve the macro name argument if it is indirect. */
1003
1004 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1005 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1006 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1007 r->parse, ln, (int)(iname - buf->buf),
1008 "%.*s", (int)namesz, iname);
1009 namesz = 0;
1010 } else
1011 namesz = strlen(name);
1012 } else
1013 name = iname;
1014
1015 if (namesz == 0 && tok != ROFF_ig) {
1016 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1017 ln, ppos, roffs[tok].name);
1018 return(ROFF_IGN);
1019 }
1020
1021 roffnode_push(r, tok, name, ln, ppos);
1022
1023 /*
1024 * At the beginning of a `de' macro, clear the existing string
1025 * with the same name, if there is one. New content will be
1026 * appended from roff_block_text() in multiline mode.
1027 */
1028
1029 if (tok == ROFF_de || tok == ROFF_dei)
1030 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1031
1032 if (*cp == '\0')
1033 return(ROFF_IGN);
1034
1035 /* Get the custom end marker. */
1036
1037 iname = cp;
1038 namesz = roff_getname(r, &cp, ln, ppos);
1039
1040 /* Resolve the end marker if it is indirect. */
1041
1042 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1043 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1044 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1045 r->parse, ln, (int)(iname - buf->buf),
1046 "%.*s", (int)namesz, iname);
1047 namesz = 0;
1048 } else
1049 namesz = strlen(name);
1050 } else
1051 name = iname;
1052
1053 if (namesz)
1054 r->last->end = mandoc_strndup(name, namesz);
1055
1056 if (*cp != '\0')
1057 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1058 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1059
1060 return(ROFF_IGN);
1061 }
1062
1063 static enum rofferr
1064 roff_block_sub(ROFF_ARGS)
1065 {
1066 enum rofft t;
1067 int i, j;
1068
1069 /*
1070 * First check whether a custom macro exists at this level. If
1071 * it does, then check against it. This is some of groff's
1072 * stranger behaviours. If we encountered a custom end-scope
1073 * tag and that tag also happens to be a "real" macro, then we
1074 * need to try interpreting it again as a real macro. If it's
1075 * not, then return ignore. Else continue.
1076 */
1077
1078 if (r->last->end) {
1079 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1080 if (buf->buf[i] != r->last->end[j])
1081 break;
1082
1083 if (r->last->end[j] == '\0' &&
1084 (buf->buf[i] == '\0' ||
1085 buf->buf[i] == ' ' ||
1086 buf->buf[i] == '\t')) {
1087 roffnode_pop(r);
1088 roffnode_cleanscope(r);
1089
1090 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1091 i++;
1092
1093 pos = i;
1094 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1095 ROFF_MAX)
1096 return(ROFF_RERUN);
1097 return(ROFF_IGN);
1098 }
1099 }
1100
1101 /*
1102 * If we have no custom end-query or lookup failed, then try
1103 * pulling it out of the hashtable.
1104 */
1105
1106 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1107
1108 if (t != ROFF_cblock) {
1109 if (tok != ROFF_ig)
1110 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1111 return(ROFF_IGN);
1112 }
1113
1114 assert(roffs[t].proc);
1115 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1116 }
1117
1118 static enum rofferr
1119 roff_block_text(ROFF_ARGS)
1120 {
1121
1122 if (tok != ROFF_ig)
1123 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1124
1125 return(ROFF_IGN);
1126 }
1127
1128 static enum rofferr
1129 roff_cond_sub(ROFF_ARGS)
1130 {
1131 enum rofft t;
1132 char *ep;
1133 int rr;
1134
1135 rr = r->last->rule;
1136 roffnode_cleanscope(r);
1137 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1138
1139 /*
1140 * Fully handle known macros when they are structurally
1141 * required or when the conditional evaluated to true.
1142 */
1143
1144 if ((t != ROFF_MAX) &&
1145 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1146 assert(roffs[t].proc);
1147 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1148 }
1149
1150 /*
1151 * If `\}' occurs on a macro line without a preceding macro,
1152 * drop the line completely.
1153 */
1154
1155 ep = buf->buf + pos;
1156 if (ep[0] == '\\' && ep[1] == '}')
1157 rr = 0;
1158
1159 /* Always check for the closing delimiter `\}'. */
1160
1161 while ((ep = strchr(ep, '\\')) != NULL) {
1162 if (*(++ep) == '}') {
1163 *ep = '&';
1164 roff_ccond(r, ln, ep - buf->buf - 1);
1165 }
1166 ++ep;
1167 }
1168 return(rr ? ROFF_CONT : ROFF_IGN);
1169 }
1170
1171 static enum rofferr
1172 roff_cond_text(ROFF_ARGS)
1173 {
1174 char *ep;
1175 int rr;
1176
1177 rr = r->last->rule;
1178 roffnode_cleanscope(r);
1179
1180 ep = buf->buf + pos;
1181 while ((ep = strchr(ep, '\\')) != NULL) {
1182 if (*(++ep) == '}') {
1183 *ep = '&';
1184 roff_ccond(r, ln, ep - buf->buf - 1);
1185 }
1186 ++ep;
1187 }
1188 return(rr ? ROFF_CONT : ROFF_IGN);
1189 }
1190
1191 /*
1192 * Parse a single signed integer number. Stop at the first non-digit.
1193 * If there is at least one digit, return success and advance the
1194 * parse point, else return failure and let the parse point unchanged.
1195 * Ignore overflows, treat them just like the C language.
1196 */
1197 static int
1198 roff_getnum(const char *v, int *pos, int *res)
1199 {
1200 int myres, n, p;
1201
1202 if (NULL == res)
1203 res = &myres;
1204
1205 p = *pos;
1206 n = v[p] == '-';
1207 if (n)
1208 p++;
1209
1210 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1211 *res = 10 * *res + v[p] - '0';
1212 if (p == *pos + n)
1213 return 0;
1214
1215 if (n)
1216 *res = -*res;
1217
1218 *pos = p;
1219 return 1;
1220 }
1221
1222 /*
1223 * Evaluate a string comparison condition.
1224 * The first character is the delimiter.
1225 * Succeed if the string up to its second occurrence
1226 * matches the string up to its third occurence.
1227 * Advance the cursor after the third occurrence
1228 * or lacking that, to the end of the line.
1229 */
1230 static int
1231 roff_evalstrcond(const char *v, int *pos)
1232 {
1233 const char *s1, *s2, *s3;
1234 int match;
1235
1236 match = 0;
1237 s1 = v + *pos; /* initial delimiter */
1238 s2 = s1 + 1; /* for scanning the first string */
1239 s3 = strchr(s2, *s1); /* for scanning the second string */
1240
1241 if (NULL == s3) /* found no middle delimiter */
1242 goto out;
1243
1244 while ('\0' != *++s3) {
1245 if (*s2 != *s3) { /* mismatch */
1246 s3 = strchr(s3, *s1);
1247 break;
1248 }
1249 if (*s3 == *s1) { /* found the final delimiter */
1250 match = 1;
1251 break;
1252 }
1253 s2++;
1254 }
1255
1256 out:
1257 if (NULL == s3)
1258 s3 = strchr(s2, '\0');
1259 else if (*s3 != '\0')
1260 s3++;
1261 *pos = s3 - v;
1262 return(match);
1263 }
1264
1265 /*
1266 * Evaluate an optionally negated single character, numerical,
1267 * or string condition.
1268 */
1269 static int
1270 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1271 {
1272 int number, savepos, wanttrue;
1273
1274 if ('!' == v[*pos]) {
1275 wanttrue = 0;
1276 (*pos)++;
1277 } else
1278 wanttrue = 1;
1279
1280 switch (v[*pos]) {
1281 case '\0':
1282 return(0);
1283 case 'n':
1284 /* FALLTHROUGH */
1285 case 'o':
1286 (*pos)++;
1287 return(wanttrue);
1288 case 'c':
1289 /* FALLTHROUGH */
1290 case 'd':
1291 /* FALLTHROUGH */
1292 case 'e':
1293 /* FALLTHROUGH */
1294 case 'r':
1295 /* FALLTHROUGH */
1296 case 't':
1297 /* FALLTHROUGH */
1298 case 'v':
1299 (*pos)++;
1300 return(!wanttrue);
1301 default:
1302 break;
1303 }
1304
1305 savepos = *pos;
1306 if (roff_evalnum(r, ln, v, pos, &number, 0))
1307 return((number > 0) == wanttrue);
1308 else if (*pos == savepos)
1309 return(roff_evalstrcond(v, pos) == wanttrue);
1310 else
1311 return (0);
1312 }
1313
1314 static enum rofferr
1315 roff_line_ignore(ROFF_ARGS)
1316 {
1317
1318 return(ROFF_IGN);
1319 }
1320
1321 static enum rofferr
1322 roff_cond(ROFF_ARGS)
1323 {
1324
1325 roffnode_push(r, tok, NULL, ln, ppos);
1326
1327 /*
1328 * An `.el' has no conditional body: it will consume the value
1329 * of the current rstack entry set in prior `ie' calls or
1330 * defaults to DENY.
1331 *
1332 * If we're not an `el', however, then evaluate the conditional.
1333 */
1334
1335 r->last->rule = tok == ROFF_el ?
1336 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1337 roff_evalcond(r, ln, buf->buf, &pos);
1338
1339 /*
1340 * An if-else will put the NEGATION of the current evaluated
1341 * conditional into the stack of rules.
1342 */
1343
1344 if (tok == ROFF_ie) {
1345 if (r->rstackpos + 1 == r->rstacksz) {
1346 r->rstacksz += 16;
1347 r->rstack = mandoc_reallocarray(r->rstack,
1348 r->rstacksz, sizeof(int));
1349 }
1350 r->rstack[++r->rstackpos] = !r->last->rule;
1351 }
1352
1353 /* If the parent has false as its rule, then so do we. */
1354
1355 if (r->last->parent && !r->last->parent->rule)
1356 r->last->rule = 0;
1357
1358 /*
1359 * Determine scope.
1360 * If there is nothing on the line after the conditional,
1361 * not even whitespace, use next-line scope.
1362 */
1363
1364 if (buf->buf[pos] == '\0') {
1365 r->last->endspan = 2;
1366 goto out;
1367 }
1368
1369 while (buf->buf[pos] == ' ')
1370 pos++;
1371
1372 /* An opening brace requests multiline scope. */
1373
1374 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
1375 r->last->endspan = -1;
1376 pos += 2;
1377 goto out;
1378 }
1379
1380 /*
1381 * Anything else following the conditional causes
1382 * single-line scope. Warn if the scope contains
1383 * nothing but trailing whitespace.
1384 */
1385
1386 if (buf->buf[pos] == '\0')
1387 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1388 ln, ppos, roffs[tok].name);
1389
1390 r->last->endspan = 1;
1391
1392 out:
1393 *offs = pos;
1394 return(ROFF_RERUN);
1395 }
1396
1397 static enum rofferr
1398 roff_ds(ROFF_ARGS)
1399 {
1400 char *string;
1401 const char *name;
1402 size_t namesz;
1403
1404 /*
1405 * The first word is the name of the string.
1406 * If it is empty or terminated by an escape sequence,
1407 * abort the `ds' request without defining anything.
1408 */
1409
1410 name = string = buf->buf + pos;
1411 if (*name == '\0')
1412 return(ROFF_IGN);
1413
1414 namesz = roff_getname(r, &string, ln, pos);
1415 if (name[namesz] == '\\')
1416 return(ROFF_IGN);
1417
1418 /* Read past the initial double-quote, if any. */
1419 if (*string == '"')
1420 string++;
1421
1422 /* The rest is the value. */
1423 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1424 ROFF_as == tok);
1425 return(ROFF_IGN);
1426 }
1427
1428 /*
1429 * Parse a single operator, one or two characters long.
1430 * If the operator is recognized, return success and advance the
1431 * parse point, else return failure and let the parse point unchanged.
1432 */
1433 static int
1434 roff_getop(const char *v, int *pos, char *res)
1435 {
1436
1437 *res = v[*pos];
1438
1439 switch (*res) {
1440 case '+':
1441 /* FALLTHROUGH */
1442 case '-':
1443 /* FALLTHROUGH */
1444 case '*':
1445 /* FALLTHROUGH */
1446 case '/':
1447 /* FALLTHROUGH */
1448 case '%':
1449 /* FALLTHROUGH */
1450 case '&':
1451 /* FALLTHROUGH */
1452 case ':':
1453 break;
1454 case '<':
1455 switch (v[*pos + 1]) {
1456 case '=':
1457 *res = 'l';
1458 (*pos)++;
1459 break;
1460 case '>':
1461 *res = '!';
1462 (*pos)++;
1463 break;
1464 case '?':
1465 *res = 'i';
1466 (*pos)++;
1467 break;
1468 default:
1469 break;
1470 }
1471 break;
1472 case '>':
1473 switch (v[*pos + 1]) {
1474 case '=':
1475 *res = 'g';
1476 (*pos)++;
1477 break;
1478 case '?':
1479 *res = 'a';
1480 (*pos)++;
1481 break;
1482 default:
1483 break;
1484 }
1485 break;
1486 case '=':
1487 if ('=' == v[*pos + 1])
1488 (*pos)++;
1489 break;
1490 default:
1491 return(0);
1492 }
1493 (*pos)++;
1494
1495 return(*res);
1496 }
1497
1498 /*
1499 * Evaluate either a parenthesized numeric expression
1500 * or a single signed integer number.
1501 */
1502 static int
1503 roff_evalpar(struct roff *r, int ln,
1504 const char *v, int *pos, int *res)
1505 {
1506
1507 if ('(' != v[*pos])
1508 return(roff_getnum(v, pos, res));
1509
1510 (*pos)++;
1511 if ( ! roff_evalnum(r, ln, v, pos, res, 1))
1512 return(0);
1513
1514 /*
1515 * Omission of the closing parenthesis
1516 * is an error in validation mode,
1517 * but ignored in evaluation mode.
1518 */
1519
1520 if (')' == v[*pos])
1521 (*pos)++;
1522 else if (NULL == res)
1523 return(0);
1524
1525 return(1);
1526 }
1527
1528 /*
1529 * Evaluate a complete numeric expression.
1530 * Proceed left to right, there is no concept of precedence.
1531 */
1532 static int
1533 roff_evalnum(struct roff *r, int ln, const char *v,
1534 int *pos, int *res, int skipwhite)
1535 {
1536 int mypos, operand2;
1537 char operator;
1538
1539 if (NULL == pos) {
1540 mypos = 0;
1541 pos = &mypos;
1542 }
1543
1544 if (skipwhite)
1545 while (isspace((unsigned char)v[*pos]))
1546 (*pos)++;
1547
1548 if ( ! roff_evalpar(r, ln, v, pos, res))
1549 return(0);
1550
1551 while (1) {
1552 if (skipwhite)
1553 while (isspace((unsigned char)v[*pos]))
1554 (*pos)++;
1555
1556 if ( ! roff_getop(v, pos, &operator))
1557 break;
1558
1559 if (skipwhite)
1560 while (isspace((unsigned char)v[*pos]))
1561 (*pos)++;
1562
1563 if ( ! roff_evalpar(r, ln, v, pos, &operand2))
1564 return(0);
1565
1566 if (skipwhite)
1567 while (isspace((unsigned char)v[*pos]))
1568 (*pos)++;
1569
1570 if (NULL == res)
1571 continue;
1572
1573 switch (operator) {
1574 case '+':
1575 *res += operand2;
1576 break;
1577 case '-':
1578 *res -= operand2;
1579 break;
1580 case '*':
1581 *res *= operand2;
1582 break;
1583 case '/':
1584 if (operand2 == 0) {
1585 mandoc_msg(MANDOCERR_DIVZERO,
1586 r->parse, ln, *pos, v);
1587 *res = 0;
1588 break;
1589 }
1590 *res /= operand2;
1591 break;
1592 case '%':
1593 if (operand2 == 0) {
1594 mandoc_msg(MANDOCERR_DIVZERO,
1595 r->parse, ln, *pos, v);
1596 *res = 0;
1597 break;
1598 }
1599 *res %= operand2;
1600 break;
1601 case '<':
1602 *res = *res < operand2;
1603 break;
1604 case '>':
1605 *res = *res > operand2;
1606 break;
1607 case 'l':
1608 *res = *res <= operand2;
1609 break;
1610 case 'g':
1611 *res = *res >= operand2;
1612 break;
1613 case '=':
1614 *res = *res == operand2;
1615 break;
1616 case '!':
1617 *res = *res != operand2;
1618 break;
1619 case '&':
1620 *res = *res && operand2;
1621 break;
1622 case ':':
1623 *res = *res || operand2;
1624 break;
1625 case 'i':
1626 if (operand2 < *res)
1627 *res = operand2;
1628 break;
1629 case 'a':
1630 if (operand2 > *res)
1631 *res = operand2;
1632 break;
1633 default:
1634 abort();
1635 }
1636 }
1637 return(1);
1638 }
1639
1640 void
1641 roff_setreg(struct roff *r, const char *name, int val, char sign)
1642 {
1643 struct roffreg *reg;
1644
1645 /* Search for an existing register with the same name. */
1646 reg = r->regtab;
1647
1648 while (reg && strcmp(name, reg->key.p))
1649 reg = reg->next;
1650
1651 if (NULL == reg) {
1652 /* Create a new register. */
1653 reg = mandoc_malloc(sizeof(struct roffreg));
1654 reg->key.p = mandoc_strdup(name);
1655 reg->key.sz = strlen(name);
1656 reg->val = 0;
1657 reg->next = r->regtab;
1658 r->regtab = reg;
1659 }
1660
1661 if ('+' == sign)
1662 reg->val += val;
1663 else if ('-' == sign)
1664 reg->val -= val;
1665 else
1666 reg->val = val;
1667 }
1668
1669 /*
1670 * Handle some predefined read-only number registers.
1671 * For now, return -1 if the requested register is not predefined;
1672 * in case a predefined read-only register having the value -1
1673 * were to turn up, another special value would have to be chosen.
1674 */
1675 static int
1676 roff_getregro(const char *name)
1677 {
1678
1679 switch (*name) {
1680 case 'A': /* ASCII approximation mode is always off. */
1681 return(0);
1682 case 'g': /* Groff compatibility mode is always on. */
1683 return(1);
1684 case 'H': /* Fixed horizontal resolution. */
1685 return (24);
1686 case 'j': /* Always adjust left margin only. */
1687 return(0);
1688 case 'T': /* Some output device is always defined. */
1689 return(1);
1690 case 'V': /* Fixed vertical resolution. */
1691 return (40);
1692 default:
1693 return (-1);
1694 }
1695 }
1696
1697 int
1698 roff_getreg(const struct roff *r, const char *name)
1699 {
1700 struct roffreg *reg;
1701 int val;
1702
1703 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1704 val = roff_getregro(name + 1);
1705 if (-1 != val)
1706 return (val);
1707 }
1708
1709 for (reg = r->regtab; reg; reg = reg->next)
1710 if (0 == strcmp(name, reg->key.p))
1711 return(reg->val);
1712
1713 return(0);
1714 }
1715
1716 static int
1717 roff_getregn(const struct roff *r, const char *name, size_t len)
1718 {
1719 struct roffreg *reg;
1720 int val;
1721
1722 if ('.' == name[0] && 2 == len) {
1723 val = roff_getregro(name + 1);
1724 if (-1 != val)
1725 return (val);
1726 }
1727
1728 for (reg = r->regtab; reg; reg = reg->next)
1729 if (len == reg->key.sz &&
1730 0 == strncmp(name, reg->key.p, len))
1731 return(reg->val);
1732
1733 return(0);
1734 }
1735
1736 static void
1737 roff_freereg(struct roffreg *reg)
1738 {
1739 struct roffreg *old_reg;
1740
1741 while (NULL != reg) {
1742 free(reg->key.p);
1743 old_reg = reg;
1744 reg = reg->next;
1745 free(old_reg);
1746 }
1747 }
1748
1749 static enum rofferr
1750 roff_nr(ROFF_ARGS)
1751 {
1752 char *key, *val;
1753 size_t keysz;
1754 int iv;
1755 char sign;
1756
1757 key = val = buf->buf + pos;
1758 if (*key == '\0')
1759 return(ROFF_IGN);
1760
1761 keysz = roff_getname(r, &val, ln, pos);
1762 if (key[keysz] == '\\')
1763 return(ROFF_IGN);
1764 key[keysz] = '\0';
1765
1766 sign = *val;
1767 if (sign == '+' || sign == '-')
1768 val++;
1769
1770 if (roff_evalnum(r, ln, val, NULL, &iv, 0))
1771 roff_setreg(r, key, iv, sign);
1772
1773 return(ROFF_IGN);
1774 }
1775
1776 static enum rofferr
1777 roff_rr(ROFF_ARGS)
1778 {
1779 struct roffreg *reg, **prev;
1780 char *name, *cp;
1781 size_t namesz;
1782
1783 name = cp = buf->buf + pos;
1784 if (*name == '\0')
1785 return(ROFF_IGN);
1786 namesz = roff_getname(r, &cp, ln, pos);
1787 name[namesz] = '\0';
1788
1789 prev = &r->regtab;
1790 while (1) {
1791 reg = *prev;
1792 if (reg == NULL || !strcmp(name, reg->key.p))
1793 break;
1794 prev = &reg->next;
1795 }
1796 if (reg != NULL) {
1797 *prev = reg->next;
1798 free(reg->key.p);
1799 free(reg);
1800 }
1801 return(ROFF_IGN);
1802 }
1803
1804 static enum rofferr
1805 roff_rm(ROFF_ARGS)
1806 {
1807 const char *name;
1808 char *cp;
1809 size_t namesz;
1810
1811 cp = buf->buf + pos;
1812 while (*cp != '\0') {
1813 name = cp;
1814 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
1815 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1816 if (name[namesz] == '\\')
1817 break;
1818 }
1819 return(ROFF_IGN);
1820 }
1821
1822 static enum rofferr
1823 roff_it(ROFF_ARGS)
1824 {
1825 char *cp;
1826 size_t len;
1827 int iv;
1828
1829 /* Parse the number of lines. */
1830 cp = buf->buf + pos;
1831 len = strcspn(cp, " \t");
1832 cp[len] = '\0';
1833 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1834 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1835 ln, ppos, buf->buf + 1);
1836 return(ROFF_IGN);
1837 }
1838 cp += len + 1;
1839
1840 /* Arm the input line trap. */
1841 roffit_lines = iv;
1842 roffit_macro = mandoc_strdup(cp);
1843 return(ROFF_IGN);
1844 }
1845
1846 static enum rofferr
1847 roff_Dd(ROFF_ARGS)
1848 {
1849 const char *const *cp;
1850
1851 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
1852 for (cp = __mdoc_reserved; *cp; cp++)
1853 roff_setstr(r, *cp, NULL, 0);
1854
1855 if (r->format == 0)
1856 r->format = MPARSE_MDOC;
1857
1858 return(ROFF_CONT);
1859 }
1860
1861 static enum rofferr
1862 roff_TH(ROFF_ARGS)
1863 {
1864 const char *const *cp;
1865
1866 if ((r->options & MPARSE_QUICK) == 0)
1867 for (cp = __man_reserved; *cp; cp++)
1868 roff_setstr(r, *cp, NULL, 0);
1869
1870 if (r->format == 0)
1871 r->format = MPARSE_MAN;
1872
1873 return(ROFF_CONT);
1874 }
1875
1876 static enum rofferr
1877 roff_TE(ROFF_ARGS)
1878 {
1879
1880 if (NULL == r->tbl)
1881 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1882 ln, ppos, "TE");
1883 else
1884 tbl_end(&r->tbl);
1885
1886 return(ROFF_IGN);
1887 }
1888
1889 static enum rofferr
1890 roff_T_(ROFF_ARGS)
1891 {
1892
1893 if (NULL == r->tbl)
1894 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1895 ln, ppos, "T&");
1896 else
1897 tbl_restart(ppos, ln, r->tbl);
1898
1899 return(ROFF_IGN);
1900 }
1901
1902 /*
1903 * Handle in-line equation delimiters.
1904 */
1905 static enum rofferr
1906 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
1907 {
1908 char *cp1, *cp2;
1909 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
1910
1911 /*
1912 * Outside equations, look for an opening delimiter.
1913 * If we are inside an equation, we already know it is
1914 * in-line, or this function wouldn't have been called;
1915 * so look for a closing delimiter.
1916 */
1917
1918 cp1 = buf->buf + pos;
1919 cp2 = strchr(cp1, r->eqn == NULL ?
1920 r->last_eqn->odelim : r->last_eqn->cdelim);
1921 if (cp2 == NULL)
1922 return(ROFF_CONT);
1923
1924 *cp2++ = '\0';
1925 bef_pr = bef_nl = aft_nl = aft_pr = "";
1926
1927 /* Handle preceding text, protecting whitespace. */
1928
1929 if (*buf->buf != '\0') {
1930 if (r->eqn == NULL)
1931 bef_pr = "\\&";
1932 bef_nl = "\n";
1933 }
1934
1935 /*
1936 * Prepare replacing the delimiter with an equation macro
1937 * and drop leading white space from the equation.
1938 */
1939
1940 if (r->eqn == NULL) {
1941 while (*cp2 == ' ')
1942 cp2++;
1943 mac = ".EQ";
1944 } else
1945 mac = ".EN";
1946
1947 /* Handle following text, protecting whitespace. */
1948
1949 if (*cp2 != '\0') {
1950 aft_nl = "\n";
1951 if (r->eqn != NULL)
1952 aft_pr = "\\&";
1953 }
1954
1955 /* Do the actual replacement. */
1956
1957 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
1958 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
1959 free(buf->buf);
1960 buf->buf = cp1;
1961
1962 /* Toggle the in-line state of the eqn subsystem. */
1963
1964 r->eqn_inline = r->eqn == NULL;
1965 return(ROFF_REPARSE);
1966 }
1967
1968 static enum rofferr
1969 roff_EQ(ROFF_ARGS)
1970 {
1971 struct eqn_node *e;
1972
1973 assert(r->eqn == NULL);
1974 e = eqn_alloc(ppos, ln, r->parse);
1975
1976 if (r->last_eqn) {
1977 r->last_eqn->next = e;
1978 e->delim = r->last_eqn->delim;
1979 e->odelim = r->last_eqn->odelim;
1980 e->cdelim = r->last_eqn->cdelim;
1981 } else
1982 r->first_eqn = r->last_eqn = e;
1983
1984 r->eqn = r->last_eqn = e;
1985
1986 if (buf->buf[pos] != '\0')
1987 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1988 ".EQ %s", buf->buf + pos);
1989
1990 return(ROFF_IGN);
1991 }
1992
1993 static enum rofferr
1994 roff_EN(ROFF_ARGS)
1995 {
1996
1997 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1998 return(ROFF_IGN);
1999 }
2000
2001 static enum rofferr
2002 roff_TS(ROFF_ARGS)
2003 {
2004 struct tbl_node *tbl;
2005
2006 if (r->tbl) {
2007 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2008 ln, ppos, "TS breaks TS");
2009 tbl_end(&r->tbl);
2010 }
2011
2012 tbl = tbl_alloc(ppos, ln, r->parse);
2013
2014 if (r->last_tbl)
2015 r->last_tbl->next = tbl;
2016 else
2017 r->first_tbl = r->last_tbl = tbl;
2018
2019 r->tbl = r->last_tbl = tbl;
2020 return(ROFF_IGN);
2021 }
2022
2023 static enum rofferr
2024 roff_cc(ROFF_ARGS)
2025 {
2026 const char *p;
2027
2028 p = buf->buf + pos;
2029
2030 if (*p == '\0' || (r->control = *p++) == '.')
2031 r->control = 0;
2032
2033 if (*p != '\0')
2034 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2035
2036 return(ROFF_IGN);
2037 }
2038
2039 static enum rofferr
2040 roff_tr(ROFF_ARGS)
2041 {
2042 const char *p, *first, *second;
2043 size_t fsz, ssz;
2044 enum mandoc_esc esc;
2045
2046 p = buf->buf + pos;
2047
2048 if (*p == '\0') {
2049 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2050 return(ROFF_IGN);
2051 }
2052
2053 while (*p != '\0') {
2054 fsz = ssz = 1;
2055
2056 first = p++;
2057 if (*first == '\\') {
2058 esc = mandoc_escape(&p, NULL, NULL);
2059 if (esc == ESCAPE_ERROR) {
2060 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2061 ln, (int)(p - buf->buf), first);
2062 return(ROFF_IGN);
2063 }
2064 fsz = (size_t)(p - first);
2065 }
2066
2067 second = p++;
2068 if (*second == '\\') {
2069 esc = mandoc_escape(&p, NULL, NULL);
2070 if (esc == ESCAPE_ERROR) {
2071 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2072 ln, (int)(p - buf->buf), second);
2073 return(ROFF_IGN);
2074 }
2075 ssz = (size_t)(p - second);
2076 } else if (*second == '\0') {
2077 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
2078 ln, (int)(p - buf->buf), NULL);
2079 second = " ";
2080 p--;
2081 }
2082
2083 if (fsz > 1) {
2084 roff_setstrn(&r->xmbtab, first, fsz,
2085 second, ssz, 0);
2086 continue;
2087 }
2088
2089 if (r->xtab == NULL)
2090 r->xtab = mandoc_calloc(128,
2091 sizeof(struct roffstr));
2092
2093 free(r->xtab[(int)*first].p);
2094 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2095 r->xtab[(int)*first].sz = ssz;
2096 }
2097
2098 return(ROFF_IGN);
2099 }
2100
2101 static enum rofferr
2102 roff_so(ROFF_ARGS)
2103 {
2104 char *name;
2105
2106 name = buf->buf + pos;
2107 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2108
2109 /*
2110 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2111 * opening anything that's not in our cwd or anything beneath
2112 * it. Thus, explicitly disallow traversing up the file-system
2113 * or using absolute paths.
2114 */
2115
2116 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2117 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2118 ".so %s", name);
2119 return(ROFF_ERR);
2120 }
2121
2122 *offs = pos;
2123 return(ROFF_SO);
2124 }
2125
2126 static enum rofferr
2127 roff_userdef(ROFF_ARGS)
2128 {
2129 const char *arg[9];
2130 char *cp, *n1, *n2;
2131 int i;
2132
2133 /*
2134 * Collect pointers to macro argument strings
2135 * and NUL-terminate them.
2136 */
2137 cp = buf->buf + pos;
2138 for (i = 0; i < 9; i++)
2139 arg[i] = *cp == '\0' ? "" :
2140 mandoc_getarg(r->parse, &cp, ln, &pos);
2141
2142 /*
2143 * Expand macro arguments.
2144 */
2145 buf->sz = 0;
2146 n1 = cp = mandoc_strdup(r->current_string);
2147 while ((cp = strstr(cp, "\\$")) != NULL) {
2148 i = cp[2] - '1';
2149 if (0 > i || 8 < i) {
2150 /* Not an argument invocation. */
2151 cp += 2;
2152 continue;
2153 }
2154 *cp = '\0';
2155 buf->sz = mandoc_asprintf(&n2, "%s%s%s",
2156 n1, arg[i], cp + 3) + 1;
2157 cp = n2 + (cp - n1);
2158 free(n1);
2159 n1 = n2;
2160 }
2161
2162 /*
2163 * Replace the macro invocation
2164 * by the expanded macro.
2165 */
2166 free(buf->buf);
2167 buf->buf = n1;
2168 if (buf->sz == 0)
2169 buf->sz = strlen(buf->buf) + 1;
2170
2171 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
2172 ROFF_REPARSE : ROFF_APPEND);
2173 }
2174
2175 static size_t
2176 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2177 {
2178 char *name, *cp;
2179 size_t namesz;
2180
2181 name = *cpp;
2182 if ('\0' == *name)
2183 return(0);
2184
2185 /* Read until end of name and terminate it with NUL. */
2186 for (cp = name; 1; cp++) {
2187 if ('\0' == *cp || ' ' == *cp) {
2188 namesz = cp - name;
2189 break;
2190 }
2191 if ('\\' != *cp)
2192 continue;
2193 namesz = cp - name;
2194 if ('{' == cp[1] || '}' == cp[1])
2195 break;
2196 cp++;
2197 if ('\\' == *cp)
2198 continue;
2199 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2200 "%.*s", (int)(cp - name + 1), name);
2201 mandoc_escape((const char **)&cp, NULL, NULL);
2202 break;
2203 }
2204
2205 /* Read past spaces. */
2206 while (' ' == *cp)
2207 cp++;
2208
2209 *cpp = cp;
2210 return(namesz);
2211 }
2212
2213 /*
2214 * Store *string into the user-defined string called *name.
2215 * To clear an existing entry, call with (*r, *name, NULL, 0).
2216 * append == 0: replace mode
2217 * append == 1: single-line append mode
2218 * append == 2: multiline append mode, append '\n' after each call
2219 */
2220 static void
2221 roff_setstr(struct roff *r, const char *name, const char *string,
2222 int append)
2223 {
2224
2225 roff_setstrn(&r->strtab, name, strlen(name), string,
2226 string ? strlen(string) : 0, append);
2227 }
2228
2229 static void
2230 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2231 const char *string, size_t stringsz, int append)
2232 {
2233 struct roffkv *n;
2234 char *c;
2235 int i;
2236 size_t oldch, newch;
2237
2238 /* Search for an existing string with the same name. */
2239 n = *r;
2240
2241 while (n && (namesz != n->key.sz ||
2242 strncmp(n->key.p, name, namesz)))
2243 n = n->next;
2244
2245 if (NULL == n) {
2246 /* Create a new string table entry. */
2247 n = mandoc_malloc(sizeof(struct roffkv));
2248 n->key.p = mandoc_strndup(name, namesz);
2249 n->key.sz = namesz;
2250 n->val.p = NULL;
2251 n->val.sz = 0;
2252 n->next = *r;
2253 *r = n;
2254 } else if (0 == append) {
2255 free(n->val.p);
2256 n->val.p = NULL;
2257 n->val.sz = 0;
2258 }
2259
2260 if (NULL == string)
2261 return;
2262
2263 /*
2264 * One additional byte for the '\n' in multiline mode,
2265 * and one for the terminating '\0'.
2266 */
2267 newch = stringsz + (1 < append ? 2u : 1u);
2268
2269 if (NULL == n->val.p) {
2270 n->val.p = mandoc_malloc(newch);
2271 *n->val.p = '\0';
2272 oldch = 0;
2273 } else {
2274 oldch = n->val.sz;
2275 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2276 }
2277
2278 /* Skip existing content in the destination buffer. */
2279 c = n->val.p + (int)oldch;
2280
2281 /* Append new content to the destination buffer. */
2282 i = 0;
2283 while (i < (int)stringsz) {
2284 /*
2285 * Rudimentary roff copy mode:
2286 * Handle escaped backslashes.
2287 */
2288 if ('\\' == string[i] && '\\' == string[i + 1])
2289 i++;
2290 *c++ = string[i++];
2291 }
2292
2293 /* Append terminating bytes. */
2294 if (1 < append)
2295 *c++ = '\n';
2296
2297 *c = '\0';
2298 n->val.sz = (int)(c - n->val.p);
2299 }
2300
2301 static const char *
2302 roff_getstrn(const struct roff *r, const char *name, size_t len)
2303 {
2304 const struct roffkv *n;
2305 int i;
2306
2307 for (n = r->strtab; n; n = n->next)
2308 if (0 == strncmp(name, n->key.p, len) &&
2309 '\0' == n->key.p[(int)len])
2310 return(n->val.p);
2311
2312 for (i = 0; i < PREDEFS_MAX; i++)
2313 if (0 == strncmp(name, predefs[i].name, len) &&
2314 '\0' == predefs[i].name[(int)len])
2315 return(predefs[i].str);
2316
2317 return(NULL);
2318 }
2319
2320 static void
2321 roff_freestr(struct roffkv *r)
2322 {
2323 struct roffkv *n, *nn;
2324
2325 for (n = r; n; n = nn) {
2326 free(n->key.p);
2327 free(n->val.p);
2328 nn = n->next;
2329 free(n);
2330 }
2331 }
2332
2333 const struct tbl_span *
2334 roff_span(const struct roff *r)
2335 {
2336
2337 return(r->tbl ? tbl_span(r->tbl) : NULL);
2338 }
2339
2340 const struct eqn *
2341 roff_eqn(const struct roff *r)
2342 {
2343
2344 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2345 }
2346
2347 /*
2348 * Duplicate an input string, making the appropriate character
2349 * conversations (as stipulated by `tr') along the way.
2350 * Returns a heap-allocated string with all the replacements made.
2351 */
2352 char *
2353 roff_strdup(const struct roff *r, const char *p)
2354 {
2355 const struct roffkv *cp;
2356 char *res;
2357 const char *pp;
2358 size_t ssz, sz;
2359 enum mandoc_esc esc;
2360
2361 if (NULL == r->xmbtab && NULL == r->xtab)
2362 return(mandoc_strdup(p));
2363 else if ('\0' == *p)
2364 return(mandoc_strdup(""));
2365
2366 /*
2367 * Step through each character looking for term matches
2368 * (remember that a `tr' can be invoked with an escape, which is
2369 * a glyph but the escape is multi-character).
2370 * We only do this if the character hash has been initialised
2371 * and the string is >0 length.
2372 */
2373
2374 res = NULL;
2375 ssz = 0;
2376
2377 while ('\0' != *p) {
2378 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2379 sz = r->xtab[(int)*p].sz;
2380 res = mandoc_realloc(res, ssz + sz + 1);
2381 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2382 ssz += sz;
2383 p++;
2384 continue;
2385 } else if ('\\' != *p) {
2386 res = mandoc_realloc(res, ssz + 2);
2387 res[ssz++] = *p++;
2388 continue;
2389 }
2390
2391 /* Search for term matches. */
2392 for (cp = r->xmbtab; cp; cp = cp->next)
2393 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2394 break;
2395
2396 if (NULL != cp) {
2397 /*
2398 * A match has been found.
2399 * Append the match to the array and move
2400 * forward by its keysize.
2401 */
2402 res = mandoc_realloc(res,
2403 ssz + cp->val.sz + 1);
2404 memcpy(res + ssz, cp->val.p, cp->val.sz);
2405 ssz += cp->val.sz;
2406 p += (int)cp->key.sz;
2407 continue;
2408 }
2409
2410 /*
2411 * Handle escapes carefully: we need to copy
2412 * over just the escape itself, or else we might
2413 * do replacements within the escape itself.
2414 * Make sure to pass along the bogus string.
2415 */
2416 pp = p++;
2417 esc = mandoc_escape(&p, NULL, NULL);
2418 if (ESCAPE_ERROR == esc) {
2419 sz = strlen(pp);
2420 res = mandoc_realloc(res, ssz + sz + 1);
2421 memcpy(res + ssz, pp, sz);
2422 break;
2423 }
2424 /*
2425 * We bail out on bad escapes.
2426 * No need to warn: we already did so when
2427 * roff_res() was called.
2428 */
2429 sz = (int)(p - pp);
2430 res = mandoc_realloc(res, ssz + sz + 1);
2431 memcpy(res + ssz, pp, sz);
2432 ssz += sz;
2433 }
2434
2435 res[(int)ssz] = '\0';
2436 return(res);
2437 }
2438
2439 int
2440 roff_getformat(const struct roff *r)
2441 {
2442
2443 return(r->format);
2444 }
2445
2446 /*
2447 * Find out whether a line is a macro line or not.
2448 * If it is, adjust the current position and return one; if it isn't,
2449 * return zero and don't change the current position.
2450 * If the control character has been set with `.cc', then let that grain
2451 * precedence.
2452 * This is slighly contrary to groff, where using the non-breaking
2453 * control character when `cc' has been invoked will cause the
2454 * non-breaking macro contents to be printed verbatim.
2455 */
2456 int
2457 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2458 {
2459 int pos;
2460
2461 pos = *ppos;
2462
2463 if (0 != r->control && cp[pos] == r->control)
2464 pos++;
2465 else if (0 != r->control)
2466 return(0);
2467 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2468 pos += 2;
2469 else if ('.' == cp[pos] || '\'' == cp[pos])
2470 pos++;
2471 else
2472 return(0);
2473
2474 while (' ' == cp[pos] || '\t' == cp[pos])
2475 pos++;
2476
2477 *ppos = pos;
2478 return(1);
2479 }