]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Handle output encoding for unicode, numbered and named escape sequences
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.236 2014/10/25 15:23:56 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
31 #include "libroff.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_pl,
64 ROFF_ps,
65 ROFF_rm,
66 ROFF_rr,
67 ROFF_so,
68 ROFF_ta,
69 ROFF_tr,
70 ROFF_Dd,
71 ROFF_TH,
72 ROFF_TS,
73 ROFF_TE,
74 ROFF_T_,
75 ROFF_EQ,
76 ROFF_EN,
77 ROFF_cblock,
78 ROFF_USERDEF,
79 ROFF_MAX
80 };
81
82 /*
83 * An incredibly-simple string buffer.
84 */
85 struct roffstr {
86 char *p; /* nil-terminated buffer */
87 size_t sz; /* saved strlen(p) */
88 };
89
90 /*
91 * A key-value roffstr pair as part of a singly-linked list.
92 */
93 struct roffkv {
94 struct roffstr key;
95 struct roffstr val;
96 struct roffkv *next; /* next in list */
97 };
98
99 /*
100 * A single number register as part of a singly-linked list.
101 */
102 struct roffreg {
103 struct roffstr key;
104 int val;
105 struct roffreg *next;
106 };
107
108 struct roff {
109 struct mparse *parse; /* parse point */
110 struct roffnode *last; /* leaf of stack */
111 int *rstack; /* stack of inverted `ie' values */
112 struct roffreg *regtab; /* number registers */
113 struct roffkv *strtab; /* user-defined strings & macros */
114 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
115 struct roffstr *xtab; /* single-byte trans table (`tr') */
116 const char *current_string; /* value of last called user macro */
117 struct tbl_node *first_tbl; /* first table parsed */
118 struct tbl_node *last_tbl; /* last table parsed */
119 struct tbl_node *tbl; /* current table being parsed */
120 struct eqn_node *last_eqn; /* last equation parsed */
121 struct eqn_node *first_eqn; /* first equation parsed */
122 struct eqn_node *eqn; /* current equation being parsed */
123 int eqn_inline; /* current equation is inline */
124 int options; /* parse options */
125 int rstacksz; /* current size limit of rstack */
126 int rstackpos; /* position in rstack */
127 int format; /* current file in mdoc or man format */
128 char control; /* control character */
129 };
130
131 struct roffnode {
132 enum rofft tok; /* type of node */
133 struct roffnode *parent; /* up one in stack */
134 int line; /* parse line */
135 int col; /* parse col */
136 char *name; /* node name, e.g. macro name */
137 char *end; /* end-rules: custom token */
138 int endspan; /* end-rules: next-line or infty */
139 int rule; /* current evaluation rule */
140 };
141
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
150
151 typedef enum rofferr (*roffproc)(ROFF_ARGS);
152
153 struct roffmac {
154 const char *name; /* macro name */
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac *next;
161 };
162
163 struct predef {
164 const char *name; /* predefined input name */
165 const char *str; /* replacement symbol */
166 };
167
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
170
171 static enum rofft roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff *);
174 static void roffnode_pop(struct roff *);
175 static void roffnode_push(struct roff *, enum rofft,
176 const char *, int, int);
177 static enum rofferr roff_block(ROFF_ARGS);
178 static enum rofferr roff_block_text(ROFF_ARGS);
179 static enum rofferr roff_block_sub(ROFF_ARGS);
180 static enum rofferr roff_cblock(ROFF_ARGS);
181 static enum rofferr roff_cc(ROFF_ARGS);
182 static void roff_ccond(struct roff *, int, int);
183 static enum rofferr roff_cond(ROFF_ARGS);
184 static enum rofferr roff_cond_text(ROFF_ARGS);
185 static enum rofferr roff_cond_sub(ROFF_ARGS);
186 static enum rofferr roff_ds(ROFF_ARGS);
187 static enum rofferr roff_eqndelim(struct roff *,
188 char **, size_t *, int);
189 static int roff_evalcond(struct roff *r, int,
190 const char *, int *);
191 static int roff_evalnum(struct roff *, int,
192 const char *, int *, int *, int);
193 static int roff_evalpar(struct roff *, int,
194 const char *, int *, int *);
195 static int roff_evalstrcond(const char *, int *);
196 static void roff_free1(struct roff *);
197 static void roff_freereg(struct roffreg *);
198 static void roff_freestr(struct roffkv *);
199 static size_t roff_getname(struct roff *, char **, int, int);
200 static int roff_getnum(const char *, int *, int *);
201 static int roff_getop(const char *, int *, char *);
202 static int roff_getregn(const struct roff *,
203 const char *, size_t);
204 static int roff_getregro(const char *name);
205 static const char *roff_getstrn(const struct roff *,
206 const char *, size_t);
207 static enum rofferr roff_it(ROFF_ARGS);
208 static enum rofferr roff_line_ignore(ROFF_ARGS);
209 static enum rofferr roff_nr(ROFF_ARGS);
210 static enum rofft roff_parse(struct roff *, char *, int *,
211 int, int);
212 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
213 static enum rofferr roff_res(struct roff *,
214 char **, size_t *, int, int);
215 static enum rofferr roff_rm(ROFF_ARGS);
216 static enum rofferr roff_rr(ROFF_ARGS);
217 static void roff_setstr(struct roff *,
218 const char *, const char *, int);
219 static void roff_setstrn(struct roffkv **, const char *,
220 size_t, const char *, size_t, int);
221 static enum rofferr roff_so(ROFF_ARGS);
222 static enum rofferr roff_tr(ROFF_ARGS);
223 static enum rofferr roff_Dd(ROFF_ARGS);
224 static enum rofferr roff_TH(ROFF_ARGS);
225 static enum rofferr roff_TE(ROFF_ARGS);
226 static enum rofferr roff_TS(ROFF_ARGS);
227 static enum rofferr roff_EQ(ROFF_ARGS);
228 static enum rofferr roff_EN(ROFF_ARGS);
229 static enum rofferr roff_T_(ROFF_ARGS);
230 static enum rofferr roff_userdef(ROFF_ARGS);
231
232 /* See roffhash_find() */
233
234 #define ASCII_HI 126
235 #define ASCII_LO 33
236 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
237
238 static struct roffmac *hash[HASHWIDTH];
239
240 static struct roffmac roffs[ROFF_MAX] = {
241 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
244 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
245 { "as", roff_ds, NULL, NULL, 0, NULL },
246 { "cc", roff_cc, NULL, NULL, 0, NULL },
247 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
249 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
250 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "ds", roff_ds, NULL, NULL, 0, NULL },
252 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
253 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
257 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
258 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
259 { "it", roff_it, NULL, NULL, 0, NULL },
260 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
261 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
262 { "nr", roff_nr, NULL, NULL, 0, NULL },
263 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
264 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
265 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
266 { "rm", roff_rm, NULL, NULL, 0, NULL },
267 { "rr", roff_rr, NULL, NULL, 0, NULL },
268 { "so", roff_so, NULL, NULL, 0, NULL },
269 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
270 { "tr", roff_tr, NULL, NULL, 0, NULL },
271 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
272 { "TH", roff_TH, NULL, NULL, 0, NULL },
273 { "TS", roff_TS, NULL, NULL, 0, NULL },
274 { "TE", roff_TE, NULL, NULL, 0, NULL },
275 { "T&", roff_T_, NULL, NULL, 0, NULL },
276 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
277 { "EN", roff_EN, NULL, NULL, 0, NULL },
278 { ".", roff_cblock, NULL, NULL, 0, NULL },
279 { NULL, roff_userdef, NULL, NULL, 0, NULL },
280 };
281
282 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
283 const char *const __mdoc_reserved[] = {
284 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
285 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
286 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
287 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
288 "Dt", "Dv", "Dx", "D1",
289 "Ec", "Ed", "Ef", "Ek", "El", "Em",
290 "En", "Eo", "Er", "Es", "Ev", "Ex",
291 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
292 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
293 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
294 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
295 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
296 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
297 "Sc", "Sh", "Sm", "So", "Sq",
298 "Ss", "St", "Sx", "Sy",
299 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
300 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
301 "%P", "%Q", "%R", "%T", "%U", "%V",
302 NULL
303 };
304
305 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
306 const char *const __man_reserved[] = {
307 "AT", "B", "BI", "BR", "DT",
308 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
309 "LP", "OP", "P", "PD", "PP",
310 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
311 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
312 NULL
313 };
314
315 /* Array of injected predefined strings. */
316 #define PREDEFS_MAX 38
317 static const struct predef predefs[PREDEFS_MAX] = {
318 #include "predefs.in"
319 };
320
321 /* See roffhash_find() */
322 #define ROFF_HASH(p) (p[0] - ASCII_LO)
323
324 static int roffit_lines; /* number of lines to delay */
325 static char *roffit_macro; /* nil-terminated macro line */
326
327
328 static void
329 roffhash_init(void)
330 {
331 struct roffmac *n;
332 int buc, i;
333
334 for (i = 0; i < (int)ROFF_USERDEF; i++) {
335 assert(roffs[i].name[0] >= ASCII_LO);
336 assert(roffs[i].name[0] <= ASCII_HI);
337
338 buc = ROFF_HASH(roffs[i].name);
339
340 if (NULL != (n = hash[buc])) {
341 for ( ; n->next; n = n->next)
342 /* Do nothing. */ ;
343 n->next = &roffs[i];
344 } else
345 hash[buc] = &roffs[i];
346 }
347 }
348
349 /*
350 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
351 * the nil-terminated string name could be found.
352 */
353 static enum rofft
354 roffhash_find(const char *p, size_t s)
355 {
356 int buc;
357 struct roffmac *n;
358
359 /*
360 * libroff has an extremely simple hashtable, for the time
361 * being, which simply keys on the first character, which must
362 * be printable, then walks a chain. It works well enough until
363 * optimised.
364 */
365
366 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
367 return(ROFF_MAX);
368
369 buc = ROFF_HASH(p);
370
371 if (NULL == (n = hash[buc]))
372 return(ROFF_MAX);
373 for ( ; n; n = n->next)
374 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
375 return((enum rofft)(n - roffs));
376
377 return(ROFF_MAX);
378 }
379
380 /*
381 * Pop the current node off of the stack of roff instructions currently
382 * pending.
383 */
384 static void
385 roffnode_pop(struct roff *r)
386 {
387 struct roffnode *p;
388
389 assert(r->last);
390 p = r->last;
391
392 r->last = r->last->parent;
393 free(p->name);
394 free(p->end);
395 free(p);
396 }
397
398 /*
399 * Push a roff node onto the instruction stack. This must later be
400 * removed with roffnode_pop().
401 */
402 static void
403 roffnode_push(struct roff *r, enum rofft tok, const char *name,
404 int line, int col)
405 {
406 struct roffnode *p;
407
408 p = mandoc_calloc(1, sizeof(struct roffnode));
409 p->tok = tok;
410 if (name)
411 p->name = mandoc_strdup(name);
412 p->parent = r->last;
413 p->line = line;
414 p->col = col;
415 p->rule = p->parent ? p->parent->rule : 0;
416
417 r->last = p;
418 }
419
420 static void
421 roff_free1(struct roff *r)
422 {
423 struct tbl_node *tbl;
424 struct eqn_node *e;
425 int i;
426
427 while (NULL != (tbl = r->first_tbl)) {
428 r->first_tbl = tbl->next;
429 tbl_free(tbl);
430 }
431 r->first_tbl = r->last_tbl = r->tbl = NULL;
432
433 while (NULL != (e = r->first_eqn)) {
434 r->first_eqn = e->next;
435 eqn_free(e);
436 }
437 r->first_eqn = r->last_eqn = r->eqn = NULL;
438
439 while (r->last)
440 roffnode_pop(r);
441
442 free (r->rstack);
443 r->rstack = NULL;
444 r->rstacksz = 0;
445 r->rstackpos = -1;
446
447 roff_freereg(r->regtab);
448 r->regtab = NULL;
449
450 roff_freestr(r->strtab);
451 roff_freestr(r->xmbtab);
452 r->strtab = r->xmbtab = NULL;
453
454 if (r->xtab)
455 for (i = 0; i < 128; i++)
456 free(r->xtab[i].p);
457 free(r->xtab);
458 r->xtab = NULL;
459 }
460
461 void
462 roff_reset(struct roff *r)
463 {
464
465 roff_free1(r);
466 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
467 r->control = 0;
468 }
469
470 void
471 roff_free(struct roff *r)
472 {
473
474 roff_free1(r);
475 free(r);
476 }
477
478 struct roff *
479 roff_alloc(struct mparse *parse, int options)
480 {
481 struct roff *r;
482
483 r = mandoc_calloc(1, sizeof(struct roff));
484 r->parse = parse;
485 r->options = options;
486 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
487 r->rstackpos = -1;
488
489 roffhash_init();
490
491 return(r);
492 }
493
494 /*
495 * In the current line, expand escape sequences that tend to get
496 * used in numerical expressions and conditional requests.
497 * Also check the syntax of the remaining escape sequences.
498 */
499 static enum rofferr
500 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
501 {
502 char ubuf[24]; /* buffer to print the number */
503 const char *start; /* start of the string to process */
504 char *stesc; /* start of an escape sequence ('\\') */
505 const char *stnam; /* start of the name, after "[(*" */
506 const char *cp; /* end of the name, e.g. before ']' */
507 const char *res; /* the string to be substituted */
508 char *nbuf; /* new buffer to copy bufp to */
509 size_t maxl; /* expected length of the escape name */
510 size_t naml; /* actual length of the escape name */
511 int expand_count; /* to avoid infinite loops */
512 int npos; /* position in numeric expression */
513 int arg_complete; /* argument not interrupted by eol */
514 char term; /* character terminating the escape */
515
516 expand_count = 0;
517 start = *bufp + pos;
518 stesc = strchr(start, '\0') - 1;
519 while (stesc-- > start) {
520
521 /* Search backwards for the next backslash. */
522
523 if ('\\' != *stesc)
524 continue;
525
526 /* If it is escaped, skip it. */
527
528 for (cp = stesc - 1; cp >= start; cp--)
529 if ('\\' != *cp)
530 break;
531
532 if (0 == (stesc - cp) % 2) {
533 stesc = (char *)cp;
534 continue;
535 }
536
537 /* Decide whether to expand or to check only. */
538
539 term = '\0';
540 cp = stesc + 1;
541 switch (*cp) {
542 case '*':
543 res = NULL;
544 break;
545 case 'B':
546 /* FALLTHROUGH */
547 case 'w':
548 term = cp[1];
549 /* FALLTHROUGH */
550 case 'n':
551 res = ubuf;
552 break;
553 default:
554 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
555 mandoc_vmsg(MANDOCERR_ESC_BAD,
556 r->parse, ln, (int)(stesc - *bufp),
557 "%.*s", (int)(cp - stesc), stesc);
558 continue;
559 }
560
561 if (EXPAND_LIMIT < ++expand_count) {
562 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
563 ln, (int)(stesc - *bufp), NULL);
564 return(ROFF_IGN);
565 }
566
567 /*
568 * The third character decides the length
569 * of the name of the string or register.
570 * Save a pointer to the name.
571 */
572
573 if ('\0' == term) {
574 switch (*++cp) {
575 case '\0':
576 maxl = 0;
577 break;
578 case '(':
579 cp++;
580 maxl = 2;
581 break;
582 case '[':
583 cp++;
584 term = ']';
585 maxl = 0;
586 break;
587 default:
588 maxl = 1;
589 break;
590 }
591 } else {
592 cp += 2;
593 maxl = 0;
594 }
595 stnam = cp;
596
597 /* Advance to the end of the name. */
598
599 arg_complete = 1;
600 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
601 if ('\0' == *cp) {
602 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
603 ln, (int)(stesc - *bufp), stesc);
604 arg_complete = 0;
605 break;
606 }
607 if (0 == maxl && *cp == term) {
608 cp++;
609 break;
610 }
611 }
612
613 /*
614 * Retrieve the replacement string; if it is
615 * undefined, resume searching for escapes.
616 */
617
618 switch (stesc[1]) {
619 case '*':
620 if (arg_complete)
621 res = roff_getstrn(r, stnam, naml);
622 break;
623 case 'B':
624 npos = 0;
625 ubuf[0] = arg_complete &&
626 roff_evalnum(r, ln, stnam, &npos, NULL, 0) &&
627 stnam + npos + 1 == cp ? '1' : '0';
628 ubuf[1] = '\0';
629 break;
630 case 'n':
631 if (arg_complete)
632 (void)snprintf(ubuf, sizeof(ubuf), "%d",
633 roff_getregn(r, stnam, naml));
634 else
635 ubuf[0] = '\0';
636 break;
637 case 'w':
638 /* use even incomplete args */
639 (void)snprintf(ubuf, sizeof(ubuf), "%d",
640 24 * (int)naml);
641 break;
642 }
643
644 if (NULL == res) {
645 mandoc_vmsg(MANDOCERR_STR_UNDEF,
646 r->parse, ln, (int)(stesc - *bufp),
647 "%.*s", (int)naml, stnam);
648 res = "";
649 }
650
651 /* Replace the escape sequence by the string. */
652
653 *stesc = '\0';
654 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
655 *bufp, res, cp) + 1;
656
657 /* Prepare for the next replacement. */
658
659 start = nbuf + pos;
660 stesc = nbuf + (stesc - *bufp) + strlen(res);
661 free(*bufp);
662 *bufp = nbuf;
663 }
664 return(ROFF_CONT);
665 }
666
667 /*
668 * Process text streams:
669 * Convert all breakable hyphens into ASCII_HYPH.
670 * Decrement and spring input line trap.
671 */
672 static enum rofferr
673 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
674 {
675 size_t sz;
676 const char *start;
677 char *p;
678 int isz;
679 enum mandoc_esc esc;
680
681 start = p = *bufp + pos;
682
683 while ('\0' != *p) {
684 sz = strcspn(p, "-\\");
685 p += sz;
686
687 if ('\0' == *p)
688 break;
689
690 if ('\\' == *p) {
691 /* Skip over escapes. */
692 p++;
693 esc = mandoc_escape((const char **)&p, NULL, NULL);
694 if (ESCAPE_ERROR == esc)
695 break;
696 continue;
697 } else if (p == start) {
698 p++;
699 continue;
700 }
701
702 if (isalpha((unsigned char)p[-1]) &&
703 isalpha((unsigned char)p[1]))
704 *p = ASCII_HYPH;
705 p++;
706 }
707
708 /* Spring the input line trap. */
709 if (1 == roffit_lines) {
710 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
711 free(*bufp);
712 *bufp = p;
713 *szp = isz + 1;
714 *offs = 0;
715 free(roffit_macro);
716 roffit_lines = 0;
717 return(ROFF_REPARSE);
718 } else if (1 < roffit_lines)
719 --roffit_lines;
720 return(ROFF_CONT);
721 }
722
723 enum rofferr
724 roff_parseln(struct roff *r, int ln, char **bufp,
725 size_t *szp, int pos, int *offs)
726 {
727 enum rofft t;
728 enum rofferr e;
729 int ppos, ctl;
730
731 /* Handle in-line equation delimiters. */
732
733 if (r->tbl == NULL &&
734 r->last_eqn != NULL && r->last_eqn->delim &&
735 (r->eqn == NULL || r->eqn_inline)) {
736 e = roff_eqndelim(r, bufp, szp, pos);
737 if (e == ROFF_REPARSE)
738 return(e);
739 assert(e == ROFF_CONT);
740 }
741
742 /* Expand some escape sequences. */
743
744 e = roff_res(r, bufp, szp, ln, pos);
745 if (ROFF_IGN == e)
746 return(e);
747 assert(ROFF_CONT == e);
748
749 ppos = pos;
750 ctl = roff_getcontrol(r, *bufp, &pos);
751
752 /*
753 * First, if a scope is open and we're not a macro, pass the
754 * text through the macro's filter. If a scope isn't open and
755 * we're not a macro, just let it through.
756 * Finally, if there's an equation scope open, divert it into it
757 * no matter our state.
758 */
759
760 if (r->last && ! ctl) {
761 t = r->last->tok;
762 assert(roffs[t].text);
763 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
764 assert(ROFF_IGN == e || ROFF_CONT == e);
765 if (ROFF_CONT != e)
766 return(e);
767 }
768 if (r->eqn)
769 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
770 if ( ! ctl) {
771 if (r->tbl)
772 return(tbl_read(r->tbl, ln, *bufp, pos));
773 return(roff_parsetext(bufp, szp, pos, offs));
774 }
775
776 /* Skip empty request lines. */
777
778 if ((*bufp)[pos] == '"') {
779 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
780 ln, pos, NULL);
781 return(ROFF_IGN);
782 } else if ((*bufp)[pos] == '\0')
783 return(ROFF_IGN);
784
785 /*
786 * If a scope is open, go to the child handler for that macro,
787 * as it may want to preprocess before doing anything with it.
788 * Don't do so if an equation is open.
789 */
790
791 if (r->last) {
792 t = r->last->tok;
793 assert(roffs[t].sub);
794 return((*roffs[t].sub)(r, t, bufp, szp,
795 ln, ppos, pos, offs));
796 }
797
798 /*
799 * Lastly, as we've no scope open, try to look up and execute
800 * the new macro. If no macro is found, simply return and let
801 * the compilers handle it.
802 */
803
804 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
805 return(ROFF_CONT);
806
807 assert(roffs[t].proc);
808 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
809 }
810
811 void
812 roff_endparse(struct roff *r)
813 {
814
815 if (r->last)
816 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
817 r->last->line, r->last->col,
818 roffs[r->last->tok].name);
819
820 if (r->eqn) {
821 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
822 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
823 eqn_end(&r->eqn);
824 }
825
826 if (r->tbl) {
827 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
828 r->tbl->line, r->tbl->pos, "TS");
829 tbl_end(&r->tbl);
830 }
831 }
832
833 /*
834 * Parse a roff node's type from the input buffer. This must be in the
835 * form of ".foo xxx" in the usual way.
836 */
837 static enum rofft
838 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
839 {
840 char *cp;
841 const char *mac;
842 size_t maclen;
843 enum rofft t;
844
845 cp = buf + *pos;
846
847 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
848 return(ROFF_MAX);
849
850 mac = cp;
851 maclen = roff_getname(r, &cp, ln, ppos);
852
853 t = (r->current_string = roff_getstrn(r, mac, maclen))
854 ? ROFF_USERDEF : roffhash_find(mac, maclen);
855
856 if (ROFF_MAX != t)
857 *pos = cp - buf;
858
859 return(t);
860 }
861
862 static enum rofferr
863 roff_cblock(ROFF_ARGS)
864 {
865
866 /*
867 * A block-close `..' should only be invoked as a child of an
868 * ignore macro, otherwise raise a warning and just ignore it.
869 */
870
871 if (NULL == r->last) {
872 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
873 ln, ppos, "..");
874 return(ROFF_IGN);
875 }
876
877 switch (r->last->tok) {
878 case ROFF_am:
879 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
880 /* FALLTHROUGH */
881 case ROFF_ami:
882 /* FALLTHROUGH */
883 case ROFF_de:
884 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
885 /* FALLTHROUGH */
886 case ROFF_dei:
887 /* FALLTHROUGH */
888 case ROFF_ig:
889 break;
890 default:
891 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
892 ln, ppos, "..");
893 return(ROFF_IGN);
894 }
895
896 if ((*bufp)[pos])
897 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
898 ".. %s", *bufp + pos);
899
900 roffnode_pop(r);
901 roffnode_cleanscope(r);
902 return(ROFF_IGN);
903
904 }
905
906 static void
907 roffnode_cleanscope(struct roff *r)
908 {
909
910 while (r->last) {
911 if (--r->last->endspan != 0)
912 break;
913 roffnode_pop(r);
914 }
915 }
916
917 static void
918 roff_ccond(struct roff *r, int ln, int ppos)
919 {
920
921 if (NULL == r->last) {
922 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
923 ln, ppos, "\\}");
924 return;
925 }
926
927 switch (r->last->tok) {
928 case ROFF_el:
929 /* FALLTHROUGH */
930 case ROFF_ie:
931 /* FALLTHROUGH */
932 case ROFF_if:
933 break;
934 default:
935 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
936 ln, ppos, "\\}");
937 return;
938 }
939
940 if (r->last->endspan > -1) {
941 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
942 ln, ppos, "\\}");
943 return;
944 }
945
946 roffnode_pop(r);
947 roffnode_cleanscope(r);
948 return;
949 }
950
951 static enum rofferr
952 roff_block(ROFF_ARGS)
953 {
954 const char *name;
955 char *iname, *cp;
956 size_t namesz;
957
958 /* Ignore groff compatibility mode for now. */
959
960 if (ROFF_de1 == tok)
961 tok = ROFF_de;
962 else if (ROFF_am1 == tok)
963 tok = ROFF_am;
964
965 /* Parse the macro name argument. */
966
967 cp = *bufp + pos;
968 if (ROFF_ig == tok) {
969 iname = NULL;
970 namesz = 0;
971 } else {
972 iname = cp;
973 namesz = roff_getname(r, &cp, ln, ppos);
974 iname[namesz] = '\0';
975 }
976
977 /* Resolve the macro name argument if it is indirect. */
978
979 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
980 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
981 mandoc_vmsg(MANDOCERR_STR_UNDEF,
982 r->parse, ln, (int)(iname - *bufp),
983 "%.*s", (int)namesz, iname);
984 namesz = 0;
985 } else
986 namesz = strlen(name);
987 } else
988 name = iname;
989
990 if (0 == namesz && ROFF_ig != tok) {
991 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
992 ln, ppos, roffs[tok].name);
993 return(ROFF_IGN);
994 }
995
996 roffnode_push(r, tok, name, ln, ppos);
997
998 /*
999 * At the beginning of a `de' macro, clear the existing string
1000 * with the same name, if there is one. New content will be
1001 * appended from roff_block_text() in multiline mode.
1002 */
1003
1004 if (ROFF_de == tok || ROFF_dei == tok)
1005 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1006
1007 if ('\0' == *cp)
1008 return(ROFF_IGN);
1009
1010 /* Get the custom end marker. */
1011
1012 iname = cp;
1013 namesz = roff_getname(r, &cp, ln, ppos);
1014
1015 /* Resolve the end marker if it is indirect. */
1016
1017 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
1018 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
1019 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1020 r->parse, ln, (int)(iname - *bufp),
1021 "%.*s", (int)namesz, iname);
1022 namesz = 0;
1023 } else
1024 namesz = strlen(name);
1025 } else
1026 name = iname;
1027
1028 if (namesz)
1029 r->last->end = mandoc_strndup(name, namesz);
1030
1031 if ('\0' != *cp)
1032 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1033 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1034
1035 return(ROFF_IGN);
1036 }
1037
1038 static enum rofferr
1039 roff_block_sub(ROFF_ARGS)
1040 {
1041 enum rofft t;
1042 int i, j;
1043
1044 /*
1045 * First check whether a custom macro exists at this level. If
1046 * it does, then check against it. This is some of groff's
1047 * stranger behaviours. If we encountered a custom end-scope
1048 * tag and that tag also happens to be a "real" macro, then we
1049 * need to try interpreting it again as a real macro. If it's
1050 * not, then return ignore. Else continue.
1051 */
1052
1053 if (r->last->end) {
1054 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1055 if ((*bufp)[i] != r->last->end[j])
1056 break;
1057
1058 if ('\0' == r->last->end[j] &&
1059 ('\0' == (*bufp)[i] ||
1060 ' ' == (*bufp)[i] ||
1061 '\t' == (*bufp)[i])) {
1062 roffnode_pop(r);
1063 roffnode_cleanscope(r);
1064
1065 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1066 i++;
1067
1068 pos = i;
1069 if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
1070 return(ROFF_RERUN);
1071 return(ROFF_IGN);
1072 }
1073 }
1074
1075 /*
1076 * If we have no custom end-query or lookup failed, then try
1077 * pulling it out of the hashtable.
1078 */
1079
1080 t = roff_parse(r, *bufp, &pos, ln, ppos);
1081
1082 if (ROFF_cblock != t) {
1083 if (ROFF_ig != tok)
1084 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1085 return(ROFF_IGN);
1086 }
1087
1088 assert(roffs[t].proc);
1089 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1090 }
1091
1092 static enum rofferr
1093 roff_block_text(ROFF_ARGS)
1094 {
1095
1096 if (ROFF_ig != tok)
1097 roff_setstr(r, r->last->name, *bufp + pos, 2);
1098
1099 return(ROFF_IGN);
1100 }
1101
1102 static enum rofferr
1103 roff_cond_sub(ROFF_ARGS)
1104 {
1105 enum rofft t;
1106 char *ep;
1107 int rr;
1108
1109 rr = r->last->rule;
1110 roffnode_cleanscope(r);
1111 t = roff_parse(r, *bufp, &pos, ln, ppos);
1112
1113 /*
1114 * Fully handle known macros when they are structurally
1115 * required or when the conditional evaluated to true.
1116 */
1117
1118 if ((ROFF_MAX != t) &&
1119 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1120 assert(roffs[t].proc);
1121 return((*roffs[t].proc)(r, t, bufp, szp,
1122 ln, ppos, pos, offs));
1123 }
1124
1125 /*
1126 * If `\}' occurs on a macro line without a preceding macro,
1127 * drop the line completely.
1128 */
1129
1130 ep = *bufp + pos;
1131 if ('\\' == ep[0] && '}' == ep[1])
1132 rr = 0;
1133
1134 /* Always check for the closing delimiter `\}'. */
1135
1136 while (NULL != (ep = strchr(ep, '\\'))) {
1137 if ('}' == *(++ep)) {
1138 *ep = '&';
1139 roff_ccond(r, ln, ep - *bufp - 1);
1140 }
1141 ++ep;
1142 }
1143 return(rr ? ROFF_CONT : ROFF_IGN);
1144 }
1145
1146 static enum rofferr
1147 roff_cond_text(ROFF_ARGS)
1148 {
1149 char *ep;
1150 int rr;
1151
1152 rr = r->last->rule;
1153 roffnode_cleanscope(r);
1154
1155 ep = *bufp + pos;
1156 while (NULL != (ep = strchr(ep, '\\'))) {
1157 if ('}' == *(++ep)) {
1158 *ep = '&';
1159 roff_ccond(r, ln, ep - *bufp - 1);
1160 }
1161 ++ep;
1162 }
1163 return(rr ? ROFF_CONT : ROFF_IGN);
1164 }
1165
1166 /*
1167 * Parse a single signed integer number. Stop at the first non-digit.
1168 * If there is at least one digit, return success and advance the
1169 * parse point, else return failure and let the parse point unchanged.
1170 * Ignore overflows, treat them just like the C language.
1171 */
1172 static int
1173 roff_getnum(const char *v, int *pos, int *res)
1174 {
1175 int myres, n, p;
1176
1177 if (NULL == res)
1178 res = &myres;
1179
1180 p = *pos;
1181 n = v[p] == '-';
1182 if (n)
1183 p++;
1184
1185 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1186 *res = 10 * *res + v[p] - '0';
1187 if (p == *pos + n)
1188 return 0;
1189
1190 if (n)
1191 *res = -*res;
1192
1193 *pos = p;
1194 return 1;
1195 }
1196
1197 /*
1198 * Evaluate a string comparison condition.
1199 * The first character is the delimiter.
1200 * Succeed if the string up to its second occurrence
1201 * matches the string up to its third occurence.
1202 * Advance the cursor after the third occurrence
1203 * or lacking that, to the end of the line.
1204 */
1205 static int
1206 roff_evalstrcond(const char *v, int *pos)
1207 {
1208 const char *s1, *s2, *s3;
1209 int match;
1210
1211 match = 0;
1212 s1 = v + *pos; /* initial delimiter */
1213 s2 = s1 + 1; /* for scanning the first string */
1214 s3 = strchr(s2, *s1); /* for scanning the second string */
1215
1216 if (NULL == s3) /* found no middle delimiter */
1217 goto out;
1218
1219 while ('\0' != *++s3) {
1220 if (*s2 != *s3) { /* mismatch */
1221 s3 = strchr(s3, *s1);
1222 break;
1223 }
1224 if (*s3 == *s1) { /* found the final delimiter */
1225 match = 1;
1226 break;
1227 }
1228 s2++;
1229 }
1230
1231 out:
1232 if (NULL == s3)
1233 s3 = strchr(s2, '\0');
1234 else
1235 s3++;
1236 *pos = s3 - v;
1237 return(match);
1238 }
1239
1240 /*
1241 * Evaluate an optionally negated single character, numerical,
1242 * or string condition.
1243 */
1244 static int
1245 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1246 {
1247 int wanttrue, number;
1248
1249 if ('!' == v[*pos]) {
1250 wanttrue = 0;
1251 (*pos)++;
1252 } else
1253 wanttrue = 1;
1254
1255 switch (v[*pos]) {
1256 case 'n':
1257 /* FALLTHROUGH */
1258 case 'o':
1259 (*pos)++;
1260 return(wanttrue);
1261 case 'c':
1262 /* FALLTHROUGH */
1263 case 'd':
1264 /* FALLTHROUGH */
1265 case 'e':
1266 /* FALLTHROUGH */
1267 case 'r':
1268 /* FALLTHROUGH */
1269 case 't':
1270 (*pos)++;
1271 return(!wanttrue);
1272 default:
1273 break;
1274 }
1275
1276 if (roff_evalnum(r, ln, v, pos, &number, 0))
1277 return((number > 0) == wanttrue);
1278 else
1279 return(roff_evalstrcond(v, pos) == wanttrue);
1280 }
1281
1282 static enum rofferr
1283 roff_line_ignore(ROFF_ARGS)
1284 {
1285
1286 return(ROFF_IGN);
1287 }
1288
1289 static enum rofferr
1290 roff_cond(ROFF_ARGS)
1291 {
1292
1293 roffnode_push(r, tok, NULL, ln, ppos);
1294
1295 /*
1296 * An `.el' has no conditional body: it will consume the value
1297 * of the current rstack entry set in prior `ie' calls or
1298 * defaults to DENY.
1299 *
1300 * If we're not an `el', however, then evaluate the conditional.
1301 */
1302
1303 r->last->rule = ROFF_el == tok ?
1304 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1305 roff_evalcond(r, ln, *bufp, &pos);
1306
1307 /*
1308 * An if-else will put the NEGATION of the current evaluated
1309 * conditional into the stack of rules.
1310 */
1311
1312 if (ROFF_ie == tok) {
1313 if (r->rstackpos + 1 == r->rstacksz) {
1314 r->rstacksz += 16;
1315 r->rstack = mandoc_reallocarray(r->rstack,
1316 r->rstacksz, sizeof(int));
1317 }
1318 r->rstack[++r->rstackpos] = !r->last->rule;
1319 }
1320
1321 /* If the parent has false as its rule, then so do we. */
1322
1323 if (r->last->parent && !r->last->parent->rule)
1324 r->last->rule = 0;
1325
1326 /*
1327 * Determine scope.
1328 * If there is nothing on the line after the conditional,
1329 * not even whitespace, use next-line scope.
1330 */
1331
1332 if ('\0' == (*bufp)[pos]) {
1333 r->last->endspan = 2;
1334 goto out;
1335 }
1336
1337 while (' ' == (*bufp)[pos])
1338 pos++;
1339
1340 /* An opening brace requests multiline scope. */
1341
1342 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1343 r->last->endspan = -1;
1344 pos += 2;
1345 goto out;
1346 }
1347
1348 /*
1349 * Anything else following the conditional causes
1350 * single-line scope. Warn if the scope contains
1351 * nothing but trailing whitespace.
1352 */
1353
1354 if ('\0' == (*bufp)[pos])
1355 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1356 ln, ppos, roffs[tok].name);
1357
1358 r->last->endspan = 1;
1359
1360 out:
1361 *offs = pos;
1362 return(ROFF_RERUN);
1363 }
1364
1365 static enum rofferr
1366 roff_ds(ROFF_ARGS)
1367 {
1368 char *string;
1369 const char *name;
1370 size_t namesz;
1371
1372 /*
1373 * The first word is the name of the string.
1374 * If it is empty or terminated by an escape sequence,
1375 * abort the `ds' request without defining anything.
1376 */
1377
1378 name = string = *bufp + pos;
1379 if ('\0' == *name)
1380 return(ROFF_IGN);
1381
1382 namesz = roff_getname(r, &string, ln, pos);
1383 if ('\\' == name[namesz])
1384 return(ROFF_IGN);
1385
1386 /* Read past the initial double-quote, if any. */
1387 if ('"' == *string)
1388 string++;
1389
1390 /* The rest is the value. */
1391 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1392 ROFF_as == tok);
1393 return(ROFF_IGN);
1394 }
1395
1396 /*
1397 * Parse a single operator, one or two characters long.
1398 * If the operator is recognized, return success and advance the
1399 * parse point, else return failure and let the parse point unchanged.
1400 */
1401 static int
1402 roff_getop(const char *v, int *pos, char *res)
1403 {
1404
1405 *res = v[*pos];
1406
1407 switch (*res) {
1408 case '+':
1409 /* FALLTHROUGH */
1410 case '-':
1411 /* FALLTHROUGH */
1412 case '*':
1413 /* FALLTHROUGH */
1414 case '/':
1415 /* FALLTHROUGH */
1416 case '%':
1417 /* FALLTHROUGH */
1418 case '&':
1419 /* FALLTHROUGH */
1420 case ':':
1421 break;
1422 case '<':
1423 switch (v[*pos + 1]) {
1424 case '=':
1425 *res = 'l';
1426 (*pos)++;
1427 break;
1428 case '>':
1429 *res = '!';
1430 (*pos)++;
1431 break;
1432 case '?':
1433 *res = 'i';
1434 (*pos)++;
1435 break;
1436 default:
1437 break;
1438 }
1439 break;
1440 case '>':
1441 switch (v[*pos + 1]) {
1442 case '=':
1443 *res = 'g';
1444 (*pos)++;
1445 break;
1446 case '?':
1447 *res = 'a';
1448 (*pos)++;
1449 break;
1450 default:
1451 break;
1452 }
1453 break;
1454 case '=':
1455 if ('=' == v[*pos + 1])
1456 (*pos)++;
1457 break;
1458 default:
1459 return(0);
1460 }
1461 (*pos)++;
1462
1463 return(*res);
1464 }
1465
1466 /*
1467 * Evaluate either a parenthesized numeric expression
1468 * or a single signed integer number.
1469 */
1470 static int
1471 roff_evalpar(struct roff *r, int ln,
1472 const char *v, int *pos, int *res)
1473 {
1474
1475 if ('(' != v[*pos])
1476 return(roff_getnum(v, pos, res));
1477
1478 (*pos)++;
1479 if ( ! roff_evalnum(r, ln, v, pos, res, 1))
1480 return(0);
1481
1482 /*
1483 * Omission of the closing parenthesis
1484 * is an error in validation mode,
1485 * but ignored in evaluation mode.
1486 */
1487
1488 if (')' == v[*pos])
1489 (*pos)++;
1490 else if (NULL == res)
1491 return(0);
1492
1493 return(1);
1494 }
1495
1496 /*
1497 * Evaluate a complete numeric expression.
1498 * Proceed left to right, there is no concept of precedence.
1499 */
1500 static int
1501 roff_evalnum(struct roff *r, int ln, const char *v,
1502 int *pos, int *res, int skipwhite)
1503 {
1504 int mypos, operand2;
1505 char operator;
1506
1507 if (NULL == pos) {
1508 mypos = 0;
1509 pos = &mypos;
1510 }
1511
1512 if (skipwhite)
1513 while (isspace((unsigned char)v[*pos]))
1514 (*pos)++;
1515
1516 if ( ! roff_evalpar(r, ln, v, pos, res))
1517 return(0);
1518
1519 while (1) {
1520 if (skipwhite)
1521 while (isspace((unsigned char)v[*pos]))
1522 (*pos)++;
1523
1524 if ( ! roff_getop(v, pos, &operator))
1525 break;
1526
1527 if (skipwhite)
1528 while (isspace((unsigned char)v[*pos]))
1529 (*pos)++;
1530
1531 if ( ! roff_evalpar(r, ln, v, pos, &operand2))
1532 return(0);
1533
1534 if (skipwhite)
1535 while (isspace((unsigned char)v[*pos]))
1536 (*pos)++;
1537
1538 if (NULL == res)
1539 continue;
1540
1541 switch (operator) {
1542 case '+':
1543 *res += operand2;
1544 break;
1545 case '-':
1546 *res -= operand2;
1547 break;
1548 case '*':
1549 *res *= operand2;
1550 break;
1551 case '/':
1552 if (0 == operand2) {
1553 mandoc_msg(MANDOCERR_DIVZERO,
1554 r->parse, ln, *pos, v);
1555 *res = 0;
1556 break;
1557 }
1558 *res /= operand2;
1559 break;
1560 case '%':
1561 *res %= operand2;
1562 break;
1563 case '<':
1564 *res = *res < operand2;
1565 break;
1566 case '>':
1567 *res = *res > operand2;
1568 break;
1569 case 'l':
1570 *res = *res <= operand2;
1571 break;
1572 case 'g':
1573 *res = *res >= operand2;
1574 break;
1575 case '=':
1576 *res = *res == operand2;
1577 break;
1578 case '!':
1579 *res = *res != operand2;
1580 break;
1581 case '&':
1582 *res = *res && operand2;
1583 break;
1584 case ':':
1585 *res = *res || operand2;
1586 break;
1587 case 'i':
1588 if (operand2 < *res)
1589 *res = operand2;
1590 break;
1591 case 'a':
1592 if (operand2 > *res)
1593 *res = operand2;
1594 break;
1595 default:
1596 abort();
1597 }
1598 }
1599 return(1);
1600 }
1601
1602 void
1603 roff_setreg(struct roff *r, const char *name, int val, char sign)
1604 {
1605 struct roffreg *reg;
1606
1607 /* Search for an existing register with the same name. */
1608 reg = r->regtab;
1609
1610 while (reg && strcmp(name, reg->key.p))
1611 reg = reg->next;
1612
1613 if (NULL == reg) {
1614 /* Create a new register. */
1615 reg = mandoc_malloc(sizeof(struct roffreg));
1616 reg->key.p = mandoc_strdup(name);
1617 reg->key.sz = strlen(name);
1618 reg->val = 0;
1619 reg->next = r->regtab;
1620 r->regtab = reg;
1621 }
1622
1623 if ('+' == sign)
1624 reg->val += val;
1625 else if ('-' == sign)
1626 reg->val -= val;
1627 else
1628 reg->val = val;
1629 }
1630
1631 /*
1632 * Handle some predefined read-only number registers.
1633 * For now, return -1 if the requested register is not predefined;
1634 * in case a predefined read-only register having the value -1
1635 * were to turn up, another special value would have to be chosen.
1636 */
1637 static int
1638 roff_getregro(const char *name)
1639 {
1640
1641 switch (*name) {
1642 case 'A': /* ASCII approximation mode is always off. */
1643 return(0);
1644 case 'g': /* Groff compatibility mode is always on. */
1645 return(1);
1646 case 'H': /* Fixed horizontal resolution. */
1647 return (24);
1648 case 'j': /* Always adjust left margin only. */
1649 return(0);
1650 case 'T': /* Some output device is always defined. */
1651 return(1);
1652 case 'V': /* Fixed vertical resolution. */
1653 return (40);
1654 default:
1655 return (-1);
1656 }
1657 }
1658
1659 int
1660 roff_getreg(const struct roff *r, const char *name)
1661 {
1662 struct roffreg *reg;
1663 int val;
1664
1665 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1666 val = roff_getregro(name + 1);
1667 if (-1 != val)
1668 return (val);
1669 }
1670
1671 for (reg = r->regtab; reg; reg = reg->next)
1672 if (0 == strcmp(name, reg->key.p))
1673 return(reg->val);
1674
1675 return(0);
1676 }
1677
1678 static int
1679 roff_getregn(const struct roff *r, const char *name, size_t len)
1680 {
1681 struct roffreg *reg;
1682 int val;
1683
1684 if ('.' == name[0] && 2 == len) {
1685 val = roff_getregro(name + 1);
1686 if (-1 != val)
1687 return (val);
1688 }
1689
1690 for (reg = r->regtab; reg; reg = reg->next)
1691 if (len == reg->key.sz &&
1692 0 == strncmp(name, reg->key.p, len))
1693 return(reg->val);
1694
1695 return(0);
1696 }
1697
1698 static void
1699 roff_freereg(struct roffreg *reg)
1700 {
1701 struct roffreg *old_reg;
1702
1703 while (NULL != reg) {
1704 free(reg->key.p);
1705 old_reg = reg;
1706 reg = reg->next;
1707 free(old_reg);
1708 }
1709 }
1710
1711 static enum rofferr
1712 roff_nr(ROFF_ARGS)
1713 {
1714 char *key, *val;
1715 size_t keysz;
1716 int iv;
1717 char sign;
1718
1719 key = val = *bufp + pos;
1720 if ('\0' == *key)
1721 return(ROFF_IGN);
1722
1723 keysz = roff_getname(r, &val, ln, pos);
1724 if ('\\' == key[keysz])
1725 return(ROFF_IGN);
1726 key[keysz] = '\0';
1727
1728 sign = *val;
1729 if ('+' == sign || '-' == sign)
1730 val++;
1731
1732 if (roff_evalnum(r, ln, val, NULL, &iv, 0))
1733 roff_setreg(r, key, iv, sign);
1734
1735 return(ROFF_IGN);
1736 }
1737
1738 static enum rofferr
1739 roff_rr(ROFF_ARGS)
1740 {
1741 struct roffreg *reg, **prev;
1742 char *name, *cp;
1743 size_t namesz;
1744
1745 name = cp = *bufp + pos;
1746 if ('\0' == *name)
1747 return(ROFF_IGN);
1748 namesz = roff_getname(r, &cp, ln, pos);
1749 name[namesz] = '\0';
1750
1751 prev = &r->regtab;
1752 while (1) {
1753 reg = *prev;
1754 if (NULL == reg || !strcmp(name, reg->key.p))
1755 break;
1756 prev = &reg->next;
1757 }
1758 if (NULL != reg) {
1759 *prev = reg->next;
1760 free(reg->key.p);
1761 free(reg);
1762 }
1763 return(ROFF_IGN);
1764 }
1765
1766 static enum rofferr
1767 roff_rm(ROFF_ARGS)
1768 {
1769 const char *name;
1770 char *cp;
1771 size_t namesz;
1772
1773 cp = *bufp + pos;
1774 while ('\0' != *cp) {
1775 name = cp;
1776 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1777 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1778 if ('\\' == name[namesz])
1779 break;
1780 }
1781 return(ROFF_IGN);
1782 }
1783
1784 static enum rofferr
1785 roff_it(ROFF_ARGS)
1786 {
1787 char *cp;
1788 size_t len;
1789 int iv;
1790
1791 /* Parse the number of lines. */
1792 cp = *bufp + pos;
1793 len = strcspn(cp, " \t");
1794 cp[len] = '\0';
1795 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1796 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1797 ln, ppos, *bufp + 1);
1798 return(ROFF_IGN);
1799 }
1800 cp += len + 1;
1801
1802 /* Arm the input line trap. */
1803 roffit_lines = iv;
1804 roffit_macro = mandoc_strdup(cp);
1805 return(ROFF_IGN);
1806 }
1807
1808 static enum rofferr
1809 roff_Dd(ROFF_ARGS)
1810 {
1811 const char *const *cp;
1812
1813 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
1814 for (cp = __mdoc_reserved; *cp; cp++)
1815 roff_setstr(r, *cp, NULL, 0);
1816
1817 if (r->format == 0)
1818 r->format = MPARSE_MDOC;
1819
1820 return(ROFF_CONT);
1821 }
1822
1823 static enum rofferr
1824 roff_TH(ROFF_ARGS)
1825 {
1826 const char *const *cp;
1827
1828 if ((r->options & MPARSE_QUICK) == 0)
1829 for (cp = __man_reserved; *cp; cp++)
1830 roff_setstr(r, *cp, NULL, 0);
1831
1832 if (r->format == 0)
1833 r->format = MPARSE_MAN;
1834
1835 return(ROFF_CONT);
1836 }
1837
1838 static enum rofferr
1839 roff_TE(ROFF_ARGS)
1840 {
1841
1842 if (NULL == r->tbl)
1843 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1844 ln, ppos, "TE");
1845 else
1846 tbl_end(&r->tbl);
1847
1848 return(ROFF_IGN);
1849 }
1850
1851 static enum rofferr
1852 roff_T_(ROFF_ARGS)
1853 {
1854
1855 if (NULL == r->tbl)
1856 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1857 ln, ppos, "T&");
1858 else
1859 tbl_restart(ppos, ln, r->tbl);
1860
1861 return(ROFF_IGN);
1862 }
1863
1864 /*
1865 * Handle in-line equation delimiters.
1866 */
1867 static enum rofferr
1868 roff_eqndelim(struct roff *r, char **bufp, size_t *szp, int pos)
1869 {
1870 char *cp1, *cp2;
1871 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
1872
1873 /*
1874 * Outside equations, look for an opening delimiter.
1875 * If we are inside an equation, we already know it is
1876 * in-line, or this function wouldn't have been called;
1877 * so look for a closing delimiter.
1878 */
1879
1880 cp1 = *bufp + pos;
1881 cp2 = strchr(cp1, r->eqn == NULL ?
1882 r->last_eqn->odelim : r->last_eqn->cdelim);
1883 if (cp2 == NULL)
1884 return(ROFF_CONT);
1885
1886 *cp2++ = '\0';
1887 bef_pr = bef_nl = aft_nl = aft_pr = "";
1888
1889 /* Handle preceding text, protecting whitespace. */
1890
1891 if (**bufp != '\0') {
1892 if (r->eqn == NULL)
1893 bef_pr = "\\&";
1894 bef_nl = "\n";
1895 }
1896
1897 /*
1898 * Prepare replacing the delimiter with an equation macro
1899 * and drop leading white space from the equation.
1900 */
1901
1902 if (r->eqn == NULL) {
1903 while (*cp2 == ' ')
1904 cp2++;
1905 mac = ".EQ";
1906 } else
1907 mac = ".EN";
1908
1909 /* Handle following text, protecting whitespace. */
1910
1911 if (*cp2 != '\0') {
1912 aft_nl = "\n";
1913 if (r->eqn != NULL)
1914 aft_pr = "\\&";
1915 }
1916
1917 /* Do the actual replacement. */
1918
1919 *szp = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", *bufp,
1920 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
1921 free(*bufp);
1922 *bufp = cp1;
1923
1924 /* Toggle the in-line state of the eqn subsystem. */
1925
1926 r->eqn_inline = r->eqn == NULL;
1927 return(ROFF_REPARSE);
1928 }
1929
1930 static enum rofferr
1931 roff_EQ(ROFF_ARGS)
1932 {
1933 struct eqn_node *e;
1934
1935 assert(NULL == r->eqn);
1936 e = eqn_alloc(ppos, ln, r->parse);
1937
1938 if (r->last_eqn) {
1939 r->last_eqn->next = e;
1940 e->delim = r->last_eqn->delim;
1941 e->odelim = r->last_eqn->odelim;
1942 e->cdelim = r->last_eqn->cdelim;
1943 } else
1944 r->first_eqn = r->last_eqn = e;
1945
1946 r->eqn = r->last_eqn = e;
1947
1948 if ((*bufp)[pos])
1949 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1950 ".EQ %s", *bufp + pos);
1951
1952 return(ROFF_IGN);
1953 }
1954
1955 static enum rofferr
1956 roff_EN(ROFF_ARGS)
1957 {
1958
1959 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1960 return(ROFF_IGN);
1961 }
1962
1963 static enum rofferr
1964 roff_TS(ROFF_ARGS)
1965 {
1966 struct tbl_node *tbl;
1967
1968 if (r->tbl) {
1969 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
1970 ln, ppos, "TS breaks TS");
1971 tbl_end(&r->tbl);
1972 }
1973
1974 tbl = tbl_alloc(ppos, ln, r->parse);
1975
1976 if (r->last_tbl)
1977 r->last_tbl->next = tbl;
1978 else
1979 r->first_tbl = r->last_tbl = tbl;
1980
1981 r->tbl = r->last_tbl = tbl;
1982 return(ROFF_IGN);
1983 }
1984
1985 static enum rofferr
1986 roff_cc(ROFF_ARGS)
1987 {
1988 const char *p;
1989
1990 p = *bufp + pos;
1991
1992 if ('\0' == *p || '.' == (r->control = *p++))
1993 r->control = 0;
1994
1995 if ('\0' != *p)
1996 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1997
1998 return(ROFF_IGN);
1999 }
2000
2001 static enum rofferr
2002 roff_tr(ROFF_ARGS)
2003 {
2004 const char *p, *first, *second;
2005 size_t fsz, ssz;
2006 enum mandoc_esc esc;
2007
2008 p = *bufp + pos;
2009
2010 if ('\0' == *p) {
2011 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2012 return(ROFF_IGN);
2013 }
2014
2015 while ('\0' != *p) {
2016 fsz = ssz = 1;
2017
2018 first = p++;
2019 if ('\\' == *first) {
2020 esc = mandoc_escape(&p, NULL, NULL);
2021 if (ESCAPE_ERROR == esc) {
2022 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2023 ln, (int)(p - *bufp), first);
2024 return(ROFF_IGN);
2025 }
2026 fsz = (size_t)(p - first);
2027 }
2028
2029 second = p++;
2030 if ('\\' == *second) {
2031 esc = mandoc_escape(&p, NULL, NULL);
2032 if (ESCAPE_ERROR == esc) {
2033 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2034 ln, (int)(p - *bufp), second);
2035 return(ROFF_IGN);
2036 }
2037 ssz = (size_t)(p - second);
2038 } else if ('\0' == *second) {
2039 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
2040 ln, (int)(p - *bufp), NULL);
2041 second = " ";
2042 p--;
2043 }
2044
2045 if (fsz > 1) {
2046 roff_setstrn(&r->xmbtab, first, fsz,
2047 second, ssz, 0);
2048 continue;
2049 }
2050
2051 if (NULL == r->xtab)
2052 r->xtab = mandoc_calloc(128,
2053 sizeof(struct roffstr));
2054
2055 free(r->xtab[(int)*first].p);
2056 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2057 r->xtab[(int)*first].sz = ssz;
2058 }
2059
2060 return(ROFF_IGN);
2061 }
2062
2063 static enum rofferr
2064 roff_so(ROFF_ARGS)
2065 {
2066 char *name;
2067
2068 name = *bufp + pos;
2069 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2070
2071 /*
2072 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2073 * opening anything that's not in our cwd or anything beneath
2074 * it. Thus, explicitly disallow traversing up the file-system
2075 * or using absolute paths.
2076 */
2077
2078 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
2079 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2080 ".so %s", name);
2081 return(ROFF_ERR);
2082 }
2083
2084 *offs = pos;
2085 return(ROFF_SO);
2086 }
2087
2088 static enum rofferr
2089 roff_userdef(ROFF_ARGS)
2090 {
2091 const char *arg[9];
2092 char *cp, *n1, *n2;
2093 int i;
2094
2095 /*
2096 * Collect pointers to macro argument strings
2097 * and NUL-terminate them.
2098 */
2099 cp = *bufp + pos;
2100 for (i = 0; i < 9; i++)
2101 arg[i] = '\0' == *cp ? "" :
2102 mandoc_getarg(r->parse, &cp, ln, &pos);
2103
2104 /*
2105 * Expand macro arguments.
2106 */
2107 *szp = 0;
2108 n1 = cp = mandoc_strdup(r->current_string);
2109 while (NULL != (cp = strstr(cp, "\\$"))) {
2110 i = cp[2] - '1';
2111 if (0 > i || 8 < i) {
2112 /* Not an argument invocation. */
2113 cp += 2;
2114 continue;
2115 }
2116 *cp = '\0';
2117 *szp = mandoc_asprintf(&n2, "%s%s%s",
2118 n1, arg[i], cp + 3) + 1;
2119 cp = n2 + (cp - n1);
2120 free(n1);
2121 n1 = n2;
2122 }
2123
2124 /*
2125 * Replace the macro invocation
2126 * by the expanded macro.
2127 */
2128 free(*bufp);
2129 *bufp = n1;
2130 if (0 == *szp)
2131 *szp = strlen(*bufp) + 1;
2132
2133 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2134 ROFF_REPARSE : ROFF_APPEND);
2135 }
2136
2137 static size_t
2138 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2139 {
2140 char *name, *cp;
2141 size_t namesz;
2142
2143 name = *cpp;
2144 if ('\0' == *name)
2145 return(0);
2146
2147 /* Read until end of name and terminate it with NUL. */
2148 for (cp = name; 1; cp++) {
2149 if ('\0' == *cp || ' ' == *cp) {
2150 namesz = cp - name;
2151 break;
2152 }
2153 if ('\\' != *cp)
2154 continue;
2155 namesz = cp - name;
2156 if ('{' == cp[1] || '}' == cp[1])
2157 break;
2158 cp++;
2159 if ('\\' == *cp)
2160 continue;
2161 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2162 "%.*s", (int)(cp - name + 1), name);
2163 mandoc_escape((const char **)&cp, NULL, NULL);
2164 break;
2165 }
2166
2167 /* Read past spaces. */
2168 while (' ' == *cp)
2169 cp++;
2170
2171 *cpp = cp;
2172 return(namesz);
2173 }
2174
2175 /*
2176 * Store *string into the user-defined string called *name.
2177 * To clear an existing entry, call with (*r, *name, NULL, 0).
2178 * append == 0: replace mode
2179 * append == 1: single-line append mode
2180 * append == 2: multiline append mode, append '\n' after each call
2181 */
2182 static void
2183 roff_setstr(struct roff *r, const char *name, const char *string,
2184 int append)
2185 {
2186
2187 roff_setstrn(&r->strtab, name, strlen(name), string,
2188 string ? strlen(string) : 0, append);
2189 }
2190
2191 static void
2192 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2193 const char *string, size_t stringsz, int append)
2194 {
2195 struct roffkv *n;
2196 char *c;
2197 int i;
2198 size_t oldch, newch;
2199
2200 /* Search for an existing string with the same name. */
2201 n = *r;
2202
2203 while (n && (namesz != n->key.sz ||
2204 strncmp(n->key.p, name, namesz)))
2205 n = n->next;
2206
2207 if (NULL == n) {
2208 /* Create a new string table entry. */
2209 n = mandoc_malloc(sizeof(struct roffkv));
2210 n->key.p = mandoc_strndup(name, namesz);
2211 n->key.sz = namesz;
2212 n->val.p = NULL;
2213 n->val.sz = 0;
2214 n->next = *r;
2215 *r = n;
2216 } else if (0 == append) {
2217 free(n->val.p);
2218 n->val.p = NULL;
2219 n->val.sz = 0;
2220 }
2221
2222 if (NULL == string)
2223 return;
2224
2225 /*
2226 * One additional byte for the '\n' in multiline mode,
2227 * and one for the terminating '\0'.
2228 */
2229 newch = stringsz + (1 < append ? 2u : 1u);
2230
2231 if (NULL == n->val.p) {
2232 n->val.p = mandoc_malloc(newch);
2233 *n->val.p = '\0';
2234 oldch = 0;
2235 } else {
2236 oldch = n->val.sz;
2237 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2238 }
2239
2240 /* Skip existing content in the destination buffer. */
2241 c = n->val.p + (int)oldch;
2242
2243 /* Append new content to the destination buffer. */
2244 i = 0;
2245 while (i < (int)stringsz) {
2246 /*
2247 * Rudimentary roff copy mode:
2248 * Handle escaped backslashes.
2249 */
2250 if ('\\' == string[i] && '\\' == string[i + 1])
2251 i++;
2252 *c++ = string[i++];
2253 }
2254
2255 /* Append terminating bytes. */
2256 if (1 < append)
2257 *c++ = '\n';
2258
2259 *c = '\0';
2260 n->val.sz = (int)(c - n->val.p);
2261 }
2262
2263 static const char *
2264 roff_getstrn(const struct roff *r, const char *name, size_t len)
2265 {
2266 const struct roffkv *n;
2267 int i;
2268
2269 for (n = r->strtab; n; n = n->next)
2270 if (0 == strncmp(name, n->key.p, len) &&
2271 '\0' == n->key.p[(int)len])
2272 return(n->val.p);
2273
2274 for (i = 0; i < PREDEFS_MAX; i++)
2275 if (0 == strncmp(name, predefs[i].name, len) &&
2276 '\0' == predefs[i].name[(int)len])
2277 return(predefs[i].str);
2278
2279 return(NULL);
2280 }
2281
2282 static void
2283 roff_freestr(struct roffkv *r)
2284 {
2285 struct roffkv *n, *nn;
2286
2287 for (n = r; n; n = nn) {
2288 free(n->key.p);
2289 free(n->val.p);
2290 nn = n->next;
2291 free(n);
2292 }
2293 }
2294
2295 const struct tbl_span *
2296 roff_span(const struct roff *r)
2297 {
2298
2299 return(r->tbl ? tbl_span(r->tbl) : NULL);
2300 }
2301
2302 const struct eqn *
2303 roff_eqn(const struct roff *r)
2304 {
2305
2306 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2307 }
2308
2309 /*
2310 * Duplicate an input string, making the appropriate character
2311 * conversations (as stipulated by `tr') along the way.
2312 * Returns a heap-allocated string with all the replacements made.
2313 */
2314 char *
2315 roff_strdup(const struct roff *r, const char *p)
2316 {
2317 const struct roffkv *cp;
2318 char *res;
2319 const char *pp;
2320 size_t ssz, sz;
2321 enum mandoc_esc esc;
2322
2323 if (NULL == r->xmbtab && NULL == r->xtab)
2324 return(mandoc_strdup(p));
2325 else if ('\0' == *p)
2326 return(mandoc_strdup(""));
2327
2328 /*
2329 * Step through each character looking for term matches
2330 * (remember that a `tr' can be invoked with an escape, which is
2331 * a glyph but the escape is multi-character).
2332 * We only do this if the character hash has been initialised
2333 * and the string is >0 length.
2334 */
2335
2336 res = NULL;
2337 ssz = 0;
2338
2339 while ('\0' != *p) {
2340 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2341 sz = r->xtab[(int)*p].sz;
2342 res = mandoc_realloc(res, ssz + sz + 1);
2343 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2344 ssz += sz;
2345 p++;
2346 continue;
2347 } else if ('\\' != *p) {
2348 res = mandoc_realloc(res, ssz + 2);
2349 res[ssz++] = *p++;
2350 continue;
2351 }
2352
2353 /* Search for term matches. */
2354 for (cp = r->xmbtab; cp; cp = cp->next)
2355 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2356 break;
2357
2358 if (NULL != cp) {
2359 /*
2360 * A match has been found.
2361 * Append the match to the array and move
2362 * forward by its keysize.
2363 */
2364 res = mandoc_realloc(res,
2365 ssz + cp->val.sz + 1);
2366 memcpy(res + ssz, cp->val.p, cp->val.sz);
2367 ssz += cp->val.sz;
2368 p += (int)cp->key.sz;
2369 continue;
2370 }
2371
2372 /*
2373 * Handle escapes carefully: we need to copy
2374 * over just the escape itself, or else we might
2375 * do replacements within the escape itself.
2376 * Make sure to pass along the bogus string.
2377 */
2378 pp = p++;
2379 esc = mandoc_escape(&p, NULL, NULL);
2380 if (ESCAPE_ERROR == esc) {
2381 sz = strlen(pp);
2382 res = mandoc_realloc(res, ssz + sz + 1);
2383 memcpy(res + ssz, pp, sz);
2384 break;
2385 }
2386 /*
2387 * We bail out on bad escapes.
2388 * No need to warn: we already did so when
2389 * roff_res() was called.
2390 */
2391 sz = (int)(p - pp);
2392 res = mandoc_realloc(res, ssz + sz + 1);
2393 memcpy(res + ssz, pp, sz);
2394 ssz += sz;
2395 }
2396
2397 res[(int)ssz] = '\0';
2398 return(res);
2399 }
2400
2401 int
2402 roff_getformat(const struct roff *r)
2403 {
2404
2405 return(r->format);
2406 }
2407
2408 /*
2409 * Find out whether a line is a macro line or not.
2410 * If it is, adjust the current position and return one; if it isn't,
2411 * return zero and don't change the current position.
2412 * If the control character has been set with `.cc', then let that grain
2413 * precedence.
2414 * This is slighly contrary to groff, where using the non-breaking
2415 * control character when `cc' has been invoked will cause the
2416 * non-breaking macro contents to be printed verbatim.
2417 */
2418 int
2419 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2420 {
2421 int pos;
2422
2423 pos = *ppos;
2424
2425 if (0 != r->control && cp[pos] == r->control)
2426 pos++;
2427 else if (0 != r->control)
2428 return(0);
2429 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2430 pos += 2;
2431 else if ('.' == cp[pos] || '\'' == cp[pos])
2432 pos++;
2433 else
2434 return(0);
2435
2436 while (' ' == cp[pos] || '\t' == cp[pos])
2437 pos++;
2438
2439 *ppos = pos;
2440 return(1);
2441 }