]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
We already supported (outer) user-defined strings containing references
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.205 2014/04/07 21:00:08 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_rr,
66 ROFF_so,
67 ROFF_ta,
68 ROFF_tr,
69 ROFF_Dd,
70 ROFF_TH,
71 ROFF_TS,
72 ROFF_TE,
73 ROFF_T_,
74 ROFF_EQ,
75 ROFF_EN,
76 ROFF_cblock,
77 ROFF_USERDEF,
78 ROFF_MAX
79 };
80
81 /*
82 * An incredibly-simple string buffer.
83 */
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
87 };
88
89 /*
90 * A key-value roffstr pair as part of a singly-linked list.
91 */
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
96 };
97
98 /*
99 * A single number register as part of a singly-linked list.
100 */
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
105 };
106
107 struct roff {
108 struct mparse *parse; /* parse point */
109 int options; /* parse options */
110 struct roffnode *last; /* leaf of stack */
111 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 int rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static void roff_ccond(struct roff *, int, int);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static int roff_evalcond(const char *, int *);
184 static int roff_evalnum(const char *, int *, int *, int);
185 static int roff_evalpar(const char *, int *, int *);
186 static int roff_evalstrcond(const char *, int *);
187 static void roff_free1(struct roff *);
188 static void roff_freereg(struct roffreg *);
189 static void roff_freestr(struct roffkv *);
190 static char *roff_getname(struct roff *, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff *,
194 const char *, size_t);
195 static int roff_getregro(const char *name);
196 static const char *roff_getstrn(const struct roff *,
197 const char *, size_t);
198 static enum rofferr roff_it(ROFF_ARGS);
199 static enum rofferr roff_line_ignore(ROFF_ARGS);
200 static enum rofferr roff_nr(ROFF_ARGS);
201 static void roff_openeqn(struct roff *, const char *,
202 int, int, const char *);
203 static enum rofft roff_parse(struct roff *, const char *, int *);
204 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
205 static enum rofferr roff_res(struct roff *,
206 char **, size_t *, int, int);
207 static enum rofferr roff_rm(ROFF_ARGS);
208 static enum rofferr roff_rr(ROFF_ARGS);
209 static void roff_setstr(struct roff *,
210 const char *, const char *, int);
211 static void roff_setstrn(struct roffkv **, const char *,
212 size_t, const char *, size_t, int);
213 static enum rofferr roff_so(ROFF_ARGS);
214 static enum rofferr roff_tr(ROFF_ARGS);
215 static enum rofferr roff_Dd(ROFF_ARGS);
216 static enum rofferr roff_TH(ROFF_ARGS);
217 static enum rofferr roff_TE(ROFF_ARGS);
218 static enum rofferr roff_TS(ROFF_ARGS);
219 static enum rofferr roff_EQ(ROFF_ARGS);
220 static enum rofferr roff_EN(ROFF_ARGS);
221 static enum rofferr roff_T_(ROFF_ARGS);
222 static enum rofferr roff_userdef(ROFF_ARGS);
223
224 /* See roffhash_find() */
225
226 #define ASCII_HI 126
227 #define ASCII_LO 33
228 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
229
230 static struct roffmac *hash[HASHWIDTH];
231
232 static struct roffmac roffs[ROFF_MAX] = {
233 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
234 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "as", roff_ds, NULL, NULL, 0, NULL },
238 { "cc", roff_cc, NULL, NULL, 0, NULL },
239 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "ds", roff_ds, NULL, NULL, 0, NULL },
244 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
249 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
250 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "it", roff_it, NULL, NULL, 0, NULL },
252 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "nr", roff_nr, NULL, NULL, 0, NULL },
255 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "rm", roff_rm, NULL, NULL, 0, NULL },
258 { "rr", roff_rr, NULL, NULL, 0, NULL },
259 { "so", roff_so, NULL, NULL, 0, NULL },
260 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
261 { "tr", roff_tr, NULL, NULL, 0, NULL },
262 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
263 { "TH", roff_TH, NULL, NULL, 0, NULL },
264 { "TS", roff_TS, NULL, NULL, 0, NULL },
265 { "TE", roff_TE, NULL, NULL, 0, NULL },
266 { "T&", roff_T_, NULL, NULL, 0, NULL },
267 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
268 { "EN", roff_EN, NULL, NULL, 0, NULL },
269 { ".", roff_cblock, NULL, NULL, 0, NULL },
270 { NULL, roff_userdef, NULL, NULL, 0, NULL },
271 };
272
273 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
274 const char *const __mdoc_reserved[] = {
275 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
276 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
277 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
278 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
279 "Dt", "Dv", "Dx", "D1",
280 "Ec", "Ed", "Ef", "Ek", "El", "Em",
281 "En", "Eo", "Er", "Es", "Ev", "Ex",
282 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
283 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
284 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
285 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
286 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
287 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
288 "Sc", "Sh", "Sm", "So", "Sq",
289 "Ss", "St", "Sx", "Sy",
290 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
291 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
292 "%P", "%Q", "%R", "%T", "%U", "%V",
293 NULL
294 };
295
296 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
297 const char *const __man_reserved[] = {
298 "AT", "B", "BI", "BR", "DT",
299 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
300 "LP", "OP", "P", "PD", "PP",
301 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
302 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
303 NULL
304 };
305
306 /* Array of injected predefined strings. */
307 #define PREDEFS_MAX 38
308 static const struct predef predefs[PREDEFS_MAX] = {
309 #include "predefs.in"
310 };
311
312 /* See roffhash_find() */
313 #define ROFF_HASH(p) (p[0] - ASCII_LO)
314
315 static int roffit_lines; /* number of lines to delay */
316 static char *roffit_macro; /* nil-terminated macro line */
317
318 static void
319 roffhash_init(void)
320 {
321 struct roffmac *n;
322 int buc, i;
323
324 for (i = 0; i < (int)ROFF_USERDEF; i++) {
325 assert(roffs[i].name[0] >= ASCII_LO);
326 assert(roffs[i].name[0] <= ASCII_HI);
327
328 buc = ROFF_HASH(roffs[i].name);
329
330 if (NULL != (n = hash[buc])) {
331 for ( ; n->next; n = n->next)
332 /* Do nothing. */ ;
333 n->next = &roffs[i];
334 } else
335 hash[buc] = &roffs[i];
336 }
337 }
338
339 /*
340 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
341 * the nil-terminated string name could be found.
342 */
343 static enum rofft
344 roffhash_find(const char *p, size_t s)
345 {
346 int buc;
347 struct roffmac *n;
348
349 /*
350 * libroff has an extremely simple hashtable, for the time
351 * being, which simply keys on the first character, which must
352 * be printable, then walks a chain. It works well enough until
353 * optimised.
354 */
355
356 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
357 return(ROFF_MAX);
358
359 buc = ROFF_HASH(p);
360
361 if (NULL == (n = hash[buc]))
362 return(ROFF_MAX);
363 for ( ; n; n = n->next)
364 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
365 return((enum rofft)(n - roffs));
366
367 return(ROFF_MAX);
368 }
369
370
371 /*
372 * Pop the current node off of the stack of roff instructions currently
373 * pending.
374 */
375 static void
376 roffnode_pop(struct roff *r)
377 {
378 struct roffnode *p;
379
380 assert(r->last);
381 p = r->last;
382
383 r->last = r->last->parent;
384 free(p->name);
385 free(p->end);
386 free(p);
387 }
388
389
390 /*
391 * Push a roff node onto the instruction stack. This must later be
392 * removed with roffnode_pop().
393 */
394 static void
395 roffnode_push(struct roff *r, enum rofft tok, const char *name,
396 int line, int col)
397 {
398 struct roffnode *p;
399
400 p = mandoc_calloc(1, sizeof(struct roffnode));
401 p->tok = tok;
402 if (name)
403 p->name = mandoc_strdup(name);
404 p->parent = r->last;
405 p->line = line;
406 p->col = col;
407 p->rule = p->parent ? p->parent->rule : 0;
408
409 r->last = p;
410 }
411
412
413 static void
414 roff_free1(struct roff *r)
415 {
416 struct tbl_node *tbl;
417 struct eqn_node *e;
418 int i;
419
420 while (NULL != (tbl = r->first_tbl)) {
421 r->first_tbl = tbl->next;
422 tbl_free(tbl);
423 }
424
425 r->first_tbl = r->last_tbl = r->tbl = NULL;
426
427 while (NULL != (e = r->first_eqn)) {
428 r->first_eqn = e->next;
429 eqn_free(e);
430 }
431
432 r->first_eqn = r->last_eqn = r->eqn = NULL;
433
434 while (r->last)
435 roffnode_pop(r);
436
437 roff_freestr(r->strtab);
438 roff_freestr(r->xmbtab);
439
440 r->strtab = r->xmbtab = NULL;
441
442 roff_freereg(r->regtab);
443
444 r->regtab = NULL;
445
446 if (r->xtab)
447 for (i = 0; i < 128; i++)
448 free(r->xtab[i].p);
449
450 free(r->xtab);
451 r->xtab = NULL;
452 }
453
454 void
455 roff_reset(struct roff *r)
456 {
457
458 roff_free1(r);
459 r->control = 0;
460 }
461
462
463 void
464 roff_free(struct roff *r)
465 {
466
467 roff_free1(r);
468 free(r);
469 }
470
471
472 struct roff *
473 roff_alloc(struct mparse *parse, int options)
474 {
475 struct roff *r;
476
477 r = mandoc_calloc(1, sizeof(struct roff));
478 r->parse = parse;
479 r->options = options;
480 r->rstackpos = -1;
481
482 roffhash_init();
483
484 return(r);
485 }
486
487 /*
488 * In the current line, expand user-defined strings ("\*")
489 * and references to number registers ("\n").
490 * Also check the syntax of other escape sequences.
491 */
492 static enum rofferr
493 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
494 {
495 char ubuf[12]; /* buffer to print the number */
496 const char *start; /* start of the string to process */
497 const char *stesc; /* start of an escape sequence ('\\') */
498 const char *stnam; /* start of the name, after "[(*" */
499 const char *cp; /* end of the name, e.g. before ']' */
500 const char *res; /* the string to be substituted */
501 char *nbuf; /* new buffer to copy bufp to */
502 size_t maxl; /* expected length of the escape name */
503 size_t naml; /* actual length of the escape name */
504 size_t ressz; /* size of the replacement string */
505 int expand_count; /* to avoid infinite loops */
506
507 expand_count = 0;
508 start = *bufp + pos;
509 stesc = strchr(start, '\0') - 1;
510 while (stesc-- > start) {
511
512 /* Search backwards for the next backslash. */
513
514 if ('\\' != *stesc)
515 continue;
516
517 /* If it is escaped, skip it. */
518
519 for (cp = stesc - 1; cp >= start; cp--)
520 if ('\\' != *cp)
521 break;
522
523 if (0 == (stesc - cp) % 2) {
524 stesc = cp;
525 continue;
526 }
527
528 /*
529 * Everything except user-defined strings and number
530 * registers is only checked, not expanded.
531 */
532
533 cp = stesc + 1;
534 switch (*cp) {
535 case ('*'):
536 res = NULL;
537 break;
538 case ('n'):
539 res = ubuf;
540 break;
541 default:
542 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
543 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
544 ln, (int)(stesc - *bufp), NULL);
545 continue;
546 }
547
548 if (EXPAND_LIMIT < ++expand_count) {
549 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
550 ln, (int)(stesc - *bufp), NULL);
551 return(ROFF_IGN);
552 }
553
554 /*
555 * The third character decides the length
556 * of the name of the string or register.
557 * Save a pointer to the name.
558 */
559
560 switch (*++cp) {
561 case ('\0'):
562 continue;
563 case ('('):
564 cp++;
565 maxl = 2;
566 break;
567 case ('['):
568 cp++;
569 maxl = 0;
570 break;
571 default:
572 maxl = 1;
573 break;
574 }
575 stnam = cp;
576
577 /* Advance to the end of the name. */
578
579 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
580 if ('\0' == *cp) {
581 mandoc_msg
582 (MANDOCERR_BADESCAPE,
583 r->parse, ln,
584 (int)(stesc - *bufp), NULL);
585 continue;
586 }
587 if (0 == maxl && ']' == *cp)
588 break;
589 }
590
591 /*
592 * Retrieve the replacement string; if it is
593 * undefined, resume searching for escapes.
594 */
595
596 if (NULL == res)
597 res = roff_getstrn(r, stnam, naml);
598 else
599 snprintf(ubuf, sizeof(ubuf), "%d",
600 roff_getregn(r, stnam, naml));
601
602 if (NULL == res) {
603 mandoc_msg
604 (MANDOCERR_BADESCAPE, r->parse,
605 ln, (int)(stesc - *bufp), NULL);
606 res = "";
607 }
608 ressz = strlen(res);
609
610 /* Replace the escape sequence by the string. */
611
612 *szp += ressz + 1;
613 nbuf = mandoc_malloc(*szp);
614
615 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
616 strlcat(nbuf, res, *szp);
617 strlcat(nbuf, cp + (maxl ? 0 : 1), *szp);
618
619 /* Prepare for the next replacement. */
620
621 start = nbuf + pos;
622 stesc = nbuf + (stesc - *bufp) + ressz;
623 free(*bufp);
624 *bufp = nbuf;
625 }
626 return(ROFF_CONT);
627 }
628
629 /*
630 * Process text streams:
631 * Convert all breakable hyphens into ASCII_HYPH.
632 * Decrement and spring input line trap.
633 */
634 static enum rofferr
635 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
636 {
637 size_t sz;
638 const char *start;
639 char *p;
640 int isz;
641 enum mandoc_esc esc;
642
643 start = p = *bufp + pos;
644
645 while ('\0' != *p) {
646 sz = strcspn(p, "-\\");
647 p += sz;
648
649 if ('\0' == *p)
650 break;
651
652 if ('\\' == *p) {
653 /* Skip over escapes. */
654 p++;
655 esc = mandoc_escape((const char **)&p, NULL, NULL);
656 if (ESCAPE_ERROR == esc)
657 break;
658 continue;
659 } else if (p == start) {
660 p++;
661 continue;
662 }
663
664 if (isalpha((unsigned char)p[-1]) &&
665 isalpha((unsigned char)p[1]))
666 *p = ASCII_HYPH;
667 p++;
668 }
669
670 /* Spring the input line trap. */
671 if (1 == roffit_lines) {
672 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
673 free(*bufp);
674 *bufp = p;
675 *szp = isz + 1;
676 *offs = 0;
677 free(roffit_macro);
678 roffit_lines = 0;
679 return(ROFF_REPARSE);
680 } else if (1 < roffit_lines)
681 --roffit_lines;
682 return(ROFF_CONT);
683 }
684
685 enum rofferr
686 roff_parseln(struct roff *r, int ln, char **bufp,
687 size_t *szp, int pos, int *offs)
688 {
689 enum rofft t;
690 enum rofferr e;
691 int ppos, ctl;
692
693 /*
694 * Run the reserved-word filter only if we have some reserved
695 * words to fill in.
696 */
697
698 e = roff_res(r, bufp, szp, ln, pos);
699 if (ROFF_IGN == e)
700 return(e);
701 assert(ROFF_CONT == e);
702
703 ppos = pos;
704 ctl = roff_getcontrol(r, *bufp, &pos);
705
706 /*
707 * First, if a scope is open and we're not a macro, pass the
708 * text through the macro's filter. If a scope isn't open and
709 * we're not a macro, just let it through.
710 * Finally, if there's an equation scope open, divert it into it
711 * no matter our state.
712 */
713
714 if (r->last && ! ctl) {
715 t = r->last->tok;
716 assert(roffs[t].text);
717 e = (*roffs[t].text)
718 (r, t, bufp, szp, ln, pos, pos, offs);
719 assert(ROFF_IGN == e || ROFF_CONT == e);
720 if (ROFF_CONT != e)
721 return(e);
722 }
723 if (r->eqn)
724 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
725 if ( ! ctl) {
726 if (r->tbl)
727 return(tbl_read(r->tbl, ln, *bufp, pos));
728 return(roff_parsetext(bufp, szp, pos, offs));
729 }
730
731 /*
732 * If a scope is open, go to the child handler for that macro,
733 * as it may want to preprocess before doing anything with it.
734 * Don't do so if an equation is open.
735 */
736
737 if (r->last) {
738 t = r->last->tok;
739 assert(roffs[t].sub);
740 return((*roffs[t].sub)
741 (r, t, bufp, szp,
742 ln, ppos, pos, offs));
743 }
744
745 /*
746 * Lastly, as we've no scope open, try to look up and execute
747 * the new macro. If no macro is found, simply return and let
748 * the compilers handle it.
749 */
750
751 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
752 return(ROFF_CONT);
753
754 assert(roffs[t].proc);
755 return((*roffs[t].proc)
756 (r, t, bufp, szp,
757 ln, ppos, pos, offs));
758 }
759
760
761 void
762 roff_endparse(struct roff *r)
763 {
764
765 if (r->last)
766 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
767 r->last->line, r->last->col, NULL);
768
769 if (r->eqn) {
770 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
771 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
772 eqn_end(&r->eqn);
773 }
774
775 if (r->tbl) {
776 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
777 r->tbl->line, r->tbl->pos, NULL);
778 tbl_end(&r->tbl);
779 }
780 }
781
782 /*
783 * Parse a roff node's type from the input buffer. This must be in the
784 * form of ".foo xxx" in the usual way.
785 */
786 static enum rofft
787 roff_parse(struct roff *r, const char *buf, int *pos)
788 {
789 const char *mac;
790 size_t maclen;
791 enum rofft t;
792
793 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
794 '\t' == buf[*pos] || ' ' == buf[*pos])
795 return(ROFF_MAX);
796
797 /* We stop the macro parse at an escape, tab, space, or nil. */
798
799 mac = buf + *pos;
800 maclen = strcspn(mac, " \\\t\0");
801
802 t = (r->current_string = roff_getstrn(r, mac, maclen))
803 ? ROFF_USERDEF : roffhash_find(mac, maclen);
804
805 *pos += (int)maclen;
806
807 while (buf[*pos] && ' ' == buf[*pos])
808 (*pos)++;
809
810 return(t);
811 }
812
813 /* ARGSUSED */
814 static enum rofferr
815 roff_cblock(ROFF_ARGS)
816 {
817
818 /*
819 * A block-close `..' should only be invoked as a child of an
820 * ignore macro, otherwise raise a warning and just ignore it.
821 */
822
823 if (NULL == r->last) {
824 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
825 return(ROFF_IGN);
826 }
827
828 switch (r->last->tok) {
829 case (ROFF_am):
830 /* FALLTHROUGH */
831 case (ROFF_ami):
832 /* FALLTHROUGH */
833 case (ROFF_am1):
834 /* FALLTHROUGH */
835 case (ROFF_de):
836 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
837 /* FALLTHROUGH */
838 case (ROFF_dei):
839 /* FALLTHROUGH */
840 case (ROFF_ig):
841 break;
842 default:
843 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
844 return(ROFF_IGN);
845 }
846
847 if ((*bufp)[pos])
848 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
849
850 roffnode_pop(r);
851 roffnode_cleanscope(r);
852 return(ROFF_IGN);
853
854 }
855
856
857 static void
858 roffnode_cleanscope(struct roff *r)
859 {
860
861 while (r->last) {
862 if (--r->last->endspan != 0)
863 break;
864 roffnode_pop(r);
865 }
866 }
867
868
869 static void
870 roff_ccond(struct roff *r, int ln, int ppos)
871 {
872
873 if (NULL == r->last) {
874 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
875 return;
876 }
877
878 switch (r->last->tok) {
879 case (ROFF_el):
880 /* FALLTHROUGH */
881 case (ROFF_ie):
882 /* FALLTHROUGH */
883 case (ROFF_if):
884 break;
885 default:
886 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
887 return;
888 }
889
890 if (r->last->endspan > -1) {
891 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
892 return;
893 }
894
895 roffnode_pop(r);
896 roffnode_cleanscope(r);
897 return;
898 }
899
900
901 /* ARGSUSED */
902 static enum rofferr
903 roff_block(ROFF_ARGS)
904 {
905 int sv;
906 size_t sz;
907 char *name;
908
909 name = NULL;
910
911 if (ROFF_ig != tok) {
912 if ('\0' == (*bufp)[pos]) {
913 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
914 return(ROFF_IGN);
915 }
916
917 /*
918 * Re-write `de1', since we don't really care about
919 * groff's strange compatibility mode, into `de'.
920 */
921
922 if (ROFF_de1 == tok)
923 tok = ROFF_de;
924 if (ROFF_de == tok)
925 name = *bufp + pos;
926 else
927 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
928 roffs[tok].name);
929
930 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
931 pos++;
932
933 while (isspace((unsigned char)(*bufp)[pos]))
934 (*bufp)[pos++] = '\0';
935 }
936
937 roffnode_push(r, tok, name, ln, ppos);
938
939 /*
940 * At the beginning of a `de' macro, clear the existing string
941 * with the same name, if there is one. New content will be
942 * appended from roff_block_text() in multiline mode.
943 */
944
945 if (ROFF_de == tok)
946 roff_setstr(r, name, "", 0);
947
948 if ('\0' == (*bufp)[pos])
949 return(ROFF_IGN);
950
951 /* If present, process the custom end-of-line marker. */
952
953 sv = pos;
954 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
955 pos++;
956
957 /*
958 * Note: groff does NOT like escape characters in the input.
959 * Instead of detecting this, we're just going to let it fly and
960 * to hell with it.
961 */
962
963 assert(pos > sv);
964 sz = (size_t)(pos - sv);
965
966 if (1 == sz && '.' == (*bufp)[sv])
967 return(ROFF_IGN);
968
969 r->last->end = mandoc_malloc(sz + 1);
970
971 memcpy(r->last->end, *bufp + sv, sz);
972 r->last->end[(int)sz] = '\0';
973
974 if ((*bufp)[pos])
975 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
976
977 return(ROFF_IGN);
978 }
979
980
981 /* ARGSUSED */
982 static enum rofferr
983 roff_block_sub(ROFF_ARGS)
984 {
985 enum rofft t;
986 int i, j;
987
988 /*
989 * First check whether a custom macro exists at this level. If
990 * it does, then check against it. This is some of groff's
991 * stranger behaviours. If we encountered a custom end-scope
992 * tag and that tag also happens to be a "real" macro, then we
993 * need to try interpreting it again as a real macro. If it's
994 * not, then return ignore. Else continue.
995 */
996
997 if (r->last->end) {
998 for (i = pos, j = 0; r->last->end[j]; j++, i++)
999 if ((*bufp)[i] != r->last->end[j])
1000 break;
1001
1002 if ('\0' == r->last->end[j] &&
1003 ('\0' == (*bufp)[i] ||
1004 ' ' == (*bufp)[i] ||
1005 '\t' == (*bufp)[i])) {
1006 roffnode_pop(r);
1007 roffnode_cleanscope(r);
1008
1009 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1010 i++;
1011
1012 pos = i;
1013 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1014 return(ROFF_RERUN);
1015 return(ROFF_IGN);
1016 }
1017 }
1018
1019 /*
1020 * If we have no custom end-query or lookup failed, then try
1021 * pulling it out of the hashtable.
1022 */
1023
1024 t = roff_parse(r, *bufp, &pos);
1025
1026 /*
1027 * Macros other than block-end are only significant
1028 * in `de' blocks; elsewhere, simply throw them away.
1029 */
1030 if (ROFF_cblock != t) {
1031 if (ROFF_de == tok)
1032 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1033 return(ROFF_IGN);
1034 }
1035
1036 assert(roffs[t].proc);
1037 return((*roffs[t].proc)(r, t, bufp, szp,
1038 ln, ppos, pos, offs));
1039 }
1040
1041
1042 /* ARGSUSED */
1043 static enum rofferr
1044 roff_block_text(ROFF_ARGS)
1045 {
1046
1047 if (ROFF_de == tok)
1048 roff_setstr(r, r->last->name, *bufp + pos, 2);
1049
1050 return(ROFF_IGN);
1051 }
1052
1053
1054 /* ARGSUSED */
1055 static enum rofferr
1056 roff_cond_sub(ROFF_ARGS)
1057 {
1058 enum rofft t;
1059 char *ep;
1060 int rr;
1061
1062 rr = r->last->rule;
1063 roffnode_cleanscope(r);
1064 t = roff_parse(r, *bufp, &pos);
1065
1066 /*
1067 * Fully handle known macros when they are structurally
1068 * required or when the conditional evaluated to true.
1069 */
1070
1071 if ((ROFF_MAX != t) &&
1072 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1073 assert(roffs[t].proc);
1074 return((*roffs[t].proc)(r, t, bufp, szp,
1075 ln, ppos, pos, offs));
1076 }
1077
1078 /*
1079 * If `\}' occurs on a macro line without a preceding macro,
1080 * drop the line completely.
1081 */
1082
1083 ep = *bufp + pos;
1084 if ('\\' == ep[0] && '}' == ep[1])
1085 rr = 0;
1086
1087 /* Always check for the closing delimiter `\}'. */
1088
1089 while (NULL != (ep = strchr(ep, '\\'))) {
1090 if ('}' == *(++ep)) {
1091 *ep = '&';
1092 roff_ccond(r, ln, ep - *bufp - 1);
1093 }
1094 ++ep;
1095 }
1096 return(rr ? ROFF_CONT : ROFF_IGN);
1097 }
1098
1099 /* ARGSUSED */
1100 static enum rofferr
1101 roff_cond_text(ROFF_ARGS)
1102 {
1103 char *ep;
1104 int rr;
1105
1106 rr = r->last->rule;
1107 roffnode_cleanscope(r);
1108
1109 ep = *bufp + pos;
1110 while (NULL != (ep = strchr(ep, '\\'))) {
1111 if ('}' == *(++ep)) {
1112 *ep = '&';
1113 roff_ccond(r, ln, ep - *bufp - 1);
1114 }
1115 ++ep;
1116 }
1117 return(rr ? ROFF_CONT : ROFF_IGN);
1118 }
1119
1120 /*
1121 * Parse a single signed integer number. Stop at the first non-digit.
1122 * If there is at least one digit, return success and advance the
1123 * parse point, else return failure and let the parse point unchanged.
1124 * Ignore overflows, treat them just like the C language.
1125 */
1126 static int
1127 roff_getnum(const char *v, int *pos, int *res)
1128 {
1129 int p, n;
1130
1131 p = *pos;
1132 n = v[p] == '-';
1133 if (n)
1134 p++;
1135
1136 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1137 *res = 10 * *res + v[p] - '0';
1138 if (p == *pos + n)
1139 return 0;
1140
1141 if (n)
1142 *res = -*res;
1143
1144 *pos = p;
1145 return 1;
1146 }
1147
1148 /*
1149 * Evaluate a string comparison condition.
1150 * The first character is the delimiter.
1151 * Succeed if the string up to its second occurrence
1152 * matches the string up to its third occurence.
1153 * Advance the cursor after the third occurrence
1154 * or lacking that, to the end of the line.
1155 */
1156 static int
1157 roff_evalstrcond(const char *v, int *pos)
1158 {
1159 const char *s1, *s2, *s3;
1160 int match;
1161
1162 match = 0;
1163 s1 = v + *pos; /* initial delimiter */
1164 s2 = s1 + 1; /* for scanning the first string */
1165 s3 = strchr(s2, *s1); /* for scanning the second string */
1166
1167 if (NULL == s3) /* found no middle delimiter */
1168 goto out;
1169
1170 while ('\0' != *++s3) {
1171 if (*s2 != *s3) { /* mismatch */
1172 s3 = strchr(s3, *s1);
1173 break;
1174 }
1175 if (*s3 == *s1) { /* found the final delimiter */
1176 match = 1;
1177 break;
1178 }
1179 s2++;
1180 }
1181
1182 out:
1183 if (NULL == s3)
1184 s3 = strchr(s2, '\0');
1185 else
1186 s3++;
1187 *pos = s3 - v;
1188 return(match);
1189 }
1190
1191 /*
1192 * Evaluate an optionally negated single character, numerical,
1193 * or string condition.
1194 */
1195 static int
1196 roff_evalcond(const char *v, int *pos)
1197 {
1198 int wanttrue, number;
1199
1200 if ('!' == v[*pos]) {
1201 wanttrue = 0;
1202 (*pos)++;
1203 } else
1204 wanttrue = 1;
1205
1206 switch (v[*pos]) {
1207 case ('n'):
1208 /* FALLTHROUGH */
1209 case ('o'):
1210 (*pos)++;
1211 return(wanttrue);
1212 case ('c'):
1213 /* FALLTHROUGH */
1214 case ('d'):
1215 /* FALLTHROUGH */
1216 case ('e'):
1217 /* FALLTHROUGH */
1218 case ('r'):
1219 /* FALLTHROUGH */
1220 case ('t'):
1221 (*pos)++;
1222 return(!wanttrue);
1223 default:
1224 break;
1225 }
1226
1227 if (roff_evalnum(v, pos, &number, 0))
1228 return((number > 0) == wanttrue);
1229 else
1230 return(roff_evalstrcond(v, pos) == wanttrue);
1231 }
1232
1233 /* ARGSUSED */
1234 static enum rofferr
1235 roff_line_ignore(ROFF_ARGS)
1236 {
1237
1238 return(ROFF_IGN);
1239 }
1240
1241 /* ARGSUSED */
1242 static enum rofferr
1243 roff_cond(ROFF_ARGS)
1244 {
1245
1246 roffnode_push(r, tok, NULL, ln, ppos);
1247
1248 /*
1249 * An `.el' has no conditional body: it will consume the value
1250 * of the current rstack entry set in prior `ie' calls or
1251 * defaults to DENY.
1252 *
1253 * If we're not an `el', however, then evaluate the conditional.
1254 */
1255
1256 r->last->rule = ROFF_el == tok ?
1257 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1258 roff_evalcond(*bufp, &pos);
1259
1260 /*
1261 * An if-else will put the NEGATION of the current evaluated
1262 * conditional into the stack of rules.
1263 */
1264
1265 if (ROFF_ie == tok) {
1266 if (r->rstackpos == RSTACK_MAX - 1) {
1267 mandoc_msg(MANDOCERR_MEM,
1268 r->parse, ln, ppos, NULL);
1269 return(ROFF_ERR);
1270 }
1271 r->rstack[++r->rstackpos] = !r->last->rule;
1272 }
1273
1274 /* If the parent has false as its rule, then so do we. */
1275
1276 if (r->last->parent && !r->last->parent->rule)
1277 r->last->rule = 0;
1278
1279 /*
1280 * Determine scope.
1281 * If there is nothing on the line after the conditional,
1282 * not even whitespace, use next-line scope.
1283 */
1284
1285 if ('\0' == (*bufp)[pos]) {
1286 r->last->endspan = 2;
1287 goto out;
1288 }
1289
1290 while (' ' == (*bufp)[pos])
1291 pos++;
1292
1293 /* An opening brace requests multiline scope. */
1294
1295 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1296 r->last->endspan = -1;
1297 pos += 2;
1298 goto out;
1299 }
1300
1301 /*
1302 * Anything else following the conditional causes
1303 * single-line scope. Warn if the scope contains
1304 * nothing but trailing whitespace.
1305 */
1306
1307 if ('\0' == (*bufp)[pos])
1308 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1309
1310 r->last->endspan = 1;
1311
1312 out:
1313 *offs = pos;
1314 return(ROFF_RERUN);
1315 }
1316
1317
1318 /* ARGSUSED */
1319 static enum rofferr
1320 roff_ds(ROFF_ARGS)
1321 {
1322 char *name, *string;
1323
1324 /*
1325 * A symbol is named by the first word following the macro
1326 * invocation up to a space. Its value is anything after the
1327 * name's trailing whitespace and optional double-quote. Thus,
1328 *
1329 * [.ds foo "bar " ]
1330 *
1331 * will have `bar " ' as its value.
1332 */
1333
1334 string = *bufp + pos;
1335 name = roff_getname(r, &string, ln, pos);
1336 if ('\0' == *name)
1337 return(ROFF_IGN);
1338
1339 /* Read past initial double-quote. */
1340 if ('"' == *string)
1341 string++;
1342
1343 /* The rest is the value. */
1344 roff_setstr(r, name, string, ROFF_as == tok);
1345 return(ROFF_IGN);
1346 }
1347
1348 /*
1349 * Parse a single operator, one or two characters long.
1350 * If the operator is recognized, return success and advance the
1351 * parse point, else return failure and let the parse point unchanged.
1352 */
1353 static int
1354 roff_getop(const char *v, int *pos, char *res)
1355 {
1356
1357 *res = v[*pos];
1358
1359 switch (*res) {
1360 case ('+'):
1361 /* FALLTHROUGH */
1362 case ('-'):
1363 /* FALLTHROUGH */
1364 case ('*'):
1365 /* FALLTHROUGH */
1366 case ('/'):
1367 /* FALLTHROUGH */
1368 case ('%'):
1369 /* FALLTHROUGH */
1370 case ('&'):
1371 /* FALLTHROUGH */
1372 case (':'):
1373 break;
1374 case '<':
1375 switch (v[*pos + 1]) {
1376 case ('='):
1377 *res = 'l';
1378 (*pos)++;
1379 break;
1380 case ('>'):
1381 *res = '!';
1382 (*pos)++;
1383 break;
1384 case ('?'):
1385 *res = 'i';
1386 (*pos)++;
1387 break;
1388 default:
1389 break;
1390 }
1391 break;
1392 case '>':
1393 switch (v[*pos + 1]) {
1394 case ('='):
1395 *res = 'g';
1396 (*pos)++;
1397 break;
1398 case ('?'):
1399 *res = 'a';
1400 (*pos)++;
1401 break;
1402 default:
1403 break;
1404 }
1405 break;
1406 case '=':
1407 if ('=' == v[*pos + 1])
1408 (*pos)++;
1409 break;
1410 default:
1411 return(0);
1412 }
1413 (*pos)++;
1414
1415 return(*res);
1416 }
1417
1418 /*
1419 * Evaluate either a parenthesized numeric expression
1420 * or a single signed integer number.
1421 */
1422 static int
1423 roff_evalpar(const char *v, int *pos, int *res)
1424 {
1425
1426 if ('(' != v[*pos])
1427 return(roff_getnum(v, pos, res));
1428
1429 (*pos)++;
1430 if ( ! roff_evalnum(v, pos, res, 1))
1431 return(0);
1432
1433 /* If the trailing parenthesis is missing, ignore the error. */
1434 if (')' == v[*pos])
1435 (*pos)++;
1436
1437 return(1);
1438 }
1439
1440 /*
1441 * Evaluate a complete numeric expression.
1442 * Proceed left to right, there is no concept of precedence.
1443 */
1444 static int
1445 roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
1446 {
1447 int mypos, operand2;
1448 char operator;
1449
1450 if (NULL == pos) {
1451 mypos = 0;
1452 pos = &mypos;
1453 }
1454
1455 if (skipwhite)
1456 while (isspace((unsigned char)v[*pos]))
1457 (*pos)++;
1458
1459 if ( ! roff_evalpar(v, pos, res))
1460 return(0);
1461
1462 while (1) {
1463 if (skipwhite)
1464 while (isspace((unsigned char)v[*pos]))
1465 (*pos)++;
1466
1467 if ( ! roff_getop(v, pos, &operator))
1468 break;
1469
1470 if (skipwhite)
1471 while (isspace((unsigned char)v[*pos]))
1472 (*pos)++;
1473
1474 if ( ! roff_evalpar(v, pos, &operand2))
1475 return(0);
1476
1477 if (skipwhite)
1478 while (isspace((unsigned char)v[*pos]))
1479 (*pos)++;
1480
1481 switch (operator) {
1482 case ('+'):
1483 *res += operand2;
1484 break;
1485 case ('-'):
1486 *res -= operand2;
1487 break;
1488 case ('*'):
1489 *res *= operand2;
1490 break;
1491 case ('/'):
1492 *res /= operand2;
1493 break;
1494 case ('%'):
1495 *res %= operand2;
1496 break;
1497 case ('<'):
1498 *res = *res < operand2;
1499 break;
1500 case ('>'):
1501 *res = *res > operand2;
1502 break;
1503 case ('l'):
1504 *res = *res <= operand2;
1505 break;
1506 case ('g'):
1507 *res = *res >= operand2;
1508 break;
1509 case ('='):
1510 *res = *res == operand2;
1511 break;
1512 case ('!'):
1513 *res = *res != operand2;
1514 break;
1515 case ('&'):
1516 *res = *res && operand2;
1517 break;
1518 case (':'):
1519 *res = *res || operand2;
1520 break;
1521 case ('i'):
1522 if (operand2 < *res)
1523 *res = operand2;
1524 break;
1525 case ('a'):
1526 if (operand2 > *res)
1527 *res = operand2;
1528 break;
1529 default:
1530 abort();
1531 }
1532 }
1533 return(1);
1534 }
1535
1536 void
1537 roff_setreg(struct roff *r, const char *name, int val, char sign)
1538 {
1539 struct roffreg *reg;
1540
1541 /* Search for an existing register with the same name. */
1542 reg = r->regtab;
1543
1544 while (reg && strcmp(name, reg->key.p))
1545 reg = reg->next;
1546
1547 if (NULL == reg) {
1548 /* Create a new register. */
1549 reg = mandoc_malloc(sizeof(struct roffreg));
1550 reg->key.p = mandoc_strdup(name);
1551 reg->key.sz = strlen(name);
1552 reg->val = 0;
1553 reg->next = r->regtab;
1554 r->regtab = reg;
1555 }
1556
1557 if ('+' == sign)
1558 reg->val += val;
1559 else if ('-' == sign)
1560 reg->val -= val;
1561 else
1562 reg->val = val;
1563 }
1564
1565 /*
1566 * Handle some predefined read-only number registers.
1567 * For now, return -1 if the requested register is not predefined;
1568 * in case a predefined read-only register having the value -1
1569 * were to turn up, another special value would have to be chosen.
1570 */
1571 static int
1572 roff_getregro(const char *name)
1573 {
1574
1575 switch (*name) {
1576 case ('A'): /* ASCII approximation mode is always off. */
1577 return(0);
1578 case ('g'): /* Groff compatibility mode is always on. */
1579 return(1);
1580 case ('H'): /* Fixed horizontal resolution. */
1581 return (24);
1582 case ('j'): /* Always adjust left margin only. */
1583 return(0);
1584 case ('T'): /* Some output device is always defined. */
1585 return(1);
1586 case ('V'): /* Fixed vertical resolution. */
1587 return (40);
1588 default:
1589 return (-1);
1590 }
1591 }
1592
1593 int
1594 roff_getreg(const struct roff *r, const char *name)
1595 {
1596 struct roffreg *reg;
1597 int val;
1598
1599 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1600 val = roff_getregro(name + 1);
1601 if (-1 != val)
1602 return (val);
1603 }
1604
1605 for (reg = r->regtab; reg; reg = reg->next)
1606 if (0 == strcmp(name, reg->key.p))
1607 return(reg->val);
1608
1609 return(0);
1610 }
1611
1612 static int
1613 roff_getregn(const struct roff *r, const char *name, size_t len)
1614 {
1615 struct roffreg *reg;
1616 int val;
1617
1618 if ('.' == name[0] && 2 == len) {
1619 val = roff_getregro(name + 1);
1620 if (-1 != val)
1621 return (val);
1622 }
1623
1624 for (reg = r->regtab; reg; reg = reg->next)
1625 if (len == reg->key.sz &&
1626 0 == strncmp(name, reg->key.p, len))
1627 return(reg->val);
1628
1629 return(0);
1630 }
1631
1632 static void
1633 roff_freereg(struct roffreg *reg)
1634 {
1635 struct roffreg *old_reg;
1636
1637 while (NULL != reg) {
1638 free(reg->key.p);
1639 old_reg = reg;
1640 reg = reg->next;
1641 free(old_reg);
1642 }
1643 }
1644
1645 static enum rofferr
1646 roff_nr(ROFF_ARGS)
1647 {
1648 const char *key;
1649 char *val;
1650 int iv;
1651 char sign;
1652
1653 val = *bufp + pos;
1654 key = roff_getname(r, &val, ln, pos);
1655
1656 sign = *val;
1657 if ('+' == sign || '-' == sign)
1658 val++;
1659
1660 if (roff_evalnum(val, NULL, &iv, 0))
1661 roff_setreg(r, key, iv, sign);
1662
1663 return(ROFF_IGN);
1664 }
1665
1666 static enum rofferr
1667 roff_rr(ROFF_ARGS)
1668 {
1669 struct roffreg *reg, **prev;
1670 const char *name;
1671 char *cp;
1672
1673 cp = *bufp + pos;
1674 name = roff_getname(r, &cp, ln, pos);
1675
1676 prev = &r->regtab;
1677 while (1) {
1678 reg = *prev;
1679 if (NULL == reg || !strcmp(name, reg->key.p))
1680 break;
1681 prev = &reg->next;
1682 }
1683 if (NULL != reg) {
1684 *prev = reg->next;
1685 free(reg->key.p);
1686 free(reg);
1687 }
1688 return(ROFF_IGN);
1689 }
1690
1691 /* ARGSUSED */
1692 static enum rofferr
1693 roff_rm(ROFF_ARGS)
1694 {
1695 const char *name;
1696 char *cp;
1697
1698 cp = *bufp + pos;
1699 while ('\0' != *cp) {
1700 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1701 if ('\0' != *name)
1702 roff_setstr(r, name, NULL, 0);
1703 }
1704 return(ROFF_IGN);
1705 }
1706
1707 /* ARGSUSED */
1708 static enum rofferr
1709 roff_it(ROFF_ARGS)
1710 {
1711 char *cp;
1712 size_t len;
1713 int iv;
1714
1715 /* Parse the number of lines. */
1716 cp = *bufp + pos;
1717 len = strcspn(cp, " \t");
1718 cp[len] = '\0';
1719 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1720 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1721 ln, ppos, *bufp + 1);
1722 return(ROFF_IGN);
1723 }
1724 cp += len + 1;
1725
1726 /* Arm the input line trap. */
1727 roffit_lines = iv;
1728 roffit_macro = mandoc_strdup(cp);
1729 return(ROFF_IGN);
1730 }
1731
1732 /* ARGSUSED */
1733 static enum rofferr
1734 roff_Dd(ROFF_ARGS)
1735 {
1736 const char *const *cp;
1737
1738 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1739 for (cp = __mdoc_reserved; *cp; cp++)
1740 roff_setstr(r, *cp, NULL, 0);
1741
1742 return(ROFF_CONT);
1743 }
1744
1745 /* ARGSUSED */
1746 static enum rofferr
1747 roff_TH(ROFF_ARGS)
1748 {
1749 const char *const *cp;
1750
1751 if (0 == (MPARSE_QUICK & r->options))
1752 for (cp = __man_reserved; *cp; cp++)
1753 roff_setstr(r, *cp, NULL, 0);
1754
1755 return(ROFF_CONT);
1756 }
1757
1758 /* ARGSUSED */
1759 static enum rofferr
1760 roff_TE(ROFF_ARGS)
1761 {
1762
1763 if (NULL == r->tbl)
1764 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1765 else
1766 tbl_end(&r->tbl);
1767
1768 return(ROFF_IGN);
1769 }
1770
1771 /* ARGSUSED */
1772 static enum rofferr
1773 roff_T_(ROFF_ARGS)
1774 {
1775
1776 if (NULL == r->tbl)
1777 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1778 else
1779 tbl_restart(ppos, ln, r->tbl);
1780
1781 return(ROFF_IGN);
1782 }
1783
1784 #if 0
1785 static int
1786 roff_closeeqn(struct roff *r)
1787 {
1788
1789 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1790 }
1791 #endif
1792
1793 static void
1794 roff_openeqn(struct roff *r, const char *name, int line,
1795 int offs, const char *buf)
1796 {
1797 struct eqn_node *e;
1798 int poff;
1799
1800 assert(NULL == r->eqn);
1801 e = eqn_alloc(name, offs, line, r->parse);
1802
1803 if (r->last_eqn)
1804 r->last_eqn->next = e;
1805 else
1806 r->first_eqn = r->last_eqn = e;
1807
1808 r->eqn = r->last_eqn = e;
1809
1810 if (buf) {
1811 poff = 0;
1812 eqn_read(&r->eqn, line, buf, offs, &poff);
1813 }
1814 }
1815
1816 /* ARGSUSED */
1817 static enum rofferr
1818 roff_EQ(ROFF_ARGS)
1819 {
1820
1821 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1822 return(ROFF_IGN);
1823 }
1824
1825 /* ARGSUSED */
1826 static enum rofferr
1827 roff_EN(ROFF_ARGS)
1828 {
1829
1830 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1831 return(ROFF_IGN);
1832 }
1833
1834 /* ARGSUSED */
1835 static enum rofferr
1836 roff_TS(ROFF_ARGS)
1837 {
1838 struct tbl_node *tbl;
1839
1840 if (r->tbl) {
1841 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1842 tbl_end(&r->tbl);
1843 }
1844
1845 tbl = tbl_alloc(ppos, ln, r->parse);
1846
1847 if (r->last_tbl)
1848 r->last_tbl->next = tbl;
1849 else
1850 r->first_tbl = r->last_tbl = tbl;
1851
1852 r->tbl = r->last_tbl = tbl;
1853 return(ROFF_IGN);
1854 }
1855
1856 /* ARGSUSED */
1857 static enum rofferr
1858 roff_cc(ROFF_ARGS)
1859 {
1860 const char *p;
1861
1862 p = *bufp + pos;
1863
1864 if ('\0' == *p || '.' == (r->control = *p++))
1865 r->control = 0;
1866
1867 if ('\0' != *p)
1868 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1869
1870 return(ROFF_IGN);
1871 }
1872
1873 /* ARGSUSED */
1874 static enum rofferr
1875 roff_tr(ROFF_ARGS)
1876 {
1877 const char *p, *first, *second;
1878 size_t fsz, ssz;
1879 enum mandoc_esc esc;
1880
1881 p = *bufp + pos;
1882
1883 if ('\0' == *p) {
1884 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1885 return(ROFF_IGN);
1886 }
1887
1888 while ('\0' != *p) {
1889 fsz = ssz = 1;
1890
1891 first = p++;
1892 if ('\\' == *first) {
1893 esc = mandoc_escape(&p, NULL, NULL);
1894 if (ESCAPE_ERROR == esc) {
1895 mandoc_msg
1896 (MANDOCERR_BADESCAPE, r->parse,
1897 ln, (int)(p - *bufp), NULL);
1898 return(ROFF_IGN);
1899 }
1900 fsz = (size_t)(p - first);
1901 }
1902
1903 second = p++;
1904 if ('\\' == *second) {
1905 esc = mandoc_escape(&p, NULL, NULL);
1906 if (ESCAPE_ERROR == esc) {
1907 mandoc_msg
1908 (MANDOCERR_BADESCAPE, r->parse,
1909 ln, (int)(p - *bufp), NULL);
1910 return(ROFF_IGN);
1911 }
1912 ssz = (size_t)(p - second);
1913 } else if ('\0' == *second) {
1914 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1915 ln, (int)(p - *bufp), NULL);
1916 second = " ";
1917 p--;
1918 }
1919
1920 if (fsz > 1) {
1921 roff_setstrn(&r->xmbtab, first,
1922 fsz, second, ssz, 0);
1923 continue;
1924 }
1925
1926 if (NULL == r->xtab)
1927 r->xtab = mandoc_calloc
1928 (128, sizeof(struct roffstr));
1929
1930 free(r->xtab[(int)*first].p);
1931 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1932 r->xtab[(int)*first].sz = ssz;
1933 }
1934
1935 return(ROFF_IGN);
1936 }
1937
1938 /* ARGSUSED */
1939 static enum rofferr
1940 roff_so(ROFF_ARGS)
1941 {
1942 char *name;
1943
1944 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1945
1946 /*
1947 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1948 * opening anything that's not in our cwd or anything beneath
1949 * it. Thus, explicitly disallow traversing up the file-system
1950 * or using absolute paths.
1951 */
1952
1953 name = *bufp + pos;
1954 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1955 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1956 return(ROFF_ERR);
1957 }
1958
1959 *offs = pos;
1960 return(ROFF_SO);
1961 }
1962
1963 /* ARGSUSED */
1964 static enum rofferr
1965 roff_userdef(ROFF_ARGS)
1966 {
1967 const char *arg[9];
1968 char *cp, *n1, *n2;
1969 int i;
1970
1971 /*
1972 * Collect pointers to macro argument strings
1973 * and NUL-terminate them.
1974 */
1975 cp = *bufp + pos;
1976 for (i = 0; i < 9; i++)
1977 arg[i] = '\0' == *cp ? "" :
1978 mandoc_getarg(r->parse, &cp, ln, &pos);
1979
1980 /*
1981 * Expand macro arguments.
1982 */
1983 *szp = 0;
1984 n1 = cp = mandoc_strdup(r->current_string);
1985 while (NULL != (cp = strstr(cp, "\\$"))) {
1986 i = cp[2] - '1';
1987 if (0 > i || 8 < i) {
1988 /* Not an argument invocation. */
1989 cp += 2;
1990 continue;
1991 }
1992
1993 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1994 n2 = mandoc_malloc(*szp);
1995
1996 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1997 strlcat(n2, arg[i], *szp);
1998 strlcat(n2, cp + 3, *szp);
1999
2000 cp = n2 + (cp - n1);
2001 free(n1);
2002 n1 = n2;
2003 }
2004
2005 /*
2006 * Replace the macro invocation
2007 * by the expanded macro.
2008 */
2009 free(*bufp);
2010 *bufp = n1;
2011 if (0 == *szp)
2012 *szp = strlen(*bufp) + 1;
2013
2014 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2015 ROFF_REPARSE : ROFF_APPEND);
2016 }
2017
2018 static char *
2019 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2020 {
2021 char *name, *cp;
2022
2023 name = *cpp;
2024 if ('\0' == *name)
2025 return(name);
2026
2027 /* Read until end of name. */
2028 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
2029 if ('\\' != *cp)
2030 continue;
2031 cp++;
2032 if ('\\' == *cp)
2033 continue;
2034 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
2035 *cp = '\0';
2036 name = cp;
2037 }
2038
2039 /* Nil-terminate name. */
2040 if ('\0' != *cp)
2041 *(cp++) = '\0';
2042
2043 /* Read past spaces. */
2044 while (' ' == *cp)
2045 cp++;
2046
2047 *cpp = cp;
2048 return(name);
2049 }
2050
2051 /*
2052 * Store *string into the user-defined string called *name.
2053 * To clear an existing entry, call with (*r, *name, NULL, 0).
2054 * append == 0: replace mode
2055 * append == 1: single-line append mode
2056 * append == 2: multiline append mode, append '\n' after each call
2057 */
2058 static void
2059 roff_setstr(struct roff *r, const char *name, const char *string,
2060 int append)
2061 {
2062
2063 roff_setstrn(&r->strtab, name, strlen(name), string,
2064 string ? strlen(string) : 0, append);
2065 }
2066
2067 static void
2068 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2069 const char *string, size_t stringsz, int append)
2070 {
2071 struct roffkv *n;
2072 char *c;
2073 int i;
2074 size_t oldch, newch;
2075
2076 /* Search for an existing string with the same name. */
2077 n = *r;
2078
2079 while (n && strcmp(name, n->key.p))
2080 n = n->next;
2081
2082 if (NULL == n) {
2083 /* Create a new string table entry. */
2084 n = mandoc_malloc(sizeof(struct roffkv));
2085 n->key.p = mandoc_strndup(name, namesz);
2086 n->key.sz = namesz;
2087 n->val.p = NULL;
2088 n->val.sz = 0;
2089 n->next = *r;
2090 *r = n;
2091 } else if (0 == append) {
2092 free(n->val.p);
2093 n->val.p = NULL;
2094 n->val.sz = 0;
2095 }
2096
2097 if (NULL == string)
2098 return;
2099
2100 /*
2101 * One additional byte for the '\n' in multiline mode,
2102 * and one for the terminating '\0'.
2103 */
2104 newch = stringsz + (1 < append ? 2u : 1u);
2105
2106 if (NULL == n->val.p) {
2107 n->val.p = mandoc_malloc(newch);
2108 *n->val.p = '\0';
2109 oldch = 0;
2110 } else {
2111 oldch = n->val.sz;
2112 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2113 }
2114
2115 /* Skip existing content in the destination buffer. */
2116 c = n->val.p + (int)oldch;
2117
2118 /* Append new content to the destination buffer. */
2119 i = 0;
2120 while (i < (int)stringsz) {
2121 /*
2122 * Rudimentary roff copy mode:
2123 * Handle escaped backslashes.
2124 */
2125 if ('\\' == string[i] && '\\' == string[i + 1])
2126 i++;
2127 *c++ = string[i++];
2128 }
2129
2130 /* Append terminating bytes. */
2131 if (1 < append)
2132 *c++ = '\n';
2133
2134 *c = '\0';
2135 n->val.sz = (int)(c - n->val.p);
2136 }
2137
2138 static const char *
2139 roff_getstrn(const struct roff *r, const char *name, size_t len)
2140 {
2141 const struct roffkv *n;
2142 int i;
2143
2144 for (n = r->strtab; n; n = n->next)
2145 if (0 == strncmp(name, n->key.p, len) &&
2146 '\0' == n->key.p[(int)len])
2147 return(n->val.p);
2148
2149 for (i = 0; i < PREDEFS_MAX; i++)
2150 if (0 == strncmp(name, predefs[i].name, len) &&
2151 '\0' == predefs[i].name[(int)len])
2152 return(predefs[i].str);
2153
2154 return(NULL);
2155 }
2156
2157 static void
2158 roff_freestr(struct roffkv *r)
2159 {
2160 struct roffkv *n, *nn;
2161
2162 for (n = r; n; n = nn) {
2163 free(n->key.p);
2164 free(n->val.p);
2165 nn = n->next;
2166 free(n);
2167 }
2168 }
2169
2170 const struct tbl_span *
2171 roff_span(const struct roff *r)
2172 {
2173
2174 return(r->tbl ? tbl_span(r->tbl) : NULL);
2175 }
2176
2177 const struct eqn *
2178 roff_eqn(const struct roff *r)
2179 {
2180
2181 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2182 }
2183
2184 /*
2185 * Duplicate an input string, making the appropriate character
2186 * conversations (as stipulated by `tr') along the way.
2187 * Returns a heap-allocated string with all the replacements made.
2188 */
2189 char *
2190 roff_strdup(const struct roff *r, const char *p)
2191 {
2192 const struct roffkv *cp;
2193 char *res;
2194 const char *pp;
2195 size_t ssz, sz;
2196 enum mandoc_esc esc;
2197
2198 if (NULL == r->xmbtab && NULL == r->xtab)
2199 return(mandoc_strdup(p));
2200 else if ('\0' == *p)
2201 return(mandoc_strdup(""));
2202
2203 /*
2204 * Step through each character looking for term matches
2205 * (remember that a `tr' can be invoked with an escape, which is
2206 * a glyph but the escape is multi-character).
2207 * We only do this if the character hash has been initialised
2208 * and the string is >0 length.
2209 */
2210
2211 res = NULL;
2212 ssz = 0;
2213
2214 while ('\0' != *p) {
2215 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2216 sz = r->xtab[(int)*p].sz;
2217 res = mandoc_realloc(res, ssz + sz + 1);
2218 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2219 ssz += sz;
2220 p++;
2221 continue;
2222 } else if ('\\' != *p) {
2223 res = mandoc_realloc(res, ssz + 2);
2224 res[ssz++] = *p++;
2225 continue;
2226 }
2227
2228 /* Search for term matches. */
2229 for (cp = r->xmbtab; cp; cp = cp->next)
2230 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2231 break;
2232
2233 if (NULL != cp) {
2234 /*
2235 * A match has been found.
2236 * Append the match to the array and move
2237 * forward by its keysize.
2238 */
2239 res = mandoc_realloc
2240 (res, ssz + cp->val.sz + 1);
2241 memcpy(res + ssz, cp->val.p, cp->val.sz);
2242 ssz += cp->val.sz;
2243 p += (int)cp->key.sz;
2244 continue;
2245 }
2246
2247 /*
2248 * Handle escapes carefully: we need to copy
2249 * over just the escape itself, or else we might
2250 * do replacements within the escape itself.
2251 * Make sure to pass along the bogus string.
2252 */
2253 pp = p++;
2254 esc = mandoc_escape(&p, NULL, NULL);
2255 if (ESCAPE_ERROR == esc) {
2256 sz = strlen(pp);
2257 res = mandoc_realloc(res, ssz + sz + 1);
2258 memcpy(res + ssz, pp, sz);
2259 break;
2260 }
2261 /*
2262 * We bail out on bad escapes.
2263 * No need to warn: we already did so when
2264 * roff_res() was called.
2265 */
2266 sz = (int)(p - pp);
2267 res = mandoc_realloc(res, ssz + sz + 1);
2268 memcpy(res + ssz, pp, sz);
2269 ssz += sz;
2270 }
2271
2272 res[(int)ssz] = '\0';
2273 return(res);
2274 }
2275
2276 /*
2277 * Find out whether a line is a macro line or not.
2278 * If it is, adjust the current position and return one; if it isn't,
2279 * return zero and don't change the current position.
2280 * If the control character has been set with `.cc', then let that grain
2281 * precedence.
2282 * This is slighly contrary to groff, where using the non-breaking
2283 * control character when `cc' has been invoked will cause the
2284 * non-breaking macro contents to be printed verbatim.
2285 */
2286 int
2287 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2288 {
2289 int pos;
2290
2291 pos = *ppos;
2292
2293 if (0 != r->control && cp[pos] == r->control)
2294 pos++;
2295 else if (0 != r->control)
2296 return(0);
2297 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2298 pos += 2;
2299 else if ('.' == cp[pos] || '\'' == cp[pos])
2300 pos++;
2301 else
2302 return(0);
2303
2304 while (' ' == cp[pos] || '\t' == cp[pos])
2305 pos++;
2306
2307 *ppos = pos;
2308 return(1);
2309 }