]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
When the -n or -t flag is given to makewhatis(8),
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.202 2014/03/23 12:11:18 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_so,
66 ROFF_ta,
67 ROFF_tr,
68 ROFF_Dd,
69 ROFF_TH,
70 ROFF_TS,
71 ROFF_TE,
72 ROFF_T_,
73 ROFF_EQ,
74 ROFF_EN,
75 ROFF_cblock,
76 ROFF_USERDEF,
77 ROFF_MAX
78 };
79
80 /*
81 * An incredibly-simple string buffer.
82 */
83 struct roffstr {
84 char *p; /* nil-terminated buffer */
85 size_t sz; /* saved strlen(p) */
86 };
87
88 /*
89 * A key-value roffstr pair as part of a singly-linked list.
90 */
91 struct roffkv {
92 struct roffstr key;
93 struct roffstr val;
94 struct roffkv *next; /* next in list */
95 };
96
97 /*
98 * A single number register as part of a singly-linked list.
99 */
100 struct roffreg {
101 struct roffstr key;
102 int val;
103 struct roffreg *next;
104 };
105
106 struct roff {
107 struct mparse *parse; /* parse point */
108 int options; /* parse options */
109 struct roffnode *last; /* leaf of stack */
110 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
111 char control; /* control character */
112 int rstackpos; /* position in rstack */
113 struct roffreg *regtab; /* number registers */
114 struct roffkv *strtab; /* user-defined strings & macros */
115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
116 struct roffstr *xtab; /* single-byte trans table (`tr') */
117 const char *current_string; /* value of last called user macro */
118 struct tbl_node *first_tbl; /* first table parsed */
119 struct tbl_node *last_tbl; /* last table parsed */
120 struct tbl_node *tbl; /* current table being parsed */
121 struct eqn_node *last_eqn; /* last equation parsed */
122 struct eqn_node *first_eqn; /* first equation parsed */
123 struct eqn_node *eqn; /* current equation being parsed */
124 };
125
126 struct roffnode {
127 enum rofft tok; /* type of node */
128 struct roffnode *parent; /* up one in stack */
129 int line; /* parse line */
130 int col; /* parse col */
131 char *name; /* node name, e.g. macro name */
132 char *end; /* end-rules: custom token */
133 int endspan; /* end-rules: next-line or infty */
134 int rule; /* current evaluation rule */
135 };
136
137 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
138 enum rofft tok, /* tok of macro */ \
139 char **bufp, /* input buffer */ \
140 size_t *szp, /* size of input buffer */ \
141 int ln, /* parse line */ \
142 int ppos, /* original pos in buffer */ \
143 int pos, /* current pos in buffer */ \
144 int *offs /* reset offset of buffer data */
145
146 typedef enum rofferr (*roffproc)(ROFF_ARGS);
147
148 struct roffmac {
149 const char *name; /* macro name */
150 roffproc proc; /* process new macro */
151 roffproc text; /* process as child text of macro */
152 roffproc sub; /* process as child of macro */
153 int flags;
154 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
155 struct roffmac *next;
156 };
157
158 struct predef {
159 const char *name; /* predefined input name */
160 const char *str; /* replacement symbol */
161 };
162
163 #define PREDEF(__name, __str) \
164 { (__name), (__str) },
165
166 static enum rofft roffhash_find(const char *, size_t);
167 static void roffhash_init(void);
168 static void roffnode_cleanscope(struct roff *);
169 static void roffnode_pop(struct roff *);
170 static void roffnode_push(struct roff *, enum rofft,
171 const char *, int, int);
172 static enum rofferr roff_block(ROFF_ARGS);
173 static enum rofferr roff_block_text(ROFF_ARGS);
174 static enum rofferr roff_block_sub(ROFF_ARGS);
175 static enum rofferr roff_cblock(ROFF_ARGS);
176 static enum rofferr roff_cc(ROFF_ARGS);
177 static void roff_ccond(struct roff *, int, int);
178 static enum rofferr roff_cond(ROFF_ARGS);
179 static enum rofferr roff_cond_text(ROFF_ARGS);
180 static enum rofferr roff_cond_sub(ROFF_ARGS);
181 static enum rofferr roff_ds(ROFF_ARGS);
182 static int roff_evalcond(const char *, int *);
183 static int roff_evalstrcond(const char *, int *);
184 static void roff_free1(struct roff *);
185 static void roff_freereg(struct roffreg *);
186 static void roff_freestr(struct roffkv *);
187 static char *roff_getname(struct roff *, char **, int, int);
188 static int roff_getnum(const char *, int *, int *);
189 static int roff_getop(const char *, int *, char *);
190 static int roff_getregn(const struct roff *,
191 const char *, size_t);
192 static int roff_getregro(const char *name);
193 static const char *roff_getstrn(const struct roff *,
194 const char *, size_t);
195 static enum rofferr roff_it(ROFF_ARGS);
196 static enum rofferr roff_line_ignore(ROFF_ARGS);
197 static enum rofferr roff_nr(ROFF_ARGS);
198 static void roff_openeqn(struct roff *, const char *,
199 int, int, const char *);
200 static enum rofft roff_parse(struct roff *, const char *, int *);
201 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
202 static enum rofferr roff_res(struct roff *,
203 char **, size_t *, int, int);
204 static enum rofferr roff_rm(ROFF_ARGS);
205 static void roff_setstr(struct roff *,
206 const char *, const char *, int);
207 static void roff_setstrn(struct roffkv **, const char *,
208 size_t, const char *, size_t, int);
209 static enum rofferr roff_so(ROFF_ARGS);
210 static enum rofferr roff_tr(ROFF_ARGS);
211 static enum rofferr roff_Dd(ROFF_ARGS);
212 static enum rofferr roff_TH(ROFF_ARGS);
213 static enum rofferr roff_TE(ROFF_ARGS);
214 static enum rofferr roff_TS(ROFF_ARGS);
215 static enum rofferr roff_EQ(ROFF_ARGS);
216 static enum rofferr roff_EN(ROFF_ARGS);
217 static enum rofferr roff_T_(ROFF_ARGS);
218 static enum rofferr roff_userdef(ROFF_ARGS);
219
220 /* See roffhash_find() */
221
222 #define ASCII_HI 126
223 #define ASCII_LO 33
224 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
225
226 static struct roffmac *hash[HASHWIDTH];
227
228 static struct roffmac roffs[ROFF_MAX] = {
229 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
230 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
231 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 { "as", roff_ds, NULL, NULL, 0, NULL },
234 { "cc", roff_cc, NULL, NULL, 0, NULL },
235 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
236 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
239 { "ds", roff_ds, NULL, NULL, 0, NULL },
240 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
241 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
246 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
247 { "it", roff_it, NULL, NULL, 0, NULL },
248 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
250 { "nr", roff_nr, NULL, NULL, 0, NULL },
251 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "rm", roff_rm, NULL, NULL, 0, NULL },
254 { "so", roff_so, NULL, NULL, 0, NULL },
255 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "tr", roff_tr, NULL, NULL, 0, NULL },
257 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
258 { "TH", roff_TH, NULL, NULL, 0, NULL },
259 { "TS", roff_TS, NULL, NULL, 0, NULL },
260 { "TE", roff_TE, NULL, NULL, 0, NULL },
261 { "T&", roff_T_, NULL, NULL, 0, NULL },
262 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
263 { "EN", roff_EN, NULL, NULL, 0, NULL },
264 { ".", roff_cblock, NULL, NULL, 0, NULL },
265 { NULL, roff_userdef, NULL, NULL, 0, NULL },
266 };
267
268 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
269 const char *const __mdoc_reserved[] = {
270 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
271 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
272 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
273 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
274 "Dt", "Dv", "Dx", "D1",
275 "Ec", "Ed", "Ef", "Ek", "El", "Em",
276 "En", "Eo", "Er", "Es", "Ev", "Ex",
277 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
278 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
279 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
280 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
281 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
282 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
283 "Sc", "Sh", "Sm", "So", "Sq",
284 "Ss", "St", "Sx", "Sy",
285 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
286 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
287 "%P", "%Q", "%R", "%T", "%U", "%V",
288 NULL
289 };
290
291 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
292 const char *const __man_reserved[] = {
293 "AT", "B", "BI", "BR", "DT",
294 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
295 "LP", "OP", "P", "PD", "PP",
296 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
297 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
298 NULL
299 };
300
301 /* Array of injected predefined strings. */
302 #define PREDEFS_MAX 38
303 static const struct predef predefs[PREDEFS_MAX] = {
304 #include "predefs.in"
305 };
306
307 /* See roffhash_find() */
308 #define ROFF_HASH(p) (p[0] - ASCII_LO)
309
310 static int roffit_lines; /* number of lines to delay */
311 static char *roffit_macro; /* nil-terminated macro line */
312
313 static void
314 roffhash_init(void)
315 {
316 struct roffmac *n;
317 int buc, i;
318
319 for (i = 0; i < (int)ROFF_USERDEF; i++) {
320 assert(roffs[i].name[0] >= ASCII_LO);
321 assert(roffs[i].name[0] <= ASCII_HI);
322
323 buc = ROFF_HASH(roffs[i].name);
324
325 if (NULL != (n = hash[buc])) {
326 for ( ; n->next; n = n->next)
327 /* Do nothing. */ ;
328 n->next = &roffs[i];
329 } else
330 hash[buc] = &roffs[i];
331 }
332 }
333
334 /*
335 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
336 * the nil-terminated string name could be found.
337 */
338 static enum rofft
339 roffhash_find(const char *p, size_t s)
340 {
341 int buc;
342 struct roffmac *n;
343
344 /*
345 * libroff has an extremely simple hashtable, for the time
346 * being, which simply keys on the first character, which must
347 * be printable, then walks a chain. It works well enough until
348 * optimised.
349 */
350
351 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
352 return(ROFF_MAX);
353
354 buc = ROFF_HASH(p);
355
356 if (NULL == (n = hash[buc]))
357 return(ROFF_MAX);
358 for ( ; n; n = n->next)
359 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
360 return((enum rofft)(n - roffs));
361
362 return(ROFF_MAX);
363 }
364
365
366 /*
367 * Pop the current node off of the stack of roff instructions currently
368 * pending.
369 */
370 static void
371 roffnode_pop(struct roff *r)
372 {
373 struct roffnode *p;
374
375 assert(r->last);
376 p = r->last;
377
378 r->last = r->last->parent;
379 free(p->name);
380 free(p->end);
381 free(p);
382 }
383
384
385 /*
386 * Push a roff node onto the instruction stack. This must later be
387 * removed with roffnode_pop().
388 */
389 static void
390 roffnode_push(struct roff *r, enum rofft tok, const char *name,
391 int line, int col)
392 {
393 struct roffnode *p;
394
395 p = mandoc_calloc(1, sizeof(struct roffnode));
396 p->tok = tok;
397 if (name)
398 p->name = mandoc_strdup(name);
399 p->parent = r->last;
400 p->line = line;
401 p->col = col;
402 p->rule = p->parent ? p->parent->rule : 0;
403
404 r->last = p;
405 }
406
407
408 static void
409 roff_free1(struct roff *r)
410 {
411 struct tbl_node *tbl;
412 struct eqn_node *e;
413 int i;
414
415 while (NULL != (tbl = r->first_tbl)) {
416 r->first_tbl = tbl->next;
417 tbl_free(tbl);
418 }
419
420 r->first_tbl = r->last_tbl = r->tbl = NULL;
421
422 while (NULL != (e = r->first_eqn)) {
423 r->first_eqn = e->next;
424 eqn_free(e);
425 }
426
427 r->first_eqn = r->last_eqn = r->eqn = NULL;
428
429 while (r->last)
430 roffnode_pop(r);
431
432 roff_freestr(r->strtab);
433 roff_freestr(r->xmbtab);
434
435 r->strtab = r->xmbtab = NULL;
436
437 roff_freereg(r->regtab);
438
439 r->regtab = NULL;
440
441 if (r->xtab)
442 for (i = 0; i < 128; i++)
443 free(r->xtab[i].p);
444
445 free(r->xtab);
446 r->xtab = NULL;
447 }
448
449 void
450 roff_reset(struct roff *r)
451 {
452
453 roff_free1(r);
454 r->control = 0;
455 }
456
457
458 void
459 roff_free(struct roff *r)
460 {
461
462 roff_free1(r);
463 free(r);
464 }
465
466
467 struct roff *
468 roff_alloc(struct mparse *parse, int options)
469 {
470 struct roff *r;
471
472 r = mandoc_calloc(1, sizeof(struct roff));
473 r->parse = parse;
474 r->options = options;
475 r->rstackpos = -1;
476
477 roffhash_init();
478
479 return(r);
480 }
481
482 /*
483 * In the current line, expand user-defined strings ("\*")
484 * and references to number registers ("\n").
485 * Also check the syntax of other escape sequences.
486 */
487 static enum rofferr
488 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
489 {
490 char ubuf[12]; /* buffer to print the number */
491 const char *stesc; /* start of an escape sequence ('\\') */
492 const char *stnam; /* start of the name, after "[(*" */
493 const char *cp; /* end of the name, e.g. before ']' */
494 const char *res; /* the string to be substituted */
495 char *nbuf; /* new buffer to copy bufp to */
496 size_t nsz; /* size of the new buffer */
497 size_t maxl; /* expected length of the escape name */
498 size_t naml; /* actual length of the escape name */
499 int expand_count; /* to avoid infinite loops */
500
501 expand_count = 0;
502
503 again:
504 cp = *bufp + pos;
505 while (NULL != (cp = strchr(cp, '\\'))) {
506 stesc = cp++;
507
508 /*
509 * The second character must be an asterisk or an n.
510 * If it isn't, skip it anyway: It is escaped,
511 * so it can't start another escape sequence.
512 */
513
514 if ('\0' == *cp)
515 return(ROFF_CONT);
516
517 switch (*cp) {
518 case ('*'):
519 res = NULL;
520 break;
521 case ('n'):
522 res = ubuf;
523 break;
524 default:
525 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
526 continue;
527 mandoc_msg
528 (MANDOCERR_BADESCAPE, r->parse,
529 ln, (int)(stesc - *bufp), NULL);
530 return(ROFF_CONT);
531 }
532
533 cp++;
534
535 /*
536 * The third character decides the length
537 * of the name of the string or register.
538 * Save a pointer to the name.
539 */
540
541 switch (*cp) {
542 case ('\0'):
543 return(ROFF_CONT);
544 case ('('):
545 cp++;
546 maxl = 2;
547 break;
548 case ('['):
549 cp++;
550 maxl = 0;
551 break;
552 default:
553 maxl = 1;
554 break;
555 }
556 stnam = cp;
557
558 /* Advance to the end of the name. */
559
560 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
561 if ('\0' == *cp) {
562 mandoc_msg
563 (MANDOCERR_BADESCAPE,
564 r->parse, ln,
565 (int)(stesc - *bufp), NULL);
566 return(ROFF_CONT);
567 }
568 if (0 == maxl && ']' == *cp)
569 break;
570 }
571
572 /*
573 * Retrieve the replacement string; if it is
574 * undefined, resume searching for escapes.
575 */
576
577 if (NULL == res)
578 res = roff_getstrn(r, stnam, naml);
579 else
580 snprintf(ubuf, sizeof(ubuf), "%d",
581 roff_getregn(r, stnam, naml));
582
583 if (NULL == res) {
584 mandoc_msg
585 (MANDOCERR_BADESCAPE, r->parse,
586 ln, (int)(stesc - *bufp), NULL);
587 res = "";
588 }
589
590 /* Replace the escape sequence by the string. */
591
592 pos = stesc - *bufp;
593
594 nsz = *szp + strlen(res) + 1;
595 nbuf = mandoc_malloc(nsz);
596
597 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
598 strlcat(nbuf, res, nsz);
599 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
600
601 free(*bufp);
602
603 *bufp = nbuf;
604 *szp = nsz;
605
606 if (EXPAND_LIMIT >= ++expand_count)
607 goto again;
608
609 /* Just leave the string unexpanded. */
610 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
611 return(ROFF_IGN);
612 }
613 return(ROFF_CONT);
614 }
615
616 /*
617 * Process text streams:
618 * Convert all breakable hyphens into ASCII_HYPH.
619 * Decrement and spring input line trap.
620 */
621 static enum rofferr
622 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
623 {
624 size_t sz;
625 const char *start;
626 char *p;
627 int isz;
628 enum mandoc_esc esc;
629
630 start = p = *bufp + pos;
631
632 while ('\0' != *p) {
633 sz = strcspn(p, "-\\");
634 p += sz;
635
636 if ('\0' == *p)
637 break;
638
639 if ('\\' == *p) {
640 /* Skip over escapes. */
641 p++;
642 esc = mandoc_escape((const char **)&p, NULL, NULL);
643 if (ESCAPE_ERROR == esc)
644 break;
645 continue;
646 } else if (p == start) {
647 p++;
648 continue;
649 }
650
651 if (isalpha((unsigned char)p[-1]) &&
652 isalpha((unsigned char)p[1]))
653 *p = ASCII_HYPH;
654 p++;
655 }
656
657 /* Spring the input line trap. */
658 if (1 == roffit_lines) {
659 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
660 free(*bufp);
661 *bufp = p;
662 *szp = isz + 1;
663 *offs = 0;
664 free(roffit_macro);
665 roffit_lines = 0;
666 return(ROFF_REPARSE);
667 } else if (1 < roffit_lines)
668 --roffit_lines;
669 return(ROFF_CONT);
670 }
671
672 enum rofferr
673 roff_parseln(struct roff *r, int ln, char **bufp,
674 size_t *szp, int pos, int *offs)
675 {
676 enum rofft t;
677 enum rofferr e;
678 int ppos, ctl;
679
680 /*
681 * Run the reserved-word filter only if we have some reserved
682 * words to fill in.
683 */
684
685 e = roff_res(r, bufp, szp, ln, pos);
686 if (ROFF_IGN == e)
687 return(e);
688 assert(ROFF_CONT == e);
689
690 ppos = pos;
691 ctl = roff_getcontrol(r, *bufp, &pos);
692
693 /*
694 * First, if a scope is open and we're not a macro, pass the
695 * text through the macro's filter. If a scope isn't open and
696 * we're not a macro, just let it through.
697 * Finally, if there's an equation scope open, divert it into it
698 * no matter our state.
699 */
700
701 if (r->last && ! ctl) {
702 t = r->last->tok;
703 assert(roffs[t].text);
704 e = (*roffs[t].text)
705 (r, t, bufp, szp, ln, pos, pos, offs);
706 assert(ROFF_IGN == e || ROFF_CONT == e);
707 if (ROFF_CONT != e)
708 return(e);
709 }
710 if (r->eqn)
711 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
712 if ( ! ctl) {
713 if (r->tbl)
714 return(tbl_read(r->tbl, ln, *bufp, pos));
715 return(roff_parsetext(bufp, szp, pos, offs));
716 }
717
718 /*
719 * If a scope is open, go to the child handler for that macro,
720 * as it may want to preprocess before doing anything with it.
721 * Don't do so if an equation is open.
722 */
723
724 if (r->last) {
725 t = r->last->tok;
726 assert(roffs[t].sub);
727 return((*roffs[t].sub)
728 (r, t, bufp, szp,
729 ln, ppos, pos, offs));
730 }
731
732 /*
733 * Lastly, as we've no scope open, try to look up and execute
734 * the new macro. If no macro is found, simply return and let
735 * the compilers handle it.
736 */
737
738 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
739 return(ROFF_CONT);
740
741 assert(roffs[t].proc);
742 return((*roffs[t].proc)
743 (r, t, bufp, szp,
744 ln, ppos, pos, offs));
745 }
746
747
748 void
749 roff_endparse(struct roff *r)
750 {
751
752 if (r->last)
753 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
754 r->last->line, r->last->col, NULL);
755
756 if (r->eqn) {
757 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
758 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
759 eqn_end(&r->eqn);
760 }
761
762 if (r->tbl) {
763 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
764 r->tbl->line, r->tbl->pos, NULL);
765 tbl_end(&r->tbl);
766 }
767 }
768
769 /*
770 * Parse a roff node's type from the input buffer. This must be in the
771 * form of ".foo xxx" in the usual way.
772 */
773 static enum rofft
774 roff_parse(struct roff *r, const char *buf, int *pos)
775 {
776 const char *mac;
777 size_t maclen;
778 enum rofft t;
779
780 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
781 '\t' == buf[*pos] || ' ' == buf[*pos])
782 return(ROFF_MAX);
783
784 /* We stop the macro parse at an escape, tab, space, or nil. */
785
786 mac = buf + *pos;
787 maclen = strcspn(mac, " \\\t\0");
788
789 t = (r->current_string = roff_getstrn(r, mac, maclen))
790 ? ROFF_USERDEF : roffhash_find(mac, maclen);
791
792 *pos += (int)maclen;
793
794 while (buf[*pos] && ' ' == buf[*pos])
795 (*pos)++;
796
797 return(t);
798 }
799
800 /* ARGSUSED */
801 static enum rofferr
802 roff_cblock(ROFF_ARGS)
803 {
804
805 /*
806 * A block-close `..' should only be invoked as a child of an
807 * ignore macro, otherwise raise a warning and just ignore it.
808 */
809
810 if (NULL == r->last) {
811 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
812 return(ROFF_IGN);
813 }
814
815 switch (r->last->tok) {
816 case (ROFF_am):
817 /* FALLTHROUGH */
818 case (ROFF_ami):
819 /* FALLTHROUGH */
820 case (ROFF_am1):
821 /* FALLTHROUGH */
822 case (ROFF_de):
823 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
824 /* FALLTHROUGH */
825 case (ROFF_dei):
826 /* FALLTHROUGH */
827 case (ROFF_ig):
828 break;
829 default:
830 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
831 return(ROFF_IGN);
832 }
833
834 if ((*bufp)[pos])
835 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
836
837 roffnode_pop(r);
838 roffnode_cleanscope(r);
839 return(ROFF_IGN);
840
841 }
842
843
844 static void
845 roffnode_cleanscope(struct roff *r)
846 {
847
848 while (r->last) {
849 if (--r->last->endspan != 0)
850 break;
851 roffnode_pop(r);
852 }
853 }
854
855
856 static void
857 roff_ccond(struct roff *r, int ln, int ppos)
858 {
859
860 if (NULL == r->last) {
861 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
862 return;
863 }
864
865 switch (r->last->tok) {
866 case (ROFF_el):
867 /* FALLTHROUGH */
868 case (ROFF_ie):
869 /* FALLTHROUGH */
870 case (ROFF_if):
871 break;
872 default:
873 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
874 return;
875 }
876
877 if (r->last->endspan > -1) {
878 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
879 return;
880 }
881
882 roffnode_pop(r);
883 roffnode_cleanscope(r);
884 return;
885 }
886
887
888 /* ARGSUSED */
889 static enum rofferr
890 roff_block(ROFF_ARGS)
891 {
892 int sv;
893 size_t sz;
894 char *name;
895
896 name = NULL;
897
898 if (ROFF_ig != tok) {
899 if ('\0' == (*bufp)[pos]) {
900 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
901 return(ROFF_IGN);
902 }
903
904 /*
905 * Re-write `de1', since we don't really care about
906 * groff's strange compatibility mode, into `de'.
907 */
908
909 if (ROFF_de1 == tok)
910 tok = ROFF_de;
911 if (ROFF_de == tok)
912 name = *bufp + pos;
913 else
914 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
915 roffs[tok].name);
916
917 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
918 pos++;
919
920 while (isspace((unsigned char)(*bufp)[pos]))
921 (*bufp)[pos++] = '\0';
922 }
923
924 roffnode_push(r, tok, name, ln, ppos);
925
926 /*
927 * At the beginning of a `de' macro, clear the existing string
928 * with the same name, if there is one. New content will be
929 * appended from roff_block_text() in multiline mode.
930 */
931
932 if (ROFF_de == tok)
933 roff_setstr(r, name, "", 0);
934
935 if ('\0' == (*bufp)[pos])
936 return(ROFF_IGN);
937
938 /* If present, process the custom end-of-line marker. */
939
940 sv = pos;
941 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
942 pos++;
943
944 /*
945 * Note: groff does NOT like escape characters in the input.
946 * Instead of detecting this, we're just going to let it fly and
947 * to hell with it.
948 */
949
950 assert(pos > sv);
951 sz = (size_t)(pos - sv);
952
953 if (1 == sz && '.' == (*bufp)[sv])
954 return(ROFF_IGN);
955
956 r->last->end = mandoc_malloc(sz + 1);
957
958 memcpy(r->last->end, *bufp + sv, sz);
959 r->last->end[(int)sz] = '\0';
960
961 if ((*bufp)[pos])
962 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
963
964 return(ROFF_IGN);
965 }
966
967
968 /* ARGSUSED */
969 static enum rofferr
970 roff_block_sub(ROFF_ARGS)
971 {
972 enum rofft t;
973 int i, j;
974
975 /*
976 * First check whether a custom macro exists at this level. If
977 * it does, then check against it. This is some of groff's
978 * stranger behaviours. If we encountered a custom end-scope
979 * tag and that tag also happens to be a "real" macro, then we
980 * need to try interpreting it again as a real macro. If it's
981 * not, then return ignore. Else continue.
982 */
983
984 if (r->last->end) {
985 for (i = pos, j = 0; r->last->end[j]; j++, i++)
986 if ((*bufp)[i] != r->last->end[j])
987 break;
988
989 if ('\0' == r->last->end[j] &&
990 ('\0' == (*bufp)[i] ||
991 ' ' == (*bufp)[i] ||
992 '\t' == (*bufp)[i])) {
993 roffnode_pop(r);
994 roffnode_cleanscope(r);
995
996 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
997 i++;
998
999 pos = i;
1000 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1001 return(ROFF_RERUN);
1002 return(ROFF_IGN);
1003 }
1004 }
1005
1006 /*
1007 * If we have no custom end-query or lookup failed, then try
1008 * pulling it out of the hashtable.
1009 */
1010
1011 t = roff_parse(r, *bufp, &pos);
1012
1013 /*
1014 * Macros other than block-end are only significant
1015 * in `de' blocks; elsewhere, simply throw them away.
1016 */
1017 if (ROFF_cblock != t) {
1018 if (ROFF_de == tok)
1019 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1020 return(ROFF_IGN);
1021 }
1022
1023 assert(roffs[t].proc);
1024 return((*roffs[t].proc)(r, t, bufp, szp,
1025 ln, ppos, pos, offs));
1026 }
1027
1028
1029 /* ARGSUSED */
1030 static enum rofferr
1031 roff_block_text(ROFF_ARGS)
1032 {
1033
1034 if (ROFF_de == tok)
1035 roff_setstr(r, r->last->name, *bufp + pos, 2);
1036
1037 return(ROFF_IGN);
1038 }
1039
1040
1041 /* ARGSUSED */
1042 static enum rofferr
1043 roff_cond_sub(ROFF_ARGS)
1044 {
1045 enum rofft t;
1046 char *ep;
1047 int rr;
1048
1049 rr = r->last->rule;
1050 roffnode_cleanscope(r);
1051 t = roff_parse(r, *bufp, &pos);
1052
1053 /*
1054 * Fully handle known macros when they are structurally
1055 * required or when the conditional evaluated to true.
1056 */
1057
1058 if ((ROFF_MAX != t) &&
1059 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1060 assert(roffs[t].proc);
1061 return((*roffs[t].proc)(r, t, bufp, szp,
1062 ln, ppos, pos, offs));
1063 }
1064
1065 /*
1066 * If `\}' occurs on a macro line without a preceding macro,
1067 * drop the line completely.
1068 */
1069
1070 ep = *bufp + pos;
1071 if ('\\' == ep[0] && '}' == ep[1])
1072 rr = 0;
1073
1074 /* Always check for the closing delimiter `\}'. */
1075
1076 while (NULL != (ep = strchr(ep, '\\'))) {
1077 if ('}' == *(++ep)) {
1078 *ep = '&';
1079 roff_ccond(r, ln, ep - *bufp - 1);
1080 }
1081 ++ep;
1082 }
1083 return(rr ? ROFF_CONT : ROFF_IGN);
1084 }
1085
1086 /* ARGSUSED */
1087 static enum rofferr
1088 roff_cond_text(ROFF_ARGS)
1089 {
1090 char *ep;
1091 int rr;
1092
1093 rr = r->last->rule;
1094 roffnode_cleanscope(r);
1095
1096 ep = *bufp + pos;
1097 while (NULL != (ep = strchr(ep, '\\'))) {
1098 if ('}' == *(++ep)) {
1099 *ep = '&';
1100 roff_ccond(r, ln, ep - *bufp - 1);
1101 }
1102 ++ep;
1103 }
1104 return(rr ? ROFF_CONT : ROFF_IGN);
1105 }
1106
1107 static int
1108 roff_getnum(const char *v, int *pos, int *res)
1109 {
1110 int p, n;
1111
1112 p = *pos;
1113 n = v[p] == '-';
1114 if (n)
1115 p++;
1116
1117 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1118 *res += 10 * *res + v[p] - '0';
1119 if (p == *pos + n)
1120 return 0;
1121
1122 if (n)
1123 *res = -*res;
1124
1125 *pos = p;
1126 return 1;
1127 }
1128
1129 static int
1130 roff_getop(const char *v, int *pos, char *res)
1131 {
1132 int e;
1133
1134 *res = v[*pos];
1135 e = v[*pos + 1] == '=';
1136
1137 switch (*res) {
1138 case '=':
1139 break;
1140 case '>':
1141 if (e)
1142 *res = 'g';
1143 break;
1144 case '<':
1145 if (e)
1146 *res = 'l';
1147 break;
1148 default:
1149 return(0);
1150 }
1151
1152 *pos += 1 + e;
1153
1154 return(*res);
1155 }
1156
1157 /*
1158 * Evaluate a string comparison condition.
1159 * The first character is the delimiter.
1160 * Succeed if the string up to its second occurrence
1161 * matches the string up to its third occurence.
1162 * Advance the cursor after the third occurrence
1163 * or lacking that, to the end of the line.
1164 */
1165 static int
1166 roff_evalstrcond(const char *v, int *pos)
1167 {
1168 const char *s1, *s2, *s3;
1169 int match;
1170
1171 match = 0;
1172 s1 = v + *pos; /* initial delimiter */
1173 s2 = s1 + 1; /* for scanning the first string */
1174 s3 = strchr(s2, *s1); /* for scanning the second string */
1175
1176 if (NULL == s3) /* found no middle delimiter */
1177 goto out;
1178
1179 while ('\0' != *++s3) {
1180 if (*s2 != *s3) { /* mismatch */
1181 s3 = strchr(s3, *s1);
1182 break;
1183 }
1184 if (*s3 == *s1) { /* found the final delimiter */
1185 match = 1;
1186 break;
1187 }
1188 s2++;
1189 }
1190
1191 out:
1192 if (NULL == s3)
1193 s3 = strchr(s2, '\0');
1194 else
1195 s3++;
1196 *pos = s3 - v;
1197 return(match);
1198 }
1199
1200 static int
1201 roff_evalcond(const char *v, int *pos)
1202 {
1203 int wanttrue, lh, rh;
1204 char op;
1205
1206 if ('!' == v[*pos]) {
1207 wanttrue = 0;
1208 (*pos)++;
1209 } else
1210 wanttrue = 1;
1211
1212 switch (v[*pos]) {
1213 case ('n'):
1214 /* FALLTHROUGH */
1215 case ('o'):
1216 (*pos)++;
1217 return(wanttrue);
1218 case ('c'):
1219 /* FALLTHROUGH */
1220 case ('d'):
1221 /* FALLTHROUGH */
1222 case ('e'):
1223 /* FALLTHROUGH */
1224 case ('r'):
1225 /* FALLTHROUGH */
1226 case ('t'):
1227 (*pos)++;
1228 return(!wanttrue);
1229 default:
1230 break;
1231 }
1232
1233 if (!roff_getnum(v, pos, &lh))
1234 return(roff_evalstrcond(v, pos) == wanttrue);
1235 if (!roff_getop(v, pos, &op))
1236 return((lh > 0) == wanttrue);
1237 if (!roff_getnum(v, pos, &rh))
1238 return(0);
1239
1240 switch (op) {
1241 case 'g':
1242 return((lh >= rh) == wanttrue);
1243 case 'l':
1244 return((lh <= rh) == wanttrue);
1245 case '=':
1246 return((lh == rh) == wanttrue);
1247 case '>':
1248 return((lh > rh) == wanttrue);
1249 case '<':
1250 return((lh < rh) == wanttrue);
1251 default:
1252 return(0);
1253 }
1254 }
1255
1256 /* ARGSUSED */
1257 static enum rofferr
1258 roff_line_ignore(ROFF_ARGS)
1259 {
1260
1261 return(ROFF_IGN);
1262 }
1263
1264 /* ARGSUSED */
1265 static enum rofferr
1266 roff_cond(ROFF_ARGS)
1267 {
1268
1269 roffnode_push(r, tok, NULL, ln, ppos);
1270
1271 /*
1272 * An `.el' has no conditional body: it will consume the value
1273 * of the current rstack entry set in prior `ie' calls or
1274 * defaults to DENY.
1275 *
1276 * If we're not an `el', however, then evaluate the conditional.
1277 */
1278
1279 r->last->rule = ROFF_el == tok ?
1280 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1281 roff_evalcond(*bufp, &pos);
1282
1283 /*
1284 * An if-else will put the NEGATION of the current evaluated
1285 * conditional into the stack of rules.
1286 */
1287
1288 if (ROFF_ie == tok) {
1289 if (r->rstackpos == RSTACK_MAX - 1) {
1290 mandoc_msg(MANDOCERR_MEM,
1291 r->parse, ln, ppos, NULL);
1292 return(ROFF_ERR);
1293 }
1294 r->rstack[++r->rstackpos] = !r->last->rule;
1295 }
1296
1297 /* If the parent has false as its rule, then so do we. */
1298
1299 if (r->last->parent && !r->last->parent->rule)
1300 r->last->rule = 0;
1301
1302 /*
1303 * Determine scope.
1304 * If there is nothing on the line after the conditional,
1305 * not even whitespace, use next-line scope.
1306 */
1307
1308 if ('\0' == (*bufp)[pos]) {
1309 r->last->endspan = 2;
1310 goto out;
1311 }
1312
1313 while (' ' == (*bufp)[pos])
1314 pos++;
1315
1316 /* An opening brace requests multiline scope. */
1317
1318 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1319 r->last->endspan = -1;
1320 pos += 2;
1321 goto out;
1322 }
1323
1324 /*
1325 * Anything else following the conditional causes
1326 * single-line scope. Warn if the scope contains
1327 * nothing but trailing whitespace.
1328 */
1329
1330 if ('\0' == (*bufp)[pos])
1331 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1332
1333 r->last->endspan = 1;
1334
1335 out:
1336 *offs = pos;
1337 return(ROFF_RERUN);
1338 }
1339
1340
1341 /* ARGSUSED */
1342 static enum rofferr
1343 roff_ds(ROFF_ARGS)
1344 {
1345 char *name, *string;
1346
1347 /*
1348 * A symbol is named by the first word following the macro
1349 * invocation up to a space. Its value is anything after the
1350 * name's trailing whitespace and optional double-quote. Thus,
1351 *
1352 * [.ds foo "bar " ]
1353 *
1354 * will have `bar " ' as its value.
1355 */
1356
1357 string = *bufp + pos;
1358 name = roff_getname(r, &string, ln, pos);
1359 if ('\0' == *name)
1360 return(ROFF_IGN);
1361
1362 /* Read past initial double-quote. */
1363 if ('"' == *string)
1364 string++;
1365
1366 /* The rest is the value. */
1367 roff_setstr(r, name, string, ROFF_as == tok);
1368 return(ROFF_IGN);
1369 }
1370
1371 void
1372 roff_setreg(struct roff *r, const char *name, int val, char sign)
1373 {
1374 struct roffreg *reg;
1375
1376 /* Search for an existing register with the same name. */
1377 reg = r->regtab;
1378
1379 while (reg && strcmp(name, reg->key.p))
1380 reg = reg->next;
1381
1382 if (NULL == reg) {
1383 /* Create a new register. */
1384 reg = mandoc_malloc(sizeof(struct roffreg));
1385 reg->key.p = mandoc_strdup(name);
1386 reg->key.sz = strlen(name);
1387 reg->val = 0;
1388 reg->next = r->regtab;
1389 r->regtab = reg;
1390 }
1391
1392 if ('+' == sign)
1393 reg->val += val;
1394 else if ('-' == sign)
1395 reg->val -= val;
1396 else
1397 reg->val = val;
1398 }
1399
1400 /*
1401 * Handle some predefined read-only number registers.
1402 * For now, return -1 if the requested register is not predefined;
1403 * in case a predefined read-only register having the value -1
1404 * were to turn up, another special value would have to be chosen.
1405 */
1406 static int
1407 roff_getregro(const char *name)
1408 {
1409
1410 switch (*name) {
1411 case ('A'): /* ASCII approximation mode is always off. */
1412 return(0);
1413 case ('g'): /* Groff compatibility mode is always on. */
1414 return(1);
1415 case ('H'): /* Fixed horizontal resolution. */
1416 return (24);
1417 case ('j'): /* Always adjust left margin only. */
1418 return(0);
1419 case ('T'): /* Some output device is always defined. */
1420 return(1);
1421 case ('V'): /* Fixed vertical resolution. */
1422 return (40);
1423 default:
1424 return (-1);
1425 }
1426 }
1427
1428 int
1429 roff_getreg(const struct roff *r, const char *name)
1430 {
1431 struct roffreg *reg;
1432 int val;
1433
1434 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1435 val = roff_getregro(name + 1);
1436 if (-1 != val)
1437 return (val);
1438 }
1439
1440 for (reg = r->regtab; reg; reg = reg->next)
1441 if (0 == strcmp(name, reg->key.p))
1442 return(reg->val);
1443
1444 return(0);
1445 }
1446
1447 static int
1448 roff_getregn(const struct roff *r, const char *name, size_t len)
1449 {
1450 struct roffreg *reg;
1451 int val;
1452
1453 if ('.' == name[0] && 2 == len) {
1454 val = roff_getregro(name + 1);
1455 if (-1 != val)
1456 return (val);
1457 }
1458
1459 for (reg = r->regtab; reg; reg = reg->next)
1460 if (len == reg->key.sz &&
1461 0 == strncmp(name, reg->key.p, len))
1462 return(reg->val);
1463
1464 return(0);
1465 }
1466
1467 static void
1468 roff_freereg(struct roffreg *reg)
1469 {
1470 struct roffreg *old_reg;
1471
1472 while (NULL != reg) {
1473 free(reg->key.p);
1474 old_reg = reg;
1475 reg = reg->next;
1476 free(old_reg);
1477 }
1478 }
1479
1480 /* ARGSUSED */
1481 static enum rofferr
1482 roff_nr(ROFF_ARGS)
1483 {
1484 const char *key;
1485 char *val;
1486 size_t sz;
1487 int iv;
1488 char sign;
1489
1490 val = *bufp + pos;
1491 key = roff_getname(r, &val, ln, pos);
1492
1493 sign = *val;
1494 if ('+' == sign || '-' == sign)
1495 val++;
1496
1497 sz = strspn(val, "0123456789");
1498 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1499
1500 roff_setreg(r, key, iv, sign);
1501
1502 return(ROFF_IGN);
1503 }
1504
1505 /* ARGSUSED */
1506 static enum rofferr
1507 roff_rm(ROFF_ARGS)
1508 {
1509 const char *name;
1510 char *cp;
1511
1512 cp = *bufp + pos;
1513 while ('\0' != *cp) {
1514 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1515 if ('\0' != *name)
1516 roff_setstr(r, name, NULL, 0);
1517 }
1518 return(ROFF_IGN);
1519 }
1520
1521 /* ARGSUSED */
1522 static enum rofferr
1523 roff_it(ROFF_ARGS)
1524 {
1525 char *cp;
1526 size_t len;
1527 int iv;
1528
1529 /* Parse the number of lines. */
1530 cp = *bufp + pos;
1531 len = strcspn(cp, " \t");
1532 cp[len] = '\0';
1533 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1534 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1535 ln, ppos, *bufp + 1);
1536 return(ROFF_IGN);
1537 }
1538 cp += len + 1;
1539
1540 /* Arm the input line trap. */
1541 roffit_lines = iv;
1542 roffit_macro = mandoc_strdup(cp);
1543 return(ROFF_IGN);
1544 }
1545
1546 /* ARGSUSED */
1547 static enum rofferr
1548 roff_Dd(ROFF_ARGS)
1549 {
1550 const char *const *cp;
1551
1552 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1553 for (cp = __mdoc_reserved; *cp; cp++)
1554 roff_setstr(r, *cp, NULL, 0);
1555
1556 return(ROFF_CONT);
1557 }
1558
1559 /* ARGSUSED */
1560 static enum rofferr
1561 roff_TH(ROFF_ARGS)
1562 {
1563 const char *const *cp;
1564
1565 if (0 == (MPARSE_QUICK & r->options))
1566 for (cp = __man_reserved; *cp; cp++)
1567 roff_setstr(r, *cp, NULL, 0);
1568
1569 return(ROFF_CONT);
1570 }
1571
1572 /* ARGSUSED */
1573 static enum rofferr
1574 roff_TE(ROFF_ARGS)
1575 {
1576
1577 if (NULL == r->tbl)
1578 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1579 else
1580 tbl_end(&r->tbl);
1581
1582 return(ROFF_IGN);
1583 }
1584
1585 /* ARGSUSED */
1586 static enum rofferr
1587 roff_T_(ROFF_ARGS)
1588 {
1589
1590 if (NULL == r->tbl)
1591 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1592 else
1593 tbl_restart(ppos, ln, r->tbl);
1594
1595 return(ROFF_IGN);
1596 }
1597
1598 #if 0
1599 static int
1600 roff_closeeqn(struct roff *r)
1601 {
1602
1603 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1604 }
1605 #endif
1606
1607 static void
1608 roff_openeqn(struct roff *r, const char *name, int line,
1609 int offs, const char *buf)
1610 {
1611 struct eqn_node *e;
1612 int poff;
1613
1614 assert(NULL == r->eqn);
1615 e = eqn_alloc(name, offs, line, r->parse);
1616
1617 if (r->last_eqn)
1618 r->last_eqn->next = e;
1619 else
1620 r->first_eqn = r->last_eqn = e;
1621
1622 r->eqn = r->last_eqn = e;
1623
1624 if (buf) {
1625 poff = 0;
1626 eqn_read(&r->eqn, line, buf, offs, &poff);
1627 }
1628 }
1629
1630 /* ARGSUSED */
1631 static enum rofferr
1632 roff_EQ(ROFF_ARGS)
1633 {
1634
1635 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1636 return(ROFF_IGN);
1637 }
1638
1639 /* ARGSUSED */
1640 static enum rofferr
1641 roff_EN(ROFF_ARGS)
1642 {
1643
1644 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1645 return(ROFF_IGN);
1646 }
1647
1648 /* ARGSUSED */
1649 static enum rofferr
1650 roff_TS(ROFF_ARGS)
1651 {
1652 struct tbl_node *tbl;
1653
1654 if (r->tbl) {
1655 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1656 tbl_end(&r->tbl);
1657 }
1658
1659 tbl = tbl_alloc(ppos, ln, r->parse);
1660
1661 if (r->last_tbl)
1662 r->last_tbl->next = tbl;
1663 else
1664 r->first_tbl = r->last_tbl = tbl;
1665
1666 r->tbl = r->last_tbl = tbl;
1667 return(ROFF_IGN);
1668 }
1669
1670 /* ARGSUSED */
1671 static enum rofferr
1672 roff_cc(ROFF_ARGS)
1673 {
1674 const char *p;
1675
1676 p = *bufp + pos;
1677
1678 if ('\0' == *p || '.' == (r->control = *p++))
1679 r->control = 0;
1680
1681 if ('\0' != *p)
1682 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1683
1684 return(ROFF_IGN);
1685 }
1686
1687 /* ARGSUSED */
1688 static enum rofferr
1689 roff_tr(ROFF_ARGS)
1690 {
1691 const char *p, *first, *second;
1692 size_t fsz, ssz;
1693 enum mandoc_esc esc;
1694
1695 p = *bufp + pos;
1696
1697 if ('\0' == *p) {
1698 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1699 return(ROFF_IGN);
1700 }
1701
1702 while ('\0' != *p) {
1703 fsz = ssz = 1;
1704
1705 first = p++;
1706 if ('\\' == *first) {
1707 esc = mandoc_escape(&p, NULL, NULL);
1708 if (ESCAPE_ERROR == esc) {
1709 mandoc_msg
1710 (MANDOCERR_BADESCAPE, r->parse,
1711 ln, (int)(p - *bufp), NULL);
1712 return(ROFF_IGN);
1713 }
1714 fsz = (size_t)(p - first);
1715 }
1716
1717 second = p++;
1718 if ('\\' == *second) {
1719 esc = mandoc_escape(&p, NULL, NULL);
1720 if (ESCAPE_ERROR == esc) {
1721 mandoc_msg
1722 (MANDOCERR_BADESCAPE, r->parse,
1723 ln, (int)(p - *bufp), NULL);
1724 return(ROFF_IGN);
1725 }
1726 ssz = (size_t)(p - second);
1727 } else if ('\0' == *second) {
1728 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1729 ln, (int)(p - *bufp), NULL);
1730 second = " ";
1731 p--;
1732 }
1733
1734 if (fsz > 1) {
1735 roff_setstrn(&r->xmbtab, first,
1736 fsz, second, ssz, 0);
1737 continue;
1738 }
1739
1740 if (NULL == r->xtab)
1741 r->xtab = mandoc_calloc
1742 (128, sizeof(struct roffstr));
1743
1744 free(r->xtab[(int)*first].p);
1745 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1746 r->xtab[(int)*first].sz = ssz;
1747 }
1748
1749 return(ROFF_IGN);
1750 }
1751
1752 /* ARGSUSED */
1753 static enum rofferr
1754 roff_so(ROFF_ARGS)
1755 {
1756 char *name;
1757
1758 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1759
1760 /*
1761 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1762 * opening anything that's not in our cwd or anything beneath
1763 * it. Thus, explicitly disallow traversing up the file-system
1764 * or using absolute paths.
1765 */
1766
1767 name = *bufp + pos;
1768 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1769 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1770 return(ROFF_ERR);
1771 }
1772
1773 *offs = pos;
1774 return(ROFF_SO);
1775 }
1776
1777 /* ARGSUSED */
1778 static enum rofferr
1779 roff_userdef(ROFF_ARGS)
1780 {
1781 const char *arg[9];
1782 char *cp, *n1, *n2;
1783 int i;
1784
1785 /*
1786 * Collect pointers to macro argument strings
1787 * and NUL-terminate them.
1788 */
1789 cp = *bufp + pos;
1790 for (i = 0; i < 9; i++)
1791 arg[i] = '\0' == *cp ? "" :
1792 mandoc_getarg(r->parse, &cp, ln, &pos);
1793
1794 /*
1795 * Expand macro arguments.
1796 */
1797 *szp = 0;
1798 n1 = cp = mandoc_strdup(r->current_string);
1799 while (NULL != (cp = strstr(cp, "\\$"))) {
1800 i = cp[2] - '1';
1801 if (0 > i || 8 < i) {
1802 /* Not an argument invocation. */
1803 cp += 2;
1804 continue;
1805 }
1806
1807 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1808 n2 = mandoc_malloc(*szp);
1809
1810 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1811 strlcat(n2, arg[i], *szp);
1812 strlcat(n2, cp + 3, *szp);
1813
1814 cp = n2 + (cp - n1);
1815 free(n1);
1816 n1 = n2;
1817 }
1818
1819 /*
1820 * Replace the macro invocation
1821 * by the expanded macro.
1822 */
1823 free(*bufp);
1824 *bufp = n1;
1825 if (0 == *szp)
1826 *szp = strlen(*bufp) + 1;
1827
1828 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1829 ROFF_REPARSE : ROFF_APPEND);
1830 }
1831
1832 static char *
1833 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1834 {
1835 char *name, *cp;
1836
1837 name = *cpp;
1838 if ('\0' == *name)
1839 return(name);
1840
1841 /* Read until end of name. */
1842 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1843 if ('\\' != *cp)
1844 continue;
1845 cp++;
1846 if ('\\' == *cp)
1847 continue;
1848 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1849 *cp = '\0';
1850 name = cp;
1851 }
1852
1853 /* Nil-terminate name. */
1854 if ('\0' != *cp)
1855 *(cp++) = '\0';
1856
1857 /* Read past spaces. */
1858 while (' ' == *cp)
1859 cp++;
1860
1861 *cpp = cp;
1862 return(name);
1863 }
1864
1865 /*
1866 * Store *string into the user-defined string called *name.
1867 * To clear an existing entry, call with (*r, *name, NULL, 0).
1868 * append == 0: replace mode
1869 * append == 1: single-line append mode
1870 * append == 2: multiline append mode, append '\n' after each call
1871 */
1872 static void
1873 roff_setstr(struct roff *r, const char *name, const char *string,
1874 int append)
1875 {
1876
1877 roff_setstrn(&r->strtab, name, strlen(name), string,
1878 string ? strlen(string) : 0, append);
1879 }
1880
1881 static void
1882 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1883 const char *string, size_t stringsz, int append)
1884 {
1885 struct roffkv *n;
1886 char *c;
1887 int i;
1888 size_t oldch, newch;
1889
1890 /* Search for an existing string with the same name. */
1891 n = *r;
1892
1893 while (n && strcmp(name, n->key.p))
1894 n = n->next;
1895
1896 if (NULL == n) {
1897 /* Create a new string table entry. */
1898 n = mandoc_malloc(sizeof(struct roffkv));
1899 n->key.p = mandoc_strndup(name, namesz);
1900 n->key.sz = namesz;
1901 n->val.p = NULL;
1902 n->val.sz = 0;
1903 n->next = *r;
1904 *r = n;
1905 } else if (0 == append) {
1906 free(n->val.p);
1907 n->val.p = NULL;
1908 n->val.sz = 0;
1909 }
1910
1911 if (NULL == string)
1912 return;
1913
1914 /*
1915 * One additional byte for the '\n' in multiline mode,
1916 * and one for the terminating '\0'.
1917 */
1918 newch = stringsz + (1 < append ? 2u : 1u);
1919
1920 if (NULL == n->val.p) {
1921 n->val.p = mandoc_malloc(newch);
1922 *n->val.p = '\0';
1923 oldch = 0;
1924 } else {
1925 oldch = n->val.sz;
1926 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1927 }
1928
1929 /* Skip existing content in the destination buffer. */
1930 c = n->val.p + (int)oldch;
1931
1932 /* Append new content to the destination buffer. */
1933 i = 0;
1934 while (i < (int)stringsz) {
1935 /*
1936 * Rudimentary roff copy mode:
1937 * Handle escaped backslashes.
1938 */
1939 if ('\\' == string[i] && '\\' == string[i + 1])
1940 i++;
1941 *c++ = string[i++];
1942 }
1943
1944 /* Append terminating bytes. */
1945 if (1 < append)
1946 *c++ = '\n';
1947
1948 *c = '\0';
1949 n->val.sz = (int)(c - n->val.p);
1950 }
1951
1952 static const char *
1953 roff_getstrn(const struct roff *r, const char *name, size_t len)
1954 {
1955 const struct roffkv *n;
1956 int i;
1957
1958 for (n = r->strtab; n; n = n->next)
1959 if (0 == strncmp(name, n->key.p, len) &&
1960 '\0' == n->key.p[(int)len])
1961 return(n->val.p);
1962
1963 for (i = 0; i < PREDEFS_MAX; i++)
1964 if (0 == strncmp(name, predefs[i].name, len) &&
1965 '\0' == predefs[i].name[(int)len])
1966 return(predefs[i].str);
1967
1968 return(NULL);
1969 }
1970
1971 static void
1972 roff_freestr(struct roffkv *r)
1973 {
1974 struct roffkv *n, *nn;
1975
1976 for (n = r; n; n = nn) {
1977 free(n->key.p);
1978 free(n->val.p);
1979 nn = n->next;
1980 free(n);
1981 }
1982 }
1983
1984 const struct tbl_span *
1985 roff_span(const struct roff *r)
1986 {
1987
1988 return(r->tbl ? tbl_span(r->tbl) : NULL);
1989 }
1990
1991 const struct eqn *
1992 roff_eqn(const struct roff *r)
1993 {
1994
1995 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1996 }
1997
1998 /*
1999 * Duplicate an input string, making the appropriate character
2000 * conversations (as stipulated by `tr') along the way.
2001 * Returns a heap-allocated string with all the replacements made.
2002 */
2003 char *
2004 roff_strdup(const struct roff *r, const char *p)
2005 {
2006 const struct roffkv *cp;
2007 char *res;
2008 const char *pp;
2009 size_t ssz, sz;
2010 enum mandoc_esc esc;
2011
2012 if (NULL == r->xmbtab && NULL == r->xtab)
2013 return(mandoc_strdup(p));
2014 else if ('\0' == *p)
2015 return(mandoc_strdup(""));
2016
2017 /*
2018 * Step through each character looking for term matches
2019 * (remember that a `tr' can be invoked with an escape, which is
2020 * a glyph but the escape is multi-character).
2021 * We only do this if the character hash has been initialised
2022 * and the string is >0 length.
2023 */
2024
2025 res = NULL;
2026 ssz = 0;
2027
2028 while ('\0' != *p) {
2029 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2030 sz = r->xtab[(int)*p].sz;
2031 res = mandoc_realloc(res, ssz + sz + 1);
2032 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2033 ssz += sz;
2034 p++;
2035 continue;
2036 } else if ('\\' != *p) {
2037 res = mandoc_realloc(res, ssz + 2);
2038 res[ssz++] = *p++;
2039 continue;
2040 }
2041
2042 /* Search for term matches. */
2043 for (cp = r->xmbtab; cp; cp = cp->next)
2044 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2045 break;
2046
2047 if (NULL != cp) {
2048 /*
2049 * A match has been found.
2050 * Append the match to the array and move
2051 * forward by its keysize.
2052 */
2053 res = mandoc_realloc
2054 (res, ssz + cp->val.sz + 1);
2055 memcpy(res + ssz, cp->val.p, cp->val.sz);
2056 ssz += cp->val.sz;
2057 p += (int)cp->key.sz;
2058 continue;
2059 }
2060
2061 /*
2062 * Handle escapes carefully: we need to copy
2063 * over just the escape itself, or else we might
2064 * do replacements within the escape itself.
2065 * Make sure to pass along the bogus string.
2066 */
2067 pp = p++;
2068 esc = mandoc_escape(&p, NULL, NULL);
2069 if (ESCAPE_ERROR == esc) {
2070 sz = strlen(pp);
2071 res = mandoc_realloc(res, ssz + sz + 1);
2072 memcpy(res + ssz, pp, sz);
2073 break;
2074 }
2075 /*
2076 * We bail out on bad escapes.
2077 * No need to warn: we already did so when
2078 * roff_res() was called.
2079 */
2080 sz = (int)(p - pp);
2081 res = mandoc_realloc(res, ssz + sz + 1);
2082 memcpy(res + ssz, pp, sz);
2083 ssz += sz;
2084 }
2085
2086 res[(int)ssz] = '\0';
2087 return(res);
2088 }
2089
2090 /*
2091 * Find out whether a line is a macro line or not.
2092 * If it is, adjust the current position and return one; if it isn't,
2093 * return zero and don't change the current position.
2094 * If the control character has been set with `.cc', then let that grain
2095 * precedence.
2096 * This is slighly contrary to groff, where using the non-breaking
2097 * control character when `cc' has been invoked will cause the
2098 * non-breaking macro contents to be printed verbatim.
2099 */
2100 int
2101 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2102 {
2103 int pos;
2104
2105 pos = *ppos;
2106
2107 if (0 != r->control && cp[pos] == r->control)
2108 pos++;
2109 else if (0 != r->control)
2110 return(0);
2111 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2112 pos += 2;
2113 else if ('.' == cp[pos] || '\'' == cp[pos])
2114 pos++;
2115 else
2116 return(0);
2117
2118 while (' ' == cp[pos] || '\t' == cp[pos])
2119 pos++;
2120
2121 *ppos = pos;
2122 return(1);
2123 }