]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
The files mandoc.c and mandoc.h contained both specialised low-level
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.201 2014/03/23 11:25:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_so,
66 ROFF_ta,
67 ROFF_tr,
68 ROFF_Dd,
69 ROFF_TH,
70 ROFF_TS,
71 ROFF_TE,
72 ROFF_T_,
73 ROFF_EQ,
74 ROFF_EN,
75 ROFF_cblock,
76 ROFF_USERDEF,
77 ROFF_MAX
78 };
79
80 /*
81 * An incredibly-simple string buffer.
82 */
83 struct roffstr {
84 char *p; /* nil-terminated buffer */
85 size_t sz; /* saved strlen(p) */
86 };
87
88 /*
89 * A key-value roffstr pair as part of a singly-linked list.
90 */
91 struct roffkv {
92 struct roffstr key;
93 struct roffstr val;
94 struct roffkv *next; /* next in list */
95 };
96
97 /*
98 * A single number register as part of a singly-linked list.
99 */
100 struct roffreg {
101 struct roffstr key;
102 int val;
103 struct roffreg *next;
104 };
105
106 struct roff {
107 struct mparse *parse; /* parse point */
108 int options; /* parse options */
109 struct roffnode *last; /* leaf of stack */
110 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
111 char control; /* control character */
112 int rstackpos; /* position in rstack */
113 struct roffreg *regtab; /* number registers */
114 struct roffkv *strtab; /* user-defined strings & macros */
115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
116 struct roffstr *xtab; /* single-byte trans table (`tr') */
117 const char *current_string; /* value of last called user macro */
118 struct tbl_node *first_tbl; /* first table parsed */
119 struct tbl_node *last_tbl; /* last table parsed */
120 struct tbl_node *tbl; /* current table being parsed */
121 struct eqn_node *last_eqn; /* last equation parsed */
122 struct eqn_node *first_eqn; /* first equation parsed */
123 struct eqn_node *eqn; /* current equation being parsed */
124 };
125
126 struct roffnode {
127 enum rofft tok; /* type of node */
128 struct roffnode *parent; /* up one in stack */
129 int line; /* parse line */
130 int col; /* parse col */
131 char *name; /* node name, e.g. macro name */
132 char *end; /* end-rules: custom token */
133 int endspan; /* end-rules: next-line or infty */
134 int rule; /* current evaluation rule */
135 };
136
137 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
138 enum rofft tok, /* tok of macro */ \
139 char **bufp, /* input buffer */ \
140 size_t *szp, /* size of input buffer */ \
141 int ln, /* parse line */ \
142 int ppos, /* original pos in buffer */ \
143 int pos, /* current pos in buffer */ \
144 int *offs /* reset offset of buffer data */
145
146 typedef enum rofferr (*roffproc)(ROFF_ARGS);
147
148 struct roffmac {
149 const char *name; /* macro name */
150 roffproc proc; /* process new macro */
151 roffproc text; /* process as child text of macro */
152 roffproc sub; /* process as child of macro */
153 int flags;
154 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
155 struct roffmac *next;
156 };
157
158 struct predef {
159 const char *name; /* predefined input name */
160 const char *str; /* replacement symbol */
161 };
162
163 #define PREDEF(__name, __str) \
164 { (__name), (__str) },
165
166 static enum rofft roffhash_find(const char *, size_t);
167 static void roffhash_init(void);
168 static void roffnode_cleanscope(struct roff *);
169 static void roffnode_pop(struct roff *);
170 static void roffnode_push(struct roff *, enum rofft,
171 const char *, int, int);
172 static enum rofferr roff_block(ROFF_ARGS);
173 static enum rofferr roff_block_text(ROFF_ARGS);
174 static enum rofferr roff_block_sub(ROFF_ARGS);
175 static enum rofferr roff_cblock(ROFF_ARGS);
176 static enum rofferr roff_cc(ROFF_ARGS);
177 static void roff_ccond(struct roff *, int, int);
178 static enum rofferr roff_cond(ROFF_ARGS);
179 static enum rofferr roff_cond_text(ROFF_ARGS);
180 static enum rofferr roff_cond_sub(ROFF_ARGS);
181 static enum rofferr roff_ds(ROFF_ARGS);
182 static int roff_evalcond(const char *, int *);
183 static int roff_evalstrcond(const char *, int *);
184 static void roff_free1(struct roff *);
185 static void roff_freereg(struct roffreg *);
186 static void roff_freestr(struct roffkv *);
187 static char *roff_getname(struct roff *, char **, int, int);
188 static int roff_getnum(const char *, int *, int *);
189 static int roff_getop(const char *, int *, char *);
190 static int roff_getregn(const struct roff *,
191 const char *, size_t);
192 static int roff_getregro(const char *name);
193 static const char *roff_getstrn(const struct roff *,
194 const char *, size_t);
195 static enum rofferr roff_it(ROFF_ARGS);
196 static enum rofferr roff_line_ignore(ROFF_ARGS);
197 static enum rofferr roff_nr(ROFF_ARGS);
198 static void roff_openeqn(struct roff *, const char *,
199 int, int, const char *);
200 static enum rofft roff_parse(struct roff *, const char *, int *);
201 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
202 static enum rofferr roff_res(struct roff *,
203 char **, size_t *, int, int);
204 static enum rofferr roff_rm(ROFF_ARGS);
205 static void roff_setstr(struct roff *,
206 const char *, const char *, int);
207 static void roff_setstrn(struct roffkv **, const char *,
208 size_t, const char *, size_t, int);
209 static enum rofferr roff_so(ROFF_ARGS);
210 static enum rofferr roff_tr(ROFF_ARGS);
211 static enum rofferr roff_Dd(ROFF_ARGS);
212 static enum rofferr roff_TH(ROFF_ARGS);
213 static enum rofferr roff_TE(ROFF_ARGS);
214 static enum rofferr roff_TS(ROFF_ARGS);
215 static enum rofferr roff_EQ(ROFF_ARGS);
216 static enum rofferr roff_EN(ROFF_ARGS);
217 static enum rofferr roff_T_(ROFF_ARGS);
218 static enum rofferr roff_userdef(ROFF_ARGS);
219
220 /* See roffhash_find() */
221
222 #define ASCII_HI 126
223 #define ASCII_LO 33
224 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
225
226 static struct roffmac *hash[HASHWIDTH];
227
228 static struct roffmac roffs[ROFF_MAX] = {
229 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
230 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
231 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 { "as", roff_ds, NULL, NULL, 0, NULL },
234 { "cc", roff_cc, NULL, NULL, 0, NULL },
235 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
236 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
239 { "ds", roff_ds, NULL, NULL, 0, NULL },
240 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
241 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
246 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
247 { "it", roff_it, NULL, NULL, 0, NULL },
248 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
250 { "nr", roff_nr, NULL, NULL, 0, NULL },
251 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "rm", roff_rm, NULL, NULL, 0, NULL },
254 { "so", roff_so, NULL, NULL, 0, NULL },
255 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "tr", roff_tr, NULL, NULL, 0, NULL },
257 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
258 { "TH", roff_TH, NULL, NULL, 0, NULL },
259 { "TS", roff_TS, NULL, NULL, 0, NULL },
260 { "TE", roff_TE, NULL, NULL, 0, NULL },
261 { "T&", roff_T_, NULL, NULL, 0, NULL },
262 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
263 { "EN", roff_EN, NULL, NULL, 0, NULL },
264 { ".", roff_cblock, NULL, NULL, 0, NULL },
265 { NULL, roff_userdef, NULL, NULL, 0, NULL },
266 };
267
268 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
269 const char *const __mdoc_reserved[] = {
270 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
271 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
272 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
273 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
274 "Dt", "Dv", "Dx", "D1",
275 "Ec", "Ed", "Ef", "Ek", "El", "Em",
276 "En", "Eo", "Er", "Es", "Ev", "Ex",
277 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
278 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
279 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
280 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
281 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
282 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
283 "Sc", "Sh", "Sm", "So", "Sq",
284 "Ss", "St", "Sx", "Sy",
285 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
286 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
287 "%P", "%Q", "%R", "%T", "%U", "%V",
288 NULL
289 };
290
291 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
292 const char *const __man_reserved[] = {
293 "AT", "B", "BI", "BR", "DT",
294 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
295 "LP", "OP", "P", "PD", "PP",
296 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
297 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
298 NULL
299 };
300
301 /* Array of injected predefined strings. */
302 #define PREDEFS_MAX 38
303 static const struct predef predefs[PREDEFS_MAX] = {
304 #include "predefs.in"
305 };
306
307 /* See roffhash_find() */
308 #define ROFF_HASH(p) (p[0] - ASCII_LO)
309
310 static int roffit_lines; /* number of lines to delay */
311 static char *roffit_macro; /* nil-terminated macro line */
312
313 static void
314 roffhash_init(void)
315 {
316 struct roffmac *n;
317 int buc, i;
318
319 for (i = 0; i < (int)ROFF_USERDEF; i++) {
320 assert(roffs[i].name[0] >= ASCII_LO);
321 assert(roffs[i].name[0] <= ASCII_HI);
322
323 buc = ROFF_HASH(roffs[i].name);
324
325 if (NULL != (n = hash[buc])) {
326 for ( ; n->next; n = n->next)
327 /* Do nothing. */ ;
328 n->next = &roffs[i];
329 } else
330 hash[buc] = &roffs[i];
331 }
332 }
333
334 /*
335 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
336 * the nil-terminated string name could be found.
337 */
338 static enum rofft
339 roffhash_find(const char *p, size_t s)
340 {
341 int buc;
342 struct roffmac *n;
343
344 /*
345 * libroff has an extremely simple hashtable, for the time
346 * being, which simply keys on the first character, which must
347 * be printable, then walks a chain. It works well enough until
348 * optimised.
349 */
350
351 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
352 return(ROFF_MAX);
353
354 buc = ROFF_HASH(p);
355
356 if (NULL == (n = hash[buc]))
357 return(ROFF_MAX);
358 for ( ; n; n = n->next)
359 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
360 return((enum rofft)(n - roffs));
361
362 return(ROFF_MAX);
363 }
364
365
366 /*
367 * Pop the current node off of the stack of roff instructions currently
368 * pending.
369 */
370 static void
371 roffnode_pop(struct roff *r)
372 {
373 struct roffnode *p;
374
375 assert(r->last);
376 p = r->last;
377
378 r->last = r->last->parent;
379 free(p->name);
380 free(p->end);
381 free(p);
382 }
383
384
385 /*
386 * Push a roff node onto the instruction stack. This must later be
387 * removed with roffnode_pop().
388 */
389 static void
390 roffnode_push(struct roff *r, enum rofft tok, const char *name,
391 int line, int col)
392 {
393 struct roffnode *p;
394
395 p = mandoc_calloc(1, sizeof(struct roffnode));
396 p->tok = tok;
397 if (name)
398 p->name = mandoc_strdup(name);
399 p->parent = r->last;
400 p->line = line;
401 p->col = col;
402 p->rule = p->parent ? p->parent->rule : 0;
403
404 r->last = p;
405 }
406
407
408 static void
409 roff_free1(struct roff *r)
410 {
411 struct tbl_node *tbl;
412 struct eqn_node *e;
413 int i;
414
415 while (NULL != (tbl = r->first_tbl)) {
416 r->first_tbl = tbl->next;
417 tbl_free(tbl);
418 }
419
420 r->first_tbl = r->last_tbl = r->tbl = NULL;
421
422 while (NULL != (e = r->first_eqn)) {
423 r->first_eqn = e->next;
424 eqn_free(e);
425 }
426
427 r->first_eqn = r->last_eqn = r->eqn = NULL;
428
429 while (r->last)
430 roffnode_pop(r);
431
432 roff_freestr(r->strtab);
433 roff_freestr(r->xmbtab);
434
435 r->strtab = r->xmbtab = NULL;
436
437 roff_freereg(r->regtab);
438
439 r->regtab = NULL;
440
441 if (r->xtab)
442 for (i = 0; i < 128; i++)
443 free(r->xtab[i].p);
444
445 free(r->xtab);
446 r->xtab = NULL;
447 }
448
449 void
450 roff_reset(struct roff *r)
451 {
452
453 roff_free1(r);
454 r->control = 0;
455 }
456
457
458 void
459 roff_free(struct roff *r)
460 {
461
462 roff_free1(r);
463 free(r);
464 }
465
466
467 struct roff *
468 roff_alloc(struct mparse *parse, int options)
469 {
470 struct roff *r;
471
472 r = mandoc_calloc(1, sizeof(struct roff));
473 r->parse = parse;
474 r->options = options;
475 r->rstackpos = -1;
476
477 roffhash_init();
478
479 return(r);
480 }
481
482 /*
483 * In the current line, expand user-defined strings ("\*")
484 * and references to number registers ("\n").
485 * Also check the syntax of other escape sequences.
486 */
487 static enum rofferr
488 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
489 {
490 char ubuf[12]; /* buffer to print the number */
491 const char *stesc; /* start of an escape sequence ('\\') */
492 const char *stnam; /* start of the name, after "[(*" */
493 const char *cp; /* end of the name, e.g. before ']' */
494 const char *res; /* the string to be substituted */
495 char *nbuf; /* new buffer to copy bufp to */
496 size_t nsz; /* size of the new buffer */
497 size_t maxl; /* expected length of the escape name */
498 size_t naml; /* actual length of the escape name */
499 int expand_count; /* to avoid infinite loops */
500
501 expand_count = 0;
502
503 again:
504 cp = *bufp + pos;
505 while (NULL != (cp = strchr(cp, '\\'))) {
506 stesc = cp++;
507
508 /*
509 * The second character must be an asterisk or an n.
510 * If it isn't, skip it anyway: It is escaped,
511 * so it can't start another escape sequence.
512 */
513
514 if ('\0' == *cp)
515 return(ROFF_CONT);
516
517 switch (*cp) {
518 case ('*'):
519 res = NULL;
520 break;
521 case ('n'):
522 res = ubuf;
523 break;
524 default:
525 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
526 continue;
527 mandoc_msg
528 (MANDOCERR_BADESCAPE, r->parse,
529 ln, (int)(stesc - *bufp), NULL);
530 return(ROFF_CONT);
531 }
532
533 cp++;
534
535 /*
536 * The third character decides the length
537 * of the name of the string or register.
538 * Save a pointer to the name.
539 */
540
541 switch (*cp) {
542 case ('\0'):
543 return(ROFF_CONT);
544 case ('('):
545 cp++;
546 maxl = 2;
547 break;
548 case ('['):
549 cp++;
550 maxl = 0;
551 break;
552 default:
553 maxl = 1;
554 break;
555 }
556 stnam = cp;
557
558 /* Advance to the end of the name. */
559
560 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
561 if ('\0' == *cp) {
562 mandoc_msg
563 (MANDOCERR_BADESCAPE,
564 r->parse, ln,
565 (int)(stesc - *bufp), NULL);
566 return(ROFF_CONT);
567 }
568 if (0 == maxl && ']' == *cp)
569 break;
570 }
571
572 /*
573 * Retrieve the replacement string; if it is
574 * undefined, resume searching for escapes.
575 */
576
577 if (NULL == res)
578 res = roff_getstrn(r, stnam, naml);
579 else
580 snprintf(ubuf, sizeof(ubuf), "%d",
581 roff_getregn(r, stnam, naml));
582
583 if (NULL == res) {
584 mandoc_msg
585 (MANDOCERR_BADESCAPE, r->parse,
586 ln, (int)(stesc - *bufp), NULL);
587 res = "";
588 }
589
590 /* Replace the escape sequence by the string. */
591
592 pos = stesc - *bufp;
593
594 nsz = *szp + strlen(res) + 1;
595 nbuf = mandoc_malloc(nsz);
596
597 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
598 strlcat(nbuf, res, nsz);
599 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
600
601 free(*bufp);
602
603 *bufp = nbuf;
604 *szp = nsz;
605
606 if (EXPAND_LIMIT >= ++expand_count)
607 goto again;
608
609 /* Just leave the string unexpanded. */
610 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
611 return(ROFF_IGN);
612 }
613 return(ROFF_CONT);
614 }
615
616 /*
617 * Process text streams:
618 * Convert all breakable hyphens into ASCII_HYPH.
619 * Decrement and spring input line trap.
620 */
621 static enum rofferr
622 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
623 {
624 size_t sz;
625 const char *start;
626 char *p;
627 int isz;
628 enum mandoc_esc esc;
629
630 start = p = *bufp + pos;
631
632 while ('\0' != *p) {
633 sz = strcspn(p, "-\\");
634 p += sz;
635
636 if ('\0' == *p)
637 break;
638
639 if ('\\' == *p) {
640 /* Skip over escapes. */
641 p++;
642 esc = mandoc_escape((const char **)&p, NULL, NULL);
643 if (ESCAPE_ERROR == esc)
644 break;
645 continue;
646 } else if (p == start) {
647 p++;
648 continue;
649 }
650
651 if (isalpha((unsigned char)p[-1]) &&
652 isalpha((unsigned char)p[1]))
653 *p = ASCII_HYPH;
654 p++;
655 }
656
657 /* Spring the input line trap. */
658 if (1 == roffit_lines) {
659 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
660 if (-1 == isz) {
661 perror(NULL);
662 exit((int)MANDOCLEVEL_SYSERR);
663 }
664 free(*bufp);
665 *bufp = p;
666 *szp = isz + 1;
667 *offs = 0;
668 free(roffit_macro);
669 roffit_lines = 0;
670 return(ROFF_REPARSE);
671 } else if (1 < roffit_lines)
672 --roffit_lines;
673 return(ROFF_CONT);
674 }
675
676 enum rofferr
677 roff_parseln(struct roff *r, int ln, char **bufp,
678 size_t *szp, int pos, int *offs)
679 {
680 enum rofft t;
681 enum rofferr e;
682 int ppos, ctl;
683
684 /*
685 * Run the reserved-word filter only if we have some reserved
686 * words to fill in.
687 */
688
689 e = roff_res(r, bufp, szp, ln, pos);
690 if (ROFF_IGN == e)
691 return(e);
692 assert(ROFF_CONT == e);
693
694 ppos = pos;
695 ctl = roff_getcontrol(r, *bufp, &pos);
696
697 /*
698 * First, if a scope is open and we're not a macro, pass the
699 * text through the macro's filter. If a scope isn't open and
700 * we're not a macro, just let it through.
701 * Finally, if there's an equation scope open, divert it into it
702 * no matter our state.
703 */
704
705 if (r->last && ! ctl) {
706 t = r->last->tok;
707 assert(roffs[t].text);
708 e = (*roffs[t].text)
709 (r, t, bufp, szp, ln, pos, pos, offs);
710 assert(ROFF_IGN == e || ROFF_CONT == e);
711 if (ROFF_CONT != e)
712 return(e);
713 }
714 if (r->eqn)
715 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
716 if ( ! ctl) {
717 if (r->tbl)
718 return(tbl_read(r->tbl, ln, *bufp, pos));
719 return(roff_parsetext(bufp, szp, pos, offs));
720 }
721
722 /*
723 * If a scope is open, go to the child handler for that macro,
724 * as it may want to preprocess before doing anything with it.
725 * Don't do so if an equation is open.
726 */
727
728 if (r->last) {
729 t = r->last->tok;
730 assert(roffs[t].sub);
731 return((*roffs[t].sub)
732 (r, t, bufp, szp,
733 ln, ppos, pos, offs));
734 }
735
736 /*
737 * Lastly, as we've no scope open, try to look up and execute
738 * the new macro. If no macro is found, simply return and let
739 * the compilers handle it.
740 */
741
742 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
743 return(ROFF_CONT);
744
745 assert(roffs[t].proc);
746 return((*roffs[t].proc)
747 (r, t, bufp, szp,
748 ln, ppos, pos, offs));
749 }
750
751
752 void
753 roff_endparse(struct roff *r)
754 {
755
756 if (r->last)
757 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
758 r->last->line, r->last->col, NULL);
759
760 if (r->eqn) {
761 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
762 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
763 eqn_end(&r->eqn);
764 }
765
766 if (r->tbl) {
767 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
768 r->tbl->line, r->tbl->pos, NULL);
769 tbl_end(&r->tbl);
770 }
771 }
772
773 /*
774 * Parse a roff node's type from the input buffer. This must be in the
775 * form of ".foo xxx" in the usual way.
776 */
777 static enum rofft
778 roff_parse(struct roff *r, const char *buf, int *pos)
779 {
780 const char *mac;
781 size_t maclen;
782 enum rofft t;
783
784 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
785 '\t' == buf[*pos] || ' ' == buf[*pos])
786 return(ROFF_MAX);
787
788 /* We stop the macro parse at an escape, tab, space, or nil. */
789
790 mac = buf + *pos;
791 maclen = strcspn(mac, " \\\t\0");
792
793 t = (r->current_string = roff_getstrn(r, mac, maclen))
794 ? ROFF_USERDEF : roffhash_find(mac, maclen);
795
796 *pos += (int)maclen;
797
798 while (buf[*pos] && ' ' == buf[*pos])
799 (*pos)++;
800
801 return(t);
802 }
803
804 /* ARGSUSED */
805 static enum rofferr
806 roff_cblock(ROFF_ARGS)
807 {
808
809 /*
810 * A block-close `..' should only be invoked as a child of an
811 * ignore macro, otherwise raise a warning and just ignore it.
812 */
813
814 if (NULL == r->last) {
815 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
816 return(ROFF_IGN);
817 }
818
819 switch (r->last->tok) {
820 case (ROFF_am):
821 /* FALLTHROUGH */
822 case (ROFF_ami):
823 /* FALLTHROUGH */
824 case (ROFF_am1):
825 /* FALLTHROUGH */
826 case (ROFF_de):
827 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
828 /* FALLTHROUGH */
829 case (ROFF_dei):
830 /* FALLTHROUGH */
831 case (ROFF_ig):
832 break;
833 default:
834 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
835 return(ROFF_IGN);
836 }
837
838 if ((*bufp)[pos])
839 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
840
841 roffnode_pop(r);
842 roffnode_cleanscope(r);
843 return(ROFF_IGN);
844
845 }
846
847
848 static void
849 roffnode_cleanscope(struct roff *r)
850 {
851
852 while (r->last) {
853 if (--r->last->endspan != 0)
854 break;
855 roffnode_pop(r);
856 }
857 }
858
859
860 static void
861 roff_ccond(struct roff *r, int ln, int ppos)
862 {
863
864 if (NULL == r->last) {
865 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
866 return;
867 }
868
869 switch (r->last->tok) {
870 case (ROFF_el):
871 /* FALLTHROUGH */
872 case (ROFF_ie):
873 /* FALLTHROUGH */
874 case (ROFF_if):
875 break;
876 default:
877 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
878 return;
879 }
880
881 if (r->last->endspan > -1) {
882 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
883 return;
884 }
885
886 roffnode_pop(r);
887 roffnode_cleanscope(r);
888 return;
889 }
890
891
892 /* ARGSUSED */
893 static enum rofferr
894 roff_block(ROFF_ARGS)
895 {
896 int sv;
897 size_t sz;
898 char *name;
899
900 name = NULL;
901
902 if (ROFF_ig != tok) {
903 if ('\0' == (*bufp)[pos]) {
904 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
905 return(ROFF_IGN);
906 }
907
908 /*
909 * Re-write `de1', since we don't really care about
910 * groff's strange compatibility mode, into `de'.
911 */
912
913 if (ROFF_de1 == tok)
914 tok = ROFF_de;
915 if (ROFF_de == tok)
916 name = *bufp + pos;
917 else
918 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
919 roffs[tok].name);
920
921 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
922 pos++;
923
924 while (isspace((unsigned char)(*bufp)[pos]))
925 (*bufp)[pos++] = '\0';
926 }
927
928 roffnode_push(r, tok, name, ln, ppos);
929
930 /*
931 * At the beginning of a `de' macro, clear the existing string
932 * with the same name, if there is one. New content will be
933 * appended from roff_block_text() in multiline mode.
934 */
935
936 if (ROFF_de == tok)
937 roff_setstr(r, name, "", 0);
938
939 if ('\0' == (*bufp)[pos])
940 return(ROFF_IGN);
941
942 /* If present, process the custom end-of-line marker. */
943
944 sv = pos;
945 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
946 pos++;
947
948 /*
949 * Note: groff does NOT like escape characters in the input.
950 * Instead of detecting this, we're just going to let it fly and
951 * to hell with it.
952 */
953
954 assert(pos > sv);
955 sz = (size_t)(pos - sv);
956
957 if (1 == sz && '.' == (*bufp)[sv])
958 return(ROFF_IGN);
959
960 r->last->end = mandoc_malloc(sz + 1);
961
962 memcpy(r->last->end, *bufp + sv, sz);
963 r->last->end[(int)sz] = '\0';
964
965 if ((*bufp)[pos])
966 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
967
968 return(ROFF_IGN);
969 }
970
971
972 /* ARGSUSED */
973 static enum rofferr
974 roff_block_sub(ROFF_ARGS)
975 {
976 enum rofft t;
977 int i, j;
978
979 /*
980 * First check whether a custom macro exists at this level. If
981 * it does, then check against it. This is some of groff's
982 * stranger behaviours. If we encountered a custom end-scope
983 * tag and that tag also happens to be a "real" macro, then we
984 * need to try interpreting it again as a real macro. If it's
985 * not, then return ignore. Else continue.
986 */
987
988 if (r->last->end) {
989 for (i = pos, j = 0; r->last->end[j]; j++, i++)
990 if ((*bufp)[i] != r->last->end[j])
991 break;
992
993 if ('\0' == r->last->end[j] &&
994 ('\0' == (*bufp)[i] ||
995 ' ' == (*bufp)[i] ||
996 '\t' == (*bufp)[i])) {
997 roffnode_pop(r);
998 roffnode_cleanscope(r);
999
1000 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1001 i++;
1002
1003 pos = i;
1004 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1005 return(ROFF_RERUN);
1006 return(ROFF_IGN);
1007 }
1008 }
1009
1010 /*
1011 * If we have no custom end-query or lookup failed, then try
1012 * pulling it out of the hashtable.
1013 */
1014
1015 t = roff_parse(r, *bufp, &pos);
1016
1017 /*
1018 * Macros other than block-end are only significant
1019 * in `de' blocks; elsewhere, simply throw them away.
1020 */
1021 if (ROFF_cblock != t) {
1022 if (ROFF_de == tok)
1023 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1024 return(ROFF_IGN);
1025 }
1026
1027 assert(roffs[t].proc);
1028 return((*roffs[t].proc)(r, t, bufp, szp,
1029 ln, ppos, pos, offs));
1030 }
1031
1032
1033 /* ARGSUSED */
1034 static enum rofferr
1035 roff_block_text(ROFF_ARGS)
1036 {
1037
1038 if (ROFF_de == tok)
1039 roff_setstr(r, r->last->name, *bufp + pos, 2);
1040
1041 return(ROFF_IGN);
1042 }
1043
1044
1045 /* ARGSUSED */
1046 static enum rofferr
1047 roff_cond_sub(ROFF_ARGS)
1048 {
1049 enum rofft t;
1050 char *ep;
1051 int rr;
1052
1053 rr = r->last->rule;
1054 roffnode_cleanscope(r);
1055 t = roff_parse(r, *bufp, &pos);
1056
1057 /*
1058 * Fully handle known macros when they are structurally
1059 * required or when the conditional evaluated to true.
1060 */
1061
1062 if ((ROFF_MAX != t) &&
1063 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1064 assert(roffs[t].proc);
1065 return((*roffs[t].proc)(r, t, bufp, szp,
1066 ln, ppos, pos, offs));
1067 }
1068
1069 /*
1070 * If `\}' occurs on a macro line without a preceding macro,
1071 * drop the line completely.
1072 */
1073
1074 ep = *bufp + pos;
1075 if ('\\' == ep[0] && '}' == ep[1])
1076 rr = 0;
1077
1078 /* Always check for the closing delimiter `\}'. */
1079
1080 while (NULL != (ep = strchr(ep, '\\'))) {
1081 if ('}' == *(++ep)) {
1082 *ep = '&';
1083 roff_ccond(r, ln, ep - *bufp - 1);
1084 }
1085 ++ep;
1086 }
1087 return(rr ? ROFF_CONT : ROFF_IGN);
1088 }
1089
1090 /* ARGSUSED */
1091 static enum rofferr
1092 roff_cond_text(ROFF_ARGS)
1093 {
1094 char *ep;
1095 int rr;
1096
1097 rr = r->last->rule;
1098 roffnode_cleanscope(r);
1099
1100 ep = *bufp + pos;
1101 while (NULL != (ep = strchr(ep, '\\'))) {
1102 if ('}' == *(++ep)) {
1103 *ep = '&';
1104 roff_ccond(r, ln, ep - *bufp - 1);
1105 }
1106 ++ep;
1107 }
1108 return(rr ? ROFF_CONT : ROFF_IGN);
1109 }
1110
1111 static int
1112 roff_getnum(const char *v, int *pos, int *res)
1113 {
1114 int p, n;
1115
1116 p = *pos;
1117 n = v[p] == '-';
1118 if (n)
1119 p++;
1120
1121 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1122 *res += 10 * *res + v[p] - '0';
1123 if (p == *pos + n)
1124 return 0;
1125
1126 if (n)
1127 *res = -*res;
1128
1129 *pos = p;
1130 return 1;
1131 }
1132
1133 static int
1134 roff_getop(const char *v, int *pos, char *res)
1135 {
1136 int e;
1137
1138 *res = v[*pos];
1139 e = v[*pos + 1] == '=';
1140
1141 switch (*res) {
1142 case '=':
1143 break;
1144 case '>':
1145 if (e)
1146 *res = 'g';
1147 break;
1148 case '<':
1149 if (e)
1150 *res = 'l';
1151 break;
1152 default:
1153 return(0);
1154 }
1155
1156 *pos += 1 + e;
1157
1158 return(*res);
1159 }
1160
1161 /*
1162 * Evaluate a string comparison condition.
1163 * The first character is the delimiter.
1164 * Succeed if the string up to its second occurrence
1165 * matches the string up to its third occurence.
1166 * Advance the cursor after the third occurrence
1167 * or lacking that, to the end of the line.
1168 */
1169 static int
1170 roff_evalstrcond(const char *v, int *pos)
1171 {
1172 const char *s1, *s2, *s3;
1173 int match;
1174
1175 match = 0;
1176 s1 = v + *pos; /* initial delimiter */
1177 s2 = s1 + 1; /* for scanning the first string */
1178 s3 = strchr(s2, *s1); /* for scanning the second string */
1179
1180 if (NULL == s3) /* found no middle delimiter */
1181 goto out;
1182
1183 while ('\0' != *++s3) {
1184 if (*s2 != *s3) { /* mismatch */
1185 s3 = strchr(s3, *s1);
1186 break;
1187 }
1188 if (*s3 == *s1) { /* found the final delimiter */
1189 match = 1;
1190 break;
1191 }
1192 s2++;
1193 }
1194
1195 out:
1196 if (NULL == s3)
1197 s3 = strchr(s2, '\0');
1198 else
1199 s3++;
1200 *pos = s3 - v;
1201 return(match);
1202 }
1203
1204 static int
1205 roff_evalcond(const char *v, int *pos)
1206 {
1207 int wanttrue, lh, rh;
1208 char op;
1209
1210 if ('!' == v[*pos]) {
1211 wanttrue = 0;
1212 (*pos)++;
1213 } else
1214 wanttrue = 1;
1215
1216 switch (v[*pos]) {
1217 case ('n'):
1218 /* FALLTHROUGH */
1219 case ('o'):
1220 (*pos)++;
1221 return(wanttrue);
1222 case ('c'):
1223 /* FALLTHROUGH */
1224 case ('d'):
1225 /* FALLTHROUGH */
1226 case ('e'):
1227 /* FALLTHROUGH */
1228 case ('r'):
1229 /* FALLTHROUGH */
1230 case ('t'):
1231 (*pos)++;
1232 return(!wanttrue);
1233 default:
1234 break;
1235 }
1236
1237 if (!roff_getnum(v, pos, &lh))
1238 return(roff_evalstrcond(v, pos) == wanttrue);
1239 if (!roff_getop(v, pos, &op))
1240 return((lh > 0) == wanttrue);
1241 if (!roff_getnum(v, pos, &rh))
1242 return(0);
1243
1244 switch (op) {
1245 case 'g':
1246 return((lh >= rh) == wanttrue);
1247 case 'l':
1248 return((lh <= rh) == wanttrue);
1249 case '=':
1250 return((lh == rh) == wanttrue);
1251 case '>':
1252 return((lh > rh) == wanttrue);
1253 case '<':
1254 return((lh < rh) == wanttrue);
1255 default:
1256 return(0);
1257 }
1258 }
1259
1260 /* ARGSUSED */
1261 static enum rofferr
1262 roff_line_ignore(ROFF_ARGS)
1263 {
1264
1265 return(ROFF_IGN);
1266 }
1267
1268 /* ARGSUSED */
1269 static enum rofferr
1270 roff_cond(ROFF_ARGS)
1271 {
1272
1273 roffnode_push(r, tok, NULL, ln, ppos);
1274
1275 /*
1276 * An `.el' has no conditional body: it will consume the value
1277 * of the current rstack entry set in prior `ie' calls or
1278 * defaults to DENY.
1279 *
1280 * If we're not an `el', however, then evaluate the conditional.
1281 */
1282
1283 r->last->rule = ROFF_el == tok ?
1284 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1285 roff_evalcond(*bufp, &pos);
1286
1287 /*
1288 * An if-else will put the NEGATION of the current evaluated
1289 * conditional into the stack of rules.
1290 */
1291
1292 if (ROFF_ie == tok) {
1293 if (r->rstackpos == RSTACK_MAX - 1) {
1294 mandoc_msg(MANDOCERR_MEM,
1295 r->parse, ln, ppos, NULL);
1296 return(ROFF_ERR);
1297 }
1298 r->rstack[++r->rstackpos] = !r->last->rule;
1299 }
1300
1301 /* If the parent has false as its rule, then so do we. */
1302
1303 if (r->last->parent && !r->last->parent->rule)
1304 r->last->rule = 0;
1305
1306 /*
1307 * Determine scope.
1308 * If there is nothing on the line after the conditional,
1309 * not even whitespace, use next-line scope.
1310 */
1311
1312 if ('\0' == (*bufp)[pos]) {
1313 r->last->endspan = 2;
1314 goto out;
1315 }
1316
1317 while (' ' == (*bufp)[pos])
1318 pos++;
1319
1320 /* An opening brace requests multiline scope. */
1321
1322 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1323 r->last->endspan = -1;
1324 pos += 2;
1325 goto out;
1326 }
1327
1328 /*
1329 * Anything else following the conditional causes
1330 * single-line scope. Warn if the scope contains
1331 * nothing but trailing whitespace.
1332 */
1333
1334 if ('\0' == (*bufp)[pos])
1335 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1336
1337 r->last->endspan = 1;
1338
1339 out:
1340 *offs = pos;
1341 return(ROFF_RERUN);
1342 }
1343
1344
1345 /* ARGSUSED */
1346 static enum rofferr
1347 roff_ds(ROFF_ARGS)
1348 {
1349 char *name, *string;
1350
1351 /*
1352 * A symbol is named by the first word following the macro
1353 * invocation up to a space. Its value is anything after the
1354 * name's trailing whitespace and optional double-quote. Thus,
1355 *
1356 * [.ds foo "bar " ]
1357 *
1358 * will have `bar " ' as its value.
1359 */
1360
1361 string = *bufp + pos;
1362 name = roff_getname(r, &string, ln, pos);
1363 if ('\0' == *name)
1364 return(ROFF_IGN);
1365
1366 /* Read past initial double-quote. */
1367 if ('"' == *string)
1368 string++;
1369
1370 /* The rest is the value. */
1371 roff_setstr(r, name, string, ROFF_as == tok);
1372 return(ROFF_IGN);
1373 }
1374
1375 void
1376 roff_setreg(struct roff *r, const char *name, int val, char sign)
1377 {
1378 struct roffreg *reg;
1379
1380 /* Search for an existing register with the same name. */
1381 reg = r->regtab;
1382
1383 while (reg && strcmp(name, reg->key.p))
1384 reg = reg->next;
1385
1386 if (NULL == reg) {
1387 /* Create a new register. */
1388 reg = mandoc_malloc(sizeof(struct roffreg));
1389 reg->key.p = mandoc_strdup(name);
1390 reg->key.sz = strlen(name);
1391 reg->val = 0;
1392 reg->next = r->regtab;
1393 r->regtab = reg;
1394 }
1395
1396 if ('+' == sign)
1397 reg->val += val;
1398 else if ('-' == sign)
1399 reg->val -= val;
1400 else
1401 reg->val = val;
1402 }
1403
1404 /*
1405 * Handle some predefined read-only number registers.
1406 * For now, return -1 if the requested register is not predefined;
1407 * in case a predefined read-only register having the value -1
1408 * were to turn up, another special value would have to be chosen.
1409 */
1410 static int
1411 roff_getregro(const char *name)
1412 {
1413
1414 switch (*name) {
1415 case ('A'): /* ASCII approximation mode is always off. */
1416 return(0);
1417 case ('g'): /* Groff compatibility mode is always on. */
1418 return(1);
1419 case ('H'): /* Fixed horizontal resolution. */
1420 return (24);
1421 case ('j'): /* Always adjust left margin only. */
1422 return(0);
1423 case ('T'): /* Some output device is always defined. */
1424 return(1);
1425 case ('V'): /* Fixed vertical resolution. */
1426 return (40);
1427 default:
1428 return (-1);
1429 }
1430 }
1431
1432 int
1433 roff_getreg(const struct roff *r, const char *name)
1434 {
1435 struct roffreg *reg;
1436 int val;
1437
1438 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1439 val = roff_getregro(name + 1);
1440 if (-1 != val)
1441 return (val);
1442 }
1443
1444 for (reg = r->regtab; reg; reg = reg->next)
1445 if (0 == strcmp(name, reg->key.p))
1446 return(reg->val);
1447
1448 return(0);
1449 }
1450
1451 static int
1452 roff_getregn(const struct roff *r, const char *name, size_t len)
1453 {
1454 struct roffreg *reg;
1455 int val;
1456
1457 if ('.' == name[0] && 2 == len) {
1458 val = roff_getregro(name + 1);
1459 if (-1 != val)
1460 return (val);
1461 }
1462
1463 for (reg = r->regtab; reg; reg = reg->next)
1464 if (len == reg->key.sz &&
1465 0 == strncmp(name, reg->key.p, len))
1466 return(reg->val);
1467
1468 return(0);
1469 }
1470
1471 static void
1472 roff_freereg(struct roffreg *reg)
1473 {
1474 struct roffreg *old_reg;
1475
1476 while (NULL != reg) {
1477 free(reg->key.p);
1478 old_reg = reg;
1479 reg = reg->next;
1480 free(old_reg);
1481 }
1482 }
1483
1484 /* ARGSUSED */
1485 static enum rofferr
1486 roff_nr(ROFF_ARGS)
1487 {
1488 const char *key;
1489 char *val;
1490 size_t sz;
1491 int iv;
1492 char sign;
1493
1494 val = *bufp + pos;
1495 key = roff_getname(r, &val, ln, pos);
1496
1497 sign = *val;
1498 if ('+' == sign || '-' == sign)
1499 val++;
1500
1501 sz = strspn(val, "0123456789");
1502 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1503
1504 roff_setreg(r, key, iv, sign);
1505
1506 return(ROFF_IGN);
1507 }
1508
1509 /* ARGSUSED */
1510 static enum rofferr
1511 roff_rm(ROFF_ARGS)
1512 {
1513 const char *name;
1514 char *cp;
1515
1516 cp = *bufp + pos;
1517 while ('\0' != *cp) {
1518 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1519 if ('\0' != *name)
1520 roff_setstr(r, name, NULL, 0);
1521 }
1522 return(ROFF_IGN);
1523 }
1524
1525 /* ARGSUSED */
1526 static enum rofferr
1527 roff_it(ROFF_ARGS)
1528 {
1529 char *cp;
1530 size_t len;
1531 int iv;
1532
1533 /* Parse the number of lines. */
1534 cp = *bufp + pos;
1535 len = strcspn(cp, " \t");
1536 cp[len] = '\0';
1537 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1538 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1539 ln, ppos, *bufp + 1);
1540 return(ROFF_IGN);
1541 }
1542 cp += len + 1;
1543
1544 /* Arm the input line trap. */
1545 roffit_lines = iv;
1546 roffit_macro = mandoc_strdup(cp);
1547 return(ROFF_IGN);
1548 }
1549
1550 /* ARGSUSED */
1551 static enum rofferr
1552 roff_Dd(ROFF_ARGS)
1553 {
1554 const char *const *cp;
1555
1556 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1557 for (cp = __mdoc_reserved; *cp; cp++)
1558 roff_setstr(r, *cp, NULL, 0);
1559
1560 return(ROFF_CONT);
1561 }
1562
1563 /* ARGSUSED */
1564 static enum rofferr
1565 roff_TH(ROFF_ARGS)
1566 {
1567 const char *const *cp;
1568
1569 if (0 == (MPARSE_QUICK & r->options))
1570 for (cp = __man_reserved; *cp; cp++)
1571 roff_setstr(r, *cp, NULL, 0);
1572
1573 return(ROFF_CONT);
1574 }
1575
1576 /* ARGSUSED */
1577 static enum rofferr
1578 roff_TE(ROFF_ARGS)
1579 {
1580
1581 if (NULL == r->tbl)
1582 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1583 else
1584 tbl_end(&r->tbl);
1585
1586 return(ROFF_IGN);
1587 }
1588
1589 /* ARGSUSED */
1590 static enum rofferr
1591 roff_T_(ROFF_ARGS)
1592 {
1593
1594 if (NULL == r->tbl)
1595 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1596 else
1597 tbl_restart(ppos, ln, r->tbl);
1598
1599 return(ROFF_IGN);
1600 }
1601
1602 #if 0
1603 static int
1604 roff_closeeqn(struct roff *r)
1605 {
1606
1607 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1608 }
1609 #endif
1610
1611 static void
1612 roff_openeqn(struct roff *r, const char *name, int line,
1613 int offs, const char *buf)
1614 {
1615 struct eqn_node *e;
1616 int poff;
1617
1618 assert(NULL == r->eqn);
1619 e = eqn_alloc(name, offs, line, r->parse);
1620
1621 if (r->last_eqn)
1622 r->last_eqn->next = e;
1623 else
1624 r->first_eqn = r->last_eqn = e;
1625
1626 r->eqn = r->last_eqn = e;
1627
1628 if (buf) {
1629 poff = 0;
1630 eqn_read(&r->eqn, line, buf, offs, &poff);
1631 }
1632 }
1633
1634 /* ARGSUSED */
1635 static enum rofferr
1636 roff_EQ(ROFF_ARGS)
1637 {
1638
1639 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1640 return(ROFF_IGN);
1641 }
1642
1643 /* ARGSUSED */
1644 static enum rofferr
1645 roff_EN(ROFF_ARGS)
1646 {
1647
1648 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1649 return(ROFF_IGN);
1650 }
1651
1652 /* ARGSUSED */
1653 static enum rofferr
1654 roff_TS(ROFF_ARGS)
1655 {
1656 struct tbl_node *tbl;
1657
1658 if (r->tbl) {
1659 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1660 tbl_end(&r->tbl);
1661 }
1662
1663 tbl = tbl_alloc(ppos, ln, r->parse);
1664
1665 if (r->last_tbl)
1666 r->last_tbl->next = tbl;
1667 else
1668 r->first_tbl = r->last_tbl = tbl;
1669
1670 r->tbl = r->last_tbl = tbl;
1671 return(ROFF_IGN);
1672 }
1673
1674 /* ARGSUSED */
1675 static enum rofferr
1676 roff_cc(ROFF_ARGS)
1677 {
1678 const char *p;
1679
1680 p = *bufp + pos;
1681
1682 if ('\0' == *p || '.' == (r->control = *p++))
1683 r->control = 0;
1684
1685 if ('\0' != *p)
1686 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1687
1688 return(ROFF_IGN);
1689 }
1690
1691 /* ARGSUSED */
1692 static enum rofferr
1693 roff_tr(ROFF_ARGS)
1694 {
1695 const char *p, *first, *second;
1696 size_t fsz, ssz;
1697 enum mandoc_esc esc;
1698
1699 p = *bufp + pos;
1700
1701 if ('\0' == *p) {
1702 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1703 return(ROFF_IGN);
1704 }
1705
1706 while ('\0' != *p) {
1707 fsz = ssz = 1;
1708
1709 first = p++;
1710 if ('\\' == *first) {
1711 esc = mandoc_escape(&p, NULL, NULL);
1712 if (ESCAPE_ERROR == esc) {
1713 mandoc_msg
1714 (MANDOCERR_BADESCAPE, r->parse,
1715 ln, (int)(p - *bufp), NULL);
1716 return(ROFF_IGN);
1717 }
1718 fsz = (size_t)(p - first);
1719 }
1720
1721 second = p++;
1722 if ('\\' == *second) {
1723 esc = mandoc_escape(&p, NULL, NULL);
1724 if (ESCAPE_ERROR == esc) {
1725 mandoc_msg
1726 (MANDOCERR_BADESCAPE, r->parse,
1727 ln, (int)(p - *bufp), NULL);
1728 return(ROFF_IGN);
1729 }
1730 ssz = (size_t)(p - second);
1731 } else if ('\0' == *second) {
1732 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1733 ln, (int)(p - *bufp), NULL);
1734 second = " ";
1735 p--;
1736 }
1737
1738 if (fsz > 1) {
1739 roff_setstrn(&r->xmbtab, first,
1740 fsz, second, ssz, 0);
1741 continue;
1742 }
1743
1744 if (NULL == r->xtab)
1745 r->xtab = mandoc_calloc
1746 (128, sizeof(struct roffstr));
1747
1748 free(r->xtab[(int)*first].p);
1749 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1750 r->xtab[(int)*first].sz = ssz;
1751 }
1752
1753 return(ROFF_IGN);
1754 }
1755
1756 /* ARGSUSED */
1757 static enum rofferr
1758 roff_so(ROFF_ARGS)
1759 {
1760 char *name;
1761
1762 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1763
1764 /*
1765 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1766 * opening anything that's not in our cwd or anything beneath
1767 * it. Thus, explicitly disallow traversing up the file-system
1768 * or using absolute paths.
1769 */
1770
1771 name = *bufp + pos;
1772 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1773 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1774 return(ROFF_ERR);
1775 }
1776
1777 *offs = pos;
1778 return(ROFF_SO);
1779 }
1780
1781 /* ARGSUSED */
1782 static enum rofferr
1783 roff_userdef(ROFF_ARGS)
1784 {
1785 const char *arg[9];
1786 char *cp, *n1, *n2;
1787 int i;
1788
1789 /*
1790 * Collect pointers to macro argument strings
1791 * and NUL-terminate them.
1792 */
1793 cp = *bufp + pos;
1794 for (i = 0; i < 9; i++)
1795 arg[i] = '\0' == *cp ? "" :
1796 mandoc_getarg(r->parse, &cp, ln, &pos);
1797
1798 /*
1799 * Expand macro arguments.
1800 */
1801 *szp = 0;
1802 n1 = cp = mandoc_strdup(r->current_string);
1803 while (NULL != (cp = strstr(cp, "\\$"))) {
1804 i = cp[2] - '1';
1805 if (0 > i || 8 < i) {
1806 /* Not an argument invocation. */
1807 cp += 2;
1808 continue;
1809 }
1810
1811 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1812 n2 = mandoc_malloc(*szp);
1813
1814 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1815 strlcat(n2, arg[i], *szp);
1816 strlcat(n2, cp + 3, *szp);
1817
1818 cp = n2 + (cp - n1);
1819 free(n1);
1820 n1 = n2;
1821 }
1822
1823 /*
1824 * Replace the macro invocation
1825 * by the expanded macro.
1826 */
1827 free(*bufp);
1828 *bufp = n1;
1829 if (0 == *szp)
1830 *szp = strlen(*bufp) + 1;
1831
1832 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1833 ROFF_REPARSE : ROFF_APPEND);
1834 }
1835
1836 static char *
1837 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1838 {
1839 char *name, *cp;
1840
1841 name = *cpp;
1842 if ('\0' == *name)
1843 return(name);
1844
1845 /* Read until end of name. */
1846 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1847 if ('\\' != *cp)
1848 continue;
1849 cp++;
1850 if ('\\' == *cp)
1851 continue;
1852 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1853 *cp = '\0';
1854 name = cp;
1855 }
1856
1857 /* Nil-terminate name. */
1858 if ('\0' != *cp)
1859 *(cp++) = '\0';
1860
1861 /* Read past spaces. */
1862 while (' ' == *cp)
1863 cp++;
1864
1865 *cpp = cp;
1866 return(name);
1867 }
1868
1869 /*
1870 * Store *string into the user-defined string called *name.
1871 * To clear an existing entry, call with (*r, *name, NULL, 0).
1872 * append == 0: replace mode
1873 * append == 1: single-line append mode
1874 * append == 2: multiline append mode, append '\n' after each call
1875 */
1876 static void
1877 roff_setstr(struct roff *r, const char *name, const char *string,
1878 int append)
1879 {
1880
1881 roff_setstrn(&r->strtab, name, strlen(name), string,
1882 string ? strlen(string) : 0, append);
1883 }
1884
1885 static void
1886 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1887 const char *string, size_t stringsz, int append)
1888 {
1889 struct roffkv *n;
1890 char *c;
1891 int i;
1892 size_t oldch, newch;
1893
1894 /* Search for an existing string with the same name. */
1895 n = *r;
1896
1897 while (n && strcmp(name, n->key.p))
1898 n = n->next;
1899
1900 if (NULL == n) {
1901 /* Create a new string table entry. */
1902 n = mandoc_malloc(sizeof(struct roffkv));
1903 n->key.p = mandoc_strndup(name, namesz);
1904 n->key.sz = namesz;
1905 n->val.p = NULL;
1906 n->val.sz = 0;
1907 n->next = *r;
1908 *r = n;
1909 } else if (0 == append) {
1910 free(n->val.p);
1911 n->val.p = NULL;
1912 n->val.sz = 0;
1913 }
1914
1915 if (NULL == string)
1916 return;
1917
1918 /*
1919 * One additional byte for the '\n' in multiline mode,
1920 * and one for the terminating '\0'.
1921 */
1922 newch = stringsz + (1 < append ? 2u : 1u);
1923
1924 if (NULL == n->val.p) {
1925 n->val.p = mandoc_malloc(newch);
1926 *n->val.p = '\0';
1927 oldch = 0;
1928 } else {
1929 oldch = n->val.sz;
1930 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1931 }
1932
1933 /* Skip existing content in the destination buffer. */
1934 c = n->val.p + (int)oldch;
1935
1936 /* Append new content to the destination buffer. */
1937 i = 0;
1938 while (i < (int)stringsz) {
1939 /*
1940 * Rudimentary roff copy mode:
1941 * Handle escaped backslashes.
1942 */
1943 if ('\\' == string[i] && '\\' == string[i + 1])
1944 i++;
1945 *c++ = string[i++];
1946 }
1947
1948 /* Append terminating bytes. */
1949 if (1 < append)
1950 *c++ = '\n';
1951
1952 *c = '\0';
1953 n->val.sz = (int)(c - n->val.p);
1954 }
1955
1956 static const char *
1957 roff_getstrn(const struct roff *r, const char *name, size_t len)
1958 {
1959 const struct roffkv *n;
1960 int i;
1961
1962 for (n = r->strtab; n; n = n->next)
1963 if (0 == strncmp(name, n->key.p, len) &&
1964 '\0' == n->key.p[(int)len])
1965 return(n->val.p);
1966
1967 for (i = 0; i < PREDEFS_MAX; i++)
1968 if (0 == strncmp(name, predefs[i].name, len) &&
1969 '\0' == predefs[i].name[(int)len])
1970 return(predefs[i].str);
1971
1972 return(NULL);
1973 }
1974
1975 static void
1976 roff_freestr(struct roffkv *r)
1977 {
1978 struct roffkv *n, *nn;
1979
1980 for (n = r; n; n = nn) {
1981 free(n->key.p);
1982 free(n->val.p);
1983 nn = n->next;
1984 free(n);
1985 }
1986 }
1987
1988 const struct tbl_span *
1989 roff_span(const struct roff *r)
1990 {
1991
1992 return(r->tbl ? tbl_span(r->tbl) : NULL);
1993 }
1994
1995 const struct eqn *
1996 roff_eqn(const struct roff *r)
1997 {
1998
1999 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2000 }
2001
2002 /*
2003 * Duplicate an input string, making the appropriate character
2004 * conversations (as stipulated by `tr') along the way.
2005 * Returns a heap-allocated string with all the replacements made.
2006 */
2007 char *
2008 roff_strdup(const struct roff *r, const char *p)
2009 {
2010 const struct roffkv *cp;
2011 char *res;
2012 const char *pp;
2013 size_t ssz, sz;
2014 enum mandoc_esc esc;
2015
2016 if (NULL == r->xmbtab && NULL == r->xtab)
2017 return(mandoc_strdup(p));
2018 else if ('\0' == *p)
2019 return(mandoc_strdup(""));
2020
2021 /*
2022 * Step through each character looking for term matches
2023 * (remember that a `tr' can be invoked with an escape, which is
2024 * a glyph but the escape is multi-character).
2025 * We only do this if the character hash has been initialised
2026 * and the string is >0 length.
2027 */
2028
2029 res = NULL;
2030 ssz = 0;
2031
2032 while ('\0' != *p) {
2033 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2034 sz = r->xtab[(int)*p].sz;
2035 res = mandoc_realloc(res, ssz + sz + 1);
2036 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2037 ssz += sz;
2038 p++;
2039 continue;
2040 } else if ('\\' != *p) {
2041 res = mandoc_realloc(res, ssz + 2);
2042 res[ssz++] = *p++;
2043 continue;
2044 }
2045
2046 /* Search for term matches. */
2047 for (cp = r->xmbtab; cp; cp = cp->next)
2048 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2049 break;
2050
2051 if (NULL != cp) {
2052 /*
2053 * A match has been found.
2054 * Append the match to the array and move
2055 * forward by its keysize.
2056 */
2057 res = mandoc_realloc
2058 (res, ssz + cp->val.sz + 1);
2059 memcpy(res + ssz, cp->val.p, cp->val.sz);
2060 ssz += cp->val.sz;
2061 p += (int)cp->key.sz;
2062 continue;
2063 }
2064
2065 /*
2066 * Handle escapes carefully: we need to copy
2067 * over just the escape itself, or else we might
2068 * do replacements within the escape itself.
2069 * Make sure to pass along the bogus string.
2070 */
2071 pp = p++;
2072 esc = mandoc_escape(&p, NULL, NULL);
2073 if (ESCAPE_ERROR == esc) {
2074 sz = strlen(pp);
2075 res = mandoc_realloc(res, ssz + sz + 1);
2076 memcpy(res + ssz, pp, sz);
2077 break;
2078 }
2079 /*
2080 * We bail out on bad escapes.
2081 * No need to warn: we already did so when
2082 * roff_res() was called.
2083 */
2084 sz = (int)(p - pp);
2085 res = mandoc_realloc(res, ssz + sz + 1);
2086 memcpy(res + ssz, pp, sz);
2087 ssz += sz;
2088 }
2089
2090 res[(int)ssz] = '\0';
2091 return(res);
2092 }
2093
2094 /*
2095 * Find out whether a line is a macro line or not.
2096 * If it is, adjust the current position and return one; if it isn't,
2097 * return zero and don't change the current position.
2098 * If the control character has been set with `.cc', then let that grain
2099 * precedence.
2100 * This is slighly contrary to groff, where using the non-breaking
2101 * control character when `cc' has been invoked will cause the
2102 * non-breaking macro contents to be printed verbatim.
2103 */
2104 int
2105 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2106 {
2107 int pos;
2108
2109 pos = *ppos;
2110
2111 if (0 != r->control && cp[pos] == r->control)
2112 pos++;
2113 else if (0 != r->control)
2114 return(0);
2115 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2116 pos += 2;
2117 else if ('.' == cp[pos] || '\'' == cp[pos])
2118 pos++;
2119 else
2120 return(0);
2121
2122 while (' ' == cp[pos] || '\t' == cp[pos])
2123 pos++;
2124
2125 *ppos = pos;
2126 return(1);
2127 }