]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Keep words after .Ic together in a single argument.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.191 2014/01/06 23:46:07 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_cc,
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
49 ROFF_fam,
50 ROFF_hw,
51 ROFF_hy,
52 ROFF_ie,
53 ROFF_if,
54 ROFF_ig,
55 ROFF_it,
56 ROFF_ne,
57 ROFF_nh,
58 ROFF_nr,
59 ROFF_ns,
60 ROFF_ps,
61 ROFF_rm,
62 ROFF_so,
63 ROFF_ta,
64 ROFF_tr,
65 ROFF_Dd,
66 ROFF_TH,
67 ROFF_TS,
68 ROFF_TE,
69 ROFF_T_,
70 ROFF_EQ,
71 ROFF_EN,
72 ROFF_cblock,
73 ROFF_ccond,
74 ROFF_USERDEF,
75 ROFF_MAX
76 };
77
78 enum roffrule {
79 ROFFRULE_DENY,
80 ROFFRULE_ALLOW
81 };
82
83 /*
84 * An incredibly-simple string buffer.
85 */
86 struct roffstr {
87 char *p; /* nil-terminated buffer */
88 size_t sz; /* saved strlen(p) */
89 };
90
91 /*
92 * A key-value roffstr pair as part of a singly-linked list.
93 */
94 struct roffkv {
95 struct roffstr key;
96 struct roffstr val;
97 struct roffkv *next; /* next in list */
98 };
99
100 /*
101 * A single number register as part of a singly-linked list.
102 */
103 struct roffreg {
104 struct roffstr key;
105 int val;
106 struct roffreg *next;
107 };
108
109 struct roff {
110 enum mparset parsetype; /* requested parse type */
111 struct mparse *parse; /* parse point */
112 int quick; /* skip standard macro deletion */
113 struct roffnode *last; /* leaf of stack */
114 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
115 char control; /* control character */
116 int rstackpos; /* position in rstack */
117 struct roffreg *regtab; /* number registers */
118 struct roffkv *strtab; /* user-defined strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* last equation parsed */
126 struct eqn_node *first_eqn; /* first equation parsed */
127 struct eqn_node *eqn; /* current equation being parsed */
128 };
129
130 struct roffnode {
131 enum rofft tok; /* type of node */
132 struct roffnode *parent; /* up one in stack */
133 int line; /* parse line */
134 int col; /* parse col */
135 char *name; /* node name, e.g. macro name */
136 char *end; /* end-rules: custom token */
137 int endspan; /* end-rules: next-line or infty */
138 enum roffrule rule; /* current evaluation rule */
139 };
140
141 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
142 enum rofft tok, /* tok of macro */ \
143 char **bufp, /* input buffer */ \
144 size_t *szp, /* size of input buffer */ \
145 int ln, /* parse line */ \
146 int ppos, /* original pos in buffer */ \
147 int pos, /* current pos in buffer */ \
148 int *offs /* reset offset of buffer data */
149
150 typedef enum rofferr (*roffproc)(ROFF_ARGS);
151
152 struct roffmac {
153 const char *name; /* macro name */
154 roffproc proc; /* process new macro */
155 roffproc text; /* process as child text of macro */
156 roffproc sub; /* process as child of macro */
157 int flags;
158 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
159 struct roffmac *next;
160 };
161
162 struct predef {
163 const char *name; /* predefined input name */
164 const char *str; /* replacement symbol */
165 };
166
167 #define PREDEF(__name, __str) \
168 { (__name), (__str) },
169
170 static enum rofft roffhash_find(const char *, size_t);
171 static void roffhash_init(void);
172 static void roffnode_cleanscope(struct roff *);
173 static void roffnode_pop(struct roff *);
174 static void roffnode_push(struct roff *, enum rofft,
175 const char *, int, int);
176 static enum rofferr roff_block(ROFF_ARGS);
177 static enum rofferr roff_block_text(ROFF_ARGS);
178 static enum rofferr roff_block_sub(ROFF_ARGS);
179 static enum rofferr roff_cblock(ROFF_ARGS);
180 static enum rofferr roff_cc(ROFF_ARGS);
181 static enum rofferr roff_ccond(ROFF_ARGS);
182 static enum rofferr roff_cond(ROFF_ARGS);
183 static enum rofferr roff_cond_text(ROFF_ARGS);
184 static enum rofferr roff_cond_sub(ROFF_ARGS);
185 static enum rofferr roff_ds(ROFF_ARGS);
186 static enum roffrule roff_evalcond(const char *, int *);
187 static void roff_free1(struct roff *);
188 static void roff_freereg(struct roffreg *);
189 static void roff_freestr(struct roffkv *);
190 static char *roff_getname(struct roff *, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff *,
194 const char *, size_t);
195 static const char *roff_getstrn(const struct roff *,
196 const char *, size_t);
197 static enum rofferr roff_it(ROFF_ARGS);
198 static enum rofferr roff_line_ignore(ROFF_ARGS);
199 static enum rofferr roff_nr(ROFF_ARGS);
200 static void roff_openeqn(struct roff *, const char *,
201 int, int, const char *);
202 static enum rofft roff_parse(struct roff *, const char *, int *);
203 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
204 static enum rofferr roff_res(struct roff *,
205 char **, size_t *, int, int);
206 static enum rofferr roff_rm(ROFF_ARGS);
207 static void roff_setstr(struct roff *,
208 const char *, const char *, int);
209 static void roff_setstrn(struct roffkv **, const char *,
210 size_t, const char *, size_t, int);
211 static enum rofferr roff_so(ROFF_ARGS);
212 static enum rofferr roff_tr(ROFF_ARGS);
213 static enum rofferr roff_Dd(ROFF_ARGS);
214 static enum rofferr roff_TH(ROFF_ARGS);
215 static enum rofferr roff_TE(ROFF_ARGS);
216 static enum rofferr roff_TS(ROFF_ARGS);
217 static enum rofferr roff_EQ(ROFF_ARGS);
218 static enum rofferr roff_EN(ROFF_ARGS);
219 static enum rofferr roff_T_(ROFF_ARGS);
220 static enum rofferr roff_userdef(ROFF_ARGS);
221
222 /* See roffhash_find() */
223
224 #define ASCII_HI 126
225 #define ASCII_LO 33
226 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
227
228 static struct roffmac *hash[HASHWIDTH];
229
230 static struct roffmac roffs[ROFF_MAX] = {
231 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
232 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
234 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "cc", roff_cc, NULL, NULL, 0, NULL },
236 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
239 { "ds", roff_ds, NULL, NULL, 0, NULL },
240 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
241 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
246 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
247 { "it", roff_it, NULL, NULL, 0, NULL },
248 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
250 { "nr", roff_nr, NULL, NULL, 0, NULL },
251 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "rm", roff_rm, NULL, NULL, 0, NULL },
254 { "so", roff_so, NULL, NULL, 0, NULL },
255 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "tr", roff_tr, NULL, NULL, 0, NULL },
257 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
258 { "TH", roff_TH, NULL, NULL, 0, NULL },
259 { "TS", roff_TS, NULL, NULL, 0, NULL },
260 { "TE", roff_TE, NULL, NULL, 0, NULL },
261 { "T&", roff_T_, NULL, NULL, 0, NULL },
262 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
263 { "EN", roff_EN, NULL, NULL, 0, NULL },
264 { ".", roff_cblock, NULL, NULL, 0, NULL },
265 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
266 { NULL, roff_userdef, NULL, NULL, 0, NULL },
267 };
268
269 const char *const __mdoc_reserved[] = {
270 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
271 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
272 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
273 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
274 "Ds", "Dt", "Dv", "Dx", "D1",
275 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
276 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
277 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
278 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
279 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
280 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
281 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
282 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
283 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
284 "Ss", "St", "Sx", "Sy",
285 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
286 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
287 "%P", "%Q", "%R", "%T", "%U", "%V",
288 NULL
289 };
290
291 const char *const __man_reserved[] = {
292 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
293 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
294 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
295 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
296 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
297 NULL
298 };
299
300 /* Array of injected predefined strings. */
301 #define PREDEFS_MAX 38
302 static const struct predef predefs[PREDEFS_MAX] = {
303 #include "predefs.in"
304 };
305
306 /* See roffhash_find() */
307 #define ROFF_HASH(p) (p[0] - ASCII_LO)
308
309 static int roffit_lines; /* number of lines to delay */
310 static char *roffit_macro; /* nil-terminated macro line */
311
312 static void
313 roffhash_init(void)
314 {
315 struct roffmac *n;
316 int buc, i;
317
318 for (i = 0; i < (int)ROFF_USERDEF; i++) {
319 assert(roffs[i].name[0] >= ASCII_LO);
320 assert(roffs[i].name[0] <= ASCII_HI);
321
322 buc = ROFF_HASH(roffs[i].name);
323
324 if (NULL != (n = hash[buc])) {
325 for ( ; n->next; n = n->next)
326 /* Do nothing. */ ;
327 n->next = &roffs[i];
328 } else
329 hash[buc] = &roffs[i];
330 }
331 }
332
333 /*
334 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
335 * the nil-terminated string name could be found.
336 */
337 static enum rofft
338 roffhash_find(const char *p, size_t s)
339 {
340 int buc;
341 struct roffmac *n;
342
343 /*
344 * libroff has an extremely simple hashtable, for the time
345 * being, which simply keys on the first character, which must
346 * be printable, then walks a chain. It works well enough until
347 * optimised.
348 */
349
350 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
351 return(ROFF_MAX);
352
353 buc = ROFF_HASH(p);
354
355 if (NULL == (n = hash[buc]))
356 return(ROFF_MAX);
357 for ( ; n; n = n->next)
358 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
359 return((enum rofft)(n - roffs));
360
361 return(ROFF_MAX);
362 }
363
364
365 /*
366 * Pop the current node off of the stack of roff instructions currently
367 * pending.
368 */
369 static void
370 roffnode_pop(struct roff *r)
371 {
372 struct roffnode *p;
373
374 assert(r->last);
375 p = r->last;
376
377 r->last = r->last->parent;
378 free(p->name);
379 free(p->end);
380 free(p);
381 }
382
383
384 /*
385 * Push a roff node onto the instruction stack. This must later be
386 * removed with roffnode_pop().
387 */
388 static void
389 roffnode_push(struct roff *r, enum rofft tok, const char *name,
390 int line, int col)
391 {
392 struct roffnode *p;
393
394 p = mandoc_calloc(1, sizeof(struct roffnode));
395 p->tok = tok;
396 if (name)
397 p->name = mandoc_strdup(name);
398 p->parent = r->last;
399 p->line = line;
400 p->col = col;
401 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
402
403 r->last = p;
404 }
405
406
407 static void
408 roff_free1(struct roff *r)
409 {
410 struct tbl_node *tbl;
411 struct eqn_node *e;
412 int i;
413
414 while (NULL != (tbl = r->first_tbl)) {
415 r->first_tbl = tbl->next;
416 tbl_free(tbl);
417 }
418
419 r->first_tbl = r->last_tbl = r->tbl = NULL;
420
421 while (NULL != (e = r->first_eqn)) {
422 r->first_eqn = e->next;
423 eqn_free(e);
424 }
425
426 r->first_eqn = r->last_eqn = r->eqn = NULL;
427
428 while (r->last)
429 roffnode_pop(r);
430
431 roff_freestr(r->strtab);
432 roff_freestr(r->xmbtab);
433
434 r->strtab = r->xmbtab = NULL;
435
436 roff_freereg(r->regtab);
437
438 r->regtab = NULL;
439
440 if (r->xtab)
441 for (i = 0; i < 128; i++)
442 free(r->xtab[i].p);
443
444 free(r->xtab);
445 r->xtab = NULL;
446 }
447
448 void
449 roff_reset(struct roff *r)
450 {
451
452 roff_free1(r);
453 r->control = 0;
454 }
455
456
457 void
458 roff_free(struct roff *r)
459 {
460
461 roff_free1(r);
462 free(r);
463 }
464
465
466 struct roff *
467 roff_alloc(enum mparset type, struct mparse *parse, int quick)
468 {
469 struct roff *r;
470
471 r = mandoc_calloc(1, sizeof(struct roff));
472 r->parsetype = type;
473 r->parse = parse;
474 r->quick = quick;
475 r->rstackpos = -1;
476
477 roffhash_init();
478
479 return(r);
480 }
481
482 /*
483 * In the current line, expand user-defined strings ("\*")
484 * and references to number registers ("\n").
485 * Also check the syntax of other escape sequences.
486 */
487 static enum rofferr
488 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
489 {
490 char ubuf[12]; /* buffer to print the number */
491 const char *stesc; /* start of an escape sequence ('\\') */
492 const char *stnam; /* start of the name, after "[(*" */
493 const char *cp; /* end of the name, e.g. before ']' */
494 const char *res; /* the string to be substituted */
495 char *nbuf; /* new buffer to copy bufp to */
496 size_t nsz; /* size of the new buffer */
497 size_t maxl; /* expected length of the escape name */
498 size_t naml; /* actual length of the escape name */
499 int expand_count; /* to avoid infinite loops */
500
501 expand_count = 0;
502
503 again:
504 cp = *bufp + pos;
505 while (NULL != (cp = strchr(cp, '\\'))) {
506 stesc = cp++;
507
508 /*
509 * The second character must be an asterisk or an n.
510 * If it isn't, skip it anyway: It is escaped,
511 * so it can't start another escape sequence.
512 */
513
514 if ('\0' == *cp)
515 return(ROFF_CONT);
516
517 switch (*cp) {
518 case ('*'):
519 res = NULL;
520 break;
521 case ('n'):
522 res = ubuf;
523 break;
524 default:
525 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
526 continue;
527 mandoc_msg
528 (MANDOCERR_BADESCAPE, r->parse,
529 ln, (int)(stesc - *bufp), NULL);
530 return(ROFF_CONT);
531 }
532
533 cp++;
534
535 /*
536 * The third character decides the length
537 * of the name of the string or register.
538 * Save a pointer to the name.
539 */
540
541 switch (*cp) {
542 case ('\0'):
543 return(ROFF_CONT);
544 case ('('):
545 cp++;
546 maxl = 2;
547 break;
548 case ('['):
549 cp++;
550 maxl = 0;
551 break;
552 default:
553 maxl = 1;
554 break;
555 }
556 stnam = cp;
557
558 /* Advance to the end of the name. */
559
560 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
561 if ('\0' == *cp) {
562 mandoc_msg
563 (MANDOCERR_BADESCAPE,
564 r->parse, ln,
565 (int)(stesc - *bufp), NULL);
566 return(ROFF_CONT);
567 }
568 if (0 == maxl && ']' == *cp)
569 break;
570 }
571
572 /*
573 * Retrieve the replacement string; if it is
574 * undefined, resume searching for escapes.
575 */
576
577 if (NULL == res)
578 res = roff_getstrn(r, stnam, naml);
579 else
580 snprintf(ubuf, sizeof(ubuf), "%d",
581 roff_getregn(r, stnam, naml));
582
583 if (NULL == res) {
584 mandoc_msg
585 (MANDOCERR_BADESCAPE, r->parse,
586 ln, (int)(stesc - *bufp), NULL);
587 res = "";
588 }
589
590 /* Replace the escape sequence by the string. */
591
592 pos = stesc - *bufp;
593
594 nsz = *szp + strlen(res) + 1;
595 nbuf = mandoc_malloc(nsz);
596
597 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
598 strlcat(nbuf, res, nsz);
599 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
600
601 free(*bufp);
602
603 *bufp = nbuf;
604 *szp = nsz;
605
606 if (EXPAND_LIMIT >= ++expand_count)
607 goto again;
608
609 /* Just leave the string unexpanded. */
610 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
611 return(ROFF_IGN);
612 }
613 return(ROFF_CONT);
614 }
615
616 /*
617 * Process text streams:
618 * Convert all breakable hyphens into ASCII_HYPH.
619 * Decrement and spring input line trap.
620 */
621 static enum rofferr
622 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
623 {
624 size_t sz;
625 const char *start;
626 char *p;
627 int isz;
628 enum mandoc_esc esc;
629
630 start = p = *bufp + pos;
631
632 while ('\0' != *p) {
633 sz = strcspn(p, "-\\");
634 p += sz;
635
636 if ('\0' == *p)
637 break;
638
639 if ('\\' == *p) {
640 /* Skip over escapes. */
641 p++;
642 esc = mandoc_escape((const char **)&p, NULL, NULL);
643 if (ESCAPE_ERROR == esc)
644 break;
645 continue;
646 } else if (p == start) {
647 p++;
648 continue;
649 }
650
651 if (isalpha((unsigned char)p[-1]) &&
652 isalpha((unsigned char)p[1]))
653 *p = ASCII_HYPH;
654 p++;
655 }
656
657 /* Spring the input line trap. */
658 if (1 == roffit_lines) {
659 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
660 if (-1 == isz) {
661 perror(NULL);
662 exit((int)MANDOCLEVEL_SYSERR);
663 }
664 free(*bufp);
665 *bufp = p;
666 *szp = isz + 1;
667 *offs = 0;
668 free(roffit_macro);
669 roffit_lines = 0;
670 return(ROFF_REPARSE);
671 } else if (1 < roffit_lines)
672 --roffit_lines;
673 return(ROFF_CONT);
674 }
675
676 enum rofferr
677 roff_parseln(struct roff *r, int ln, char **bufp,
678 size_t *szp, int pos, int *offs)
679 {
680 enum rofft t;
681 enum rofferr e;
682 int ppos, ctl;
683
684 /*
685 * Run the reserved-word filter only if we have some reserved
686 * words to fill in.
687 */
688
689 e = roff_res(r, bufp, szp, ln, pos);
690 if (ROFF_IGN == e)
691 return(e);
692 assert(ROFF_CONT == e);
693
694 ppos = pos;
695 ctl = roff_getcontrol(r, *bufp, &pos);
696
697 /*
698 * First, if a scope is open and we're not a macro, pass the
699 * text through the macro's filter. If a scope isn't open and
700 * we're not a macro, just let it through.
701 * Finally, if there's an equation scope open, divert it into it
702 * no matter our state.
703 */
704
705 if (r->last && ! ctl) {
706 t = r->last->tok;
707 assert(roffs[t].text);
708 e = (*roffs[t].text)
709 (r, t, bufp, szp, ln, pos, pos, offs);
710 assert(ROFF_IGN == e || ROFF_CONT == e);
711 if (ROFF_CONT != e)
712 return(e);
713 }
714 if (r->eqn)
715 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
716 if ( ! ctl) {
717 if (r->tbl)
718 return(tbl_read(r->tbl, ln, *bufp, pos));
719 return(roff_parsetext(bufp, szp, pos, offs));
720 }
721
722 /*
723 * If a scope is open, go to the child handler for that macro,
724 * as it may want to preprocess before doing anything with it.
725 * Don't do so if an equation is open.
726 */
727
728 if (r->last) {
729 t = r->last->tok;
730 assert(roffs[t].sub);
731 return((*roffs[t].sub)
732 (r, t, bufp, szp,
733 ln, ppos, pos, offs));
734 }
735
736 /*
737 * Lastly, as we've no scope open, try to look up and execute
738 * the new macro. If no macro is found, simply return and let
739 * the compilers handle it.
740 */
741
742 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
743 return(ROFF_CONT);
744
745 assert(roffs[t].proc);
746 return((*roffs[t].proc)
747 (r, t, bufp, szp,
748 ln, ppos, pos, offs));
749 }
750
751
752 void
753 roff_endparse(struct roff *r)
754 {
755
756 if (r->last)
757 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
758 r->last->line, r->last->col, NULL);
759
760 if (r->eqn) {
761 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
762 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
763 eqn_end(&r->eqn);
764 }
765
766 if (r->tbl) {
767 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
768 r->tbl->line, r->tbl->pos, NULL);
769 tbl_end(&r->tbl);
770 }
771 }
772
773 /*
774 * Parse a roff node's type from the input buffer. This must be in the
775 * form of ".foo xxx" in the usual way.
776 */
777 static enum rofft
778 roff_parse(struct roff *r, const char *buf, int *pos)
779 {
780 const char *mac;
781 size_t maclen;
782 enum rofft t;
783
784 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
785 '\t' == buf[*pos] || ' ' == buf[*pos])
786 return(ROFF_MAX);
787
788 /*
789 * We stop the macro parse at an escape, tab, space, or nil.
790 * However, `\}' is also a valid macro, so make sure we don't
791 * clobber it by seeing the `\' as the end of token.
792 */
793
794 mac = buf + *pos;
795 maclen = strcspn(mac + 1, " \\\t\0") + 1;
796
797 t = (r->current_string = roff_getstrn(r, mac, maclen))
798 ? ROFF_USERDEF : roffhash_find(mac, maclen);
799
800 *pos += (int)maclen;
801
802 while (buf[*pos] && ' ' == buf[*pos])
803 (*pos)++;
804
805 return(t);
806 }
807
808 /* ARGSUSED */
809 static enum rofferr
810 roff_cblock(ROFF_ARGS)
811 {
812
813 /*
814 * A block-close `..' should only be invoked as a child of an
815 * ignore macro, otherwise raise a warning and just ignore it.
816 */
817
818 if (NULL == r->last) {
819 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
820 return(ROFF_IGN);
821 }
822
823 switch (r->last->tok) {
824 case (ROFF_am):
825 /* FALLTHROUGH */
826 case (ROFF_ami):
827 /* FALLTHROUGH */
828 case (ROFF_am1):
829 /* FALLTHROUGH */
830 case (ROFF_de):
831 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
832 /* FALLTHROUGH */
833 case (ROFF_dei):
834 /* FALLTHROUGH */
835 case (ROFF_ig):
836 break;
837 default:
838 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
839 return(ROFF_IGN);
840 }
841
842 if ((*bufp)[pos])
843 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
844
845 roffnode_pop(r);
846 roffnode_cleanscope(r);
847 return(ROFF_IGN);
848
849 }
850
851
852 static void
853 roffnode_cleanscope(struct roff *r)
854 {
855
856 while (r->last) {
857 if (--r->last->endspan != 0)
858 break;
859 roffnode_pop(r);
860 }
861 }
862
863
864 /* ARGSUSED */
865 static enum rofferr
866 roff_ccond(ROFF_ARGS)
867 {
868
869 if (NULL == r->last) {
870 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
871 return(ROFF_IGN);
872 }
873
874 switch (r->last->tok) {
875 case (ROFF_el):
876 /* FALLTHROUGH */
877 case (ROFF_ie):
878 /* FALLTHROUGH */
879 case (ROFF_if):
880 break;
881 default:
882 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
883 return(ROFF_IGN);
884 }
885
886 if (r->last->endspan > -1) {
887 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
888 return(ROFF_IGN);
889 }
890
891 if ((*bufp)[pos])
892 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
893
894 roffnode_pop(r);
895 roffnode_cleanscope(r);
896 return(ROFF_IGN);
897 }
898
899
900 /* ARGSUSED */
901 static enum rofferr
902 roff_block(ROFF_ARGS)
903 {
904 int sv;
905 size_t sz;
906 char *name;
907
908 name = NULL;
909
910 if (ROFF_ig != tok) {
911 if ('\0' == (*bufp)[pos]) {
912 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
913 return(ROFF_IGN);
914 }
915
916 /*
917 * Re-write `de1', since we don't really care about
918 * groff's strange compatibility mode, into `de'.
919 */
920
921 if (ROFF_de1 == tok)
922 tok = ROFF_de;
923 if (ROFF_de == tok)
924 name = *bufp + pos;
925 else
926 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
927 roffs[tok].name);
928
929 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
930 pos++;
931
932 while (isspace((unsigned char)(*bufp)[pos]))
933 (*bufp)[pos++] = '\0';
934 }
935
936 roffnode_push(r, tok, name, ln, ppos);
937
938 /*
939 * At the beginning of a `de' macro, clear the existing string
940 * with the same name, if there is one. New content will be
941 * added from roff_block_text() in multiline mode.
942 */
943
944 if (ROFF_de == tok)
945 roff_setstr(r, name, "", 0);
946
947 if ('\0' == (*bufp)[pos])
948 return(ROFF_IGN);
949
950 /* If present, process the custom end-of-line marker. */
951
952 sv = pos;
953 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
954 pos++;
955
956 /*
957 * Note: groff does NOT like escape characters in the input.
958 * Instead of detecting this, we're just going to let it fly and
959 * to hell with it.
960 */
961
962 assert(pos > sv);
963 sz = (size_t)(pos - sv);
964
965 if (1 == sz && '.' == (*bufp)[sv])
966 return(ROFF_IGN);
967
968 r->last->end = mandoc_malloc(sz + 1);
969
970 memcpy(r->last->end, *bufp + sv, sz);
971 r->last->end[(int)sz] = '\0';
972
973 if ((*bufp)[pos])
974 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
975
976 return(ROFF_IGN);
977 }
978
979
980 /* ARGSUSED */
981 static enum rofferr
982 roff_block_sub(ROFF_ARGS)
983 {
984 enum rofft t;
985 int i, j;
986
987 /*
988 * First check whether a custom macro exists at this level. If
989 * it does, then check against it. This is some of groff's
990 * stranger behaviours. If we encountered a custom end-scope
991 * tag and that tag also happens to be a "real" macro, then we
992 * need to try interpreting it again as a real macro. If it's
993 * not, then return ignore. Else continue.
994 */
995
996 if (r->last->end) {
997 for (i = pos, j = 0; r->last->end[j]; j++, i++)
998 if ((*bufp)[i] != r->last->end[j])
999 break;
1000
1001 if ('\0' == r->last->end[j] &&
1002 ('\0' == (*bufp)[i] ||
1003 ' ' == (*bufp)[i] ||
1004 '\t' == (*bufp)[i])) {
1005 roffnode_pop(r);
1006 roffnode_cleanscope(r);
1007
1008 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1009 i++;
1010
1011 pos = i;
1012 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1013 return(ROFF_RERUN);
1014 return(ROFF_IGN);
1015 }
1016 }
1017
1018 /*
1019 * If we have no custom end-query or lookup failed, then try
1020 * pulling it out of the hashtable.
1021 */
1022
1023 t = roff_parse(r, *bufp, &pos);
1024
1025 /*
1026 * Macros other than block-end are only significant
1027 * in `de' blocks; elsewhere, simply throw them away.
1028 */
1029 if (ROFF_cblock != t) {
1030 if (ROFF_de == tok)
1031 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1032 return(ROFF_IGN);
1033 }
1034
1035 assert(roffs[t].proc);
1036 return((*roffs[t].proc)(r, t, bufp, szp,
1037 ln, ppos, pos, offs));
1038 }
1039
1040
1041 /* ARGSUSED */
1042 static enum rofferr
1043 roff_block_text(ROFF_ARGS)
1044 {
1045
1046 if (ROFF_de == tok)
1047 roff_setstr(r, r->last->name, *bufp + pos, 1);
1048
1049 return(ROFF_IGN);
1050 }
1051
1052
1053 /* ARGSUSED */
1054 static enum rofferr
1055 roff_cond_sub(ROFF_ARGS)
1056 {
1057 enum rofft t;
1058 enum roffrule rr;
1059 char *ep;
1060
1061 rr = r->last->rule;
1062 roffnode_cleanscope(r);
1063 t = roff_parse(r, *bufp, &pos);
1064
1065 /*
1066 * Fully handle known macros when they are structurally
1067 * required or when the conditional evaluated to true.
1068 */
1069
1070 if ((ROFF_MAX != t) &&
1071 (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1072 ROFFMAC_STRUCT & roffs[t].flags)) {
1073 assert(roffs[t].proc);
1074 return((*roffs[t].proc)(r, t, bufp, szp,
1075 ln, ppos, pos, offs));
1076 }
1077
1078 /* Always check for the closing delimiter `\}'. */
1079
1080 ep = &(*bufp)[pos];
1081 while (NULL != (ep = strchr(ep, '\\'))) {
1082 if ('}' != *(++ep))
1083 continue;
1084
1085 /*
1086 * If we're at the end of line, then just chop
1087 * off the \} and resize the buffer.
1088 * If we aren't, then convert it to spaces.
1089 */
1090
1091 if ('\0' == *(ep + 1)) {
1092 *--ep = '\0';
1093 *szp -= 2;
1094 } else
1095 *(ep - 1) = *ep = ' ';
1096
1097 roff_ccond(r, ROFF_ccond, bufp, szp,
1098 ln, pos, pos + 2, offs);
1099 break;
1100 }
1101 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1102 }
1103
1104 /* ARGSUSED */
1105 static enum rofferr
1106 roff_cond_text(ROFF_ARGS)
1107 {
1108 char *ep;
1109 enum roffrule rr;
1110
1111 rr = r->last->rule;
1112 roffnode_cleanscope(r);
1113
1114 ep = &(*bufp)[pos];
1115 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1116 ep++;
1117 if ('}' != *ep)
1118 continue;
1119 *ep = '&';
1120 roff_ccond(r, ROFF_ccond, bufp, szp,
1121 ln, pos, pos + 2, offs);
1122 }
1123 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1124 }
1125
1126 static int
1127 roff_getnum(const char *v, int *pos, int *res)
1128 {
1129 int p, n;
1130
1131 p = *pos;
1132 n = v[p] == '-';
1133 if (n)
1134 p++;
1135
1136 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1137 *res += 10 * *res + v[p] - '0';
1138 if (p == *pos + n)
1139 return 0;
1140
1141 if (n)
1142 *res = -*res;
1143
1144 *pos = p;
1145 return 1;
1146 }
1147
1148 static int
1149 roff_getop(const char *v, int *pos, char *res)
1150 {
1151 int e;
1152
1153 *res = v[*pos];
1154 e = v[*pos + 1] == '=';
1155
1156 switch (*res) {
1157 case '=':
1158 break;
1159 case '>':
1160 if (e)
1161 *res = 'g';
1162 break;
1163 case '<':
1164 if (e)
1165 *res = 'l';
1166 break;
1167 default:
1168 return(0);
1169 }
1170
1171 *pos += 1 + e;
1172
1173 return(*res);
1174 }
1175
1176 static enum roffrule
1177 roff_evalcond(const char *v, int *pos)
1178 {
1179 int not, lh, rh;
1180 char op;
1181
1182 switch (v[*pos]) {
1183 case ('n'):
1184 (*pos)++;
1185 return(ROFFRULE_ALLOW);
1186 case ('e'):
1187 /* FALLTHROUGH */
1188 case ('o'):
1189 /* FALLTHROUGH */
1190 case ('t'):
1191 (*pos)++;
1192 return(ROFFRULE_DENY);
1193 case ('!'):
1194 (*pos)++;
1195 not = 1;
1196 break;
1197 default:
1198 not = 0;
1199 break;
1200 }
1201
1202 if (!roff_getnum(v, pos, &lh))
1203 return ROFFRULE_DENY;
1204 if (!roff_getop(v, pos, &op)) {
1205 if (lh < 0)
1206 lh = 0;
1207 goto out;
1208 }
1209 if (!roff_getnum(v, pos, &rh))
1210 return ROFFRULE_DENY;
1211 switch (op) {
1212 case 'g':
1213 lh = lh >= rh;
1214 break;
1215 case 'l':
1216 lh = lh <= rh;
1217 break;
1218 case '=':
1219 lh = lh == rh;
1220 break;
1221 case '>':
1222 lh = lh > rh;
1223 break;
1224 case '<':
1225 lh = lh < rh;
1226 break;
1227 default:
1228 return ROFFRULE_DENY;
1229 }
1230 out:
1231 if (not)
1232 lh = !lh;
1233 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1234 }
1235
1236 /* ARGSUSED */
1237 static enum rofferr
1238 roff_line_ignore(ROFF_ARGS)
1239 {
1240
1241 return(ROFF_IGN);
1242 }
1243
1244 /* ARGSUSED */
1245 static enum rofferr
1246 roff_cond(ROFF_ARGS)
1247 {
1248
1249 roffnode_push(r, tok, NULL, ln, ppos);
1250
1251 /*
1252 * An `.el' has no conditional body: it will consume the value
1253 * of the current rstack entry set in prior `ie' calls or
1254 * defaults to DENY.
1255 *
1256 * If we're not an `el', however, then evaluate the conditional.
1257 */
1258
1259 r->last->rule = ROFF_el == tok ?
1260 (r->rstackpos < 0 ?
1261 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1262 roff_evalcond(*bufp, &pos);
1263
1264 /*
1265 * An if-else will put the NEGATION of the current evaluated
1266 * conditional into the stack of rules.
1267 */
1268
1269 if (ROFF_ie == tok) {
1270 if (r->rstackpos == RSTACK_MAX - 1) {
1271 mandoc_msg(MANDOCERR_MEM,
1272 r->parse, ln, ppos, NULL);
1273 return(ROFF_ERR);
1274 }
1275 r->rstack[++r->rstackpos] =
1276 ROFFRULE_DENY == r->last->rule ?
1277 ROFFRULE_ALLOW : ROFFRULE_DENY;
1278 }
1279
1280 /* If the parent has false as its rule, then so do we. */
1281
1282 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1283 r->last->rule = ROFFRULE_DENY;
1284
1285 /*
1286 * Determine scope.
1287 * If there is nothing on the line after the conditional,
1288 * not even whitespace, use next-line scope.
1289 */
1290
1291 if ('\0' == (*bufp)[pos]) {
1292 r->last->endspan = 2;
1293 goto out;
1294 }
1295
1296 while (' ' == (*bufp)[pos])
1297 pos++;
1298
1299 /* An opening brace requests multiline scope. */
1300
1301 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1302 r->last->endspan = -1;
1303 pos += 2;
1304 goto out;
1305 }
1306
1307 /*
1308 * Anything else following the conditional causes
1309 * single-line scope. Warn if the scope contains
1310 * nothing but trailing whitespace.
1311 */
1312
1313 if ('\0' == (*bufp)[pos])
1314 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1315
1316 r->last->endspan = 1;
1317
1318 out:
1319 *offs = pos;
1320 return(ROFF_RERUN);
1321 }
1322
1323
1324 /* ARGSUSED */
1325 static enum rofferr
1326 roff_ds(ROFF_ARGS)
1327 {
1328 char *name, *string;
1329
1330 /*
1331 * A symbol is named by the first word following the macro
1332 * invocation up to a space. Its value is anything after the
1333 * name's trailing whitespace and optional double-quote. Thus,
1334 *
1335 * [.ds foo "bar " ]
1336 *
1337 * will have `bar " ' as its value.
1338 */
1339
1340 string = *bufp + pos;
1341 name = roff_getname(r, &string, ln, pos);
1342 if ('\0' == *name)
1343 return(ROFF_IGN);
1344
1345 /* Read past initial double-quote. */
1346 if ('"' == *string)
1347 string++;
1348
1349 /* The rest is the value. */
1350 roff_setstr(r, name, string, 0);
1351 return(ROFF_IGN);
1352 }
1353
1354 void
1355 roff_setreg(struct roff *r, const char *name, int val, char sign)
1356 {
1357 struct roffreg *reg;
1358
1359 /* Search for an existing register with the same name. */
1360 reg = r->regtab;
1361
1362 while (reg && strcmp(name, reg->key.p))
1363 reg = reg->next;
1364
1365 if (NULL == reg) {
1366 /* Create a new register. */
1367 reg = mandoc_malloc(sizeof(struct roffreg));
1368 reg->key.p = mandoc_strdup(name);
1369 reg->key.sz = strlen(name);
1370 reg->val = 0;
1371 reg->next = r->regtab;
1372 r->regtab = reg;
1373 }
1374
1375 if ('+' == sign)
1376 reg->val += val;
1377 else if ('-' == sign)
1378 reg->val -= val;
1379 else
1380 reg->val = val;
1381 }
1382
1383 int
1384 roff_getreg(const struct roff *r, const char *name)
1385 {
1386 struct roffreg *reg;
1387
1388 for (reg = r->regtab; reg; reg = reg->next)
1389 if (0 == strcmp(name, reg->key.p))
1390 return(reg->val);
1391
1392 return(0);
1393 }
1394
1395 static int
1396 roff_getregn(const struct roff *r, const char *name, size_t len)
1397 {
1398 struct roffreg *reg;
1399
1400 for (reg = r->regtab; reg; reg = reg->next)
1401 if (len == reg->key.sz &&
1402 0 == strncmp(name, reg->key.p, len))
1403 return(reg->val);
1404
1405 return(0);
1406 }
1407
1408 static void
1409 roff_freereg(struct roffreg *reg)
1410 {
1411 struct roffreg *old_reg;
1412
1413 while (NULL != reg) {
1414 free(reg->key.p);
1415 old_reg = reg;
1416 reg = reg->next;
1417 free(old_reg);
1418 }
1419 }
1420
1421 /* ARGSUSED */
1422 static enum rofferr
1423 roff_nr(ROFF_ARGS)
1424 {
1425 const char *key;
1426 char *val;
1427 size_t sz;
1428 int iv;
1429 char sign;
1430
1431 val = *bufp + pos;
1432 key = roff_getname(r, &val, ln, pos);
1433
1434 sign = *val;
1435 if ('+' == sign || '-' == sign)
1436 val++;
1437
1438 sz = strspn(val, "0123456789");
1439 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1440
1441 roff_setreg(r, key, iv, sign);
1442
1443 return(ROFF_IGN);
1444 }
1445
1446 /* ARGSUSED */
1447 static enum rofferr
1448 roff_rm(ROFF_ARGS)
1449 {
1450 const char *name;
1451 char *cp;
1452
1453 cp = *bufp + pos;
1454 while ('\0' != *cp) {
1455 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1456 if ('\0' != *name)
1457 roff_setstr(r, name, NULL, 0);
1458 }
1459 return(ROFF_IGN);
1460 }
1461
1462 /* ARGSUSED */
1463 static enum rofferr
1464 roff_it(ROFF_ARGS)
1465 {
1466 char *cp;
1467 size_t len;
1468 int iv;
1469
1470 /* Parse the number of lines. */
1471 cp = *bufp + pos;
1472 len = strcspn(cp, " \t");
1473 cp[len] = '\0';
1474 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1475 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1476 ln, ppos, *bufp + 1);
1477 return(ROFF_IGN);
1478 }
1479 cp += len + 1;
1480
1481 /* Arm the input line trap. */
1482 roffit_lines = iv;
1483 roffit_macro = mandoc_strdup(cp);
1484 return(ROFF_IGN);
1485 }
1486
1487 /* ARGSUSED */
1488 static enum rofferr
1489 roff_Dd(ROFF_ARGS)
1490 {
1491 const char *const *cp;
1492
1493 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1494 for (cp = __mdoc_reserved; *cp; cp++)
1495 roff_setstr(r, *cp, NULL, 0);
1496
1497 return(ROFF_CONT);
1498 }
1499
1500 /* ARGSUSED */
1501 static enum rofferr
1502 roff_TH(ROFF_ARGS)
1503 {
1504 const char *const *cp;
1505
1506 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1507 for (cp = __man_reserved; *cp; cp++)
1508 roff_setstr(r, *cp, NULL, 0);
1509
1510 return(ROFF_CONT);
1511 }
1512
1513 /* ARGSUSED */
1514 static enum rofferr
1515 roff_TE(ROFF_ARGS)
1516 {
1517
1518 if (NULL == r->tbl)
1519 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1520 else
1521 tbl_end(&r->tbl);
1522
1523 return(ROFF_IGN);
1524 }
1525
1526 /* ARGSUSED */
1527 static enum rofferr
1528 roff_T_(ROFF_ARGS)
1529 {
1530
1531 if (NULL == r->tbl)
1532 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1533 else
1534 tbl_restart(ppos, ln, r->tbl);
1535
1536 return(ROFF_IGN);
1537 }
1538
1539 #if 0
1540 static int
1541 roff_closeeqn(struct roff *r)
1542 {
1543
1544 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1545 }
1546 #endif
1547
1548 static void
1549 roff_openeqn(struct roff *r, const char *name, int line,
1550 int offs, const char *buf)
1551 {
1552 struct eqn_node *e;
1553 int poff;
1554
1555 assert(NULL == r->eqn);
1556 e = eqn_alloc(name, offs, line, r->parse);
1557
1558 if (r->last_eqn)
1559 r->last_eqn->next = e;
1560 else
1561 r->first_eqn = r->last_eqn = e;
1562
1563 r->eqn = r->last_eqn = e;
1564
1565 if (buf) {
1566 poff = 0;
1567 eqn_read(&r->eqn, line, buf, offs, &poff);
1568 }
1569 }
1570
1571 /* ARGSUSED */
1572 static enum rofferr
1573 roff_EQ(ROFF_ARGS)
1574 {
1575
1576 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1577 return(ROFF_IGN);
1578 }
1579
1580 /* ARGSUSED */
1581 static enum rofferr
1582 roff_EN(ROFF_ARGS)
1583 {
1584
1585 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1586 return(ROFF_IGN);
1587 }
1588
1589 /* ARGSUSED */
1590 static enum rofferr
1591 roff_TS(ROFF_ARGS)
1592 {
1593 struct tbl_node *tbl;
1594
1595 if (r->tbl) {
1596 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1597 tbl_end(&r->tbl);
1598 }
1599
1600 tbl = tbl_alloc(ppos, ln, r->parse);
1601
1602 if (r->last_tbl)
1603 r->last_tbl->next = tbl;
1604 else
1605 r->first_tbl = r->last_tbl = tbl;
1606
1607 r->tbl = r->last_tbl = tbl;
1608 return(ROFF_IGN);
1609 }
1610
1611 /* ARGSUSED */
1612 static enum rofferr
1613 roff_cc(ROFF_ARGS)
1614 {
1615 const char *p;
1616
1617 p = *bufp + pos;
1618
1619 if ('\0' == *p || '.' == (r->control = *p++))
1620 r->control = 0;
1621
1622 if ('\0' != *p)
1623 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1624
1625 return(ROFF_IGN);
1626 }
1627
1628 /* ARGSUSED */
1629 static enum rofferr
1630 roff_tr(ROFF_ARGS)
1631 {
1632 const char *p, *first, *second;
1633 size_t fsz, ssz;
1634 enum mandoc_esc esc;
1635
1636 p = *bufp + pos;
1637
1638 if ('\0' == *p) {
1639 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1640 return(ROFF_IGN);
1641 }
1642
1643 while ('\0' != *p) {
1644 fsz = ssz = 1;
1645
1646 first = p++;
1647 if ('\\' == *first) {
1648 esc = mandoc_escape(&p, NULL, NULL);
1649 if (ESCAPE_ERROR == esc) {
1650 mandoc_msg
1651 (MANDOCERR_BADESCAPE, r->parse,
1652 ln, (int)(p - *bufp), NULL);
1653 return(ROFF_IGN);
1654 }
1655 fsz = (size_t)(p - first);
1656 }
1657
1658 second = p++;
1659 if ('\\' == *second) {
1660 esc = mandoc_escape(&p, NULL, NULL);
1661 if (ESCAPE_ERROR == esc) {
1662 mandoc_msg
1663 (MANDOCERR_BADESCAPE, r->parse,
1664 ln, (int)(p - *bufp), NULL);
1665 return(ROFF_IGN);
1666 }
1667 ssz = (size_t)(p - second);
1668 } else if ('\0' == *second) {
1669 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1670 ln, (int)(p - *bufp), NULL);
1671 second = " ";
1672 p--;
1673 }
1674
1675 if (fsz > 1) {
1676 roff_setstrn(&r->xmbtab, first,
1677 fsz, second, ssz, 0);
1678 continue;
1679 }
1680
1681 if (NULL == r->xtab)
1682 r->xtab = mandoc_calloc
1683 (128, sizeof(struct roffstr));
1684
1685 free(r->xtab[(int)*first].p);
1686 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1687 r->xtab[(int)*first].sz = ssz;
1688 }
1689
1690 return(ROFF_IGN);
1691 }
1692
1693 /* ARGSUSED */
1694 static enum rofferr
1695 roff_so(ROFF_ARGS)
1696 {
1697 char *name;
1698
1699 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1700
1701 /*
1702 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1703 * opening anything that's not in our cwd or anything beneath
1704 * it. Thus, explicitly disallow traversing up the file-system
1705 * or using absolute paths.
1706 */
1707
1708 name = *bufp + pos;
1709 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1710 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1711 return(ROFF_ERR);
1712 }
1713
1714 *offs = pos;
1715 return(ROFF_SO);
1716 }
1717
1718 /* ARGSUSED */
1719 static enum rofferr
1720 roff_userdef(ROFF_ARGS)
1721 {
1722 const char *arg[9];
1723 char *cp, *n1, *n2;
1724 int i;
1725
1726 /*
1727 * Collect pointers to macro argument strings
1728 * and NUL-terminate them.
1729 */
1730 cp = *bufp + pos;
1731 for (i = 0; i < 9; i++)
1732 arg[i] = '\0' == *cp ? "" :
1733 mandoc_getarg(r->parse, &cp, ln, &pos);
1734
1735 /*
1736 * Expand macro arguments.
1737 */
1738 *szp = 0;
1739 n1 = cp = mandoc_strdup(r->current_string);
1740 while (NULL != (cp = strstr(cp, "\\$"))) {
1741 i = cp[2] - '1';
1742 if (0 > i || 8 < i) {
1743 /* Not an argument invocation. */
1744 cp += 2;
1745 continue;
1746 }
1747
1748 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1749 n2 = mandoc_malloc(*szp);
1750
1751 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1752 strlcat(n2, arg[i], *szp);
1753 strlcat(n2, cp + 3, *szp);
1754
1755 cp = n2 + (cp - n1);
1756 free(n1);
1757 n1 = n2;
1758 }
1759
1760 /*
1761 * Replace the macro invocation
1762 * by the expanded macro.
1763 */
1764 free(*bufp);
1765 *bufp = n1;
1766 if (0 == *szp)
1767 *szp = strlen(*bufp) + 1;
1768
1769 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1770 ROFF_REPARSE : ROFF_APPEND);
1771 }
1772
1773 static char *
1774 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1775 {
1776 char *name, *cp;
1777
1778 name = *cpp;
1779 if ('\0' == *name)
1780 return(name);
1781
1782 /* Read until end of name. */
1783 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1784 if ('\\' != *cp)
1785 continue;
1786 cp++;
1787 if ('\\' == *cp)
1788 continue;
1789 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1790 *cp = '\0';
1791 name = cp;
1792 }
1793
1794 /* Nil-terminate name. */
1795 if ('\0' != *cp)
1796 *(cp++) = '\0';
1797
1798 /* Read past spaces. */
1799 while (' ' == *cp)
1800 cp++;
1801
1802 *cpp = cp;
1803 return(name);
1804 }
1805
1806 /*
1807 * Store *string into the user-defined string called *name.
1808 * In multiline mode, append to an existing entry and append '\n';
1809 * else replace the existing entry, if there is one.
1810 * To clear an existing entry, call with (*r, *name, NULL, 0).
1811 */
1812 static void
1813 roff_setstr(struct roff *r, const char *name, const char *string,
1814 int multiline)
1815 {
1816
1817 roff_setstrn(&r->strtab, name, strlen(name), string,
1818 string ? strlen(string) : 0, multiline);
1819 }
1820
1821 static void
1822 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1823 const char *string, size_t stringsz, int multiline)
1824 {
1825 struct roffkv *n;
1826 char *c;
1827 int i;
1828 size_t oldch, newch;
1829
1830 /* Search for an existing string with the same name. */
1831 n = *r;
1832
1833 while (n && strcmp(name, n->key.p))
1834 n = n->next;
1835
1836 if (NULL == n) {
1837 /* Create a new string table entry. */
1838 n = mandoc_malloc(sizeof(struct roffkv));
1839 n->key.p = mandoc_strndup(name, namesz);
1840 n->key.sz = namesz;
1841 n->val.p = NULL;
1842 n->val.sz = 0;
1843 n->next = *r;
1844 *r = n;
1845 } else if (0 == multiline) {
1846 /* In multiline mode, append; else replace. */
1847 free(n->val.p);
1848 n->val.p = NULL;
1849 n->val.sz = 0;
1850 }
1851
1852 if (NULL == string)
1853 return;
1854
1855 /*
1856 * One additional byte for the '\n' in multiline mode,
1857 * and one for the terminating '\0'.
1858 */
1859 newch = stringsz + (multiline ? 2u : 1u);
1860
1861 if (NULL == n->val.p) {
1862 n->val.p = mandoc_malloc(newch);
1863 *n->val.p = '\0';
1864 oldch = 0;
1865 } else {
1866 oldch = n->val.sz;
1867 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1868 }
1869
1870 /* Skip existing content in the destination buffer. */
1871 c = n->val.p + (int)oldch;
1872
1873 /* Append new content to the destination buffer. */
1874 i = 0;
1875 while (i < (int)stringsz) {
1876 /*
1877 * Rudimentary roff copy mode:
1878 * Handle escaped backslashes.
1879 */
1880 if ('\\' == string[i] && '\\' == string[i + 1])
1881 i++;
1882 *c++ = string[i++];
1883 }
1884
1885 /* Append terminating bytes. */
1886 if (multiline)
1887 *c++ = '\n';
1888
1889 *c = '\0';
1890 n->val.sz = (int)(c - n->val.p);
1891 }
1892
1893 static const char *
1894 roff_getstrn(const struct roff *r, const char *name, size_t len)
1895 {
1896 const struct roffkv *n;
1897 int i;
1898
1899 for (n = r->strtab; n; n = n->next)
1900 if (0 == strncmp(name, n->key.p, len) &&
1901 '\0' == n->key.p[(int)len])
1902 return(n->val.p);
1903
1904 for (i = 0; i < PREDEFS_MAX; i++)
1905 if (0 == strncmp(name, predefs[i].name, len) &&
1906 '\0' == predefs[i].name[(int)len])
1907 return(predefs[i].str);
1908
1909 return(NULL);
1910 }
1911
1912 static void
1913 roff_freestr(struct roffkv *r)
1914 {
1915 struct roffkv *n, *nn;
1916
1917 for (n = r; n; n = nn) {
1918 free(n->key.p);
1919 free(n->val.p);
1920 nn = n->next;
1921 free(n);
1922 }
1923 }
1924
1925 const struct tbl_span *
1926 roff_span(const struct roff *r)
1927 {
1928
1929 return(r->tbl ? tbl_span(r->tbl) : NULL);
1930 }
1931
1932 const struct eqn *
1933 roff_eqn(const struct roff *r)
1934 {
1935
1936 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1937 }
1938
1939 /*
1940 * Duplicate an input string, making the appropriate character
1941 * conversations (as stipulated by `tr') along the way.
1942 * Returns a heap-allocated string with all the replacements made.
1943 */
1944 char *
1945 roff_strdup(const struct roff *r, const char *p)
1946 {
1947 const struct roffkv *cp;
1948 char *res;
1949 const char *pp;
1950 size_t ssz, sz;
1951 enum mandoc_esc esc;
1952
1953 if (NULL == r->xmbtab && NULL == r->xtab)
1954 return(mandoc_strdup(p));
1955 else if ('\0' == *p)
1956 return(mandoc_strdup(""));
1957
1958 /*
1959 * Step through each character looking for term matches
1960 * (remember that a `tr' can be invoked with an escape, which is
1961 * a glyph but the escape is multi-character).
1962 * We only do this if the character hash has been initialised
1963 * and the string is >0 length.
1964 */
1965
1966 res = NULL;
1967 ssz = 0;
1968
1969 while ('\0' != *p) {
1970 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1971 sz = r->xtab[(int)*p].sz;
1972 res = mandoc_realloc(res, ssz + sz + 1);
1973 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1974 ssz += sz;
1975 p++;
1976 continue;
1977 } else if ('\\' != *p) {
1978 res = mandoc_realloc(res, ssz + 2);
1979 res[ssz++] = *p++;
1980 continue;
1981 }
1982
1983 /* Search for term matches. */
1984 for (cp = r->xmbtab; cp; cp = cp->next)
1985 if (0 == strncmp(p, cp->key.p, cp->key.sz))
1986 break;
1987
1988 if (NULL != cp) {
1989 /*
1990 * A match has been found.
1991 * Append the match to the array and move
1992 * forward by its keysize.
1993 */
1994 res = mandoc_realloc
1995 (res, ssz + cp->val.sz + 1);
1996 memcpy(res + ssz, cp->val.p, cp->val.sz);
1997 ssz += cp->val.sz;
1998 p += (int)cp->key.sz;
1999 continue;
2000 }
2001
2002 /*
2003 * Handle escapes carefully: we need to copy
2004 * over just the escape itself, or else we might
2005 * do replacements within the escape itself.
2006 * Make sure to pass along the bogus string.
2007 */
2008 pp = p++;
2009 esc = mandoc_escape(&p, NULL, NULL);
2010 if (ESCAPE_ERROR == esc) {
2011 sz = strlen(pp);
2012 res = mandoc_realloc(res, ssz + sz + 1);
2013 memcpy(res + ssz, pp, sz);
2014 break;
2015 }
2016 /*
2017 * We bail out on bad escapes.
2018 * No need to warn: we already did so when
2019 * roff_res() was called.
2020 */
2021 sz = (int)(p - pp);
2022 res = mandoc_realloc(res, ssz + sz + 1);
2023 memcpy(res + ssz, pp, sz);
2024 ssz += sz;
2025 }
2026
2027 res[(int)ssz] = '\0';
2028 return(res);
2029 }
2030
2031 /*
2032 * Find out whether a line is a macro line or not.
2033 * If it is, adjust the current position and return one; if it isn't,
2034 * return zero and don't change the current position.
2035 * If the control character has been set with `.cc', then let that grain
2036 * precedence.
2037 * This is slighly contrary to groff, where using the non-breaking
2038 * control character when `cc' has been invoked will cause the
2039 * non-breaking macro contents to be printed verbatim.
2040 */
2041 int
2042 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2043 {
2044 int pos;
2045
2046 pos = *ppos;
2047
2048 if (0 != r->control && cp[pos] == r->control)
2049 pos++;
2050 else if (0 != r->control)
2051 return(0);
2052 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2053 pos += 2;
2054 else if ('.' == cp[pos] || '\'' == cp[pos])
2055 pos++;
2056 else
2057 return(0);
2058
2059 while (' ' == cp[pos] || '\t' == cp[pos])
2060 pos++;
2061
2062 *ppos = pos;
2063 return(1);
2064 }