]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
1316bfa9d2569997100c8d17cdb66dc029060f35
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.203 2014/04/05 20:34:57 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_rr,
66 ROFF_so,
67 ROFF_ta,
68 ROFF_tr,
69 ROFF_Dd,
70 ROFF_TH,
71 ROFF_TS,
72 ROFF_TE,
73 ROFF_T_,
74 ROFF_EQ,
75 ROFF_EN,
76 ROFF_cblock,
77 ROFF_USERDEF,
78 ROFF_MAX
79 };
80
81 /*
82 * An incredibly-simple string buffer.
83 */
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
87 };
88
89 /*
90 * A key-value roffstr pair as part of a singly-linked list.
91 */
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
96 };
97
98 /*
99 * A single number register as part of a singly-linked list.
100 */
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
105 };
106
107 struct roff {
108 struct mparse *parse; /* parse point */
109 int options; /* parse options */
110 struct roffnode *last; /* leaf of stack */
111 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 int rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static void roff_ccond(struct roff *, int, int);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static int roff_evalcond(const char *, int *);
184 static int roff_evalstrcond(const char *, int *);
185 static void roff_free1(struct roff *);
186 static void roff_freereg(struct roffreg *);
187 static void roff_freestr(struct roffkv *);
188 static char *roff_getname(struct roff *, char **, int, int);
189 static int roff_getnum(const char *, int *, int *);
190 static int roff_getop(const char *, int *, char *);
191 static int roff_getregn(const struct roff *,
192 const char *, size_t);
193 static int roff_getregro(const char *name);
194 static const char *roff_getstrn(const struct roff *,
195 const char *, size_t);
196 static enum rofferr roff_it(ROFF_ARGS);
197 static enum rofferr roff_line_ignore(ROFF_ARGS);
198 static enum rofferr roff_nr(ROFF_ARGS);
199 static void roff_openeqn(struct roff *, const char *,
200 int, int, const char *);
201 static enum rofft roff_parse(struct roff *, const char *, int *);
202 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
203 static enum rofferr roff_res(struct roff *,
204 char **, size_t *, int, int);
205 static enum rofferr roff_rm(ROFF_ARGS);
206 static enum rofferr roff_rr(ROFF_ARGS);
207 static void roff_setstr(struct roff *,
208 const char *, const char *, int);
209 static void roff_setstrn(struct roffkv **, const char *,
210 size_t, const char *, size_t, int);
211 static enum rofferr roff_so(ROFF_ARGS);
212 static enum rofferr roff_tr(ROFF_ARGS);
213 static enum rofferr roff_Dd(ROFF_ARGS);
214 static enum rofferr roff_TH(ROFF_ARGS);
215 static enum rofferr roff_TE(ROFF_ARGS);
216 static enum rofferr roff_TS(ROFF_ARGS);
217 static enum rofferr roff_EQ(ROFF_ARGS);
218 static enum rofferr roff_EN(ROFF_ARGS);
219 static enum rofferr roff_T_(ROFF_ARGS);
220 static enum rofferr roff_userdef(ROFF_ARGS);
221
222 /* See roffhash_find() */
223
224 #define ASCII_HI 126
225 #define ASCII_LO 33
226 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
227
228 static struct roffmac *hash[HASHWIDTH];
229
230 static struct roffmac roffs[ROFF_MAX] = {
231 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
232 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
234 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "as", roff_ds, NULL, NULL, 0, NULL },
236 { "cc", roff_cc, NULL, NULL, 0, NULL },
237 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
238 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
239 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
240 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "ds", roff_ds, NULL, NULL, 0, NULL },
242 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
243 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
245 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
247 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
248 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
249 { "it", roff_it, NULL, NULL, 0, NULL },
250 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
251 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "nr", roff_nr, NULL, NULL, 0, NULL },
253 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "rm", roff_rm, NULL, NULL, 0, NULL },
256 { "rr", roff_rr, NULL, NULL, 0, NULL },
257 { "so", roff_so, NULL, NULL, 0, NULL },
258 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
259 { "tr", roff_tr, NULL, NULL, 0, NULL },
260 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
261 { "TH", roff_TH, NULL, NULL, 0, NULL },
262 { "TS", roff_TS, NULL, NULL, 0, NULL },
263 { "TE", roff_TE, NULL, NULL, 0, NULL },
264 { "T&", roff_T_, NULL, NULL, 0, NULL },
265 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
266 { "EN", roff_EN, NULL, NULL, 0, NULL },
267 { ".", roff_cblock, NULL, NULL, 0, NULL },
268 { NULL, roff_userdef, NULL, NULL, 0, NULL },
269 };
270
271 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
272 const char *const __mdoc_reserved[] = {
273 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
274 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
275 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
276 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
277 "Dt", "Dv", "Dx", "D1",
278 "Ec", "Ed", "Ef", "Ek", "El", "Em",
279 "En", "Eo", "Er", "Es", "Ev", "Ex",
280 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
281 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
282 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
283 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
284 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
285 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
286 "Sc", "Sh", "Sm", "So", "Sq",
287 "Ss", "St", "Sx", "Sy",
288 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
289 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
290 "%P", "%Q", "%R", "%T", "%U", "%V",
291 NULL
292 };
293
294 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
295 const char *const __man_reserved[] = {
296 "AT", "B", "BI", "BR", "DT",
297 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
298 "LP", "OP", "P", "PD", "PP",
299 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
300 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
301 NULL
302 };
303
304 /* Array of injected predefined strings. */
305 #define PREDEFS_MAX 38
306 static const struct predef predefs[PREDEFS_MAX] = {
307 #include "predefs.in"
308 };
309
310 /* See roffhash_find() */
311 #define ROFF_HASH(p) (p[0] - ASCII_LO)
312
313 static int roffit_lines; /* number of lines to delay */
314 static char *roffit_macro; /* nil-terminated macro line */
315
316 static void
317 roffhash_init(void)
318 {
319 struct roffmac *n;
320 int buc, i;
321
322 for (i = 0; i < (int)ROFF_USERDEF; i++) {
323 assert(roffs[i].name[0] >= ASCII_LO);
324 assert(roffs[i].name[0] <= ASCII_HI);
325
326 buc = ROFF_HASH(roffs[i].name);
327
328 if (NULL != (n = hash[buc])) {
329 for ( ; n->next; n = n->next)
330 /* Do nothing. */ ;
331 n->next = &roffs[i];
332 } else
333 hash[buc] = &roffs[i];
334 }
335 }
336
337 /*
338 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
339 * the nil-terminated string name could be found.
340 */
341 static enum rofft
342 roffhash_find(const char *p, size_t s)
343 {
344 int buc;
345 struct roffmac *n;
346
347 /*
348 * libroff has an extremely simple hashtable, for the time
349 * being, which simply keys on the first character, which must
350 * be printable, then walks a chain. It works well enough until
351 * optimised.
352 */
353
354 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
355 return(ROFF_MAX);
356
357 buc = ROFF_HASH(p);
358
359 if (NULL == (n = hash[buc]))
360 return(ROFF_MAX);
361 for ( ; n; n = n->next)
362 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
363 return((enum rofft)(n - roffs));
364
365 return(ROFF_MAX);
366 }
367
368
369 /*
370 * Pop the current node off of the stack of roff instructions currently
371 * pending.
372 */
373 static void
374 roffnode_pop(struct roff *r)
375 {
376 struct roffnode *p;
377
378 assert(r->last);
379 p = r->last;
380
381 r->last = r->last->parent;
382 free(p->name);
383 free(p->end);
384 free(p);
385 }
386
387
388 /*
389 * Push a roff node onto the instruction stack. This must later be
390 * removed with roffnode_pop().
391 */
392 static void
393 roffnode_push(struct roff *r, enum rofft tok, const char *name,
394 int line, int col)
395 {
396 struct roffnode *p;
397
398 p = mandoc_calloc(1, sizeof(struct roffnode));
399 p->tok = tok;
400 if (name)
401 p->name = mandoc_strdup(name);
402 p->parent = r->last;
403 p->line = line;
404 p->col = col;
405 p->rule = p->parent ? p->parent->rule : 0;
406
407 r->last = p;
408 }
409
410
411 static void
412 roff_free1(struct roff *r)
413 {
414 struct tbl_node *tbl;
415 struct eqn_node *e;
416 int i;
417
418 while (NULL != (tbl = r->first_tbl)) {
419 r->first_tbl = tbl->next;
420 tbl_free(tbl);
421 }
422
423 r->first_tbl = r->last_tbl = r->tbl = NULL;
424
425 while (NULL != (e = r->first_eqn)) {
426 r->first_eqn = e->next;
427 eqn_free(e);
428 }
429
430 r->first_eqn = r->last_eqn = r->eqn = NULL;
431
432 while (r->last)
433 roffnode_pop(r);
434
435 roff_freestr(r->strtab);
436 roff_freestr(r->xmbtab);
437
438 r->strtab = r->xmbtab = NULL;
439
440 roff_freereg(r->regtab);
441
442 r->regtab = NULL;
443
444 if (r->xtab)
445 for (i = 0; i < 128; i++)
446 free(r->xtab[i].p);
447
448 free(r->xtab);
449 r->xtab = NULL;
450 }
451
452 void
453 roff_reset(struct roff *r)
454 {
455
456 roff_free1(r);
457 r->control = 0;
458 }
459
460
461 void
462 roff_free(struct roff *r)
463 {
464
465 roff_free1(r);
466 free(r);
467 }
468
469
470 struct roff *
471 roff_alloc(struct mparse *parse, int options)
472 {
473 struct roff *r;
474
475 r = mandoc_calloc(1, sizeof(struct roff));
476 r->parse = parse;
477 r->options = options;
478 r->rstackpos = -1;
479
480 roffhash_init();
481
482 return(r);
483 }
484
485 /*
486 * In the current line, expand user-defined strings ("\*")
487 * and references to number registers ("\n").
488 * Also check the syntax of other escape sequences.
489 */
490 static enum rofferr
491 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
492 {
493 char ubuf[12]; /* buffer to print the number */
494 const char *stesc; /* start of an escape sequence ('\\') */
495 const char *stnam; /* start of the name, after "[(*" */
496 const char *cp; /* end of the name, e.g. before ']' */
497 const char *res; /* the string to be substituted */
498 char *nbuf; /* new buffer to copy bufp to */
499 size_t nsz; /* size of the new buffer */
500 size_t maxl; /* expected length of the escape name */
501 size_t naml; /* actual length of the escape name */
502 int expand_count; /* to avoid infinite loops */
503
504 expand_count = 0;
505
506 again:
507 cp = *bufp + pos;
508 while (NULL != (cp = strchr(cp, '\\'))) {
509 stesc = cp++;
510
511 /*
512 * The second character must be an asterisk or an n.
513 * If it isn't, skip it anyway: It is escaped,
514 * so it can't start another escape sequence.
515 */
516
517 if ('\0' == *cp)
518 return(ROFF_CONT);
519
520 switch (*cp) {
521 case ('*'):
522 res = NULL;
523 break;
524 case ('n'):
525 res = ubuf;
526 break;
527 default:
528 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
529 continue;
530 mandoc_msg
531 (MANDOCERR_BADESCAPE, r->parse,
532 ln, (int)(stesc - *bufp), NULL);
533 return(ROFF_CONT);
534 }
535
536 cp++;
537
538 /*
539 * The third character decides the length
540 * of the name of the string or register.
541 * Save a pointer to the name.
542 */
543
544 switch (*cp) {
545 case ('\0'):
546 return(ROFF_CONT);
547 case ('('):
548 cp++;
549 maxl = 2;
550 break;
551 case ('['):
552 cp++;
553 maxl = 0;
554 break;
555 default:
556 maxl = 1;
557 break;
558 }
559 stnam = cp;
560
561 /* Advance to the end of the name. */
562
563 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
564 if ('\0' == *cp) {
565 mandoc_msg
566 (MANDOCERR_BADESCAPE,
567 r->parse, ln,
568 (int)(stesc - *bufp), NULL);
569 return(ROFF_CONT);
570 }
571 if (0 == maxl && ']' == *cp)
572 break;
573 }
574
575 /*
576 * Retrieve the replacement string; if it is
577 * undefined, resume searching for escapes.
578 */
579
580 if (NULL == res)
581 res = roff_getstrn(r, stnam, naml);
582 else
583 snprintf(ubuf, sizeof(ubuf), "%d",
584 roff_getregn(r, stnam, naml));
585
586 if (NULL == res) {
587 mandoc_msg
588 (MANDOCERR_BADESCAPE, r->parse,
589 ln, (int)(stesc - *bufp), NULL);
590 res = "";
591 }
592
593 /* Replace the escape sequence by the string. */
594
595 pos = stesc - *bufp;
596
597 nsz = *szp + strlen(res) + 1;
598 nbuf = mandoc_malloc(nsz);
599
600 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
601 strlcat(nbuf, res, nsz);
602 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
603
604 free(*bufp);
605
606 *bufp = nbuf;
607 *szp = nsz;
608
609 if (EXPAND_LIMIT >= ++expand_count)
610 goto again;
611
612 /* Just leave the string unexpanded. */
613 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
614 return(ROFF_IGN);
615 }
616 return(ROFF_CONT);
617 }
618
619 /*
620 * Process text streams:
621 * Convert all breakable hyphens into ASCII_HYPH.
622 * Decrement and spring input line trap.
623 */
624 static enum rofferr
625 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
626 {
627 size_t sz;
628 const char *start;
629 char *p;
630 int isz;
631 enum mandoc_esc esc;
632
633 start = p = *bufp + pos;
634
635 while ('\0' != *p) {
636 sz = strcspn(p, "-\\");
637 p += sz;
638
639 if ('\0' == *p)
640 break;
641
642 if ('\\' == *p) {
643 /* Skip over escapes. */
644 p++;
645 esc = mandoc_escape((const char **)&p, NULL, NULL);
646 if (ESCAPE_ERROR == esc)
647 break;
648 continue;
649 } else if (p == start) {
650 p++;
651 continue;
652 }
653
654 if (isalpha((unsigned char)p[-1]) &&
655 isalpha((unsigned char)p[1]))
656 *p = ASCII_HYPH;
657 p++;
658 }
659
660 /* Spring the input line trap. */
661 if (1 == roffit_lines) {
662 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
663 free(*bufp);
664 *bufp = p;
665 *szp = isz + 1;
666 *offs = 0;
667 free(roffit_macro);
668 roffit_lines = 0;
669 return(ROFF_REPARSE);
670 } else if (1 < roffit_lines)
671 --roffit_lines;
672 return(ROFF_CONT);
673 }
674
675 enum rofferr
676 roff_parseln(struct roff *r, int ln, char **bufp,
677 size_t *szp, int pos, int *offs)
678 {
679 enum rofft t;
680 enum rofferr e;
681 int ppos, ctl;
682
683 /*
684 * Run the reserved-word filter only if we have some reserved
685 * words to fill in.
686 */
687
688 e = roff_res(r, bufp, szp, ln, pos);
689 if (ROFF_IGN == e)
690 return(e);
691 assert(ROFF_CONT == e);
692
693 ppos = pos;
694 ctl = roff_getcontrol(r, *bufp, &pos);
695
696 /*
697 * First, if a scope is open and we're not a macro, pass the
698 * text through the macro's filter. If a scope isn't open and
699 * we're not a macro, just let it through.
700 * Finally, if there's an equation scope open, divert it into it
701 * no matter our state.
702 */
703
704 if (r->last && ! ctl) {
705 t = r->last->tok;
706 assert(roffs[t].text);
707 e = (*roffs[t].text)
708 (r, t, bufp, szp, ln, pos, pos, offs);
709 assert(ROFF_IGN == e || ROFF_CONT == e);
710 if (ROFF_CONT != e)
711 return(e);
712 }
713 if (r->eqn)
714 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
715 if ( ! ctl) {
716 if (r->tbl)
717 return(tbl_read(r->tbl, ln, *bufp, pos));
718 return(roff_parsetext(bufp, szp, pos, offs));
719 }
720
721 /*
722 * If a scope is open, go to the child handler for that macro,
723 * as it may want to preprocess before doing anything with it.
724 * Don't do so if an equation is open.
725 */
726
727 if (r->last) {
728 t = r->last->tok;
729 assert(roffs[t].sub);
730 return((*roffs[t].sub)
731 (r, t, bufp, szp,
732 ln, ppos, pos, offs));
733 }
734
735 /*
736 * Lastly, as we've no scope open, try to look up and execute
737 * the new macro. If no macro is found, simply return and let
738 * the compilers handle it.
739 */
740
741 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
742 return(ROFF_CONT);
743
744 assert(roffs[t].proc);
745 return((*roffs[t].proc)
746 (r, t, bufp, szp,
747 ln, ppos, pos, offs));
748 }
749
750
751 void
752 roff_endparse(struct roff *r)
753 {
754
755 if (r->last)
756 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
757 r->last->line, r->last->col, NULL);
758
759 if (r->eqn) {
760 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
761 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
762 eqn_end(&r->eqn);
763 }
764
765 if (r->tbl) {
766 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
767 r->tbl->line, r->tbl->pos, NULL);
768 tbl_end(&r->tbl);
769 }
770 }
771
772 /*
773 * Parse a roff node's type from the input buffer. This must be in the
774 * form of ".foo xxx" in the usual way.
775 */
776 static enum rofft
777 roff_parse(struct roff *r, const char *buf, int *pos)
778 {
779 const char *mac;
780 size_t maclen;
781 enum rofft t;
782
783 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
784 '\t' == buf[*pos] || ' ' == buf[*pos])
785 return(ROFF_MAX);
786
787 /* We stop the macro parse at an escape, tab, space, or nil. */
788
789 mac = buf + *pos;
790 maclen = strcspn(mac, " \\\t\0");
791
792 t = (r->current_string = roff_getstrn(r, mac, maclen))
793 ? ROFF_USERDEF : roffhash_find(mac, maclen);
794
795 *pos += (int)maclen;
796
797 while (buf[*pos] && ' ' == buf[*pos])
798 (*pos)++;
799
800 return(t);
801 }
802
803 /* ARGSUSED */
804 static enum rofferr
805 roff_cblock(ROFF_ARGS)
806 {
807
808 /*
809 * A block-close `..' should only be invoked as a child of an
810 * ignore macro, otherwise raise a warning and just ignore it.
811 */
812
813 if (NULL == r->last) {
814 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
815 return(ROFF_IGN);
816 }
817
818 switch (r->last->tok) {
819 case (ROFF_am):
820 /* FALLTHROUGH */
821 case (ROFF_ami):
822 /* FALLTHROUGH */
823 case (ROFF_am1):
824 /* FALLTHROUGH */
825 case (ROFF_de):
826 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
827 /* FALLTHROUGH */
828 case (ROFF_dei):
829 /* FALLTHROUGH */
830 case (ROFF_ig):
831 break;
832 default:
833 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
834 return(ROFF_IGN);
835 }
836
837 if ((*bufp)[pos])
838 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
839
840 roffnode_pop(r);
841 roffnode_cleanscope(r);
842 return(ROFF_IGN);
843
844 }
845
846
847 static void
848 roffnode_cleanscope(struct roff *r)
849 {
850
851 while (r->last) {
852 if (--r->last->endspan != 0)
853 break;
854 roffnode_pop(r);
855 }
856 }
857
858
859 static void
860 roff_ccond(struct roff *r, int ln, int ppos)
861 {
862
863 if (NULL == r->last) {
864 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
865 return;
866 }
867
868 switch (r->last->tok) {
869 case (ROFF_el):
870 /* FALLTHROUGH */
871 case (ROFF_ie):
872 /* FALLTHROUGH */
873 case (ROFF_if):
874 break;
875 default:
876 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
877 return;
878 }
879
880 if (r->last->endspan > -1) {
881 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
882 return;
883 }
884
885 roffnode_pop(r);
886 roffnode_cleanscope(r);
887 return;
888 }
889
890
891 /* ARGSUSED */
892 static enum rofferr
893 roff_block(ROFF_ARGS)
894 {
895 int sv;
896 size_t sz;
897 char *name;
898
899 name = NULL;
900
901 if (ROFF_ig != tok) {
902 if ('\0' == (*bufp)[pos]) {
903 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
904 return(ROFF_IGN);
905 }
906
907 /*
908 * Re-write `de1', since we don't really care about
909 * groff's strange compatibility mode, into `de'.
910 */
911
912 if (ROFF_de1 == tok)
913 tok = ROFF_de;
914 if (ROFF_de == tok)
915 name = *bufp + pos;
916 else
917 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
918 roffs[tok].name);
919
920 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
921 pos++;
922
923 while (isspace((unsigned char)(*bufp)[pos]))
924 (*bufp)[pos++] = '\0';
925 }
926
927 roffnode_push(r, tok, name, ln, ppos);
928
929 /*
930 * At the beginning of a `de' macro, clear the existing string
931 * with the same name, if there is one. New content will be
932 * appended from roff_block_text() in multiline mode.
933 */
934
935 if (ROFF_de == tok)
936 roff_setstr(r, name, "", 0);
937
938 if ('\0' == (*bufp)[pos])
939 return(ROFF_IGN);
940
941 /* If present, process the custom end-of-line marker. */
942
943 sv = pos;
944 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
945 pos++;
946
947 /*
948 * Note: groff does NOT like escape characters in the input.
949 * Instead of detecting this, we're just going to let it fly and
950 * to hell with it.
951 */
952
953 assert(pos > sv);
954 sz = (size_t)(pos - sv);
955
956 if (1 == sz && '.' == (*bufp)[sv])
957 return(ROFF_IGN);
958
959 r->last->end = mandoc_malloc(sz + 1);
960
961 memcpy(r->last->end, *bufp + sv, sz);
962 r->last->end[(int)sz] = '\0';
963
964 if ((*bufp)[pos])
965 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
966
967 return(ROFF_IGN);
968 }
969
970
971 /* ARGSUSED */
972 static enum rofferr
973 roff_block_sub(ROFF_ARGS)
974 {
975 enum rofft t;
976 int i, j;
977
978 /*
979 * First check whether a custom macro exists at this level. If
980 * it does, then check against it. This is some of groff's
981 * stranger behaviours. If we encountered a custom end-scope
982 * tag and that tag also happens to be a "real" macro, then we
983 * need to try interpreting it again as a real macro. If it's
984 * not, then return ignore. Else continue.
985 */
986
987 if (r->last->end) {
988 for (i = pos, j = 0; r->last->end[j]; j++, i++)
989 if ((*bufp)[i] != r->last->end[j])
990 break;
991
992 if ('\0' == r->last->end[j] &&
993 ('\0' == (*bufp)[i] ||
994 ' ' == (*bufp)[i] ||
995 '\t' == (*bufp)[i])) {
996 roffnode_pop(r);
997 roffnode_cleanscope(r);
998
999 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1000 i++;
1001
1002 pos = i;
1003 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1004 return(ROFF_RERUN);
1005 return(ROFF_IGN);
1006 }
1007 }
1008
1009 /*
1010 * If we have no custom end-query or lookup failed, then try
1011 * pulling it out of the hashtable.
1012 */
1013
1014 t = roff_parse(r, *bufp, &pos);
1015
1016 /*
1017 * Macros other than block-end are only significant
1018 * in `de' blocks; elsewhere, simply throw them away.
1019 */
1020 if (ROFF_cblock != t) {
1021 if (ROFF_de == tok)
1022 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1023 return(ROFF_IGN);
1024 }
1025
1026 assert(roffs[t].proc);
1027 return((*roffs[t].proc)(r, t, bufp, szp,
1028 ln, ppos, pos, offs));
1029 }
1030
1031
1032 /* ARGSUSED */
1033 static enum rofferr
1034 roff_block_text(ROFF_ARGS)
1035 {
1036
1037 if (ROFF_de == tok)
1038 roff_setstr(r, r->last->name, *bufp + pos, 2);
1039
1040 return(ROFF_IGN);
1041 }
1042
1043
1044 /* ARGSUSED */
1045 static enum rofferr
1046 roff_cond_sub(ROFF_ARGS)
1047 {
1048 enum rofft t;
1049 char *ep;
1050 int rr;
1051
1052 rr = r->last->rule;
1053 roffnode_cleanscope(r);
1054 t = roff_parse(r, *bufp, &pos);
1055
1056 /*
1057 * Fully handle known macros when they are structurally
1058 * required or when the conditional evaluated to true.
1059 */
1060
1061 if ((ROFF_MAX != t) &&
1062 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1063 assert(roffs[t].proc);
1064 return((*roffs[t].proc)(r, t, bufp, szp,
1065 ln, ppos, pos, offs));
1066 }
1067
1068 /*
1069 * If `\}' occurs on a macro line without a preceding macro,
1070 * drop the line completely.
1071 */
1072
1073 ep = *bufp + pos;
1074 if ('\\' == ep[0] && '}' == ep[1])
1075 rr = 0;
1076
1077 /* Always check for the closing delimiter `\}'. */
1078
1079 while (NULL != (ep = strchr(ep, '\\'))) {
1080 if ('}' == *(++ep)) {
1081 *ep = '&';
1082 roff_ccond(r, ln, ep - *bufp - 1);
1083 }
1084 ++ep;
1085 }
1086 return(rr ? ROFF_CONT : ROFF_IGN);
1087 }
1088
1089 /* ARGSUSED */
1090 static enum rofferr
1091 roff_cond_text(ROFF_ARGS)
1092 {
1093 char *ep;
1094 int rr;
1095
1096 rr = r->last->rule;
1097 roffnode_cleanscope(r);
1098
1099 ep = *bufp + pos;
1100 while (NULL != (ep = strchr(ep, '\\'))) {
1101 if ('}' == *(++ep)) {
1102 *ep = '&';
1103 roff_ccond(r, ln, ep - *bufp - 1);
1104 }
1105 ++ep;
1106 }
1107 return(rr ? ROFF_CONT : ROFF_IGN);
1108 }
1109
1110 static int
1111 roff_getnum(const char *v, int *pos, int *res)
1112 {
1113 int p, n;
1114
1115 p = *pos;
1116 n = v[p] == '-';
1117 if (n)
1118 p++;
1119
1120 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1121 *res += 10 * *res + v[p] - '0';
1122 if (p == *pos + n)
1123 return 0;
1124
1125 if (n)
1126 *res = -*res;
1127
1128 *pos = p;
1129 return 1;
1130 }
1131
1132 static int
1133 roff_getop(const char *v, int *pos, char *res)
1134 {
1135 int e;
1136
1137 *res = v[*pos];
1138 e = v[*pos + 1] == '=';
1139
1140 switch (*res) {
1141 case '=':
1142 break;
1143 case '>':
1144 if (e)
1145 *res = 'g';
1146 break;
1147 case '<':
1148 if (e)
1149 *res = 'l';
1150 break;
1151 default:
1152 return(0);
1153 }
1154
1155 *pos += 1 + e;
1156
1157 return(*res);
1158 }
1159
1160 /*
1161 * Evaluate a string comparison condition.
1162 * The first character is the delimiter.
1163 * Succeed if the string up to its second occurrence
1164 * matches the string up to its third occurence.
1165 * Advance the cursor after the third occurrence
1166 * or lacking that, to the end of the line.
1167 */
1168 static int
1169 roff_evalstrcond(const char *v, int *pos)
1170 {
1171 const char *s1, *s2, *s3;
1172 int match;
1173
1174 match = 0;
1175 s1 = v + *pos; /* initial delimiter */
1176 s2 = s1 + 1; /* for scanning the first string */
1177 s3 = strchr(s2, *s1); /* for scanning the second string */
1178
1179 if (NULL == s3) /* found no middle delimiter */
1180 goto out;
1181
1182 while ('\0' != *++s3) {
1183 if (*s2 != *s3) { /* mismatch */
1184 s3 = strchr(s3, *s1);
1185 break;
1186 }
1187 if (*s3 == *s1) { /* found the final delimiter */
1188 match = 1;
1189 break;
1190 }
1191 s2++;
1192 }
1193
1194 out:
1195 if (NULL == s3)
1196 s3 = strchr(s2, '\0');
1197 else
1198 s3++;
1199 *pos = s3 - v;
1200 return(match);
1201 }
1202
1203 static int
1204 roff_evalcond(const char *v, int *pos)
1205 {
1206 int wanttrue, lh, rh;
1207 char op;
1208
1209 if ('!' == v[*pos]) {
1210 wanttrue = 0;
1211 (*pos)++;
1212 } else
1213 wanttrue = 1;
1214
1215 switch (v[*pos]) {
1216 case ('n'):
1217 /* FALLTHROUGH */
1218 case ('o'):
1219 (*pos)++;
1220 return(wanttrue);
1221 case ('c'):
1222 /* FALLTHROUGH */
1223 case ('d'):
1224 /* FALLTHROUGH */
1225 case ('e'):
1226 /* FALLTHROUGH */
1227 case ('r'):
1228 /* FALLTHROUGH */
1229 case ('t'):
1230 (*pos)++;
1231 return(!wanttrue);
1232 default:
1233 break;
1234 }
1235
1236 if (!roff_getnum(v, pos, &lh))
1237 return(roff_evalstrcond(v, pos) == wanttrue);
1238 if (!roff_getop(v, pos, &op))
1239 return((lh > 0) == wanttrue);
1240 if (!roff_getnum(v, pos, &rh))
1241 return(0);
1242
1243 switch (op) {
1244 case 'g':
1245 return((lh >= rh) == wanttrue);
1246 case 'l':
1247 return((lh <= rh) == wanttrue);
1248 case '=':
1249 return((lh == rh) == wanttrue);
1250 case '>':
1251 return((lh > rh) == wanttrue);
1252 case '<':
1253 return((lh < rh) == wanttrue);
1254 default:
1255 return(0);
1256 }
1257 }
1258
1259 /* ARGSUSED */
1260 static enum rofferr
1261 roff_line_ignore(ROFF_ARGS)
1262 {
1263
1264 return(ROFF_IGN);
1265 }
1266
1267 /* ARGSUSED */
1268 static enum rofferr
1269 roff_cond(ROFF_ARGS)
1270 {
1271
1272 roffnode_push(r, tok, NULL, ln, ppos);
1273
1274 /*
1275 * An `.el' has no conditional body: it will consume the value
1276 * of the current rstack entry set in prior `ie' calls or
1277 * defaults to DENY.
1278 *
1279 * If we're not an `el', however, then evaluate the conditional.
1280 */
1281
1282 r->last->rule = ROFF_el == tok ?
1283 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1284 roff_evalcond(*bufp, &pos);
1285
1286 /*
1287 * An if-else will put the NEGATION of the current evaluated
1288 * conditional into the stack of rules.
1289 */
1290
1291 if (ROFF_ie == tok) {
1292 if (r->rstackpos == RSTACK_MAX - 1) {
1293 mandoc_msg(MANDOCERR_MEM,
1294 r->parse, ln, ppos, NULL);
1295 return(ROFF_ERR);
1296 }
1297 r->rstack[++r->rstackpos] = !r->last->rule;
1298 }
1299
1300 /* If the parent has false as its rule, then so do we. */
1301
1302 if (r->last->parent && !r->last->parent->rule)
1303 r->last->rule = 0;
1304
1305 /*
1306 * Determine scope.
1307 * If there is nothing on the line after the conditional,
1308 * not even whitespace, use next-line scope.
1309 */
1310
1311 if ('\0' == (*bufp)[pos]) {
1312 r->last->endspan = 2;
1313 goto out;
1314 }
1315
1316 while (' ' == (*bufp)[pos])
1317 pos++;
1318
1319 /* An opening brace requests multiline scope. */
1320
1321 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1322 r->last->endspan = -1;
1323 pos += 2;
1324 goto out;
1325 }
1326
1327 /*
1328 * Anything else following the conditional causes
1329 * single-line scope. Warn if the scope contains
1330 * nothing but trailing whitespace.
1331 */
1332
1333 if ('\0' == (*bufp)[pos])
1334 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1335
1336 r->last->endspan = 1;
1337
1338 out:
1339 *offs = pos;
1340 return(ROFF_RERUN);
1341 }
1342
1343
1344 /* ARGSUSED */
1345 static enum rofferr
1346 roff_ds(ROFF_ARGS)
1347 {
1348 char *name, *string;
1349
1350 /*
1351 * A symbol is named by the first word following the macro
1352 * invocation up to a space. Its value is anything after the
1353 * name's trailing whitespace and optional double-quote. Thus,
1354 *
1355 * [.ds foo "bar " ]
1356 *
1357 * will have `bar " ' as its value.
1358 */
1359
1360 string = *bufp + pos;
1361 name = roff_getname(r, &string, ln, pos);
1362 if ('\0' == *name)
1363 return(ROFF_IGN);
1364
1365 /* Read past initial double-quote. */
1366 if ('"' == *string)
1367 string++;
1368
1369 /* The rest is the value. */
1370 roff_setstr(r, name, string, ROFF_as == tok);
1371 return(ROFF_IGN);
1372 }
1373
1374 void
1375 roff_setreg(struct roff *r, const char *name, int val, char sign)
1376 {
1377 struct roffreg *reg;
1378
1379 /* Search for an existing register with the same name. */
1380 reg = r->regtab;
1381
1382 while (reg && strcmp(name, reg->key.p))
1383 reg = reg->next;
1384
1385 if (NULL == reg) {
1386 /* Create a new register. */
1387 reg = mandoc_malloc(sizeof(struct roffreg));
1388 reg->key.p = mandoc_strdup(name);
1389 reg->key.sz = strlen(name);
1390 reg->val = 0;
1391 reg->next = r->regtab;
1392 r->regtab = reg;
1393 }
1394
1395 if ('+' == sign)
1396 reg->val += val;
1397 else if ('-' == sign)
1398 reg->val -= val;
1399 else
1400 reg->val = val;
1401 }
1402
1403 /*
1404 * Handle some predefined read-only number registers.
1405 * For now, return -1 if the requested register is not predefined;
1406 * in case a predefined read-only register having the value -1
1407 * were to turn up, another special value would have to be chosen.
1408 */
1409 static int
1410 roff_getregro(const char *name)
1411 {
1412
1413 switch (*name) {
1414 case ('A'): /* ASCII approximation mode is always off. */
1415 return(0);
1416 case ('g'): /* Groff compatibility mode is always on. */
1417 return(1);
1418 case ('H'): /* Fixed horizontal resolution. */
1419 return (24);
1420 case ('j'): /* Always adjust left margin only. */
1421 return(0);
1422 case ('T'): /* Some output device is always defined. */
1423 return(1);
1424 case ('V'): /* Fixed vertical resolution. */
1425 return (40);
1426 default:
1427 return (-1);
1428 }
1429 }
1430
1431 int
1432 roff_getreg(const struct roff *r, const char *name)
1433 {
1434 struct roffreg *reg;
1435 int val;
1436
1437 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1438 val = roff_getregro(name + 1);
1439 if (-1 != val)
1440 return (val);
1441 }
1442
1443 for (reg = r->regtab; reg; reg = reg->next)
1444 if (0 == strcmp(name, reg->key.p))
1445 return(reg->val);
1446
1447 return(0);
1448 }
1449
1450 static int
1451 roff_getregn(const struct roff *r, const char *name, size_t len)
1452 {
1453 struct roffreg *reg;
1454 int val;
1455
1456 if ('.' == name[0] && 2 == len) {
1457 val = roff_getregro(name + 1);
1458 if (-1 != val)
1459 return (val);
1460 }
1461
1462 for (reg = r->regtab; reg; reg = reg->next)
1463 if (len == reg->key.sz &&
1464 0 == strncmp(name, reg->key.p, len))
1465 return(reg->val);
1466
1467 return(0);
1468 }
1469
1470 static void
1471 roff_freereg(struct roffreg *reg)
1472 {
1473 struct roffreg *old_reg;
1474
1475 while (NULL != reg) {
1476 free(reg->key.p);
1477 old_reg = reg;
1478 reg = reg->next;
1479 free(old_reg);
1480 }
1481 }
1482
1483 /* ARGSUSED */
1484 static enum rofferr
1485 roff_nr(ROFF_ARGS)
1486 {
1487 const char *key;
1488 char *val;
1489 size_t sz;
1490 int iv;
1491 char sign;
1492
1493 val = *bufp + pos;
1494 key = roff_getname(r, &val, ln, pos);
1495
1496 sign = *val;
1497 if ('+' == sign || '-' == sign)
1498 val++;
1499
1500 sz = strspn(val, "0123456789");
1501 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1502
1503 roff_setreg(r, key, iv, sign);
1504
1505 return(ROFF_IGN);
1506 }
1507
1508 static enum rofferr
1509 roff_rr(ROFF_ARGS)
1510 {
1511 struct roffreg *reg, **prev;
1512 const char *name;
1513 char *cp;
1514
1515 cp = *bufp + pos;
1516 name = roff_getname(r, &cp, ln, pos);
1517
1518 prev = &r->regtab;
1519 while (1) {
1520 reg = *prev;
1521 if (NULL == reg || !strcmp(name, reg->key.p))
1522 break;
1523 prev = &reg->next;
1524 }
1525 if (NULL != reg) {
1526 *prev = reg->next;
1527 free(reg->key.p);
1528 free(reg);
1529 }
1530 return(ROFF_IGN);
1531 }
1532
1533 /* ARGSUSED */
1534 static enum rofferr
1535 roff_rm(ROFF_ARGS)
1536 {
1537 const char *name;
1538 char *cp;
1539
1540 cp = *bufp + pos;
1541 while ('\0' != *cp) {
1542 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1543 if ('\0' != *name)
1544 roff_setstr(r, name, NULL, 0);
1545 }
1546 return(ROFF_IGN);
1547 }
1548
1549 /* ARGSUSED */
1550 static enum rofferr
1551 roff_it(ROFF_ARGS)
1552 {
1553 char *cp;
1554 size_t len;
1555 int iv;
1556
1557 /* Parse the number of lines. */
1558 cp = *bufp + pos;
1559 len = strcspn(cp, " \t");
1560 cp[len] = '\0';
1561 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1562 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1563 ln, ppos, *bufp + 1);
1564 return(ROFF_IGN);
1565 }
1566 cp += len + 1;
1567
1568 /* Arm the input line trap. */
1569 roffit_lines = iv;
1570 roffit_macro = mandoc_strdup(cp);
1571 return(ROFF_IGN);
1572 }
1573
1574 /* ARGSUSED */
1575 static enum rofferr
1576 roff_Dd(ROFF_ARGS)
1577 {
1578 const char *const *cp;
1579
1580 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1581 for (cp = __mdoc_reserved; *cp; cp++)
1582 roff_setstr(r, *cp, NULL, 0);
1583
1584 return(ROFF_CONT);
1585 }
1586
1587 /* ARGSUSED */
1588 static enum rofferr
1589 roff_TH(ROFF_ARGS)
1590 {
1591 const char *const *cp;
1592
1593 if (0 == (MPARSE_QUICK & r->options))
1594 for (cp = __man_reserved; *cp; cp++)
1595 roff_setstr(r, *cp, NULL, 0);
1596
1597 return(ROFF_CONT);
1598 }
1599
1600 /* ARGSUSED */
1601 static enum rofferr
1602 roff_TE(ROFF_ARGS)
1603 {
1604
1605 if (NULL == r->tbl)
1606 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1607 else
1608 tbl_end(&r->tbl);
1609
1610 return(ROFF_IGN);
1611 }
1612
1613 /* ARGSUSED */
1614 static enum rofferr
1615 roff_T_(ROFF_ARGS)
1616 {
1617
1618 if (NULL == r->tbl)
1619 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1620 else
1621 tbl_restart(ppos, ln, r->tbl);
1622
1623 return(ROFF_IGN);
1624 }
1625
1626 #if 0
1627 static int
1628 roff_closeeqn(struct roff *r)
1629 {
1630
1631 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1632 }
1633 #endif
1634
1635 static void
1636 roff_openeqn(struct roff *r, const char *name, int line,
1637 int offs, const char *buf)
1638 {
1639 struct eqn_node *e;
1640 int poff;
1641
1642 assert(NULL == r->eqn);
1643 e = eqn_alloc(name, offs, line, r->parse);
1644
1645 if (r->last_eqn)
1646 r->last_eqn->next = e;
1647 else
1648 r->first_eqn = r->last_eqn = e;
1649
1650 r->eqn = r->last_eqn = e;
1651
1652 if (buf) {
1653 poff = 0;
1654 eqn_read(&r->eqn, line, buf, offs, &poff);
1655 }
1656 }
1657
1658 /* ARGSUSED */
1659 static enum rofferr
1660 roff_EQ(ROFF_ARGS)
1661 {
1662
1663 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1664 return(ROFF_IGN);
1665 }
1666
1667 /* ARGSUSED */
1668 static enum rofferr
1669 roff_EN(ROFF_ARGS)
1670 {
1671
1672 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1673 return(ROFF_IGN);
1674 }
1675
1676 /* ARGSUSED */
1677 static enum rofferr
1678 roff_TS(ROFF_ARGS)
1679 {
1680 struct tbl_node *tbl;
1681
1682 if (r->tbl) {
1683 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1684 tbl_end(&r->tbl);
1685 }
1686
1687 tbl = tbl_alloc(ppos, ln, r->parse);
1688
1689 if (r->last_tbl)
1690 r->last_tbl->next = tbl;
1691 else
1692 r->first_tbl = r->last_tbl = tbl;
1693
1694 r->tbl = r->last_tbl = tbl;
1695 return(ROFF_IGN);
1696 }
1697
1698 /* ARGSUSED */
1699 static enum rofferr
1700 roff_cc(ROFF_ARGS)
1701 {
1702 const char *p;
1703
1704 p = *bufp + pos;
1705
1706 if ('\0' == *p || '.' == (r->control = *p++))
1707 r->control = 0;
1708
1709 if ('\0' != *p)
1710 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1711
1712 return(ROFF_IGN);
1713 }
1714
1715 /* ARGSUSED */
1716 static enum rofferr
1717 roff_tr(ROFF_ARGS)
1718 {
1719 const char *p, *first, *second;
1720 size_t fsz, ssz;
1721 enum mandoc_esc esc;
1722
1723 p = *bufp + pos;
1724
1725 if ('\0' == *p) {
1726 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1727 return(ROFF_IGN);
1728 }
1729
1730 while ('\0' != *p) {
1731 fsz = ssz = 1;
1732
1733 first = p++;
1734 if ('\\' == *first) {
1735 esc = mandoc_escape(&p, NULL, NULL);
1736 if (ESCAPE_ERROR == esc) {
1737 mandoc_msg
1738 (MANDOCERR_BADESCAPE, r->parse,
1739 ln, (int)(p - *bufp), NULL);
1740 return(ROFF_IGN);
1741 }
1742 fsz = (size_t)(p - first);
1743 }
1744
1745 second = p++;
1746 if ('\\' == *second) {
1747 esc = mandoc_escape(&p, NULL, NULL);
1748 if (ESCAPE_ERROR == esc) {
1749 mandoc_msg
1750 (MANDOCERR_BADESCAPE, r->parse,
1751 ln, (int)(p - *bufp), NULL);
1752 return(ROFF_IGN);
1753 }
1754 ssz = (size_t)(p - second);
1755 } else if ('\0' == *second) {
1756 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1757 ln, (int)(p - *bufp), NULL);
1758 second = " ";
1759 p--;
1760 }
1761
1762 if (fsz > 1) {
1763 roff_setstrn(&r->xmbtab, first,
1764 fsz, second, ssz, 0);
1765 continue;
1766 }
1767
1768 if (NULL == r->xtab)
1769 r->xtab = mandoc_calloc
1770 (128, sizeof(struct roffstr));
1771
1772 free(r->xtab[(int)*first].p);
1773 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1774 r->xtab[(int)*first].sz = ssz;
1775 }
1776
1777 return(ROFF_IGN);
1778 }
1779
1780 /* ARGSUSED */
1781 static enum rofferr
1782 roff_so(ROFF_ARGS)
1783 {
1784 char *name;
1785
1786 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1787
1788 /*
1789 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1790 * opening anything that's not in our cwd or anything beneath
1791 * it. Thus, explicitly disallow traversing up the file-system
1792 * or using absolute paths.
1793 */
1794
1795 name = *bufp + pos;
1796 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1797 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1798 return(ROFF_ERR);
1799 }
1800
1801 *offs = pos;
1802 return(ROFF_SO);
1803 }
1804
1805 /* ARGSUSED */
1806 static enum rofferr
1807 roff_userdef(ROFF_ARGS)
1808 {
1809 const char *arg[9];
1810 char *cp, *n1, *n2;
1811 int i;
1812
1813 /*
1814 * Collect pointers to macro argument strings
1815 * and NUL-terminate them.
1816 */
1817 cp = *bufp + pos;
1818 for (i = 0; i < 9; i++)
1819 arg[i] = '\0' == *cp ? "" :
1820 mandoc_getarg(r->parse, &cp, ln, &pos);
1821
1822 /*
1823 * Expand macro arguments.
1824 */
1825 *szp = 0;
1826 n1 = cp = mandoc_strdup(r->current_string);
1827 while (NULL != (cp = strstr(cp, "\\$"))) {
1828 i = cp[2] - '1';
1829 if (0 > i || 8 < i) {
1830 /* Not an argument invocation. */
1831 cp += 2;
1832 continue;
1833 }
1834
1835 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1836 n2 = mandoc_malloc(*szp);
1837
1838 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1839 strlcat(n2, arg[i], *szp);
1840 strlcat(n2, cp + 3, *szp);
1841
1842 cp = n2 + (cp - n1);
1843 free(n1);
1844 n1 = n2;
1845 }
1846
1847 /*
1848 * Replace the macro invocation
1849 * by the expanded macro.
1850 */
1851 free(*bufp);
1852 *bufp = n1;
1853 if (0 == *szp)
1854 *szp = strlen(*bufp) + 1;
1855
1856 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1857 ROFF_REPARSE : ROFF_APPEND);
1858 }
1859
1860 static char *
1861 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1862 {
1863 char *name, *cp;
1864
1865 name = *cpp;
1866 if ('\0' == *name)
1867 return(name);
1868
1869 /* Read until end of name. */
1870 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1871 if ('\\' != *cp)
1872 continue;
1873 cp++;
1874 if ('\\' == *cp)
1875 continue;
1876 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1877 *cp = '\0';
1878 name = cp;
1879 }
1880
1881 /* Nil-terminate name. */
1882 if ('\0' != *cp)
1883 *(cp++) = '\0';
1884
1885 /* Read past spaces. */
1886 while (' ' == *cp)
1887 cp++;
1888
1889 *cpp = cp;
1890 return(name);
1891 }
1892
1893 /*
1894 * Store *string into the user-defined string called *name.
1895 * To clear an existing entry, call with (*r, *name, NULL, 0).
1896 * append == 0: replace mode
1897 * append == 1: single-line append mode
1898 * append == 2: multiline append mode, append '\n' after each call
1899 */
1900 static void
1901 roff_setstr(struct roff *r, const char *name, const char *string,
1902 int append)
1903 {
1904
1905 roff_setstrn(&r->strtab, name, strlen(name), string,
1906 string ? strlen(string) : 0, append);
1907 }
1908
1909 static void
1910 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1911 const char *string, size_t stringsz, int append)
1912 {
1913 struct roffkv *n;
1914 char *c;
1915 int i;
1916 size_t oldch, newch;
1917
1918 /* Search for an existing string with the same name. */
1919 n = *r;
1920
1921 while (n && strcmp(name, n->key.p))
1922 n = n->next;
1923
1924 if (NULL == n) {
1925 /* Create a new string table entry. */
1926 n = mandoc_malloc(sizeof(struct roffkv));
1927 n->key.p = mandoc_strndup(name, namesz);
1928 n->key.sz = namesz;
1929 n->val.p = NULL;
1930 n->val.sz = 0;
1931 n->next = *r;
1932 *r = n;
1933 } else if (0 == append) {
1934 free(n->val.p);
1935 n->val.p = NULL;
1936 n->val.sz = 0;
1937 }
1938
1939 if (NULL == string)
1940 return;
1941
1942 /*
1943 * One additional byte for the '\n' in multiline mode,
1944 * and one for the terminating '\0'.
1945 */
1946 newch = stringsz + (1 < append ? 2u : 1u);
1947
1948 if (NULL == n->val.p) {
1949 n->val.p = mandoc_malloc(newch);
1950 *n->val.p = '\0';
1951 oldch = 0;
1952 } else {
1953 oldch = n->val.sz;
1954 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1955 }
1956
1957 /* Skip existing content in the destination buffer. */
1958 c = n->val.p + (int)oldch;
1959
1960 /* Append new content to the destination buffer. */
1961 i = 0;
1962 while (i < (int)stringsz) {
1963 /*
1964 * Rudimentary roff copy mode:
1965 * Handle escaped backslashes.
1966 */
1967 if ('\\' == string[i] && '\\' == string[i + 1])
1968 i++;
1969 *c++ = string[i++];
1970 }
1971
1972 /* Append terminating bytes. */
1973 if (1 < append)
1974 *c++ = '\n';
1975
1976 *c = '\0';
1977 n->val.sz = (int)(c - n->val.p);
1978 }
1979
1980 static const char *
1981 roff_getstrn(const struct roff *r, const char *name, size_t len)
1982 {
1983 const struct roffkv *n;
1984 int i;
1985
1986 for (n = r->strtab; n; n = n->next)
1987 if (0 == strncmp(name, n->key.p, len) &&
1988 '\0' == n->key.p[(int)len])
1989 return(n->val.p);
1990
1991 for (i = 0; i < PREDEFS_MAX; i++)
1992 if (0 == strncmp(name, predefs[i].name, len) &&
1993 '\0' == predefs[i].name[(int)len])
1994 return(predefs[i].str);
1995
1996 return(NULL);
1997 }
1998
1999 static void
2000 roff_freestr(struct roffkv *r)
2001 {
2002 struct roffkv *n, *nn;
2003
2004 for (n = r; n; n = nn) {
2005 free(n->key.p);
2006 free(n->val.p);
2007 nn = n->next;
2008 free(n);
2009 }
2010 }
2011
2012 const struct tbl_span *
2013 roff_span(const struct roff *r)
2014 {
2015
2016 return(r->tbl ? tbl_span(r->tbl) : NULL);
2017 }
2018
2019 const struct eqn *
2020 roff_eqn(const struct roff *r)
2021 {
2022
2023 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2024 }
2025
2026 /*
2027 * Duplicate an input string, making the appropriate character
2028 * conversations (as stipulated by `tr') along the way.
2029 * Returns a heap-allocated string with all the replacements made.
2030 */
2031 char *
2032 roff_strdup(const struct roff *r, const char *p)
2033 {
2034 const struct roffkv *cp;
2035 char *res;
2036 const char *pp;
2037 size_t ssz, sz;
2038 enum mandoc_esc esc;
2039
2040 if (NULL == r->xmbtab && NULL == r->xtab)
2041 return(mandoc_strdup(p));
2042 else if ('\0' == *p)
2043 return(mandoc_strdup(""));
2044
2045 /*
2046 * Step through each character looking for term matches
2047 * (remember that a `tr' can be invoked with an escape, which is
2048 * a glyph but the escape is multi-character).
2049 * We only do this if the character hash has been initialised
2050 * and the string is >0 length.
2051 */
2052
2053 res = NULL;
2054 ssz = 0;
2055
2056 while ('\0' != *p) {
2057 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2058 sz = r->xtab[(int)*p].sz;
2059 res = mandoc_realloc(res, ssz + sz + 1);
2060 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2061 ssz += sz;
2062 p++;
2063 continue;
2064 } else if ('\\' != *p) {
2065 res = mandoc_realloc(res, ssz + 2);
2066 res[ssz++] = *p++;
2067 continue;
2068 }
2069
2070 /* Search for term matches. */
2071 for (cp = r->xmbtab; cp; cp = cp->next)
2072 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2073 break;
2074
2075 if (NULL != cp) {
2076 /*
2077 * A match has been found.
2078 * Append the match to the array and move
2079 * forward by its keysize.
2080 */
2081 res = mandoc_realloc
2082 (res, ssz + cp->val.sz + 1);
2083 memcpy(res + ssz, cp->val.p, cp->val.sz);
2084 ssz += cp->val.sz;
2085 p += (int)cp->key.sz;
2086 continue;
2087 }
2088
2089 /*
2090 * Handle escapes carefully: we need to copy
2091 * over just the escape itself, or else we might
2092 * do replacements within the escape itself.
2093 * Make sure to pass along the bogus string.
2094 */
2095 pp = p++;
2096 esc = mandoc_escape(&p, NULL, NULL);
2097 if (ESCAPE_ERROR == esc) {
2098 sz = strlen(pp);
2099 res = mandoc_realloc(res, ssz + sz + 1);
2100 memcpy(res + ssz, pp, sz);
2101 break;
2102 }
2103 /*
2104 * We bail out on bad escapes.
2105 * No need to warn: we already did so when
2106 * roff_res() was called.
2107 */
2108 sz = (int)(p - pp);
2109 res = mandoc_realloc(res, ssz + sz + 1);
2110 memcpy(res + ssz, pp, sz);
2111 ssz += sz;
2112 }
2113
2114 res[(int)ssz] = '\0';
2115 return(res);
2116 }
2117
2118 /*
2119 * Find out whether a line is a macro line or not.
2120 * If it is, adjust the current position and return one; if it isn't,
2121 * return zero and don't change the current position.
2122 * If the control character has been set with `.cc', then let that grain
2123 * precedence.
2124 * This is slighly contrary to groff, where using the non-breaking
2125 * control character when `cc' has been invoked will cause the
2126 * non-breaking macro contents to be printed verbatim.
2127 */
2128 int
2129 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2130 {
2131 int pos;
2132
2133 pos = *ppos;
2134
2135 if (0 != r->control && cp[pos] == r->control)
2136 pos++;
2137 else if (0 != r->control)
2138 return(0);
2139 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2140 pos += 2;
2141 else if ('.' == cp[pos] || '\'' == cp[pos])
2142 pos++;
2143 else
2144 return(0);
2145
2146 while (' ' == cp[pos] || '\t' == cp[pos])
2147 pos++;
2148
2149 *ppos = pos;
2150 return(1);
2151 }