]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
implement .dei and .ami
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.220 2014/07/07 11:35:06 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_rr,
66 ROFF_so,
67 ROFF_ta,
68 ROFF_tr,
69 ROFF_Dd,
70 ROFF_TH,
71 ROFF_TS,
72 ROFF_TE,
73 ROFF_T_,
74 ROFF_EQ,
75 ROFF_EN,
76 ROFF_cblock,
77 ROFF_USERDEF,
78 ROFF_MAX
79 };
80
81 /*
82 * An incredibly-simple string buffer.
83 */
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
87 };
88
89 /*
90 * A key-value roffstr pair as part of a singly-linked list.
91 */
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
96 };
97
98 /*
99 * A single number register as part of a singly-linked list.
100 */
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
105 };
106
107 struct roff {
108 struct mparse *parse; /* parse point */
109 int options; /* parse options */
110 struct roffnode *last; /* leaf of stack */
111 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 int rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static void roff_ccond(struct roff *, int, int);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static int roff_evalcond(const char *, int *);
184 static int roff_evalnum(const char *, int *, int *, int);
185 static int roff_evalpar(const char *, int *, int *);
186 static int roff_evalstrcond(const char *, int *);
187 static void roff_free1(struct roff *);
188 static void roff_freereg(struct roffreg *);
189 static void roff_freestr(struct roffkv *);
190 static size_t roff_getname(struct roff *, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff *,
194 const char *, size_t);
195 static int roff_getregro(const char *name);
196 static const char *roff_getstrn(const struct roff *,
197 const char *, size_t);
198 static enum rofferr roff_it(ROFF_ARGS);
199 static enum rofferr roff_line_ignore(ROFF_ARGS);
200 static enum rofferr roff_nr(ROFF_ARGS);
201 static void roff_openeqn(struct roff *, const char *,
202 int, int, const char *);
203 static enum rofft roff_parse(struct roff *, char *, int *,
204 int, int);
205 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
206 static enum rofferr roff_res(struct roff *,
207 char **, size_t *, int, int);
208 static enum rofferr roff_rm(ROFF_ARGS);
209 static enum rofferr roff_rr(ROFF_ARGS);
210 static void roff_setstr(struct roff *,
211 const char *, const char *, int);
212 static void roff_setstrn(struct roffkv **, const char *,
213 size_t, const char *, size_t, int);
214 static enum rofferr roff_so(ROFF_ARGS);
215 static enum rofferr roff_tr(ROFF_ARGS);
216 static enum rofferr roff_Dd(ROFF_ARGS);
217 static enum rofferr roff_TH(ROFF_ARGS);
218 static enum rofferr roff_TE(ROFF_ARGS);
219 static enum rofferr roff_TS(ROFF_ARGS);
220 static enum rofferr roff_EQ(ROFF_ARGS);
221 static enum rofferr roff_EN(ROFF_ARGS);
222 static enum rofferr roff_T_(ROFF_ARGS);
223 static enum rofferr roff_userdef(ROFF_ARGS);
224
225 /* See roffhash_find() */
226
227 #define ASCII_HI 126
228 #define ASCII_LO 33
229 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
230
231 static struct roffmac *hash[HASHWIDTH];
232
233 static struct roffmac roffs[ROFF_MAX] = {
234 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
235 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "as", roff_ds, NULL, NULL, 0, NULL },
239 { "cc", roff_cc, NULL, NULL, 0, NULL },
240 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
244 { "ds", roff_ds, NULL, NULL, 0, NULL },
245 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
246 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
250 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
251 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
252 { "it", roff_it, NULL, NULL, 0, NULL },
253 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "nr", roff_nr, NULL, NULL, 0, NULL },
256 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
258 { "rm", roff_rm, NULL, NULL, 0, NULL },
259 { "rr", roff_rr, NULL, NULL, 0, NULL },
260 { "so", roff_so, NULL, NULL, 0, NULL },
261 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
262 { "tr", roff_tr, NULL, NULL, 0, NULL },
263 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
264 { "TH", roff_TH, NULL, NULL, 0, NULL },
265 { "TS", roff_TS, NULL, NULL, 0, NULL },
266 { "TE", roff_TE, NULL, NULL, 0, NULL },
267 { "T&", roff_T_, NULL, NULL, 0, NULL },
268 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
269 { "EN", roff_EN, NULL, NULL, 0, NULL },
270 { ".", roff_cblock, NULL, NULL, 0, NULL },
271 { NULL, roff_userdef, NULL, NULL, 0, NULL },
272 };
273
274 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
275 const char *const __mdoc_reserved[] = {
276 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
277 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
278 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
279 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
280 "Dt", "Dv", "Dx", "D1",
281 "Ec", "Ed", "Ef", "Ek", "El", "Em",
282 "En", "Eo", "Er", "Es", "Ev", "Ex",
283 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
284 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
285 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
286 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
287 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
288 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
289 "Sc", "Sh", "Sm", "So", "Sq",
290 "Ss", "St", "Sx", "Sy",
291 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
292 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
293 "%P", "%Q", "%R", "%T", "%U", "%V",
294 NULL
295 };
296
297 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
298 const char *const __man_reserved[] = {
299 "AT", "B", "BI", "BR", "DT",
300 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
301 "LP", "OP", "P", "PD", "PP",
302 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
303 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
304 NULL
305 };
306
307 /* Array of injected predefined strings. */
308 #define PREDEFS_MAX 38
309 static const struct predef predefs[PREDEFS_MAX] = {
310 #include "predefs.in"
311 };
312
313 /* See roffhash_find() */
314 #define ROFF_HASH(p) (p[0] - ASCII_LO)
315
316 static int roffit_lines; /* number of lines to delay */
317 static char *roffit_macro; /* nil-terminated macro line */
318
319
320 static void
321 roffhash_init(void)
322 {
323 struct roffmac *n;
324 int buc, i;
325
326 for (i = 0; i < (int)ROFF_USERDEF; i++) {
327 assert(roffs[i].name[0] >= ASCII_LO);
328 assert(roffs[i].name[0] <= ASCII_HI);
329
330 buc = ROFF_HASH(roffs[i].name);
331
332 if (NULL != (n = hash[buc])) {
333 for ( ; n->next; n = n->next)
334 /* Do nothing. */ ;
335 n->next = &roffs[i];
336 } else
337 hash[buc] = &roffs[i];
338 }
339 }
340
341 /*
342 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
343 * the nil-terminated string name could be found.
344 */
345 static enum rofft
346 roffhash_find(const char *p, size_t s)
347 {
348 int buc;
349 struct roffmac *n;
350
351 /*
352 * libroff has an extremely simple hashtable, for the time
353 * being, which simply keys on the first character, which must
354 * be printable, then walks a chain. It works well enough until
355 * optimised.
356 */
357
358 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
359 return(ROFF_MAX);
360
361 buc = ROFF_HASH(p);
362
363 if (NULL == (n = hash[buc]))
364 return(ROFF_MAX);
365 for ( ; n; n = n->next)
366 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
367 return((enum rofft)(n - roffs));
368
369 return(ROFF_MAX);
370 }
371
372 /*
373 * Pop the current node off of the stack of roff instructions currently
374 * pending.
375 */
376 static void
377 roffnode_pop(struct roff *r)
378 {
379 struct roffnode *p;
380
381 assert(r->last);
382 p = r->last;
383
384 r->last = r->last->parent;
385 free(p->name);
386 free(p->end);
387 free(p);
388 }
389
390 /*
391 * Push a roff node onto the instruction stack. This must later be
392 * removed with roffnode_pop().
393 */
394 static void
395 roffnode_push(struct roff *r, enum rofft tok, const char *name,
396 int line, int col)
397 {
398 struct roffnode *p;
399
400 p = mandoc_calloc(1, sizeof(struct roffnode));
401 p->tok = tok;
402 if (name)
403 p->name = mandoc_strdup(name);
404 p->parent = r->last;
405 p->line = line;
406 p->col = col;
407 p->rule = p->parent ? p->parent->rule : 0;
408
409 r->last = p;
410 }
411
412 static void
413 roff_free1(struct roff *r)
414 {
415 struct tbl_node *tbl;
416 struct eqn_node *e;
417 int i;
418
419 while (NULL != (tbl = r->first_tbl)) {
420 r->first_tbl = tbl->next;
421 tbl_free(tbl);
422 }
423
424 r->first_tbl = r->last_tbl = r->tbl = NULL;
425
426 while (NULL != (e = r->first_eqn)) {
427 r->first_eqn = e->next;
428 eqn_free(e);
429 }
430
431 r->first_eqn = r->last_eqn = r->eqn = NULL;
432
433 while (r->last)
434 roffnode_pop(r);
435
436 roff_freestr(r->strtab);
437 roff_freestr(r->xmbtab);
438
439 r->strtab = r->xmbtab = NULL;
440
441 roff_freereg(r->regtab);
442
443 r->regtab = NULL;
444
445 if (r->xtab)
446 for (i = 0; i < 128; i++)
447 free(r->xtab[i].p);
448
449 free(r->xtab);
450 r->xtab = NULL;
451 }
452
453 void
454 roff_reset(struct roff *r)
455 {
456
457 roff_free1(r);
458 r->control = 0;
459 }
460
461 void
462 roff_free(struct roff *r)
463 {
464
465 roff_free1(r);
466 free(r);
467 }
468
469 struct roff *
470 roff_alloc(struct mparse *parse, int options)
471 {
472 struct roff *r;
473
474 r = mandoc_calloc(1, sizeof(struct roff));
475 r->parse = parse;
476 r->options = options;
477 r->rstackpos = -1;
478
479 roffhash_init();
480
481 return(r);
482 }
483
484 /*
485 * In the current line, expand escape sequences that tend to get
486 * used in numerical expressions and conditional requests.
487 * Also check the syntax of the remaining escape sequences.
488 */
489 static enum rofferr
490 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
491 {
492 char ubuf[24]; /* buffer to print the number */
493 const char *start; /* start of the string to process */
494 char *stesc; /* start of an escape sequence ('\\') */
495 const char *stnam; /* start of the name, after "[(*" */
496 const char *cp; /* end of the name, e.g. before ']' */
497 const char *res; /* the string to be substituted */
498 char *nbuf; /* new buffer to copy bufp to */
499 size_t maxl; /* expected length of the escape name */
500 size_t naml; /* actual length of the escape name */
501 int expand_count; /* to avoid infinite loops */
502 int npos; /* position in numeric expression */
503 int arg_complete; /* argument not interrupted by eol */
504 char term; /* character terminating the escape */
505
506 expand_count = 0;
507 start = *bufp + pos;
508 stesc = strchr(start, '\0') - 1;
509 while (stesc-- > start) {
510
511 /* Search backwards for the next backslash. */
512
513 if ('\\' != *stesc)
514 continue;
515
516 /* If it is escaped, skip it. */
517
518 for (cp = stesc - 1; cp >= start; cp--)
519 if ('\\' != *cp)
520 break;
521
522 if (0 == (stesc - cp) % 2) {
523 stesc = (char *)cp;
524 continue;
525 }
526
527 /* Decide whether to expand or to check only. */
528
529 term = '\0';
530 cp = stesc + 1;
531 switch (*cp) {
532 case '*':
533 res = NULL;
534 break;
535 case 'B':
536 /* FALLTHROUGH */
537 case 'w':
538 term = cp[1];
539 /* FALLTHROUGH */
540 case 'n':
541 res = ubuf;
542 break;
543 default:
544 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
545 mandoc_vmsg(MANDOCERR_ESC_BAD,
546 r->parse, ln, (int)(stesc - *bufp),
547 "%.*s", (int)(cp - stesc), stesc);
548 continue;
549 }
550
551 if (EXPAND_LIMIT < ++expand_count) {
552 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
553 ln, (int)(stesc - *bufp), NULL);
554 return(ROFF_IGN);
555 }
556
557 /*
558 * The third character decides the length
559 * of the name of the string or register.
560 * Save a pointer to the name.
561 */
562
563 if ('\0' == term) {
564 switch (*++cp) {
565 case '\0':
566 maxl = 0;
567 break;
568 case '(':
569 cp++;
570 maxl = 2;
571 break;
572 case '[':
573 cp++;
574 term = ']';
575 maxl = 0;
576 break;
577 default:
578 maxl = 1;
579 break;
580 }
581 } else {
582 cp += 2;
583 maxl = 0;
584 }
585 stnam = cp;
586
587 /* Advance to the end of the name. */
588
589 arg_complete = 1;
590 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
591 if ('\0' == *cp) {
592 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
593 ln, (int)(stesc - *bufp), stesc);
594 arg_complete = 0;
595 break;
596 }
597 if (0 == maxl && *cp == term) {
598 cp++;
599 break;
600 }
601 }
602
603 /*
604 * Retrieve the replacement string; if it is
605 * undefined, resume searching for escapes.
606 */
607
608 switch (stesc[1]) {
609 case '*':
610 if (arg_complete)
611 res = roff_getstrn(r, stnam, naml);
612 break;
613 case 'B':
614 npos = 0;
615 ubuf[0] = arg_complete &&
616 roff_evalnum(stnam, &npos, NULL, 0) &&
617 stnam + npos + 1 == cp ? '1' : '0';
618 ubuf[1] = '\0';
619 break;
620 case 'n':
621 if (arg_complete)
622 (void)snprintf(ubuf, sizeof(ubuf), "%d",
623 roff_getregn(r, stnam, naml));
624 else
625 ubuf[0] = '\0';
626 break;
627 case 'w':
628 /* use even incomplete args */
629 (void)snprintf(ubuf, sizeof(ubuf), "%d",
630 24 * (int)naml);
631 break;
632 }
633
634 if (NULL == res) {
635 mandoc_vmsg(MANDOCERR_STR_UNDEF,
636 r->parse, ln, (int)(stesc - *bufp),
637 "%.*s", (int)naml, stnam);
638 res = "";
639 }
640
641 /* Replace the escape sequence by the string. */
642
643 *stesc = '\0';
644 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
645 *bufp, res, cp) + 1;
646
647 /* Prepare for the next replacement. */
648
649 start = nbuf + pos;
650 stesc = nbuf + (stesc - *bufp) + strlen(res);
651 free(*bufp);
652 *bufp = nbuf;
653 }
654 return(ROFF_CONT);
655 }
656
657 /*
658 * Process text streams:
659 * Convert all breakable hyphens into ASCII_HYPH.
660 * Decrement and spring input line trap.
661 */
662 static enum rofferr
663 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
664 {
665 size_t sz;
666 const char *start;
667 char *p;
668 int isz;
669 enum mandoc_esc esc;
670
671 start = p = *bufp + pos;
672
673 while ('\0' != *p) {
674 sz = strcspn(p, "-\\");
675 p += sz;
676
677 if ('\0' == *p)
678 break;
679
680 if ('\\' == *p) {
681 /* Skip over escapes. */
682 p++;
683 esc = mandoc_escape((const char **)&p, NULL, NULL);
684 if (ESCAPE_ERROR == esc)
685 break;
686 continue;
687 } else if (p == start) {
688 p++;
689 continue;
690 }
691
692 if (isalpha((unsigned char)p[-1]) &&
693 isalpha((unsigned char)p[1]))
694 *p = ASCII_HYPH;
695 p++;
696 }
697
698 /* Spring the input line trap. */
699 if (1 == roffit_lines) {
700 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
701 free(*bufp);
702 *bufp = p;
703 *szp = isz + 1;
704 *offs = 0;
705 free(roffit_macro);
706 roffit_lines = 0;
707 return(ROFF_REPARSE);
708 } else if (1 < roffit_lines)
709 --roffit_lines;
710 return(ROFF_CONT);
711 }
712
713 enum rofferr
714 roff_parseln(struct roff *r, int ln, char **bufp,
715 size_t *szp, int pos, int *offs)
716 {
717 enum rofft t;
718 enum rofferr e;
719 int ppos, ctl;
720
721 /*
722 * Run the reserved-word filter only if we have some reserved
723 * words to fill in.
724 */
725
726 e = roff_res(r, bufp, szp, ln, pos);
727 if (ROFF_IGN == e)
728 return(e);
729 assert(ROFF_CONT == e);
730
731 ppos = pos;
732 ctl = roff_getcontrol(r, *bufp, &pos);
733
734 /*
735 * First, if a scope is open and we're not a macro, pass the
736 * text through the macro's filter. If a scope isn't open and
737 * we're not a macro, just let it through.
738 * Finally, if there's an equation scope open, divert it into it
739 * no matter our state.
740 */
741
742 if (r->last && ! ctl) {
743 t = r->last->tok;
744 assert(roffs[t].text);
745 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
746 assert(ROFF_IGN == e || ROFF_CONT == e);
747 if (ROFF_CONT != e)
748 return(e);
749 }
750 if (r->eqn)
751 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
752 if ( ! ctl) {
753 if (r->tbl)
754 return(tbl_read(r->tbl, ln, *bufp, pos));
755 return(roff_parsetext(bufp, szp, pos, offs));
756 }
757
758 /*
759 * If a scope is open, go to the child handler for that macro,
760 * as it may want to preprocess before doing anything with it.
761 * Don't do so if an equation is open.
762 */
763
764 if (r->last) {
765 t = r->last->tok;
766 assert(roffs[t].sub);
767 return((*roffs[t].sub)(r, t, bufp, szp,
768 ln, ppos, pos, offs));
769 }
770
771 /*
772 * Lastly, as we've no scope open, try to look up and execute
773 * the new macro. If no macro is found, simply return and let
774 * the compilers handle it.
775 */
776
777 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
778 return(ROFF_CONT);
779
780 assert(roffs[t].proc);
781 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
782 }
783
784 void
785 roff_endparse(struct roff *r)
786 {
787
788 if (r->last)
789 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
790 r->last->line, r->last->col, NULL);
791
792 if (r->eqn) {
793 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
794 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
795 eqn_end(&r->eqn);
796 }
797
798 if (r->tbl) {
799 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
800 r->tbl->line, r->tbl->pos, NULL);
801 tbl_end(&r->tbl);
802 }
803 }
804
805 /*
806 * Parse a roff node's type from the input buffer. This must be in the
807 * form of ".foo xxx" in the usual way.
808 */
809 static enum rofft
810 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
811 {
812 char *cp;
813 const char *mac;
814 size_t maclen;
815 enum rofft t;
816
817 cp = buf + *pos;
818
819 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
820 return(ROFF_MAX);
821
822 mac = cp;
823 maclen = roff_getname(r, &cp, ln, ppos);
824
825 t = (r->current_string = roff_getstrn(r, mac, maclen))
826 ? ROFF_USERDEF : roffhash_find(mac, maclen);
827
828 if (ROFF_MAX != t)
829 *pos = cp - buf;
830
831 return(t);
832 }
833
834 static enum rofferr
835 roff_cblock(ROFF_ARGS)
836 {
837
838 /*
839 * A block-close `..' should only be invoked as a child of an
840 * ignore macro, otherwise raise a warning and just ignore it.
841 */
842
843 if (NULL == r->last) {
844 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
845 return(ROFF_IGN);
846 }
847
848 switch (r->last->tok) {
849 case ROFF_am:
850 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
851 /* FALLTHROUGH */
852 case ROFF_ami:
853 /* FALLTHROUGH */
854 case ROFF_de:
855 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
856 /* FALLTHROUGH */
857 case ROFF_dei:
858 /* FALLTHROUGH */
859 case ROFF_ig:
860 break;
861 default:
862 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
863 return(ROFF_IGN);
864 }
865
866 if ((*bufp)[pos])
867 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
868 ".. %s", *bufp + pos);
869
870 roffnode_pop(r);
871 roffnode_cleanscope(r);
872 return(ROFF_IGN);
873
874 }
875
876 static void
877 roffnode_cleanscope(struct roff *r)
878 {
879
880 while (r->last) {
881 if (--r->last->endspan != 0)
882 break;
883 roffnode_pop(r);
884 }
885 }
886
887 static void
888 roff_ccond(struct roff *r, int ln, int ppos)
889 {
890
891 if (NULL == r->last) {
892 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
893 return;
894 }
895
896 switch (r->last->tok) {
897 case ROFF_el:
898 /* FALLTHROUGH */
899 case ROFF_ie:
900 /* FALLTHROUGH */
901 case ROFF_if:
902 break;
903 default:
904 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
905 return;
906 }
907
908 if (r->last->endspan > -1) {
909 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
910 return;
911 }
912
913 roffnode_pop(r);
914 roffnode_cleanscope(r);
915 return;
916 }
917
918 static enum rofferr
919 roff_block(ROFF_ARGS)
920 {
921 const char *name;
922 char *iname, *cp;
923 size_t namesz;
924
925 /* Ignore groff compatibility mode for now. */
926
927 if (ROFF_de1 == tok)
928 tok = ROFF_de;
929 else if (ROFF_am1 == tok)
930 tok = ROFF_am;
931
932 /* Parse the macro name argument. */
933
934 cp = *bufp + pos;
935 if (ROFF_ig == tok) {
936 iname = NULL;
937 namesz = 0;
938 } else {
939 iname = cp;
940 namesz = roff_getname(r, &cp, ln, ppos);
941 iname[namesz] = '\0';
942 }
943
944 /* Resolve the macro name argument if it is indirect. */
945
946 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
947 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
948 mandoc_vmsg(MANDOCERR_STR_UNDEF,
949 r->parse, ln, (int)(iname - *bufp),
950 "%.*s", (int)namesz, iname);
951 namesz = 0;
952 } else
953 namesz = strlen(name);
954 } else
955 name = iname;
956
957 if (0 == namesz && ROFF_ig != tok) {
958 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
959 ln, ppos, roffs[tok].name);
960 return(ROFF_IGN);
961 }
962
963 roffnode_push(r, tok, name, ln, ppos);
964
965 /*
966 * At the beginning of a `de' macro, clear the existing string
967 * with the same name, if there is one. New content will be
968 * appended from roff_block_text() in multiline mode.
969 */
970
971 if (ROFF_de == tok || ROFF_dei == tok)
972 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
973
974 if ('\0' == *cp)
975 return(ROFF_IGN);
976
977 /* Get the custom end marker. */
978
979 iname = cp;
980 namesz = roff_getname(r, &cp, ln, ppos);
981
982 /* Resolve the end marker if it is indirect. */
983
984 if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
985 if (NULL == (name = roff_getstrn(r, iname, namesz))) {
986 mandoc_vmsg(MANDOCERR_STR_UNDEF,
987 r->parse, ln, (int)(iname - *bufp),
988 "%.*s", (int)namesz, iname);
989 namesz = 0;
990 } else
991 namesz = strlen(name);
992 } else
993 name = iname;
994
995 if (namesz)
996 r->last->end = mandoc_strndup(name, namesz);
997
998 if ('\0' != *cp)
999 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1000 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1001
1002 return(ROFF_IGN);
1003 }
1004
1005 static enum rofferr
1006 roff_block_sub(ROFF_ARGS)
1007 {
1008 enum rofft t;
1009 int i, j;
1010
1011 /*
1012 * First check whether a custom macro exists at this level. If
1013 * it does, then check against it. This is some of groff's
1014 * stranger behaviours. If we encountered a custom end-scope
1015 * tag and that tag also happens to be a "real" macro, then we
1016 * need to try interpreting it again as a real macro. If it's
1017 * not, then return ignore. Else continue.
1018 */
1019
1020 if (r->last->end) {
1021 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1022 if ((*bufp)[i] != r->last->end[j])
1023 break;
1024
1025 if ('\0' == r->last->end[j] &&
1026 ('\0' == (*bufp)[i] ||
1027 ' ' == (*bufp)[i] ||
1028 '\t' == (*bufp)[i])) {
1029 roffnode_pop(r);
1030 roffnode_cleanscope(r);
1031
1032 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1033 i++;
1034
1035 pos = i;
1036 if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
1037 return(ROFF_RERUN);
1038 return(ROFF_IGN);
1039 }
1040 }
1041
1042 /*
1043 * If we have no custom end-query or lookup failed, then try
1044 * pulling it out of the hashtable.
1045 */
1046
1047 t = roff_parse(r, *bufp, &pos, ln, ppos);
1048
1049 if (ROFF_cblock != t) {
1050 if (ROFF_ig != tok)
1051 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1052 return(ROFF_IGN);
1053 }
1054
1055 assert(roffs[t].proc);
1056 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1057 }
1058
1059 static enum rofferr
1060 roff_block_text(ROFF_ARGS)
1061 {
1062
1063 if (ROFF_ig != tok)
1064 roff_setstr(r, r->last->name, *bufp + pos, 2);
1065
1066 return(ROFF_IGN);
1067 }
1068
1069 static enum rofferr
1070 roff_cond_sub(ROFF_ARGS)
1071 {
1072 enum rofft t;
1073 char *ep;
1074 int rr;
1075
1076 rr = r->last->rule;
1077 roffnode_cleanscope(r);
1078 t = roff_parse(r, *bufp, &pos, ln, ppos);
1079
1080 /*
1081 * Fully handle known macros when they are structurally
1082 * required or when the conditional evaluated to true.
1083 */
1084
1085 if ((ROFF_MAX != t) &&
1086 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1087 assert(roffs[t].proc);
1088 return((*roffs[t].proc)(r, t, bufp, szp,
1089 ln, ppos, pos, offs));
1090 }
1091
1092 /*
1093 * If `\}' occurs on a macro line without a preceding macro,
1094 * drop the line completely.
1095 */
1096
1097 ep = *bufp + pos;
1098 if ('\\' == ep[0] && '}' == ep[1])
1099 rr = 0;
1100
1101 /* Always check for the closing delimiter `\}'. */
1102
1103 while (NULL != (ep = strchr(ep, '\\'))) {
1104 if ('}' == *(++ep)) {
1105 *ep = '&';
1106 roff_ccond(r, ln, ep - *bufp - 1);
1107 }
1108 ++ep;
1109 }
1110 return(rr ? ROFF_CONT : ROFF_IGN);
1111 }
1112
1113 static enum rofferr
1114 roff_cond_text(ROFF_ARGS)
1115 {
1116 char *ep;
1117 int rr;
1118
1119 rr = r->last->rule;
1120 roffnode_cleanscope(r);
1121
1122 ep = *bufp + pos;
1123 while (NULL != (ep = strchr(ep, '\\'))) {
1124 if ('}' == *(++ep)) {
1125 *ep = '&';
1126 roff_ccond(r, ln, ep - *bufp - 1);
1127 }
1128 ++ep;
1129 }
1130 return(rr ? ROFF_CONT : ROFF_IGN);
1131 }
1132
1133 /*
1134 * Parse a single signed integer number. Stop at the first non-digit.
1135 * If there is at least one digit, return success and advance the
1136 * parse point, else return failure and let the parse point unchanged.
1137 * Ignore overflows, treat them just like the C language.
1138 */
1139 static int
1140 roff_getnum(const char *v, int *pos, int *res)
1141 {
1142 int myres, n, p;
1143
1144 if (NULL == res)
1145 res = &myres;
1146
1147 p = *pos;
1148 n = v[p] == '-';
1149 if (n)
1150 p++;
1151
1152 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1153 *res = 10 * *res + v[p] - '0';
1154 if (p == *pos + n)
1155 return 0;
1156
1157 if (n)
1158 *res = -*res;
1159
1160 *pos = p;
1161 return 1;
1162 }
1163
1164 /*
1165 * Evaluate a string comparison condition.
1166 * The first character is the delimiter.
1167 * Succeed if the string up to its second occurrence
1168 * matches the string up to its third occurence.
1169 * Advance the cursor after the third occurrence
1170 * or lacking that, to the end of the line.
1171 */
1172 static int
1173 roff_evalstrcond(const char *v, int *pos)
1174 {
1175 const char *s1, *s2, *s3;
1176 int match;
1177
1178 match = 0;
1179 s1 = v + *pos; /* initial delimiter */
1180 s2 = s1 + 1; /* for scanning the first string */
1181 s3 = strchr(s2, *s1); /* for scanning the second string */
1182
1183 if (NULL == s3) /* found no middle delimiter */
1184 goto out;
1185
1186 while ('\0' != *++s3) {
1187 if (*s2 != *s3) { /* mismatch */
1188 s3 = strchr(s3, *s1);
1189 break;
1190 }
1191 if (*s3 == *s1) { /* found the final delimiter */
1192 match = 1;
1193 break;
1194 }
1195 s2++;
1196 }
1197
1198 out:
1199 if (NULL == s3)
1200 s3 = strchr(s2, '\0');
1201 else
1202 s3++;
1203 *pos = s3 - v;
1204 return(match);
1205 }
1206
1207 /*
1208 * Evaluate an optionally negated single character, numerical,
1209 * or string condition.
1210 */
1211 static int
1212 roff_evalcond(const char *v, int *pos)
1213 {
1214 int wanttrue, number;
1215
1216 if ('!' == v[*pos]) {
1217 wanttrue = 0;
1218 (*pos)++;
1219 } else
1220 wanttrue = 1;
1221
1222 switch (v[*pos]) {
1223 case 'n':
1224 /* FALLTHROUGH */
1225 case 'o':
1226 (*pos)++;
1227 return(wanttrue);
1228 case 'c':
1229 /* FALLTHROUGH */
1230 case 'd':
1231 /* FALLTHROUGH */
1232 case 'e':
1233 /* FALLTHROUGH */
1234 case 'r':
1235 /* FALLTHROUGH */
1236 case 't':
1237 (*pos)++;
1238 return(!wanttrue);
1239 default:
1240 break;
1241 }
1242
1243 if (roff_evalnum(v, pos, &number, 0))
1244 return((number > 0) == wanttrue);
1245 else
1246 return(roff_evalstrcond(v, pos) == wanttrue);
1247 }
1248
1249 static enum rofferr
1250 roff_line_ignore(ROFF_ARGS)
1251 {
1252
1253 return(ROFF_IGN);
1254 }
1255
1256 static enum rofferr
1257 roff_cond(ROFF_ARGS)
1258 {
1259
1260 roffnode_push(r, tok, NULL, ln, ppos);
1261
1262 /*
1263 * An `.el' has no conditional body: it will consume the value
1264 * of the current rstack entry set in prior `ie' calls or
1265 * defaults to DENY.
1266 *
1267 * If we're not an `el', however, then evaluate the conditional.
1268 */
1269
1270 r->last->rule = ROFF_el == tok ?
1271 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1272 roff_evalcond(*bufp, &pos);
1273
1274 /*
1275 * An if-else will put the NEGATION of the current evaluated
1276 * conditional into the stack of rules.
1277 */
1278
1279 if (ROFF_ie == tok) {
1280 if (r->rstackpos == RSTACK_MAX - 1) {
1281 mandoc_msg(MANDOCERR_MEM,
1282 r->parse, ln, ppos, NULL);
1283 return(ROFF_ERR);
1284 }
1285 r->rstack[++r->rstackpos] = !r->last->rule;
1286 }
1287
1288 /* If the parent has false as its rule, then so do we. */
1289
1290 if (r->last->parent && !r->last->parent->rule)
1291 r->last->rule = 0;
1292
1293 /*
1294 * Determine scope.
1295 * If there is nothing on the line after the conditional,
1296 * not even whitespace, use next-line scope.
1297 */
1298
1299 if ('\0' == (*bufp)[pos]) {
1300 r->last->endspan = 2;
1301 goto out;
1302 }
1303
1304 while (' ' == (*bufp)[pos])
1305 pos++;
1306
1307 /* An opening brace requests multiline scope. */
1308
1309 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1310 r->last->endspan = -1;
1311 pos += 2;
1312 goto out;
1313 }
1314
1315 /*
1316 * Anything else following the conditional causes
1317 * single-line scope. Warn if the scope contains
1318 * nothing but trailing whitespace.
1319 */
1320
1321 if ('\0' == (*bufp)[pos])
1322 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1323 ln, ppos, roffs[tok].name);
1324
1325 r->last->endspan = 1;
1326
1327 out:
1328 *offs = pos;
1329 return(ROFF_RERUN);
1330 }
1331
1332 static enum rofferr
1333 roff_ds(ROFF_ARGS)
1334 {
1335 char *string;
1336 const char *name;
1337 size_t namesz;
1338
1339 /*
1340 * The first word is the name of the string.
1341 * If it is empty or terminated by an escape sequence,
1342 * abort the `ds' request without defining anything.
1343 */
1344
1345 name = string = *bufp + pos;
1346 if ('\0' == *name)
1347 return(ROFF_IGN);
1348
1349 namesz = roff_getname(r, &string, ln, pos);
1350 if ('\\' == name[namesz])
1351 return(ROFF_IGN);
1352
1353 /* Read past the initial double-quote, if any. */
1354 if ('"' == *string)
1355 string++;
1356
1357 /* The rest is the value. */
1358 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1359 ROFF_as == tok);
1360 return(ROFF_IGN);
1361 }
1362
1363 /*
1364 * Parse a single operator, one or two characters long.
1365 * If the operator is recognized, return success and advance the
1366 * parse point, else return failure and let the parse point unchanged.
1367 */
1368 static int
1369 roff_getop(const char *v, int *pos, char *res)
1370 {
1371
1372 *res = v[*pos];
1373
1374 switch (*res) {
1375 case '+':
1376 /* FALLTHROUGH */
1377 case '-':
1378 /* FALLTHROUGH */
1379 case '*':
1380 /* FALLTHROUGH */
1381 case '/':
1382 /* FALLTHROUGH */
1383 case '%':
1384 /* FALLTHROUGH */
1385 case '&':
1386 /* FALLTHROUGH */
1387 case ':':
1388 break;
1389 case '<':
1390 switch (v[*pos + 1]) {
1391 case '=':
1392 *res = 'l';
1393 (*pos)++;
1394 break;
1395 case '>':
1396 *res = '!';
1397 (*pos)++;
1398 break;
1399 case '?':
1400 *res = 'i';
1401 (*pos)++;
1402 break;
1403 default:
1404 break;
1405 }
1406 break;
1407 case '>':
1408 switch (v[*pos + 1]) {
1409 case '=':
1410 *res = 'g';
1411 (*pos)++;
1412 break;
1413 case '?':
1414 *res = 'a';
1415 (*pos)++;
1416 break;
1417 default:
1418 break;
1419 }
1420 break;
1421 case '=':
1422 if ('=' == v[*pos + 1])
1423 (*pos)++;
1424 break;
1425 default:
1426 return(0);
1427 }
1428 (*pos)++;
1429
1430 return(*res);
1431 }
1432
1433 /*
1434 * Evaluate either a parenthesized numeric expression
1435 * or a single signed integer number.
1436 */
1437 static int
1438 roff_evalpar(const char *v, int *pos, int *res)
1439 {
1440
1441 if ('(' != v[*pos])
1442 return(roff_getnum(v, pos, res));
1443
1444 (*pos)++;
1445 if ( ! roff_evalnum(v, pos, res, 1))
1446 return(0);
1447
1448 /*
1449 * Omission of the closing parenthesis
1450 * is an error in validation mode,
1451 * but ignored in evaluation mode.
1452 */
1453
1454 if (')' == v[*pos])
1455 (*pos)++;
1456 else if (NULL == res)
1457 return(0);
1458
1459 return(1);
1460 }
1461
1462 /*
1463 * Evaluate a complete numeric expression.
1464 * Proceed left to right, there is no concept of precedence.
1465 */
1466 static int
1467 roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
1468 {
1469 int mypos, operand2;
1470 char operator;
1471
1472 if (NULL == pos) {
1473 mypos = 0;
1474 pos = &mypos;
1475 }
1476
1477 if (skipwhite)
1478 while (isspace((unsigned char)v[*pos]))
1479 (*pos)++;
1480
1481 if ( ! roff_evalpar(v, pos, res))
1482 return(0);
1483
1484 while (1) {
1485 if (skipwhite)
1486 while (isspace((unsigned char)v[*pos]))
1487 (*pos)++;
1488
1489 if ( ! roff_getop(v, pos, &operator))
1490 break;
1491
1492 if (skipwhite)
1493 while (isspace((unsigned char)v[*pos]))
1494 (*pos)++;
1495
1496 if ( ! roff_evalpar(v, pos, &operand2))
1497 return(0);
1498
1499 if (skipwhite)
1500 while (isspace((unsigned char)v[*pos]))
1501 (*pos)++;
1502
1503 if (NULL == res)
1504 continue;
1505
1506 switch (operator) {
1507 case '+':
1508 *res += operand2;
1509 break;
1510 case '-':
1511 *res -= operand2;
1512 break;
1513 case '*':
1514 *res *= operand2;
1515 break;
1516 case '/':
1517 *res /= operand2;
1518 break;
1519 case '%':
1520 *res %= operand2;
1521 break;
1522 case '<':
1523 *res = *res < operand2;
1524 break;
1525 case '>':
1526 *res = *res > operand2;
1527 break;
1528 case 'l':
1529 *res = *res <= operand2;
1530 break;
1531 case 'g':
1532 *res = *res >= operand2;
1533 break;
1534 case '=':
1535 *res = *res == operand2;
1536 break;
1537 case '!':
1538 *res = *res != operand2;
1539 break;
1540 case '&':
1541 *res = *res && operand2;
1542 break;
1543 case ':':
1544 *res = *res || operand2;
1545 break;
1546 case 'i':
1547 if (operand2 < *res)
1548 *res = operand2;
1549 break;
1550 case 'a':
1551 if (operand2 > *res)
1552 *res = operand2;
1553 break;
1554 default:
1555 abort();
1556 }
1557 }
1558 return(1);
1559 }
1560
1561 void
1562 roff_setreg(struct roff *r, const char *name, int val, char sign)
1563 {
1564 struct roffreg *reg;
1565
1566 /* Search for an existing register with the same name. */
1567 reg = r->regtab;
1568
1569 while (reg && strcmp(name, reg->key.p))
1570 reg = reg->next;
1571
1572 if (NULL == reg) {
1573 /* Create a new register. */
1574 reg = mandoc_malloc(sizeof(struct roffreg));
1575 reg->key.p = mandoc_strdup(name);
1576 reg->key.sz = strlen(name);
1577 reg->val = 0;
1578 reg->next = r->regtab;
1579 r->regtab = reg;
1580 }
1581
1582 if ('+' == sign)
1583 reg->val += val;
1584 else if ('-' == sign)
1585 reg->val -= val;
1586 else
1587 reg->val = val;
1588 }
1589
1590 /*
1591 * Handle some predefined read-only number registers.
1592 * For now, return -1 if the requested register is not predefined;
1593 * in case a predefined read-only register having the value -1
1594 * were to turn up, another special value would have to be chosen.
1595 */
1596 static int
1597 roff_getregro(const char *name)
1598 {
1599
1600 switch (*name) {
1601 case 'A': /* ASCII approximation mode is always off. */
1602 return(0);
1603 case 'g': /* Groff compatibility mode is always on. */
1604 return(1);
1605 case 'H': /* Fixed horizontal resolution. */
1606 return (24);
1607 case 'j': /* Always adjust left margin only. */
1608 return(0);
1609 case 'T': /* Some output device is always defined. */
1610 return(1);
1611 case 'V': /* Fixed vertical resolution. */
1612 return (40);
1613 default:
1614 return (-1);
1615 }
1616 }
1617
1618 int
1619 roff_getreg(const struct roff *r, const char *name)
1620 {
1621 struct roffreg *reg;
1622 int val;
1623
1624 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1625 val = roff_getregro(name + 1);
1626 if (-1 != val)
1627 return (val);
1628 }
1629
1630 for (reg = r->regtab; reg; reg = reg->next)
1631 if (0 == strcmp(name, reg->key.p))
1632 return(reg->val);
1633
1634 return(0);
1635 }
1636
1637 static int
1638 roff_getregn(const struct roff *r, const char *name, size_t len)
1639 {
1640 struct roffreg *reg;
1641 int val;
1642
1643 if ('.' == name[0] && 2 == len) {
1644 val = roff_getregro(name + 1);
1645 if (-1 != val)
1646 return (val);
1647 }
1648
1649 for (reg = r->regtab; reg; reg = reg->next)
1650 if (len == reg->key.sz &&
1651 0 == strncmp(name, reg->key.p, len))
1652 return(reg->val);
1653
1654 return(0);
1655 }
1656
1657 static void
1658 roff_freereg(struct roffreg *reg)
1659 {
1660 struct roffreg *old_reg;
1661
1662 while (NULL != reg) {
1663 free(reg->key.p);
1664 old_reg = reg;
1665 reg = reg->next;
1666 free(old_reg);
1667 }
1668 }
1669
1670 static enum rofferr
1671 roff_nr(ROFF_ARGS)
1672 {
1673 char *key, *val;
1674 size_t keysz;
1675 int iv;
1676 char sign;
1677
1678 key = val = *bufp + pos;
1679 if ('\0' == *key)
1680 return(ROFF_IGN);
1681
1682 keysz = roff_getname(r, &val, ln, pos);
1683 if ('\\' == key[keysz])
1684 return(ROFF_IGN);
1685 key[keysz] = '\0';
1686
1687 sign = *val;
1688 if ('+' == sign || '-' == sign)
1689 val++;
1690
1691 if (roff_evalnum(val, NULL, &iv, 0))
1692 roff_setreg(r, key, iv, sign);
1693
1694 return(ROFF_IGN);
1695 }
1696
1697 static enum rofferr
1698 roff_rr(ROFF_ARGS)
1699 {
1700 struct roffreg *reg, **prev;
1701 char *name, *cp;
1702 size_t namesz;
1703
1704 name = cp = *bufp + pos;
1705 if ('\0' == *name)
1706 return(ROFF_IGN);
1707 namesz = roff_getname(r, &cp, ln, pos);
1708 name[namesz] = '\0';
1709
1710 prev = &r->regtab;
1711 while (1) {
1712 reg = *prev;
1713 if (NULL == reg || !strcmp(name, reg->key.p))
1714 break;
1715 prev = &reg->next;
1716 }
1717 if (NULL != reg) {
1718 *prev = reg->next;
1719 free(reg->key.p);
1720 free(reg);
1721 }
1722 return(ROFF_IGN);
1723 }
1724
1725 static enum rofferr
1726 roff_rm(ROFF_ARGS)
1727 {
1728 const char *name;
1729 char *cp;
1730 size_t namesz;
1731
1732 cp = *bufp + pos;
1733 while ('\0' != *cp) {
1734 name = cp;
1735 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1736 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1737 if ('\\' == name[namesz])
1738 break;
1739 }
1740 return(ROFF_IGN);
1741 }
1742
1743 static enum rofferr
1744 roff_it(ROFF_ARGS)
1745 {
1746 char *cp;
1747 size_t len;
1748 int iv;
1749
1750 /* Parse the number of lines. */
1751 cp = *bufp + pos;
1752 len = strcspn(cp, " \t");
1753 cp[len] = '\0';
1754 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1755 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1756 ln, ppos, *bufp + 1);
1757 return(ROFF_IGN);
1758 }
1759 cp += len + 1;
1760
1761 /* Arm the input line trap. */
1762 roffit_lines = iv;
1763 roffit_macro = mandoc_strdup(cp);
1764 return(ROFF_IGN);
1765 }
1766
1767 static enum rofferr
1768 roff_Dd(ROFF_ARGS)
1769 {
1770 const char *const *cp;
1771
1772 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1773 for (cp = __mdoc_reserved; *cp; cp++)
1774 roff_setstr(r, *cp, NULL, 0);
1775
1776 return(ROFF_CONT);
1777 }
1778
1779 static enum rofferr
1780 roff_TH(ROFF_ARGS)
1781 {
1782 const char *const *cp;
1783
1784 if (0 == (MPARSE_QUICK & r->options))
1785 for (cp = __man_reserved; *cp; cp++)
1786 roff_setstr(r, *cp, NULL, 0);
1787
1788 return(ROFF_CONT);
1789 }
1790
1791 static enum rofferr
1792 roff_TE(ROFF_ARGS)
1793 {
1794
1795 if (NULL == r->tbl)
1796 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1797 else
1798 tbl_end(&r->tbl);
1799
1800 return(ROFF_IGN);
1801 }
1802
1803 static enum rofferr
1804 roff_T_(ROFF_ARGS)
1805 {
1806
1807 if (NULL == r->tbl)
1808 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1809 else
1810 tbl_restart(ppos, ln, r->tbl);
1811
1812 return(ROFF_IGN);
1813 }
1814
1815 #if 0
1816 static int
1817 roff_closeeqn(struct roff *r)
1818 {
1819
1820 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1821 }
1822 #endif
1823
1824 static void
1825 roff_openeqn(struct roff *r, const char *name, int line,
1826 int offs, const char *buf)
1827 {
1828 struct eqn_node *e;
1829 int poff;
1830
1831 assert(NULL == r->eqn);
1832 e = eqn_alloc(name, offs, line, r->parse);
1833
1834 if (r->last_eqn)
1835 r->last_eqn->next = e;
1836 else
1837 r->first_eqn = r->last_eqn = e;
1838
1839 r->eqn = r->last_eqn = e;
1840
1841 if (buf) {
1842 poff = 0;
1843 eqn_read(&r->eqn, line, buf, offs, &poff);
1844 }
1845 }
1846
1847 static enum rofferr
1848 roff_EQ(ROFF_ARGS)
1849 {
1850
1851 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1852 return(ROFF_IGN);
1853 }
1854
1855 static enum rofferr
1856 roff_EN(ROFF_ARGS)
1857 {
1858
1859 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1860 return(ROFF_IGN);
1861 }
1862
1863 static enum rofferr
1864 roff_TS(ROFF_ARGS)
1865 {
1866 struct tbl_node *tbl;
1867
1868 if (r->tbl) {
1869 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1870 tbl_end(&r->tbl);
1871 }
1872
1873 tbl = tbl_alloc(ppos, ln, r->parse);
1874
1875 if (r->last_tbl)
1876 r->last_tbl->next = tbl;
1877 else
1878 r->first_tbl = r->last_tbl = tbl;
1879
1880 r->tbl = r->last_tbl = tbl;
1881 return(ROFF_IGN);
1882 }
1883
1884 static enum rofferr
1885 roff_cc(ROFF_ARGS)
1886 {
1887 const char *p;
1888
1889 p = *bufp + pos;
1890
1891 if ('\0' == *p || '.' == (r->control = *p++))
1892 r->control = 0;
1893
1894 if ('\0' != *p)
1895 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1896
1897 return(ROFF_IGN);
1898 }
1899
1900 static enum rofferr
1901 roff_tr(ROFF_ARGS)
1902 {
1903 const char *p, *first, *second;
1904 size_t fsz, ssz;
1905 enum mandoc_esc esc;
1906
1907 p = *bufp + pos;
1908
1909 if ('\0' == *p) {
1910 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1911 return(ROFF_IGN);
1912 }
1913
1914 while ('\0' != *p) {
1915 fsz = ssz = 1;
1916
1917 first = p++;
1918 if ('\\' == *first) {
1919 esc = mandoc_escape(&p, NULL, NULL);
1920 if (ESCAPE_ERROR == esc) {
1921 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1922 ln, (int)(p - *bufp), first);
1923 return(ROFF_IGN);
1924 }
1925 fsz = (size_t)(p - first);
1926 }
1927
1928 second = p++;
1929 if ('\\' == *second) {
1930 esc = mandoc_escape(&p, NULL, NULL);
1931 if (ESCAPE_ERROR == esc) {
1932 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1933 ln, (int)(p - *bufp), second);
1934 return(ROFF_IGN);
1935 }
1936 ssz = (size_t)(p - second);
1937 } else if ('\0' == *second) {
1938 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1939 ln, (int)(p - *bufp), NULL);
1940 second = " ";
1941 p--;
1942 }
1943
1944 if (fsz > 1) {
1945 roff_setstrn(&r->xmbtab, first, fsz,
1946 second, ssz, 0);
1947 continue;
1948 }
1949
1950 if (NULL == r->xtab)
1951 r->xtab = mandoc_calloc(128,
1952 sizeof(struct roffstr));
1953
1954 free(r->xtab[(int)*first].p);
1955 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1956 r->xtab[(int)*first].sz = ssz;
1957 }
1958
1959 return(ROFF_IGN);
1960 }
1961
1962 static enum rofferr
1963 roff_so(ROFF_ARGS)
1964 {
1965 char *name;
1966
1967 name = *bufp + pos;
1968 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, ".so %s", name);
1969
1970 /*
1971 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1972 * opening anything that's not in our cwd or anything beneath
1973 * it. Thus, explicitly disallow traversing up the file-system
1974 * or using absolute paths.
1975 */
1976
1977 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1978 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
1979 ".so %s", name);
1980 return(ROFF_ERR);
1981 }
1982
1983 *offs = pos;
1984 return(ROFF_SO);
1985 }
1986
1987 static enum rofferr
1988 roff_userdef(ROFF_ARGS)
1989 {
1990 const char *arg[9];
1991 char *cp, *n1, *n2;
1992 int i;
1993
1994 /*
1995 * Collect pointers to macro argument strings
1996 * and NUL-terminate them.
1997 */
1998 cp = *bufp + pos;
1999 for (i = 0; i < 9; i++)
2000 arg[i] = '\0' == *cp ? "" :
2001 mandoc_getarg(r->parse, &cp, ln, &pos);
2002
2003 /*
2004 * Expand macro arguments.
2005 */
2006 *szp = 0;
2007 n1 = cp = mandoc_strdup(r->current_string);
2008 while (NULL != (cp = strstr(cp, "\\$"))) {
2009 i = cp[2] - '1';
2010 if (0 > i || 8 < i) {
2011 /* Not an argument invocation. */
2012 cp += 2;
2013 continue;
2014 }
2015 *cp = '\0';
2016 *szp = mandoc_asprintf(&n2, "%s%s%s",
2017 n1, arg[i], cp + 3) + 1;
2018 cp = n2 + (cp - n1);
2019 free(n1);
2020 n1 = n2;
2021 }
2022
2023 /*
2024 * Replace the macro invocation
2025 * by the expanded macro.
2026 */
2027 free(*bufp);
2028 *bufp = n1;
2029 if (0 == *szp)
2030 *szp = strlen(*bufp) + 1;
2031
2032 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2033 ROFF_REPARSE : ROFF_APPEND);
2034 }
2035
2036 static size_t
2037 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2038 {
2039 char *name, *cp;
2040 size_t namesz;
2041
2042 name = *cpp;
2043 if ('\0' == *name)
2044 return(0);
2045
2046 /* Read until end of name and terminate it with NUL. */
2047 for (cp = name; 1; cp++) {
2048 if ('\0' == *cp || ' ' == *cp) {
2049 namesz = cp - name;
2050 break;
2051 }
2052 if ('\\' != *cp)
2053 continue;
2054 namesz = cp - name;
2055 if ('{' == cp[1] || '}' == cp[1])
2056 break;
2057 cp++;
2058 if ('\\' == *cp)
2059 continue;
2060 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
2061 mandoc_escape((const char **)&cp, NULL, NULL);
2062 break;
2063 }
2064
2065 /* Read past spaces. */
2066 while (' ' == *cp)
2067 cp++;
2068
2069 *cpp = cp;
2070 return(namesz);
2071 }
2072
2073 /*
2074 * Store *string into the user-defined string called *name.
2075 * To clear an existing entry, call with (*r, *name, NULL, 0).
2076 * append == 0: replace mode
2077 * append == 1: single-line append mode
2078 * append == 2: multiline append mode, append '\n' after each call
2079 */
2080 static void
2081 roff_setstr(struct roff *r, const char *name, const char *string,
2082 int append)
2083 {
2084
2085 roff_setstrn(&r->strtab, name, strlen(name), string,
2086 string ? strlen(string) : 0, append);
2087 }
2088
2089 static void
2090 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2091 const char *string, size_t stringsz, int append)
2092 {
2093 struct roffkv *n;
2094 char *c;
2095 int i;
2096 size_t oldch, newch;
2097
2098 /* Search for an existing string with the same name. */
2099 n = *r;
2100
2101 while (n && (namesz != n->key.sz ||
2102 strncmp(n->key.p, name, namesz)))
2103 n = n->next;
2104
2105 if (NULL == n) {
2106 /* Create a new string table entry. */
2107 n = mandoc_malloc(sizeof(struct roffkv));
2108 n->key.p = mandoc_strndup(name, namesz);
2109 n->key.sz = namesz;
2110 n->val.p = NULL;
2111 n->val.sz = 0;
2112 n->next = *r;
2113 *r = n;
2114 } else if (0 == append) {
2115 free(n->val.p);
2116 n->val.p = NULL;
2117 n->val.sz = 0;
2118 }
2119
2120 if (NULL == string)
2121 return;
2122
2123 /*
2124 * One additional byte for the '\n' in multiline mode,
2125 * and one for the terminating '\0'.
2126 */
2127 newch = stringsz + (1 < append ? 2u : 1u);
2128
2129 if (NULL == n->val.p) {
2130 n->val.p = mandoc_malloc(newch);
2131 *n->val.p = '\0';
2132 oldch = 0;
2133 } else {
2134 oldch = n->val.sz;
2135 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2136 }
2137
2138 /* Skip existing content in the destination buffer. */
2139 c = n->val.p + (int)oldch;
2140
2141 /* Append new content to the destination buffer. */
2142 i = 0;
2143 while (i < (int)stringsz) {
2144 /*
2145 * Rudimentary roff copy mode:
2146 * Handle escaped backslashes.
2147 */
2148 if ('\\' == string[i] && '\\' == string[i + 1])
2149 i++;
2150 *c++ = string[i++];
2151 }
2152
2153 /* Append terminating bytes. */
2154 if (1 < append)
2155 *c++ = '\n';
2156
2157 *c = '\0';
2158 n->val.sz = (int)(c - n->val.p);
2159 }
2160
2161 static const char *
2162 roff_getstrn(const struct roff *r, const char *name, size_t len)
2163 {
2164 const struct roffkv *n;
2165 int i;
2166
2167 for (n = r->strtab; n; n = n->next)
2168 if (0 == strncmp(name, n->key.p, len) &&
2169 '\0' == n->key.p[(int)len])
2170 return(n->val.p);
2171
2172 for (i = 0; i < PREDEFS_MAX; i++)
2173 if (0 == strncmp(name, predefs[i].name, len) &&
2174 '\0' == predefs[i].name[(int)len])
2175 return(predefs[i].str);
2176
2177 return(NULL);
2178 }
2179
2180 static void
2181 roff_freestr(struct roffkv *r)
2182 {
2183 struct roffkv *n, *nn;
2184
2185 for (n = r; n; n = nn) {
2186 free(n->key.p);
2187 free(n->val.p);
2188 nn = n->next;
2189 free(n);
2190 }
2191 }
2192
2193 const struct tbl_span *
2194 roff_span(const struct roff *r)
2195 {
2196
2197 return(r->tbl ? tbl_span(r->tbl) : NULL);
2198 }
2199
2200 const struct eqn *
2201 roff_eqn(const struct roff *r)
2202 {
2203
2204 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2205 }
2206
2207 /*
2208 * Duplicate an input string, making the appropriate character
2209 * conversations (as stipulated by `tr') along the way.
2210 * Returns a heap-allocated string with all the replacements made.
2211 */
2212 char *
2213 roff_strdup(const struct roff *r, const char *p)
2214 {
2215 const struct roffkv *cp;
2216 char *res;
2217 const char *pp;
2218 size_t ssz, sz;
2219 enum mandoc_esc esc;
2220
2221 if (NULL == r->xmbtab && NULL == r->xtab)
2222 return(mandoc_strdup(p));
2223 else if ('\0' == *p)
2224 return(mandoc_strdup(""));
2225
2226 /*
2227 * Step through each character looking for term matches
2228 * (remember that a `tr' can be invoked with an escape, which is
2229 * a glyph but the escape is multi-character).
2230 * We only do this if the character hash has been initialised
2231 * and the string is >0 length.
2232 */
2233
2234 res = NULL;
2235 ssz = 0;
2236
2237 while ('\0' != *p) {
2238 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2239 sz = r->xtab[(int)*p].sz;
2240 res = mandoc_realloc(res, ssz + sz + 1);
2241 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2242 ssz += sz;
2243 p++;
2244 continue;
2245 } else if ('\\' != *p) {
2246 res = mandoc_realloc(res, ssz + 2);
2247 res[ssz++] = *p++;
2248 continue;
2249 }
2250
2251 /* Search for term matches. */
2252 for (cp = r->xmbtab; cp; cp = cp->next)
2253 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2254 break;
2255
2256 if (NULL != cp) {
2257 /*
2258 * A match has been found.
2259 * Append the match to the array and move
2260 * forward by its keysize.
2261 */
2262 res = mandoc_realloc(res,
2263 ssz + cp->val.sz + 1);
2264 memcpy(res + ssz, cp->val.p, cp->val.sz);
2265 ssz += cp->val.sz;
2266 p += (int)cp->key.sz;
2267 continue;
2268 }
2269
2270 /*
2271 * Handle escapes carefully: we need to copy
2272 * over just the escape itself, or else we might
2273 * do replacements within the escape itself.
2274 * Make sure to pass along the bogus string.
2275 */
2276 pp = p++;
2277 esc = mandoc_escape(&p, NULL, NULL);
2278 if (ESCAPE_ERROR == esc) {
2279 sz = strlen(pp);
2280 res = mandoc_realloc(res, ssz + sz + 1);
2281 memcpy(res + ssz, pp, sz);
2282 break;
2283 }
2284 /*
2285 * We bail out on bad escapes.
2286 * No need to warn: we already did so when
2287 * roff_res() was called.
2288 */
2289 sz = (int)(p - pp);
2290 res = mandoc_realloc(res, ssz + sz + 1);
2291 memcpy(res + ssz, pp, sz);
2292 ssz += sz;
2293 }
2294
2295 res[(int)ssz] = '\0';
2296 return(res);
2297 }
2298
2299 /*
2300 * Find out whether a line is a macro line or not.
2301 * If it is, adjust the current position and return one; if it isn't,
2302 * return zero and don't change the current position.
2303 * If the control character has been set with `.cc', then let that grain
2304 * precedence.
2305 * This is slighly contrary to groff, where using the non-breaking
2306 * control character when `cc' has been invoked will cause the
2307 * non-breaking macro contents to be printed verbatim.
2308 */
2309 int
2310 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2311 {
2312 int pos;
2313
2314 pos = *ppos;
2315
2316 if (0 != r->control && cp[pos] == r->control)
2317 pos++;
2318 else if (0 != r->control)
2319 return(0);
2320 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2321 pos += 2;
2322 else if ('.' == cp[pos] || '\'' == cp[pos])
2323 pos++;
2324 else
2325 return(0);
2326
2327 while (' ' == cp[pos] || '\t' == cp[pos])
2328 pos++;
2329
2330 *ppos = pos;
2331 return(1);
2332 }