]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
2b01f5936ffd25b784b0967724251a4b23deff56
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.212 2014/06/29 22:14:10 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_rr,
66 ROFF_so,
67 ROFF_ta,
68 ROFF_tr,
69 ROFF_Dd,
70 ROFF_TH,
71 ROFF_TS,
72 ROFF_TE,
73 ROFF_T_,
74 ROFF_EQ,
75 ROFF_EN,
76 ROFF_cblock,
77 ROFF_USERDEF,
78 ROFF_MAX
79 };
80
81 /*
82 * An incredibly-simple string buffer.
83 */
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
87 };
88
89 /*
90 * A key-value roffstr pair as part of a singly-linked list.
91 */
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
96 };
97
98 /*
99 * A single number register as part of a singly-linked list.
100 */
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
105 };
106
107 struct roff {
108 struct mparse *parse; /* parse point */
109 int options; /* parse options */
110 struct roffnode *last; /* leaf of stack */
111 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 int rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static void roff_ccond(struct roff *, int, int);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static int roff_evalcond(const char *, int *);
184 static int roff_evalnum(const char *, int *, int *, int);
185 static int roff_evalpar(const char *, int *, int *);
186 static int roff_evalstrcond(const char *, int *);
187 static void roff_free1(struct roff *);
188 static void roff_freereg(struct roffreg *);
189 static void roff_freestr(struct roffkv *);
190 static size_t roff_getname(struct roff *, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff *,
194 const char *, size_t);
195 static int roff_getregro(const char *name);
196 static const char *roff_getstrn(const struct roff *,
197 const char *, size_t);
198 static enum rofferr roff_it(ROFF_ARGS);
199 static enum rofferr roff_line_ignore(ROFF_ARGS);
200 static enum rofferr roff_nr(ROFF_ARGS);
201 static void roff_openeqn(struct roff *, const char *,
202 int, int, const char *);
203 static enum rofft roff_parse(struct roff *, const char *, int *);
204 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
205 static enum rofferr roff_res(struct roff *,
206 char **, size_t *, int, int);
207 static enum rofferr roff_rm(ROFF_ARGS);
208 static enum rofferr roff_rr(ROFF_ARGS);
209 static void roff_setstr(struct roff *,
210 const char *, const char *, int);
211 static void roff_setstrn(struct roffkv **, const char *,
212 size_t, const char *, size_t, int);
213 static enum rofferr roff_so(ROFF_ARGS);
214 static enum rofferr roff_tr(ROFF_ARGS);
215 static enum rofferr roff_Dd(ROFF_ARGS);
216 static enum rofferr roff_TH(ROFF_ARGS);
217 static enum rofferr roff_TE(ROFF_ARGS);
218 static enum rofferr roff_TS(ROFF_ARGS);
219 static enum rofferr roff_EQ(ROFF_ARGS);
220 static enum rofferr roff_EN(ROFF_ARGS);
221 static enum rofferr roff_T_(ROFF_ARGS);
222 static enum rofferr roff_userdef(ROFF_ARGS);
223
224 /* See roffhash_find() */
225
226 #define ASCII_HI 126
227 #define ASCII_LO 33
228 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
229
230 static struct roffmac *hash[HASHWIDTH];
231
232 static struct roffmac roffs[ROFF_MAX] = {
233 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
234 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "as", roff_ds, NULL, NULL, 0, NULL },
238 { "cc", roff_cc, NULL, NULL, 0, NULL },
239 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "ds", roff_ds, NULL, NULL, 0, NULL },
244 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
249 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
250 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "it", roff_it, NULL, NULL, 0, NULL },
252 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "nr", roff_nr, NULL, NULL, 0, NULL },
255 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "rm", roff_rm, NULL, NULL, 0, NULL },
258 { "rr", roff_rr, NULL, NULL, 0, NULL },
259 { "so", roff_so, NULL, NULL, 0, NULL },
260 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
261 { "tr", roff_tr, NULL, NULL, 0, NULL },
262 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
263 { "TH", roff_TH, NULL, NULL, 0, NULL },
264 { "TS", roff_TS, NULL, NULL, 0, NULL },
265 { "TE", roff_TE, NULL, NULL, 0, NULL },
266 { "T&", roff_T_, NULL, NULL, 0, NULL },
267 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
268 { "EN", roff_EN, NULL, NULL, 0, NULL },
269 { ".", roff_cblock, NULL, NULL, 0, NULL },
270 { NULL, roff_userdef, NULL, NULL, 0, NULL },
271 };
272
273 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
274 const char *const __mdoc_reserved[] = {
275 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
276 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
277 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
278 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
279 "Dt", "Dv", "Dx", "D1",
280 "Ec", "Ed", "Ef", "Ek", "El", "Em",
281 "En", "Eo", "Er", "Es", "Ev", "Ex",
282 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
283 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
284 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
285 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
286 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
287 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
288 "Sc", "Sh", "Sm", "So", "Sq",
289 "Ss", "St", "Sx", "Sy",
290 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
291 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
292 "%P", "%Q", "%R", "%T", "%U", "%V",
293 NULL
294 };
295
296 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
297 const char *const __man_reserved[] = {
298 "AT", "B", "BI", "BR", "DT",
299 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
300 "LP", "OP", "P", "PD", "PP",
301 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
302 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
303 NULL
304 };
305
306 /* Array of injected predefined strings. */
307 #define PREDEFS_MAX 38
308 static const struct predef predefs[PREDEFS_MAX] = {
309 #include "predefs.in"
310 };
311
312 /* See roffhash_find() */
313 #define ROFF_HASH(p) (p[0] - ASCII_LO)
314
315 static int roffit_lines; /* number of lines to delay */
316 static char *roffit_macro; /* nil-terminated macro line */
317
318
319 static void
320 roffhash_init(void)
321 {
322 struct roffmac *n;
323 int buc, i;
324
325 for (i = 0; i < (int)ROFF_USERDEF; i++) {
326 assert(roffs[i].name[0] >= ASCII_LO);
327 assert(roffs[i].name[0] <= ASCII_HI);
328
329 buc = ROFF_HASH(roffs[i].name);
330
331 if (NULL != (n = hash[buc])) {
332 for ( ; n->next; n = n->next)
333 /* Do nothing. */ ;
334 n->next = &roffs[i];
335 } else
336 hash[buc] = &roffs[i];
337 }
338 }
339
340 /*
341 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
342 * the nil-terminated string name could be found.
343 */
344 static enum rofft
345 roffhash_find(const char *p, size_t s)
346 {
347 int buc;
348 struct roffmac *n;
349
350 /*
351 * libroff has an extremely simple hashtable, for the time
352 * being, which simply keys on the first character, which must
353 * be printable, then walks a chain. It works well enough until
354 * optimised.
355 */
356
357 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
358 return(ROFF_MAX);
359
360 buc = ROFF_HASH(p);
361
362 if (NULL == (n = hash[buc]))
363 return(ROFF_MAX);
364 for ( ; n; n = n->next)
365 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
366 return((enum rofft)(n - roffs));
367
368 return(ROFF_MAX);
369 }
370
371 /*
372 * Pop the current node off of the stack of roff instructions currently
373 * pending.
374 */
375 static void
376 roffnode_pop(struct roff *r)
377 {
378 struct roffnode *p;
379
380 assert(r->last);
381 p = r->last;
382
383 r->last = r->last->parent;
384 free(p->name);
385 free(p->end);
386 free(p);
387 }
388
389 /*
390 * Push a roff node onto the instruction stack. This must later be
391 * removed with roffnode_pop().
392 */
393 static void
394 roffnode_push(struct roff *r, enum rofft tok, const char *name,
395 int line, int col)
396 {
397 struct roffnode *p;
398
399 p = mandoc_calloc(1, sizeof(struct roffnode));
400 p->tok = tok;
401 if (name)
402 p->name = mandoc_strdup(name);
403 p->parent = r->last;
404 p->line = line;
405 p->col = col;
406 p->rule = p->parent ? p->parent->rule : 0;
407
408 r->last = p;
409 }
410
411 static void
412 roff_free1(struct roff *r)
413 {
414 struct tbl_node *tbl;
415 struct eqn_node *e;
416 int i;
417
418 while (NULL != (tbl = r->first_tbl)) {
419 r->first_tbl = tbl->next;
420 tbl_free(tbl);
421 }
422
423 r->first_tbl = r->last_tbl = r->tbl = NULL;
424
425 while (NULL != (e = r->first_eqn)) {
426 r->first_eqn = e->next;
427 eqn_free(e);
428 }
429
430 r->first_eqn = r->last_eqn = r->eqn = NULL;
431
432 while (r->last)
433 roffnode_pop(r);
434
435 roff_freestr(r->strtab);
436 roff_freestr(r->xmbtab);
437
438 r->strtab = r->xmbtab = NULL;
439
440 roff_freereg(r->regtab);
441
442 r->regtab = NULL;
443
444 if (r->xtab)
445 for (i = 0; i < 128; i++)
446 free(r->xtab[i].p);
447
448 free(r->xtab);
449 r->xtab = NULL;
450 }
451
452 void
453 roff_reset(struct roff *r)
454 {
455
456 roff_free1(r);
457 r->control = 0;
458 }
459
460 void
461 roff_free(struct roff *r)
462 {
463
464 roff_free1(r);
465 free(r);
466 }
467
468 struct roff *
469 roff_alloc(struct mparse *parse, int options)
470 {
471 struct roff *r;
472
473 r = mandoc_calloc(1, sizeof(struct roff));
474 r->parse = parse;
475 r->options = options;
476 r->rstackpos = -1;
477
478 roffhash_init();
479
480 return(r);
481 }
482
483 /*
484 * In the current line, expand escape sequences that tend to get
485 * used in numerical expressions and conditional requests.
486 * Also check the syntax of the remaining escape sequences.
487 */
488 static enum rofferr
489 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
490 {
491 char ubuf[24]; /* buffer to print the number */
492 const char *start; /* start of the string to process */
493 char *stesc; /* start of an escape sequence ('\\') */
494 const char *stnam; /* start of the name, after "[(*" */
495 const char *cp; /* end of the name, e.g. before ']' */
496 const char *res; /* the string to be substituted */
497 char *nbuf; /* new buffer to copy bufp to */
498 size_t maxl; /* expected length of the escape name */
499 size_t naml; /* actual length of the escape name */
500 int expand_count; /* to avoid infinite loops */
501 int npos; /* position in numeric expression */
502 int irc; /* return code from roff_evalnum() */
503 char term; /* character terminating the escape */
504
505 expand_count = 0;
506 start = *bufp + pos;
507 stesc = strchr(start, '\0') - 1;
508 while (stesc-- > start) {
509
510 /* Search backwards for the next backslash. */
511
512 if ('\\' != *stesc)
513 continue;
514
515 /* If it is escaped, skip it. */
516
517 for (cp = stesc - 1; cp >= start; cp--)
518 if ('\\' != *cp)
519 break;
520
521 if (0 == (stesc - cp) % 2) {
522 stesc = (char *)cp;
523 continue;
524 }
525
526 /* Decide whether to expand or to check only. */
527
528 term = '\0';
529 cp = stesc + 1;
530 switch (*cp) {
531 case '*':
532 res = NULL;
533 break;
534 case 'B':
535 /* FALLTHROUGH */
536 case 'w':
537 term = cp[1];
538 /* FALLTHROUGH */
539 case 'n':
540 res = ubuf;
541 break;
542 default:
543 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
544 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
545 ln, (int)(stesc - *bufp), NULL);
546 continue;
547 }
548
549 if (EXPAND_LIMIT < ++expand_count) {
550 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
551 ln, (int)(stesc - *bufp), NULL);
552 return(ROFF_IGN);
553 }
554
555 /*
556 * The third character decides the length
557 * of the name of the string or register.
558 * Save a pointer to the name.
559 */
560
561 if ('\0' == term) {
562 switch (*++cp) {
563 case '\0':
564 maxl = 0;
565 break;
566 case '(':
567 cp++;
568 maxl = 2;
569 break;
570 case '[':
571 cp++;
572 term = ']';
573 maxl = 0;
574 break;
575 default:
576 maxl = 1;
577 break;
578 }
579 } else {
580 cp += 2;
581 maxl = 0;
582 }
583 stnam = cp;
584
585 /* Advance to the end of the name. */
586
587 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
588 if ('\0' == *cp) {
589 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
590 ln, (int)(stesc - *bufp), NULL);
591 break;
592 }
593 if (0 == maxl && *cp == term) {
594 cp++;
595 break;
596 }
597 }
598
599 /*
600 * Retrieve the replacement string; if it is
601 * undefined, resume searching for escapes.
602 */
603
604 switch (stesc[1]) {
605 case '*':
606 res = roff_getstrn(r, stnam, naml);
607 break;
608 case 'B':
609 npos = 0;
610 irc = roff_evalnum(stnam, &npos, NULL, 0);
611 ubuf[0] = irc && stnam + npos + 1 == cp
612 ? '1' : '0';
613 ubuf[1] = '\0';
614 break;
615 case 'n':
616 (void)snprintf(ubuf, sizeof(ubuf), "%d",
617 roff_getregn(r, stnam, naml));
618 break;
619 case 'w':
620 (void)snprintf(ubuf, sizeof(ubuf), "%d",
621 24 * (int)naml);
622 break;
623 }
624
625 if (NULL == res) {
626 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
627 ln, (int)(stesc - *bufp), NULL);
628 res = "";
629 }
630
631 /* Replace the escape sequence by the string. */
632
633 *stesc = '\0';
634 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
635 *bufp, res, cp) + 1;
636
637 /* Prepare for the next replacement. */
638
639 start = nbuf + pos;
640 stesc = nbuf + (stesc - *bufp) + strlen(res);
641 free(*bufp);
642 *bufp = nbuf;
643 }
644 return(ROFF_CONT);
645 }
646
647 /*
648 * Process text streams:
649 * Convert all breakable hyphens into ASCII_HYPH.
650 * Decrement and spring input line trap.
651 */
652 static enum rofferr
653 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
654 {
655 size_t sz;
656 const char *start;
657 char *p;
658 int isz;
659 enum mandoc_esc esc;
660
661 start = p = *bufp + pos;
662
663 while ('\0' != *p) {
664 sz = strcspn(p, "-\\");
665 p += sz;
666
667 if ('\0' == *p)
668 break;
669
670 if ('\\' == *p) {
671 /* Skip over escapes. */
672 p++;
673 esc = mandoc_escape((const char **)&p, NULL, NULL);
674 if (ESCAPE_ERROR == esc)
675 break;
676 continue;
677 } else if (p == start) {
678 p++;
679 continue;
680 }
681
682 if (isalpha((unsigned char)p[-1]) &&
683 isalpha((unsigned char)p[1]))
684 *p = ASCII_HYPH;
685 p++;
686 }
687
688 /* Spring the input line trap. */
689 if (1 == roffit_lines) {
690 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
691 free(*bufp);
692 *bufp = p;
693 *szp = isz + 1;
694 *offs = 0;
695 free(roffit_macro);
696 roffit_lines = 0;
697 return(ROFF_REPARSE);
698 } else if (1 < roffit_lines)
699 --roffit_lines;
700 return(ROFF_CONT);
701 }
702
703 enum rofferr
704 roff_parseln(struct roff *r, int ln, char **bufp,
705 size_t *szp, int pos, int *offs)
706 {
707 enum rofft t;
708 enum rofferr e;
709 int ppos, ctl;
710
711 /*
712 * Run the reserved-word filter only if we have some reserved
713 * words to fill in.
714 */
715
716 e = roff_res(r, bufp, szp, ln, pos);
717 if (ROFF_IGN == e)
718 return(e);
719 assert(ROFF_CONT == e);
720
721 ppos = pos;
722 ctl = roff_getcontrol(r, *bufp, &pos);
723
724 /*
725 * First, if a scope is open and we're not a macro, pass the
726 * text through the macro's filter. If a scope isn't open and
727 * we're not a macro, just let it through.
728 * Finally, if there's an equation scope open, divert it into it
729 * no matter our state.
730 */
731
732 if (r->last && ! ctl) {
733 t = r->last->tok;
734 assert(roffs[t].text);
735 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
736 assert(ROFF_IGN == e || ROFF_CONT == e);
737 if (ROFF_CONT != e)
738 return(e);
739 }
740 if (r->eqn)
741 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
742 if ( ! ctl) {
743 if (r->tbl)
744 return(tbl_read(r->tbl, ln, *bufp, pos));
745 return(roff_parsetext(bufp, szp, pos, offs));
746 }
747
748 /*
749 * If a scope is open, go to the child handler for that macro,
750 * as it may want to preprocess before doing anything with it.
751 * Don't do so if an equation is open.
752 */
753
754 if (r->last) {
755 t = r->last->tok;
756 assert(roffs[t].sub);
757 return((*roffs[t].sub)(r, t, bufp, szp,
758 ln, ppos, pos, offs));
759 }
760
761 /*
762 * Lastly, as we've no scope open, try to look up and execute
763 * the new macro. If no macro is found, simply return and let
764 * the compilers handle it.
765 */
766
767 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
768 return(ROFF_CONT);
769
770 assert(roffs[t].proc);
771 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
772 }
773
774 void
775 roff_endparse(struct roff *r)
776 {
777
778 if (r->last)
779 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
780 r->last->line, r->last->col, NULL);
781
782 if (r->eqn) {
783 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
784 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
785 eqn_end(&r->eqn);
786 }
787
788 if (r->tbl) {
789 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
790 r->tbl->line, r->tbl->pos, NULL);
791 tbl_end(&r->tbl);
792 }
793 }
794
795 /*
796 * Parse a roff node's type from the input buffer. This must be in the
797 * form of ".foo xxx" in the usual way.
798 */
799 static enum rofft
800 roff_parse(struct roff *r, const char *buf, int *pos)
801 {
802 const char *mac;
803 size_t maclen;
804 enum rofft t;
805
806 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
807 '\t' == buf[*pos] || ' ' == buf[*pos])
808 return(ROFF_MAX);
809
810 /* We stop the macro parse at an escape, tab, space, or nil. */
811
812 mac = buf + *pos;
813 maclen = strcspn(mac, " \\\t\0");
814
815 t = (r->current_string = roff_getstrn(r, mac, maclen))
816 ? ROFF_USERDEF : roffhash_find(mac, maclen);
817
818 *pos += (int)maclen;
819
820 while (buf[*pos] && ' ' == buf[*pos])
821 (*pos)++;
822
823 return(t);
824 }
825
826 static enum rofferr
827 roff_cblock(ROFF_ARGS)
828 {
829
830 /*
831 * A block-close `..' should only be invoked as a child of an
832 * ignore macro, otherwise raise a warning and just ignore it.
833 */
834
835 if (NULL == r->last) {
836 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
837 return(ROFF_IGN);
838 }
839
840 switch (r->last->tok) {
841 case ROFF_am:
842 /* FALLTHROUGH */
843 case ROFF_ami:
844 /* FALLTHROUGH */
845 case ROFF_am1:
846 /* FALLTHROUGH */
847 case ROFF_de:
848 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
849 /* FALLTHROUGH */
850 case ROFF_dei:
851 /* FALLTHROUGH */
852 case ROFF_ig:
853 break;
854 default:
855 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
856 return(ROFF_IGN);
857 }
858
859 if ((*bufp)[pos])
860 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
861
862 roffnode_pop(r);
863 roffnode_cleanscope(r);
864 return(ROFF_IGN);
865
866 }
867
868 static void
869 roffnode_cleanscope(struct roff *r)
870 {
871
872 while (r->last) {
873 if (--r->last->endspan != 0)
874 break;
875 roffnode_pop(r);
876 }
877 }
878
879 static void
880 roff_ccond(struct roff *r, int ln, int ppos)
881 {
882
883 if (NULL == r->last) {
884 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
885 return;
886 }
887
888 switch (r->last->tok) {
889 case ROFF_el:
890 /* FALLTHROUGH */
891 case ROFF_ie:
892 /* FALLTHROUGH */
893 case ROFF_if:
894 break;
895 default:
896 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
897 return;
898 }
899
900 if (r->last->endspan > -1) {
901 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
902 return;
903 }
904
905 roffnode_pop(r);
906 roffnode_cleanscope(r);
907 return;
908 }
909
910 static enum rofferr
911 roff_block(ROFF_ARGS)
912 {
913 int sv;
914 size_t sz;
915 char *name;
916
917 name = NULL;
918
919 if (ROFF_ig != tok) {
920 if ('\0' == (*bufp)[pos]) {
921 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
922 return(ROFF_IGN);
923 }
924
925 /*
926 * Re-write `de1', since we don't really care about
927 * groff's strange compatibility mode, into `de'.
928 */
929
930 if (ROFF_de1 == tok)
931 tok = ROFF_de;
932 if (ROFF_de == tok)
933 name = *bufp + pos;
934 else
935 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
936 roffs[tok].name);
937
938 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
939 pos++;
940
941 while (isspace((unsigned char)(*bufp)[pos]))
942 (*bufp)[pos++] = '\0';
943 }
944
945 roffnode_push(r, tok, name, ln, ppos);
946
947 /*
948 * At the beginning of a `de' macro, clear the existing string
949 * with the same name, if there is one. New content will be
950 * appended from roff_block_text() in multiline mode.
951 */
952
953 if (ROFF_de == tok)
954 roff_setstr(r, name, "", 0);
955
956 if ('\0' == (*bufp)[pos])
957 return(ROFF_IGN);
958
959 /* If present, process the custom end-of-line marker. */
960
961 sv = pos;
962 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
963 pos++;
964
965 /*
966 * Note: groff does NOT like escape characters in the input.
967 * Instead of detecting this, we're just going to let it fly and
968 * to hell with it.
969 */
970
971 assert(pos > sv);
972 sz = (size_t)(pos - sv);
973
974 if (1 == sz && '.' == (*bufp)[sv])
975 return(ROFF_IGN);
976
977 r->last->end = mandoc_malloc(sz + 1);
978
979 memcpy(r->last->end, *bufp + sv, sz);
980 r->last->end[(int)sz] = '\0';
981
982 if ((*bufp)[pos])
983 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
984
985 return(ROFF_IGN);
986 }
987
988 static enum rofferr
989 roff_block_sub(ROFF_ARGS)
990 {
991 enum rofft t;
992 int i, j;
993
994 /*
995 * First check whether a custom macro exists at this level. If
996 * it does, then check against it. This is some of groff's
997 * stranger behaviours. If we encountered a custom end-scope
998 * tag and that tag also happens to be a "real" macro, then we
999 * need to try interpreting it again as a real macro. If it's
1000 * not, then return ignore. Else continue.
1001 */
1002
1003 if (r->last->end) {
1004 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1005 if ((*bufp)[i] != r->last->end[j])
1006 break;
1007
1008 if ('\0' == r->last->end[j] &&
1009 ('\0' == (*bufp)[i] ||
1010 ' ' == (*bufp)[i] ||
1011 '\t' == (*bufp)[i])) {
1012 roffnode_pop(r);
1013 roffnode_cleanscope(r);
1014
1015 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1016 i++;
1017
1018 pos = i;
1019 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1020 return(ROFF_RERUN);
1021 return(ROFF_IGN);
1022 }
1023 }
1024
1025 /*
1026 * If we have no custom end-query or lookup failed, then try
1027 * pulling it out of the hashtable.
1028 */
1029
1030 t = roff_parse(r, *bufp, &pos);
1031
1032 /*
1033 * Macros other than block-end are only significant
1034 * in `de' blocks; elsewhere, simply throw them away.
1035 */
1036 if (ROFF_cblock != t) {
1037 if (ROFF_de == tok)
1038 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1039 return(ROFF_IGN);
1040 }
1041
1042 assert(roffs[t].proc);
1043 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1044 }
1045
1046 static enum rofferr
1047 roff_block_text(ROFF_ARGS)
1048 {
1049
1050 if (ROFF_de == tok)
1051 roff_setstr(r, r->last->name, *bufp + pos, 2);
1052
1053 return(ROFF_IGN);
1054 }
1055
1056 static enum rofferr
1057 roff_cond_sub(ROFF_ARGS)
1058 {
1059 enum rofft t;
1060 char *ep;
1061 int rr;
1062
1063 rr = r->last->rule;
1064 roffnode_cleanscope(r);
1065 t = roff_parse(r, *bufp, &pos);
1066
1067 /*
1068 * Fully handle known macros when they are structurally
1069 * required or when the conditional evaluated to true.
1070 */
1071
1072 if ((ROFF_MAX != t) &&
1073 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1074 assert(roffs[t].proc);
1075 return((*roffs[t].proc)(r, t, bufp, szp,
1076 ln, ppos, pos, offs));
1077 }
1078
1079 /*
1080 * If `\}' occurs on a macro line without a preceding macro,
1081 * drop the line completely.
1082 */
1083
1084 ep = *bufp + pos;
1085 if ('\\' == ep[0] && '}' == ep[1])
1086 rr = 0;
1087
1088 /* Always check for the closing delimiter `\}'. */
1089
1090 while (NULL != (ep = strchr(ep, '\\'))) {
1091 if ('}' == *(++ep)) {
1092 *ep = '&';
1093 roff_ccond(r, ln, ep - *bufp - 1);
1094 }
1095 ++ep;
1096 }
1097 return(rr ? ROFF_CONT : ROFF_IGN);
1098 }
1099
1100 static enum rofferr
1101 roff_cond_text(ROFF_ARGS)
1102 {
1103 char *ep;
1104 int rr;
1105
1106 rr = r->last->rule;
1107 roffnode_cleanscope(r);
1108
1109 ep = *bufp + pos;
1110 while (NULL != (ep = strchr(ep, '\\'))) {
1111 if ('}' == *(++ep)) {
1112 *ep = '&';
1113 roff_ccond(r, ln, ep - *bufp - 1);
1114 }
1115 ++ep;
1116 }
1117 return(rr ? ROFF_CONT : ROFF_IGN);
1118 }
1119
1120 /*
1121 * Parse a single signed integer number. Stop at the first non-digit.
1122 * If there is at least one digit, return success and advance the
1123 * parse point, else return failure and let the parse point unchanged.
1124 * Ignore overflows, treat them just like the C language.
1125 */
1126 static int
1127 roff_getnum(const char *v, int *pos, int *res)
1128 {
1129 int myres, n, p;
1130
1131 if (NULL == res)
1132 res = &myres;
1133
1134 p = *pos;
1135 n = v[p] == '-';
1136 if (n)
1137 p++;
1138
1139 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1140 *res = 10 * *res + v[p] - '0';
1141 if (p == *pos + n)
1142 return 0;
1143
1144 if (n)
1145 *res = -*res;
1146
1147 *pos = p;
1148 return 1;
1149 }
1150
1151 /*
1152 * Evaluate a string comparison condition.
1153 * The first character is the delimiter.
1154 * Succeed if the string up to its second occurrence
1155 * matches the string up to its third occurence.
1156 * Advance the cursor after the third occurrence
1157 * or lacking that, to the end of the line.
1158 */
1159 static int
1160 roff_evalstrcond(const char *v, int *pos)
1161 {
1162 const char *s1, *s2, *s3;
1163 int match;
1164
1165 match = 0;
1166 s1 = v + *pos; /* initial delimiter */
1167 s2 = s1 + 1; /* for scanning the first string */
1168 s3 = strchr(s2, *s1); /* for scanning the second string */
1169
1170 if (NULL == s3) /* found no middle delimiter */
1171 goto out;
1172
1173 while ('\0' != *++s3) {
1174 if (*s2 != *s3) { /* mismatch */
1175 s3 = strchr(s3, *s1);
1176 break;
1177 }
1178 if (*s3 == *s1) { /* found the final delimiter */
1179 match = 1;
1180 break;
1181 }
1182 s2++;
1183 }
1184
1185 out:
1186 if (NULL == s3)
1187 s3 = strchr(s2, '\0');
1188 else
1189 s3++;
1190 *pos = s3 - v;
1191 return(match);
1192 }
1193
1194 /*
1195 * Evaluate an optionally negated single character, numerical,
1196 * or string condition.
1197 */
1198 static int
1199 roff_evalcond(const char *v, int *pos)
1200 {
1201 int wanttrue, number;
1202
1203 if ('!' == v[*pos]) {
1204 wanttrue = 0;
1205 (*pos)++;
1206 } else
1207 wanttrue = 1;
1208
1209 switch (v[*pos]) {
1210 case 'n':
1211 /* FALLTHROUGH */
1212 case 'o':
1213 (*pos)++;
1214 return(wanttrue);
1215 case 'c':
1216 /* FALLTHROUGH */
1217 case 'd':
1218 /* FALLTHROUGH */
1219 case 'e':
1220 /* FALLTHROUGH */
1221 case 'r':
1222 /* FALLTHROUGH */
1223 case 't':
1224 (*pos)++;
1225 return(!wanttrue);
1226 default:
1227 break;
1228 }
1229
1230 if (roff_evalnum(v, pos, &number, 0))
1231 return((number > 0) == wanttrue);
1232 else
1233 return(roff_evalstrcond(v, pos) == wanttrue);
1234 }
1235
1236 static enum rofferr
1237 roff_line_ignore(ROFF_ARGS)
1238 {
1239
1240 return(ROFF_IGN);
1241 }
1242
1243 static enum rofferr
1244 roff_cond(ROFF_ARGS)
1245 {
1246
1247 roffnode_push(r, tok, NULL, ln, ppos);
1248
1249 /*
1250 * An `.el' has no conditional body: it will consume the value
1251 * of the current rstack entry set in prior `ie' calls or
1252 * defaults to DENY.
1253 *
1254 * If we're not an `el', however, then evaluate the conditional.
1255 */
1256
1257 r->last->rule = ROFF_el == tok ?
1258 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1259 roff_evalcond(*bufp, &pos);
1260
1261 /*
1262 * An if-else will put the NEGATION of the current evaluated
1263 * conditional into the stack of rules.
1264 */
1265
1266 if (ROFF_ie == tok) {
1267 if (r->rstackpos == RSTACK_MAX - 1) {
1268 mandoc_msg(MANDOCERR_MEM,
1269 r->parse, ln, ppos, NULL);
1270 return(ROFF_ERR);
1271 }
1272 r->rstack[++r->rstackpos] = !r->last->rule;
1273 }
1274
1275 /* If the parent has false as its rule, then so do we. */
1276
1277 if (r->last->parent && !r->last->parent->rule)
1278 r->last->rule = 0;
1279
1280 /*
1281 * Determine scope.
1282 * If there is nothing on the line after the conditional,
1283 * not even whitespace, use next-line scope.
1284 */
1285
1286 if ('\0' == (*bufp)[pos]) {
1287 r->last->endspan = 2;
1288 goto out;
1289 }
1290
1291 while (' ' == (*bufp)[pos])
1292 pos++;
1293
1294 /* An opening brace requests multiline scope. */
1295
1296 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1297 r->last->endspan = -1;
1298 pos += 2;
1299 goto out;
1300 }
1301
1302 /*
1303 * Anything else following the conditional causes
1304 * single-line scope. Warn if the scope contains
1305 * nothing but trailing whitespace.
1306 */
1307
1308 if ('\0' == (*bufp)[pos])
1309 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1310
1311 r->last->endspan = 1;
1312
1313 out:
1314 *offs = pos;
1315 return(ROFF_RERUN);
1316 }
1317
1318 static enum rofferr
1319 roff_ds(ROFF_ARGS)
1320 {
1321 char *string;
1322 const char *name;
1323 size_t namesz;
1324
1325 /*
1326 * The first word is the name of the string.
1327 * If it is empty or terminated by an escape sequence,
1328 * abort the `ds' request without defining anything.
1329 */
1330
1331 name = string = *bufp + pos;
1332 if ('\0' == *name)
1333 return(ROFF_IGN);
1334
1335 namesz = roff_getname(r, &string, ln, pos);
1336 if ('\\' == name[namesz])
1337 return(ROFF_IGN);
1338
1339 /* Read past the initial double-quote, if any. */
1340 if ('"' == *string)
1341 string++;
1342
1343 /* The rest is the value. */
1344 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1345 ROFF_as == tok);
1346 return(ROFF_IGN);
1347 }
1348
1349 /*
1350 * Parse a single operator, one or two characters long.
1351 * If the operator is recognized, return success and advance the
1352 * parse point, else return failure and let the parse point unchanged.
1353 */
1354 static int
1355 roff_getop(const char *v, int *pos, char *res)
1356 {
1357
1358 *res = v[*pos];
1359
1360 switch (*res) {
1361 case '+':
1362 /* FALLTHROUGH */
1363 case '-':
1364 /* FALLTHROUGH */
1365 case '*':
1366 /* FALLTHROUGH */
1367 case '/':
1368 /* FALLTHROUGH */
1369 case '%':
1370 /* FALLTHROUGH */
1371 case '&':
1372 /* FALLTHROUGH */
1373 case ':':
1374 break;
1375 case '<':
1376 switch (v[*pos + 1]) {
1377 case '=':
1378 *res = 'l';
1379 (*pos)++;
1380 break;
1381 case '>':
1382 *res = '!';
1383 (*pos)++;
1384 break;
1385 case '?':
1386 *res = 'i';
1387 (*pos)++;
1388 break;
1389 default:
1390 break;
1391 }
1392 break;
1393 case '>':
1394 switch (v[*pos + 1]) {
1395 case '=':
1396 *res = 'g';
1397 (*pos)++;
1398 break;
1399 case '?':
1400 *res = 'a';
1401 (*pos)++;
1402 break;
1403 default:
1404 break;
1405 }
1406 break;
1407 case '=':
1408 if ('=' == v[*pos + 1])
1409 (*pos)++;
1410 break;
1411 default:
1412 return(0);
1413 }
1414 (*pos)++;
1415
1416 return(*res);
1417 }
1418
1419 /*
1420 * Evaluate either a parenthesized numeric expression
1421 * or a single signed integer number.
1422 */
1423 static int
1424 roff_evalpar(const char *v, int *pos, int *res)
1425 {
1426
1427 if ('(' != v[*pos])
1428 return(roff_getnum(v, pos, res));
1429
1430 (*pos)++;
1431 if ( ! roff_evalnum(v, pos, res, 1))
1432 return(0);
1433
1434 /*
1435 * Omission of the closing parenthesis
1436 * is an error in validation mode,
1437 * but ignored in evaluation mode.
1438 */
1439
1440 if (')' == v[*pos])
1441 (*pos)++;
1442 else if (NULL == res)
1443 return(0);
1444
1445 return(1);
1446 }
1447
1448 /*
1449 * Evaluate a complete numeric expression.
1450 * Proceed left to right, there is no concept of precedence.
1451 */
1452 static int
1453 roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
1454 {
1455 int mypos, operand2;
1456 char operator;
1457
1458 if (NULL == pos) {
1459 mypos = 0;
1460 pos = &mypos;
1461 }
1462
1463 if (skipwhite)
1464 while (isspace((unsigned char)v[*pos]))
1465 (*pos)++;
1466
1467 if ( ! roff_evalpar(v, pos, res))
1468 return(0);
1469
1470 while (1) {
1471 if (skipwhite)
1472 while (isspace((unsigned char)v[*pos]))
1473 (*pos)++;
1474
1475 if ( ! roff_getop(v, pos, &operator))
1476 break;
1477
1478 if (skipwhite)
1479 while (isspace((unsigned char)v[*pos]))
1480 (*pos)++;
1481
1482 if ( ! roff_evalpar(v, pos, &operand2))
1483 return(0);
1484
1485 if (skipwhite)
1486 while (isspace((unsigned char)v[*pos]))
1487 (*pos)++;
1488
1489 if (NULL == res)
1490 continue;
1491
1492 switch (operator) {
1493 case '+':
1494 *res += operand2;
1495 break;
1496 case '-':
1497 *res -= operand2;
1498 break;
1499 case '*':
1500 *res *= operand2;
1501 break;
1502 case '/':
1503 *res /= operand2;
1504 break;
1505 case '%':
1506 *res %= operand2;
1507 break;
1508 case '<':
1509 *res = *res < operand2;
1510 break;
1511 case '>':
1512 *res = *res > operand2;
1513 break;
1514 case 'l':
1515 *res = *res <= operand2;
1516 break;
1517 case 'g':
1518 *res = *res >= operand2;
1519 break;
1520 case '=':
1521 *res = *res == operand2;
1522 break;
1523 case '!':
1524 *res = *res != operand2;
1525 break;
1526 case '&':
1527 *res = *res && operand2;
1528 break;
1529 case ':':
1530 *res = *res || operand2;
1531 break;
1532 case 'i':
1533 if (operand2 < *res)
1534 *res = operand2;
1535 break;
1536 case 'a':
1537 if (operand2 > *res)
1538 *res = operand2;
1539 break;
1540 default:
1541 abort();
1542 }
1543 }
1544 return(1);
1545 }
1546
1547 void
1548 roff_setreg(struct roff *r, const char *name, int val, char sign)
1549 {
1550 struct roffreg *reg;
1551
1552 /* Search for an existing register with the same name. */
1553 reg = r->regtab;
1554
1555 while (reg && strcmp(name, reg->key.p))
1556 reg = reg->next;
1557
1558 if (NULL == reg) {
1559 /* Create a new register. */
1560 reg = mandoc_malloc(sizeof(struct roffreg));
1561 reg->key.p = mandoc_strdup(name);
1562 reg->key.sz = strlen(name);
1563 reg->val = 0;
1564 reg->next = r->regtab;
1565 r->regtab = reg;
1566 }
1567
1568 if ('+' == sign)
1569 reg->val += val;
1570 else if ('-' == sign)
1571 reg->val -= val;
1572 else
1573 reg->val = val;
1574 }
1575
1576 /*
1577 * Handle some predefined read-only number registers.
1578 * For now, return -1 if the requested register is not predefined;
1579 * in case a predefined read-only register having the value -1
1580 * were to turn up, another special value would have to be chosen.
1581 */
1582 static int
1583 roff_getregro(const char *name)
1584 {
1585
1586 switch (*name) {
1587 case 'A': /* ASCII approximation mode is always off. */
1588 return(0);
1589 case 'g': /* Groff compatibility mode is always on. */
1590 return(1);
1591 case 'H': /* Fixed horizontal resolution. */
1592 return (24);
1593 case 'j': /* Always adjust left margin only. */
1594 return(0);
1595 case 'T': /* Some output device is always defined. */
1596 return(1);
1597 case 'V': /* Fixed vertical resolution. */
1598 return (40);
1599 default:
1600 return (-1);
1601 }
1602 }
1603
1604 int
1605 roff_getreg(const struct roff *r, const char *name)
1606 {
1607 struct roffreg *reg;
1608 int val;
1609
1610 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1611 val = roff_getregro(name + 1);
1612 if (-1 != val)
1613 return (val);
1614 }
1615
1616 for (reg = r->regtab; reg; reg = reg->next)
1617 if (0 == strcmp(name, reg->key.p))
1618 return(reg->val);
1619
1620 return(0);
1621 }
1622
1623 static int
1624 roff_getregn(const struct roff *r, const char *name, size_t len)
1625 {
1626 struct roffreg *reg;
1627 int val;
1628
1629 if ('.' == name[0] && 2 == len) {
1630 val = roff_getregro(name + 1);
1631 if (-1 != val)
1632 return (val);
1633 }
1634
1635 for (reg = r->regtab; reg; reg = reg->next)
1636 if (len == reg->key.sz &&
1637 0 == strncmp(name, reg->key.p, len))
1638 return(reg->val);
1639
1640 return(0);
1641 }
1642
1643 static void
1644 roff_freereg(struct roffreg *reg)
1645 {
1646 struct roffreg *old_reg;
1647
1648 while (NULL != reg) {
1649 free(reg->key.p);
1650 old_reg = reg;
1651 reg = reg->next;
1652 free(old_reg);
1653 }
1654 }
1655
1656 static enum rofferr
1657 roff_nr(ROFF_ARGS)
1658 {
1659 char *key, *val;
1660 size_t keysz;
1661 int iv;
1662 char sign;
1663
1664 key = val = *bufp + pos;
1665 if ('\0' == *key)
1666 return(ROFF_IGN);
1667
1668 keysz = roff_getname(r, &val, ln, pos);
1669 if ('\\' == key[keysz])
1670 return(ROFF_IGN);
1671 key[keysz] = '\0';
1672
1673 sign = *val;
1674 if ('+' == sign || '-' == sign)
1675 val++;
1676
1677 if (roff_evalnum(val, NULL, &iv, 0))
1678 roff_setreg(r, key, iv, sign);
1679
1680 return(ROFF_IGN);
1681 }
1682
1683 static enum rofferr
1684 roff_rr(ROFF_ARGS)
1685 {
1686 struct roffreg *reg, **prev;
1687 char *name, *cp;
1688 size_t namesz;
1689
1690 name = cp = *bufp + pos;
1691 if ('\0' == *name)
1692 return(ROFF_IGN);
1693 namesz = roff_getname(r, &cp, ln, pos);
1694 name[namesz] = '\0';
1695
1696 prev = &r->regtab;
1697 while (1) {
1698 reg = *prev;
1699 if (NULL == reg || !strcmp(name, reg->key.p))
1700 break;
1701 prev = &reg->next;
1702 }
1703 if (NULL != reg) {
1704 *prev = reg->next;
1705 free(reg->key.p);
1706 free(reg);
1707 }
1708 return(ROFF_IGN);
1709 }
1710
1711 static enum rofferr
1712 roff_rm(ROFF_ARGS)
1713 {
1714 const char *name;
1715 char *cp;
1716 size_t namesz;
1717
1718 cp = *bufp + pos;
1719 while ('\0' != *cp) {
1720 name = cp;
1721 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1722 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1723 if ('\\' == name[namesz])
1724 break;
1725 }
1726 return(ROFF_IGN);
1727 }
1728
1729 static enum rofferr
1730 roff_it(ROFF_ARGS)
1731 {
1732 char *cp;
1733 size_t len;
1734 int iv;
1735
1736 /* Parse the number of lines. */
1737 cp = *bufp + pos;
1738 len = strcspn(cp, " \t");
1739 cp[len] = '\0';
1740 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1741 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1742 ln, ppos, *bufp + 1);
1743 return(ROFF_IGN);
1744 }
1745 cp += len + 1;
1746
1747 /* Arm the input line trap. */
1748 roffit_lines = iv;
1749 roffit_macro = mandoc_strdup(cp);
1750 return(ROFF_IGN);
1751 }
1752
1753 static enum rofferr
1754 roff_Dd(ROFF_ARGS)
1755 {
1756 const char *const *cp;
1757
1758 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1759 for (cp = __mdoc_reserved; *cp; cp++)
1760 roff_setstr(r, *cp, NULL, 0);
1761
1762 return(ROFF_CONT);
1763 }
1764
1765 static enum rofferr
1766 roff_TH(ROFF_ARGS)
1767 {
1768 const char *const *cp;
1769
1770 if (0 == (MPARSE_QUICK & r->options))
1771 for (cp = __man_reserved; *cp; cp++)
1772 roff_setstr(r, *cp, NULL, 0);
1773
1774 return(ROFF_CONT);
1775 }
1776
1777 static enum rofferr
1778 roff_TE(ROFF_ARGS)
1779 {
1780
1781 if (NULL == r->tbl)
1782 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1783 else
1784 tbl_end(&r->tbl);
1785
1786 return(ROFF_IGN);
1787 }
1788
1789 static enum rofferr
1790 roff_T_(ROFF_ARGS)
1791 {
1792
1793 if (NULL == r->tbl)
1794 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1795 else
1796 tbl_restart(ppos, ln, r->tbl);
1797
1798 return(ROFF_IGN);
1799 }
1800
1801 #if 0
1802 static int
1803 roff_closeeqn(struct roff *r)
1804 {
1805
1806 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1807 }
1808 #endif
1809
1810 static void
1811 roff_openeqn(struct roff *r, const char *name, int line,
1812 int offs, const char *buf)
1813 {
1814 struct eqn_node *e;
1815 int poff;
1816
1817 assert(NULL == r->eqn);
1818 e = eqn_alloc(name, offs, line, r->parse);
1819
1820 if (r->last_eqn)
1821 r->last_eqn->next = e;
1822 else
1823 r->first_eqn = r->last_eqn = e;
1824
1825 r->eqn = r->last_eqn = e;
1826
1827 if (buf) {
1828 poff = 0;
1829 eqn_read(&r->eqn, line, buf, offs, &poff);
1830 }
1831 }
1832
1833 static enum rofferr
1834 roff_EQ(ROFF_ARGS)
1835 {
1836
1837 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1838 return(ROFF_IGN);
1839 }
1840
1841 static enum rofferr
1842 roff_EN(ROFF_ARGS)
1843 {
1844
1845 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1846 return(ROFF_IGN);
1847 }
1848
1849 static enum rofferr
1850 roff_TS(ROFF_ARGS)
1851 {
1852 struct tbl_node *tbl;
1853
1854 if (r->tbl) {
1855 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1856 tbl_end(&r->tbl);
1857 }
1858
1859 tbl = tbl_alloc(ppos, ln, r->parse);
1860
1861 if (r->last_tbl)
1862 r->last_tbl->next = tbl;
1863 else
1864 r->first_tbl = r->last_tbl = tbl;
1865
1866 r->tbl = r->last_tbl = tbl;
1867 return(ROFF_IGN);
1868 }
1869
1870 static enum rofferr
1871 roff_cc(ROFF_ARGS)
1872 {
1873 const char *p;
1874
1875 p = *bufp + pos;
1876
1877 if ('\0' == *p || '.' == (r->control = *p++))
1878 r->control = 0;
1879
1880 if ('\0' != *p)
1881 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1882
1883 return(ROFF_IGN);
1884 }
1885
1886 static enum rofferr
1887 roff_tr(ROFF_ARGS)
1888 {
1889 const char *p, *first, *second;
1890 size_t fsz, ssz;
1891 enum mandoc_esc esc;
1892
1893 p = *bufp + pos;
1894
1895 if ('\0' == *p) {
1896 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1897 return(ROFF_IGN);
1898 }
1899
1900 while ('\0' != *p) {
1901 fsz = ssz = 1;
1902
1903 first = p++;
1904 if ('\\' == *first) {
1905 esc = mandoc_escape(&p, NULL, NULL);
1906 if (ESCAPE_ERROR == esc) {
1907 mandoc_msg(MANDOCERR_BADESCAPE,
1908 r->parse, ln,
1909 (int)(p - *bufp), NULL);
1910 return(ROFF_IGN);
1911 }
1912 fsz = (size_t)(p - first);
1913 }
1914
1915 second = p++;
1916 if ('\\' == *second) {
1917 esc = mandoc_escape(&p, NULL, NULL);
1918 if (ESCAPE_ERROR == esc) {
1919 mandoc_msg(MANDOCERR_BADESCAPE,
1920 r->parse, ln,
1921 (int)(p - *bufp), NULL);
1922 return(ROFF_IGN);
1923 }
1924 ssz = (size_t)(p - second);
1925 } else if ('\0' == *second) {
1926 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1927 ln, (int)(p - *bufp), NULL);
1928 second = " ";
1929 p--;
1930 }
1931
1932 if (fsz > 1) {
1933 roff_setstrn(&r->xmbtab, first, fsz,
1934 second, ssz, 0);
1935 continue;
1936 }
1937
1938 if (NULL == r->xtab)
1939 r->xtab = mandoc_calloc(128,
1940 sizeof(struct roffstr));
1941
1942 free(r->xtab[(int)*first].p);
1943 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1944 r->xtab[(int)*first].sz = ssz;
1945 }
1946
1947 return(ROFF_IGN);
1948 }
1949
1950 static enum rofferr
1951 roff_so(ROFF_ARGS)
1952 {
1953 char *name;
1954
1955 name = *bufp + pos;
1956 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, ".so %s", name);
1957
1958 /*
1959 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1960 * opening anything that's not in our cwd or anything beneath
1961 * it. Thus, explicitly disallow traversing up the file-system
1962 * or using absolute paths.
1963 */
1964
1965 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1966 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
1967 ".so %s", name);
1968 return(ROFF_ERR);
1969 }
1970
1971 *offs = pos;
1972 return(ROFF_SO);
1973 }
1974
1975 static enum rofferr
1976 roff_userdef(ROFF_ARGS)
1977 {
1978 const char *arg[9];
1979 char *cp, *n1, *n2;
1980 int i;
1981
1982 /*
1983 * Collect pointers to macro argument strings
1984 * and NUL-terminate them.
1985 */
1986 cp = *bufp + pos;
1987 for (i = 0; i < 9; i++)
1988 arg[i] = '\0' == *cp ? "" :
1989 mandoc_getarg(r->parse, &cp, ln, &pos);
1990
1991 /*
1992 * Expand macro arguments.
1993 */
1994 *szp = 0;
1995 n1 = cp = mandoc_strdup(r->current_string);
1996 while (NULL != (cp = strstr(cp, "\\$"))) {
1997 i = cp[2] - '1';
1998 if (0 > i || 8 < i) {
1999 /* Not an argument invocation. */
2000 cp += 2;
2001 continue;
2002 }
2003 *cp = '\0';
2004 *szp = mandoc_asprintf(&n2, "%s%s%s",
2005 n1, arg[i], cp + 3) + 1;
2006 cp = n2 + (cp - n1);
2007 free(n1);
2008 n1 = n2;
2009 }
2010
2011 /*
2012 * Replace the macro invocation
2013 * by the expanded macro.
2014 */
2015 free(*bufp);
2016 *bufp = n1;
2017 if (0 == *szp)
2018 *szp = strlen(*bufp) + 1;
2019
2020 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2021 ROFF_REPARSE : ROFF_APPEND);
2022 }
2023
2024 static size_t
2025 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2026 {
2027 char *name, *cp;
2028 size_t namesz;
2029
2030 name = *cpp;
2031 if ('\0' == *name)
2032 return(0);
2033
2034 /* Read until end of name and terminate it with NUL. */
2035 for (cp = name; 1; cp++) {
2036 if ('\0' == *cp || ' ' == *cp) {
2037 namesz = cp - name;
2038 break;
2039 }
2040 if ('\\' != *cp)
2041 continue;
2042 cp++;
2043 if ('\\' == *cp)
2044 continue;
2045 namesz = cp - name - 1;
2046 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
2047 mandoc_escape((const char **)&cp, NULL, NULL);
2048 break;
2049 }
2050
2051 /* Read past spaces. */
2052 while (' ' == *cp)
2053 cp++;
2054
2055 *cpp = cp;
2056 return(namesz);
2057 }
2058
2059 /*
2060 * Store *string into the user-defined string called *name.
2061 * To clear an existing entry, call with (*r, *name, NULL, 0).
2062 * append == 0: replace mode
2063 * append == 1: single-line append mode
2064 * append == 2: multiline append mode, append '\n' after each call
2065 */
2066 static void
2067 roff_setstr(struct roff *r, const char *name, const char *string,
2068 int append)
2069 {
2070
2071 roff_setstrn(&r->strtab, name, strlen(name), string,
2072 string ? strlen(string) : 0, append);
2073 }
2074
2075 static void
2076 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2077 const char *string, size_t stringsz, int append)
2078 {
2079 struct roffkv *n;
2080 char *c;
2081 int i;
2082 size_t oldch, newch;
2083
2084 /* Search for an existing string with the same name. */
2085 n = *r;
2086
2087 while (n && (namesz != n->key.sz ||
2088 strncmp(n->key.p, name, namesz)))
2089 n = n->next;
2090
2091 if (NULL == n) {
2092 /* Create a new string table entry. */
2093 n = mandoc_malloc(sizeof(struct roffkv));
2094 n->key.p = mandoc_strndup(name, namesz);
2095 n->key.sz = namesz;
2096 n->val.p = NULL;
2097 n->val.sz = 0;
2098 n->next = *r;
2099 *r = n;
2100 } else if (0 == append) {
2101 free(n->val.p);
2102 n->val.p = NULL;
2103 n->val.sz = 0;
2104 }
2105
2106 if (NULL == string)
2107 return;
2108
2109 /*
2110 * One additional byte for the '\n' in multiline mode,
2111 * and one for the terminating '\0'.
2112 */
2113 newch = stringsz + (1 < append ? 2u : 1u);
2114
2115 if (NULL == n->val.p) {
2116 n->val.p = mandoc_malloc(newch);
2117 *n->val.p = '\0';
2118 oldch = 0;
2119 } else {
2120 oldch = n->val.sz;
2121 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2122 }
2123
2124 /* Skip existing content in the destination buffer. */
2125 c = n->val.p + (int)oldch;
2126
2127 /* Append new content to the destination buffer. */
2128 i = 0;
2129 while (i < (int)stringsz) {
2130 /*
2131 * Rudimentary roff copy mode:
2132 * Handle escaped backslashes.
2133 */
2134 if ('\\' == string[i] && '\\' == string[i + 1])
2135 i++;
2136 *c++ = string[i++];
2137 }
2138
2139 /* Append terminating bytes. */
2140 if (1 < append)
2141 *c++ = '\n';
2142
2143 *c = '\0';
2144 n->val.sz = (int)(c - n->val.p);
2145 }
2146
2147 static const char *
2148 roff_getstrn(const struct roff *r, const char *name, size_t len)
2149 {
2150 const struct roffkv *n;
2151 int i;
2152
2153 for (n = r->strtab; n; n = n->next)
2154 if (0 == strncmp(name, n->key.p, len) &&
2155 '\0' == n->key.p[(int)len])
2156 return(n->val.p);
2157
2158 for (i = 0; i < PREDEFS_MAX; i++)
2159 if (0 == strncmp(name, predefs[i].name, len) &&
2160 '\0' == predefs[i].name[(int)len])
2161 return(predefs[i].str);
2162
2163 return(NULL);
2164 }
2165
2166 static void
2167 roff_freestr(struct roffkv *r)
2168 {
2169 struct roffkv *n, *nn;
2170
2171 for (n = r; n; n = nn) {
2172 free(n->key.p);
2173 free(n->val.p);
2174 nn = n->next;
2175 free(n);
2176 }
2177 }
2178
2179 const struct tbl_span *
2180 roff_span(const struct roff *r)
2181 {
2182
2183 return(r->tbl ? tbl_span(r->tbl) : NULL);
2184 }
2185
2186 const struct eqn *
2187 roff_eqn(const struct roff *r)
2188 {
2189
2190 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2191 }
2192
2193 /*
2194 * Duplicate an input string, making the appropriate character
2195 * conversations (as stipulated by `tr') along the way.
2196 * Returns a heap-allocated string with all the replacements made.
2197 */
2198 char *
2199 roff_strdup(const struct roff *r, const char *p)
2200 {
2201 const struct roffkv *cp;
2202 char *res;
2203 const char *pp;
2204 size_t ssz, sz;
2205 enum mandoc_esc esc;
2206
2207 if (NULL == r->xmbtab && NULL == r->xtab)
2208 return(mandoc_strdup(p));
2209 else if ('\0' == *p)
2210 return(mandoc_strdup(""));
2211
2212 /*
2213 * Step through each character looking for term matches
2214 * (remember that a `tr' can be invoked with an escape, which is
2215 * a glyph but the escape is multi-character).
2216 * We only do this if the character hash has been initialised
2217 * and the string is >0 length.
2218 */
2219
2220 res = NULL;
2221 ssz = 0;
2222
2223 while ('\0' != *p) {
2224 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2225 sz = r->xtab[(int)*p].sz;
2226 res = mandoc_realloc(res, ssz + sz + 1);
2227 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2228 ssz += sz;
2229 p++;
2230 continue;
2231 } else if ('\\' != *p) {
2232 res = mandoc_realloc(res, ssz + 2);
2233 res[ssz++] = *p++;
2234 continue;
2235 }
2236
2237 /* Search for term matches. */
2238 for (cp = r->xmbtab; cp; cp = cp->next)
2239 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2240 break;
2241
2242 if (NULL != cp) {
2243 /*
2244 * A match has been found.
2245 * Append the match to the array and move
2246 * forward by its keysize.
2247 */
2248 res = mandoc_realloc(res,
2249 ssz + cp->val.sz + 1);
2250 memcpy(res + ssz, cp->val.p, cp->val.sz);
2251 ssz += cp->val.sz;
2252 p += (int)cp->key.sz;
2253 continue;
2254 }
2255
2256 /*
2257 * Handle escapes carefully: we need to copy
2258 * over just the escape itself, or else we might
2259 * do replacements within the escape itself.
2260 * Make sure to pass along the bogus string.
2261 */
2262 pp = p++;
2263 esc = mandoc_escape(&p, NULL, NULL);
2264 if (ESCAPE_ERROR == esc) {
2265 sz = strlen(pp);
2266 res = mandoc_realloc(res, ssz + sz + 1);
2267 memcpy(res + ssz, pp, sz);
2268 break;
2269 }
2270 /*
2271 * We bail out on bad escapes.
2272 * No need to warn: we already did so when
2273 * roff_res() was called.
2274 */
2275 sz = (int)(p - pp);
2276 res = mandoc_realloc(res, ssz + sz + 1);
2277 memcpy(res + ssz, pp, sz);
2278 ssz += sz;
2279 }
2280
2281 res[(int)ssz] = '\0';
2282 return(res);
2283 }
2284
2285 /*
2286 * Find out whether a line is a macro line or not.
2287 * If it is, adjust the current position and return one; if it isn't,
2288 * return zero and don't change the current position.
2289 * If the control character has been set with `.cc', then let that grain
2290 * precedence.
2291 * This is slighly contrary to groff, where using the non-breaking
2292 * control character when `cc' has been invoked will cause the
2293 * non-breaking macro contents to be printed verbatim.
2294 */
2295 int
2296 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2297 {
2298 int pos;
2299
2300 pos = *ppos;
2301
2302 if (0 != r->control && cp[pos] == r->control)
2303 pos++;
2304 else if (0 != r->control)
2305 return(0);
2306 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2307 pos += 2;
2308 else if ('.' == cp[pos] || '\'' == cp[pos])
2309 pos++;
2310 else
2311 return(0);
2312
2313 while (' ' == cp[pos] || '\t' == cp[pos])
2314 pos++;
2315
2316 *ppos = pos;
2317 return(1);
2318 }