]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
30e923098edf90b9cf067b56a2690f2d8a8554d0
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.211 2014/06/29 21:20:31 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_rr,
66 ROFF_so,
67 ROFF_ta,
68 ROFF_tr,
69 ROFF_Dd,
70 ROFF_TH,
71 ROFF_TS,
72 ROFF_TE,
73 ROFF_T_,
74 ROFF_EQ,
75 ROFF_EN,
76 ROFF_cblock,
77 ROFF_USERDEF,
78 ROFF_MAX
79 };
80
81 /*
82 * An incredibly-simple string buffer.
83 */
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
87 };
88
89 /*
90 * A key-value roffstr pair as part of a singly-linked list.
91 */
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
96 };
97
98 /*
99 * A single number register as part of a singly-linked list.
100 */
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
105 };
106
107 struct roff {
108 struct mparse *parse; /* parse point */
109 int options; /* parse options */
110 struct roffnode *last; /* leaf of stack */
111 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 int rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static void roff_ccond(struct roff *, int, int);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static int roff_evalcond(const char *, int *);
184 static int roff_evalnum(const char *, int *, int *, int);
185 static int roff_evalpar(const char *, int *, int *);
186 static int roff_evalstrcond(const char *, int *);
187 static void roff_free1(struct roff *);
188 static void roff_freereg(struct roffreg *);
189 static void roff_freestr(struct roffkv *);
190 static char *roff_getname(struct roff *, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff *,
194 const char *, size_t);
195 static int roff_getregro(const char *name);
196 static const char *roff_getstrn(const struct roff *,
197 const char *, size_t);
198 static enum rofferr roff_it(ROFF_ARGS);
199 static enum rofferr roff_line_ignore(ROFF_ARGS);
200 static enum rofferr roff_nr(ROFF_ARGS);
201 static void roff_openeqn(struct roff *, const char *,
202 int, int, const char *);
203 static enum rofft roff_parse(struct roff *, const char *, int *);
204 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
205 static enum rofferr roff_res(struct roff *,
206 char **, size_t *, int, int);
207 static enum rofferr roff_rm(ROFF_ARGS);
208 static enum rofferr roff_rr(ROFF_ARGS);
209 static void roff_setstr(struct roff *,
210 const char *, const char *, int);
211 static void roff_setstrn(struct roffkv **, const char *,
212 size_t, const char *, size_t, int);
213 static enum rofferr roff_so(ROFF_ARGS);
214 static enum rofferr roff_tr(ROFF_ARGS);
215 static enum rofferr roff_Dd(ROFF_ARGS);
216 static enum rofferr roff_TH(ROFF_ARGS);
217 static enum rofferr roff_TE(ROFF_ARGS);
218 static enum rofferr roff_TS(ROFF_ARGS);
219 static enum rofferr roff_EQ(ROFF_ARGS);
220 static enum rofferr roff_EN(ROFF_ARGS);
221 static enum rofferr roff_T_(ROFF_ARGS);
222 static enum rofferr roff_userdef(ROFF_ARGS);
223
224 /* See roffhash_find() */
225
226 #define ASCII_HI 126
227 #define ASCII_LO 33
228 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
229
230 static struct roffmac *hash[HASHWIDTH];
231
232 static struct roffmac roffs[ROFF_MAX] = {
233 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
234 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "as", roff_ds, NULL, NULL, 0, NULL },
238 { "cc", roff_cc, NULL, NULL, 0, NULL },
239 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "ds", roff_ds, NULL, NULL, 0, NULL },
244 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
249 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
250 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "it", roff_it, NULL, NULL, 0, NULL },
252 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "nr", roff_nr, NULL, NULL, 0, NULL },
255 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "rm", roff_rm, NULL, NULL, 0, NULL },
258 { "rr", roff_rr, NULL, NULL, 0, NULL },
259 { "so", roff_so, NULL, NULL, 0, NULL },
260 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
261 { "tr", roff_tr, NULL, NULL, 0, NULL },
262 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
263 { "TH", roff_TH, NULL, NULL, 0, NULL },
264 { "TS", roff_TS, NULL, NULL, 0, NULL },
265 { "TE", roff_TE, NULL, NULL, 0, NULL },
266 { "T&", roff_T_, NULL, NULL, 0, NULL },
267 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
268 { "EN", roff_EN, NULL, NULL, 0, NULL },
269 { ".", roff_cblock, NULL, NULL, 0, NULL },
270 { NULL, roff_userdef, NULL, NULL, 0, NULL },
271 };
272
273 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
274 const char *const __mdoc_reserved[] = {
275 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
276 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
277 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
278 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
279 "Dt", "Dv", "Dx", "D1",
280 "Ec", "Ed", "Ef", "Ek", "El", "Em",
281 "En", "Eo", "Er", "Es", "Ev", "Ex",
282 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
283 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
284 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
285 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
286 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
287 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
288 "Sc", "Sh", "Sm", "So", "Sq",
289 "Ss", "St", "Sx", "Sy",
290 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
291 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
292 "%P", "%Q", "%R", "%T", "%U", "%V",
293 NULL
294 };
295
296 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
297 const char *const __man_reserved[] = {
298 "AT", "B", "BI", "BR", "DT",
299 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
300 "LP", "OP", "P", "PD", "PP",
301 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
302 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
303 NULL
304 };
305
306 /* Array of injected predefined strings. */
307 #define PREDEFS_MAX 38
308 static const struct predef predefs[PREDEFS_MAX] = {
309 #include "predefs.in"
310 };
311
312 /* See roffhash_find() */
313 #define ROFF_HASH(p) (p[0] - ASCII_LO)
314
315 static int roffit_lines; /* number of lines to delay */
316 static char *roffit_macro; /* nil-terminated macro line */
317
318
319 static void
320 roffhash_init(void)
321 {
322 struct roffmac *n;
323 int buc, i;
324
325 for (i = 0; i < (int)ROFF_USERDEF; i++) {
326 assert(roffs[i].name[0] >= ASCII_LO);
327 assert(roffs[i].name[0] <= ASCII_HI);
328
329 buc = ROFF_HASH(roffs[i].name);
330
331 if (NULL != (n = hash[buc])) {
332 for ( ; n->next; n = n->next)
333 /* Do nothing. */ ;
334 n->next = &roffs[i];
335 } else
336 hash[buc] = &roffs[i];
337 }
338 }
339
340 /*
341 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
342 * the nil-terminated string name could be found.
343 */
344 static enum rofft
345 roffhash_find(const char *p, size_t s)
346 {
347 int buc;
348 struct roffmac *n;
349
350 /*
351 * libroff has an extremely simple hashtable, for the time
352 * being, which simply keys on the first character, which must
353 * be printable, then walks a chain. It works well enough until
354 * optimised.
355 */
356
357 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
358 return(ROFF_MAX);
359
360 buc = ROFF_HASH(p);
361
362 if (NULL == (n = hash[buc]))
363 return(ROFF_MAX);
364 for ( ; n; n = n->next)
365 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
366 return((enum rofft)(n - roffs));
367
368 return(ROFF_MAX);
369 }
370
371 /*
372 * Pop the current node off of the stack of roff instructions currently
373 * pending.
374 */
375 static void
376 roffnode_pop(struct roff *r)
377 {
378 struct roffnode *p;
379
380 assert(r->last);
381 p = r->last;
382
383 r->last = r->last->parent;
384 free(p->name);
385 free(p->end);
386 free(p);
387 }
388
389 /*
390 * Push a roff node onto the instruction stack. This must later be
391 * removed with roffnode_pop().
392 */
393 static void
394 roffnode_push(struct roff *r, enum rofft tok, const char *name,
395 int line, int col)
396 {
397 struct roffnode *p;
398
399 p = mandoc_calloc(1, sizeof(struct roffnode));
400 p->tok = tok;
401 if (name)
402 p->name = mandoc_strdup(name);
403 p->parent = r->last;
404 p->line = line;
405 p->col = col;
406 p->rule = p->parent ? p->parent->rule : 0;
407
408 r->last = p;
409 }
410
411 static void
412 roff_free1(struct roff *r)
413 {
414 struct tbl_node *tbl;
415 struct eqn_node *e;
416 int i;
417
418 while (NULL != (tbl = r->first_tbl)) {
419 r->first_tbl = tbl->next;
420 tbl_free(tbl);
421 }
422
423 r->first_tbl = r->last_tbl = r->tbl = NULL;
424
425 while (NULL != (e = r->first_eqn)) {
426 r->first_eqn = e->next;
427 eqn_free(e);
428 }
429
430 r->first_eqn = r->last_eqn = r->eqn = NULL;
431
432 while (r->last)
433 roffnode_pop(r);
434
435 roff_freestr(r->strtab);
436 roff_freestr(r->xmbtab);
437
438 r->strtab = r->xmbtab = NULL;
439
440 roff_freereg(r->regtab);
441
442 r->regtab = NULL;
443
444 if (r->xtab)
445 for (i = 0; i < 128; i++)
446 free(r->xtab[i].p);
447
448 free(r->xtab);
449 r->xtab = NULL;
450 }
451
452 void
453 roff_reset(struct roff *r)
454 {
455
456 roff_free1(r);
457 r->control = 0;
458 }
459
460 void
461 roff_free(struct roff *r)
462 {
463
464 roff_free1(r);
465 free(r);
466 }
467
468 struct roff *
469 roff_alloc(struct mparse *parse, int options)
470 {
471 struct roff *r;
472
473 r = mandoc_calloc(1, sizeof(struct roff));
474 r->parse = parse;
475 r->options = options;
476 r->rstackpos = -1;
477
478 roffhash_init();
479
480 return(r);
481 }
482
483 /*
484 * In the current line, expand escape sequences that tend to get
485 * used in numerical expressions and conditional requests.
486 * Also check the syntax of the remaining escape sequences.
487 */
488 static enum rofferr
489 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
490 {
491 char ubuf[24]; /* buffer to print the number */
492 const char *start; /* start of the string to process */
493 char *stesc; /* start of an escape sequence ('\\') */
494 const char *stnam; /* start of the name, after "[(*" */
495 const char *cp; /* end of the name, e.g. before ']' */
496 const char *res; /* the string to be substituted */
497 char *nbuf; /* new buffer to copy bufp to */
498 size_t maxl; /* expected length of the escape name */
499 size_t naml; /* actual length of the escape name */
500 int expand_count; /* to avoid infinite loops */
501 int npos; /* position in numeric expression */
502 int irc; /* return code from roff_evalnum() */
503 char term; /* character terminating the escape */
504
505 expand_count = 0;
506 start = *bufp + pos;
507 stesc = strchr(start, '\0') - 1;
508 while (stesc-- > start) {
509
510 /* Search backwards for the next backslash. */
511
512 if ('\\' != *stesc)
513 continue;
514
515 /* If it is escaped, skip it. */
516
517 for (cp = stesc - 1; cp >= start; cp--)
518 if ('\\' != *cp)
519 break;
520
521 if (0 == (stesc - cp) % 2) {
522 stesc = (char *)cp;
523 continue;
524 }
525
526 /* Decide whether to expand or to check only. */
527
528 term = '\0';
529 cp = stesc + 1;
530 switch (*cp) {
531 case '*':
532 res = NULL;
533 break;
534 case 'B':
535 /* FALLTHROUGH */
536 case 'w':
537 term = cp[1];
538 /* FALLTHROUGH */
539 case 'n':
540 res = ubuf;
541 break;
542 default:
543 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
544 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
545 ln, (int)(stesc - *bufp), NULL);
546 continue;
547 }
548
549 if (EXPAND_LIMIT < ++expand_count) {
550 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
551 ln, (int)(stesc - *bufp), NULL);
552 return(ROFF_IGN);
553 }
554
555 /*
556 * The third character decides the length
557 * of the name of the string or register.
558 * Save a pointer to the name.
559 */
560
561 if ('\0' == term) {
562 switch (*++cp) {
563 case '\0':
564 maxl = 0;
565 break;
566 case '(':
567 cp++;
568 maxl = 2;
569 break;
570 case '[':
571 cp++;
572 term = ']';
573 maxl = 0;
574 break;
575 default:
576 maxl = 1;
577 break;
578 }
579 } else {
580 cp += 2;
581 maxl = 0;
582 }
583 stnam = cp;
584
585 /* Advance to the end of the name. */
586
587 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
588 if ('\0' == *cp) {
589 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
590 ln, (int)(stesc - *bufp), NULL);
591 break;
592 }
593 if (0 == maxl && *cp == term) {
594 cp++;
595 break;
596 }
597 }
598
599 /*
600 * Retrieve the replacement string; if it is
601 * undefined, resume searching for escapes.
602 */
603
604 switch (stesc[1]) {
605 case '*':
606 res = roff_getstrn(r, stnam, naml);
607 break;
608 case 'B':
609 npos = 0;
610 irc = roff_evalnum(stnam, &npos, NULL, 0);
611 ubuf[0] = irc && stnam + npos + 1 == cp
612 ? '1' : '0';
613 ubuf[1] = '\0';
614 break;
615 case 'n':
616 (void)snprintf(ubuf, sizeof(ubuf), "%d",
617 roff_getregn(r, stnam, naml));
618 break;
619 case 'w':
620 (void)snprintf(ubuf, sizeof(ubuf), "%d",
621 24 * (int)naml);
622 break;
623 }
624
625 if (NULL == res) {
626 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
627 ln, (int)(stesc - *bufp), NULL);
628 res = "";
629 }
630
631 /* Replace the escape sequence by the string. */
632
633 *stesc = '\0';
634 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
635 *bufp, res, cp) + 1;
636
637 /* Prepare for the next replacement. */
638
639 start = nbuf + pos;
640 stesc = nbuf + (stesc - *bufp) + strlen(res);
641 free(*bufp);
642 *bufp = nbuf;
643 }
644 return(ROFF_CONT);
645 }
646
647 /*
648 * Process text streams:
649 * Convert all breakable hyphens into ASCII_HYPH.
650 * Decrement and spring input line trap.
651 */
652 static enum rofferr
653 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
654 {
655 size_t sz;
656 const char *start;
657 char *p;
658 int isz;
659 enum mandoc_esc esc;
660
661 start = p = *bufp + pos;
662
663 while ('\0' != *p) {
664 sz = strcspn(p, "-\\");
665 p += sz;
666
667 if ('\0' == *p)
668 break;
669
670 if ('\\' == *p) {
671 /* Skip over escapes. */
672 p++;
673 esc = mandoc_escape((const char **)&p, NULL, NULL);
674 if (ESCAPE_ERROR == esc)
675 break;
676 continue;
677 } else if (p == start) {
678 p++;
679 continue;
680 }
681
682 if (isalpha((unsigned char)p[-1]) &&
683 isalpha((unsigned char)p[1]))
684 *p = ASCII_HYPH;
685 p++;
686 }
687
688 /* Spring the input line trap. */
689 if (1 == roffit_lines) {
690 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
691 free(*bufp);
692 *bufp = p;
693 *szp = isz + 1;
694 *offs = 0;
695 free(roffit_macro);
696 roffit_lines = 0;
697 return(ROFF_REPARSE);
698 } else if (1 < roffit_lines)
699 --roffit_lines;
700 return(ROFF_CONT);
701 }
702
703 enum rofferr
704 roff_parseln(struct roff *r, int ln, char **bufp,
705 size_t *szp, int pos, int *offs)
706 {
707 enum rofft t;
708 enum rofferr e;
709 int ppos, ctl;
710
711 /*
712 * Run the reserved-word filter only if we have some reserved
713 * words to fill in.
714 */
715
716 e = roff_res(r, bufp, szp, ln, pos);
717 if (ROFF_IGN == e)
718 return(e);
719 assert(ROFF_CONT == e);
720
721 ppos = pos;
722 ctl = roff_getcontrol(r, *bufp, &pos);
723
724 /*
725 * First, if a scope is open and we're not a macro, pass the
726 * text through the macro's filter. If a scope isn't open and
727 * we're not a macro, just let it through.
728 * Finally, if there's an equation scope open, divert it into it
729 * no matter our state.
730 */
731
732 if (r->last && ! ctl) {
733 t = r->last->tok;
734 assert(roffs[t].text);
735 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
736 assert(ROFF_IGN == e || ROFF_CONT == e);
737 if (ROFF_CONT != e)
738 return(e);
739 }
740 if (r->eqn)
741 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
742 if ( ! ctl) {
743 if (r->tbl)
744 return(tbl_read(r->tbl, ln, *bufp, pos));
745 return(roff_parsetext(bufp, szp, pos, offs));
746 }
747
748 /*
749 * If a scope is open, go to the child handler for that macro,
750 * as it may want to preprocess before doing anything with it.
751 * Don't do so if an equation is open.
752 */
753
754 if (r->last) {
755 t = r->last->tok;
756 assert(roffs[t].sub);
757 return((*roffs[t].sub)(r, t, bufp, szp,
758 ln, ppos, pos, offs));
759 }
760
761 /*
762 * Lastly, as we've no scope open, try to look up and execute
763 * the new macro. If no macro is found, simply return and let
764 * the compilers handle it.
765 */
766
767 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
768 return(ROFF_CONT);
769
770 assert(roffs[t].proc);
771 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
772 }
773
774 void
775 roff_endparse(struct roff *r)
776 {
777
778 if (r->last)
779 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
780 r->last->line, r->last->col, NULL);
781
782 if (r->eqn) {
783 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
784 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
785 eqn_end(&r->eqn);
786 }
787
788 if (r->tbl) {
789 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
790 r->tbl->line, r->tbl->pos, NULL);
791 tbl_end(&r->tbl);
792 }
793 }
794
795 /*
796 * Parse a roff node's type from the input buffer. This must be in the
797 * form of ".foo xxx" in the usual way.
798 */
799 static enum rofft
800 roff_parse(struct roff *r, const char *buf, int *pos)
801 {
802 const char *mac;
803 size_t maclen;
804 enum rofft t;
805
806 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
807 '\t' == buf[*pos] || ' ' == buf[*pos])
808 return(ROFF_MAX);
809
810 /* We stop the macro parse at an escape, tab, space, or nil. */
811
812 mac = buf + *pos;
813 maclen = strcspn(mac, " \\\t\0");
814
815 t = (r->current_string = roff_getstrn(r, mac, maclen))
816 ? ROFF_USERDEF : roffhash_find(mac, maclen);
817
818 *pos += (int)maclen;
819
820 while (buf[*pos] && ' ' == buf[*pos])
821 (*pos)++;
822
823 return(t);
824 }
825
826 static enum rofferr
827 roff_cblock(ROFF_ARGS)
828 {
829
830 /*
831 * A block-close `..' should only be invoked as a child of an
832 * ignore macro, otherwise raise a warning and just ignore it.
833 */
834
835 if (NULL == r->last) {
836 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
837 return(ROFF_IGN);
838 }
839
840 switch (r->last->tok) {
841 case ROFF_am:
842 /* FALLTHROUGH */
843 case ROFF_ami:
844 /* FALLTHROUGH */
845 case ROFF_am1:
846 /* FALLTHROUGH */
847 case ROFF_de:
848 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
849 /* FALLTHROUGH */
850 case ROFF_dei:
851 /* FALLTHROUGH */
852 case ROFF_ig:
853 break;
854 default:
855 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
856 return(ROFF_IGN);
857 }
858
859 if ((*bufp)[pos])
860 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
861
862 roffnode_pop(r);
863 roffnode_cleanscope(r);
864 return(ROFF_IGN);
865
866 }
867
868 static void
869 roffnode_cleanscope(struct roff *r)
870 {
871
872 while (r->last) {
873 if (--r->last->endspan != 0)
874 break;
875 roffnode_pop(r);
876 }
877 }
878
879 static void
880 roff_ccond(struct roff *r, int ln, int ppos)
881 {
882
883 if (NULL == r->last) {
884 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
885 return;
886 }
887
888 switch (r->last->tok) {
889 case ROFF_el:
890 /* FALLTHROUGH */
891 case ROFF_ie:
892 /* FALLTHROUGH */
893 case ROFF_if:
894 break;
895 default:
896 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
897 return;
898 }
899
900 if (r->last->endspan > -1) {
901 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
902 return;
903 }
904
905 roffnode_pop(r);
906 roffnode_cleanscope(r);
907 return;
908 }
909
910 static enum rofferr
911 roff_block(ROFF_ARGS)
912 {
913 int sv;
914 size_t sz;
915 char *name;
916
917 name = NULL;
918
919 if (ROFF_ig != tok) {
920 if ('\0' == (*bufp)[pos]) {
921 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
922 return(ROFF_IGN);
923 }
924
925 /*
926 * Re-write `de1', since we don't really care about
927 * groff's strange compatibility mode, into `de'.
928 */
929
930 if (ROFF_de1 == tok)
931 tok = ROFF_de;
932 if (ROFF_de == tok)
933 name = *bufp + pos;
934 else
935 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
936 roffs[tok].name);
937
938 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
939 pos++;
940
941 while (isspace((unsigned char)(*bufp)[pos]))
942 (*bufp)[pos++] = '\0';
943 }
944
945 roffnode_push(r, tok, name, ln, ppos);
946
947 /*
948 * At the beginning of a `de' macro, clear the existing string
949 * with the same name, if there is one. New content will be
950 * appended from roff_block_text() in multiline mode.
951 */
952
953 if (ROFF_de == tok)
954 roff_setstr(r, name, "", 0);
955
956 if ('\0' == (*bufp)[pos])
957 return(ROFF_IGN);
958
959 /* If present, process the custom end-of-line marker. */
960
961 sv = pos;
962 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
963 pos++;
964
965 /*
966 * Note: groff does NOT like escape characters in the input.
967 * Instead of detecting this, we're just going to let it fly and
968 * to hell with it.
969 */
970
971 assert(pos > sv);
972 sz = (size_t)(pos - sv);
973
974 if (1 == sz && '.' == (*bufp)[sv])
975 return(ROFF_IGN);
976
977 r->last->end = mandoc_malloc(sz + 1);
978
979 memcpy(r->last->end, *bufp + sv, sz);
980 r->last->end[(int)sz] = '\0';
981
982 if ((*bufp)[pos])
983 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
984
985 return(ROFF_IGN);
986 }
987
988 static enum rofferr
989 roff_block_sub(ROFF_ARGS)
990 {
991 enum rofft t;
992 int i, j;
993
994 /*
995 * First check whether a custom macro exists at this level. If
996 * it does, then check against it. This is some of groff's
997 * stranger behaviours. If we encountered a custom end-scope
998 * tag and that tag also happens to be a "real" macro, then we
999 * need to try interpreting it again as a real macro. If it's
1000 * not, then return ignore. Else continue.
1001 */
1002
1003 if (r->last->end) {
1004 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1005 if ((*bufp)[i] != r->last->end[j])
1006 break;
1007
1008 if ('\0' == r->last->end[j] &&
1009 ('\0' == (*bufp)[i] ||
1010 ' ' == (*bufp)[i] ||
1011 '\t' == (*bufp)[i])) {
1012 roffnode_pop(r);
1013 roffnode_cleanscope(r);
1014
1015 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1016 i++;
1017
1018 pos = i;
1019 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1020 return(ROFF_RERUN);
1021 return(ROFF_IGN);
1022 }
1023 }
1024
1025 /*
1026 * If we have no custom end-query or lookup failed, then try
1027 * pulling it out of the hashtable.
1028 */
1029
1030 t = roff_parse(r, *bufp, &pos);
1031
1032 /*
1033 * Macros other than block-end are only significant
1034 * in `de' blocks; elsewhere, simply throw them away.
1035 */
1036 if (ROFF_cblock != t) {
1037 if (ROFF_de == tok)
1038 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1039 return(ROFF_IGN);
1040 }
1041
1042 assert(roffs[t].proc);
1043 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1044 }
1045
1046 static enum rofferr
1047 roff_block_text(ROFF_ARGS)
1048 {
1049
1050 if (ROFF_de == tok)
1051 roff_setstr(r, r->last->name, *bufp + pos, 2);
1052
1053 return(ROFF_IGN);
1054 }
1055
1056 static enum rofferr
1057 roff_cond_sub(ROFF_ARGS)
1058 {
1059 enum rofft t;
1060 char *ep;
1061 int rr;
1062
1063 rr = r->last->rule;
1064 roffnode_cleanscope(r);
1065 t = roff_parse(r, *bufp, &pos);
1066
1067 /*
1068 * Fully handle known macros when they are structurally
1069 * required or when the conditional evaluated to true.
1070 */
1071
1072 if ((ROFF_MAX != t) &&
1073 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1074 assert(roffs[t].proc);
1075 return((*roffs[t].proc)(r, t, bufp, szp,
1076 ln, ppos, pos, offs));
1077 }
1078
1079 /*
1080 * If `\}' occurs on a macro line without a preceding macro,
1081 * drop the line completely.
1082 */
1083
1084 ep = *bufp + pos;
1085 if ('\\' == ep[0] && '}' == ep[1])
1086 rr = 0;
1087
1088 /* Always check for the closing delimiter `\}'. */
1089
1090 while (NULL != (ep = strchr(ep, '\\'))) {
1091 if ('}' == *(++ep)) {
1092 *ep = '&';
1093 roff_ccond(r, ln, ep - *bufp - 1);
1094 }
1095 ++ep;
1096 }
1097 return(rr ? ROFF_CONT : ROFF_IGN);
1098 }
1099
1100 static enum rofferr
1101 roff_cond_text(ROFF_ARGS)
1102 {
1103 char *ep;
1104 int rr;
1105
1106 rr = r->last->rule;
1107 roffnode_cleanscope(r);
1108
1109 ep = *bufp + pos;
1110 while (NULL != (ep = strchr(ep, '\\'))) {
1111 if ('}' == *(++ep)) {
1112 *ep = '&';
1113 roff_ccond(r, ln, ep - *bufp - 1);
1114 }
1115 ++ep;
1116 }
1117 return(rr ? ROFF_CONT : ROFF_IGN);
1118 }
1119
1120 /*
1121 * Parse a single signed integer number. Stop at the first non-digit.
1122 * If there is at least one digit, return success and advance the
1123 * parse point, else return failure and let the parse point unchanged.
1124 * Ignore overflows, treat them just like the C language.
1125 */
1126 static int
1127 roff_getnum(const char *v, int *pos, int *res)
1128 {
1129 int myres, n, p;
1130
1131 if (NULL == res)
1132 res = &myres;
1133
1134 p = *pos;
1135 n = v[p] == '-';
1136 if (n)
1137 p++;
1138
1139 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1140 *res = 10 * *res + v[p] - '0';
1141 if (p == *pos + n)
1142 return 0;
1143
1144 if (n)
1145 *res = -*res;
1146
1147 *pos = p;
1148 return 1;
1149 }
1150
1151 /*
1152 * Evaluate a string comparison condition.
1153 * The first character is the delimiter.
1154 * Succeed if the string up to its second occurrence
1155 * matches the string up to its third occurence.
1156 * Advance the cursor after the third occurrence
1157 * or lacking that, to the end of the line.
1158 */
1159 static int
1160 roff_evalstrcond(const char *v, int *pos)
1161 {
1162 const char *s1, *s2, *s3;
1163 int match;
1164
1165 match = 0;
1166 s1 = v + *pos; /* initial delimiter */
1167 s2 = s1 + 1; /* for scanning the first string */
1168 s3 = strchr(s2, *s1); /* for scanning the second string */
1169
1170 if (NULL == s3) /* found no middle delimiter */
1171 goto out;
1172
1173 while ('\0' != *++s3) {
1174 if (*s2 != *s3) { /* mismatch */
1175 s3 = strchr(s3, *s1);
1176 break;
1177 }
1178 if (*s3 == *s1) { /* found the final delimiter */
1179 match = 1;
1180 break;
1181 }
1182 s2++;
1183 }
1184
1185 out:
1186 if (NULL == s3)
1187 s3 = strchr(s2, '\0');
1188 else
1189 s3++;
1190 *pos = s3 - v;
1191 return(match);
1192 }
1193
1194 /*
1195 * Evaluate an optionally negated single character, numerical,
1196 * or string condition.
1197 */
1198 static int
1199 roff_evalcond(const char *v, int *pos)
1200 {
1201 int wanttrue, number;
1202
1203 if ('!' == v[*pos]) {
1204 wanttrue = 0;
1205 (*pos)++;
1206 } else
1207 wanttrue = 1;
1208
1209 switch (v[*pos]) {
1210 case 'n':
1211 /* FALLTHROUGH */
1212 case 'o':
1213 (*pos)++;
1214 return(wanttrue);
1215 case 'c':
1216 /* FALLTHROUGH */
1217 case 'd':
1218 /* FALLTHROUGH */
1219 case 'e':
1220 /* FALLTHROUGH */
1221 case 'r':
1222 /* FALLTHROUGH */
1223 case 't':
1224 (*pos)++;
1225 return(!wanttrue);
1226 default:
1227 break;
1228 }
1229
1230 if (roff_evalnum(v, pos, &number, 0))
1231 return((number > 0) == wanttrue);
1232 else
1233 return(roff_evalstrcond(v, pos) == wanttrue);
1234 }
1235
1236 static enum rofferr
1237 roff_line_ignore(ROFF_ARGS)
1238 {
1239
1240 return(ROFF_IGN);
1241 }
1242
1243 static enum rofferr
1244 roff_cond(ROFF_ARGS)
1245 {
1246
1247 roffnode_push(r, tok, NULL, ln, ppos);
1248
1249 /*
1250 * An `.el' has no conditional body: it will consume the value
1251 * of the current rstack entry set in prior `ie' calls or
1252 * defaults to DENY.
1253 *
1254 * If we're not an `el', however, then evaluate the conditional.
1255 */
1256
1257 r->last->rule = ROFF_el == tok ?
1258 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1259 roff_evalcond(*bufp, &pos);
1260
1261 /*
1262 * An if-else will put the NEGATION of the current evaluated
1263 * conditional into the stack of rules.
1264 */
1265
1266 if (ROFF_ie == tok) {
1267 if (r->rstackpos == RSTACK_MAX - 1) {
1268 mandoc_msg(MANDOCERR_MEM,
1269 r->parse, ln, ppos, NULL);
1270 return(ROFF_ERR);
1271 }
1272 r->rstack[++r->rstackpos] = !r->last->rule;
1273 }
1274
1275 /* If the parent has false as its rule, then so do we. */
1276
1277 if (r->last->parent && !r->last->parent->rule)
1278 r->last->rule = 0;
1279
1280 /*
1281 * Determine scope.
1282 * If there is nothing on the line after the conditional,
1283 * not even whitespace, use next-line scope.
1284 */
1285
1286 if ('\0' == (*bufp)[pos]) {
1287 r->last->endspan = 2;
1288 goto out;
1289 }
1290
1291 while (' ' == (*bufp)[pos])
1292 pos++;
1293
1294 /* An opening brace requests multiline scope. */
1295
1296 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1297 r->last->endspan = -1;
1298 pos += 2;
1299 goto out;
1300 }
1301
1302 /*
1303 * Anything else following the conditional causes
1304 * single-line scope. Warn if the scope contains
1305 * nothing but trailing whitespace.
1306 */
1307
1308 if ('\0' == (*bufp)[pos])
1309 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1310
1311 r->last->endspan = 1;
1312
1313 out:
1314 *offs = pos;
1315 return(ROFF_RERUN);
1316 }
1317
1318 static enum rofferr
1319 roff_ds(ROFF_ARGS)
1320 {
1321 char *name, *string;
1322
1323 /*
1324 * A symbol is named by the first word following the macro
1325 * invocation up to a space. Its value is anything after the
1326 * name's trailing whitespace and optional double-quote. Thus,
1327 *
1328 * [.ds foo "bar " ]
1329 *
1330 * will have `bar " ' as its value.
1331 */
1332
1333 string = *bufp + pos;
1334 name = roff_getname(r, &string, ln, pos);
1335 if ('\0' == *name)
1336 return(ROFF_IGN);
1337
1338 /* Read past initial double-quote. */
1339 if ('"' == *string)
1340 string++;
1341
1342 /* The rest is the value. */
1343 roff_setstr(r, name, string, ROFF_as == tok);
1344 return(ROFF_IGN);
1345 }
1346
1347 /*
1348 * Parse a single operator, one or two characters long.
1349 * If the operator is recognized, return success and advance the
1350 * parse point, else return failure and let the parse point unchanged.
1351 */
1352 static int
1353 roff_getop(const char *v, int *pos, char *res)
1354 {
1355
1356 *res = v[*pos];
1357
1358 switch (*res) {
1359 case '+':
1360 /* FALLTHROUGH */
1361 case '-':
1362 /* FALLTHROUGH */
1363 case '*':
1364 /* FALLTHROUGH */
1365 case '/':
1366 /* FALLTHROUGH */
1367 case '%':
1368 /* FALLTHROUGH */
1369 case '&':
1370 /* FALLTHROUGH */
1371 case ':':
1372 break;
1373 case '<':
1374 switch (v[*pos + 1]) {
1375 case '=':
1376 *res = 'l';
1377 (*pos)++;
1378 break;
1379 case '>':
1380 *res = '!';
1381 (*pos)++;
1382 break;
1383 case '?':
1384 *res = 'i';
1385 (*pos)++;
1386 break;
1387 default:
1388 break;
1389 }
1390 break;
1391 case '>':
1392 switch (v[*pos + 1]) {
1393 case '=':
1394 *res = 'g';
1395 (*pos)++;
1396 break;
1397 case '?':
1398 *res = 'a';
1399 (*pos)++;
1400 break;
1401 default:
1402 break;
1403 }
1404 break;
1405 case '=':
1406 if ('=' == v[*pos + 1])
1407 (*pos)++;
1408 break;
1409 default:
1410 return(0);
1411 }
1412 (*pos)++;
1413
1414 return(*res);
1415 }
1416
1417 /*
1418 * Evaluate either a parenthesized numeric expression
1419 * or a single signed integer number.
1420 */
1421 static int
1422 roff_evalpar(const char *v, int *pos, int *res)
1423 {
1424
1425 if ('(' != v[*pos])
1426 return(roff_getnum(v, pos, res));
1427
1428 (*pos)++;
1429 if ( ! roff_evalnum(v, pos, res, 1))
1430 return(0);
1431
1432 /*
1433 * Omission of the closing parenthesis
1434 * is an error in validation mode,
1435 * but ignored in evaluation mode.
1436 */
1437
1438 if (')' == v[*pos])
1439 (*pos)++;
1440 else if (NULL == res)
1441 return(0);
1442
1443 return(1);
1444 }
1445
1446 /*
1447 * Evaluate a complete numeric expression.
1448 * Proceed left to right, there is no concept of precedence.
1449 */
1450 static int
1451 roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
1452 {
1453 int mypos, operand2;
1454 char operator;
1455
1456 if (NULL == pos) {
1457 mypos = 0;
1458 pos = &mypos;
1459 }
1460
1461 if (skipwhite)
1462 while (isspace((unsigned char)v[*pos]))
1463 (*pos)++;
1464
1465 if ( ! roff_evalpar(v, pos, res))
1466 return(0);
1467
1468 while (1) {
1469 if (skipwhite)
1470 while (isspace((unsigned char)v[*pos]))
1471 (*pos)++;
1472
1473 if ( ! roff_getop(v, pos, &operator))
1474 break;
1475
1476 if (skipwhite)
1477 while (isspace((unsigned char)v[*pos]))
1478 (*pos)++;
1479
1480 if ( ! roff_evalpar(v, pos, &operand2))
1481 return(0);
1482
1483 if (skipwhite)
1484 while (isspace((unsigned char)v[*pos]))
1485 (*pos)++;
1486
1487 if (NULL == res)
1488 continue;
1489
1490 switch (operator) {
1491 case '+':
1492 *res += operand2;
1493 break;
1494 case '-':
1495 *res -= operand2;
1496 break;
1497 case '*':
1498 *res *= operand2;
1499 break;
1500 case '/':
1501 *res /= operand2;
1502 break;
1503 case '%':
1504 *res %= operand2;
1505 break;
1506 case '<':
1507 *res = *res < operand2;
1508 break;
1509 case '>':
1510 *res = *res > operand2;
1511 break;
1512 case 'l':
1513 *res = *res <= operand2;
1514 break;
1515 case 'g':
1516 *res = *res >= operand2;
1517 break;
1518 case '=':
1519 *res = *res == operand2;
1520 break;
1521 case '!':
1522 *res = *res != operand2;
1523 break;
1524 case '&':
1525 *res = *res && operand2;
1526 break;
1527 case ':':
1528 *res = *res || operand2;
1529 break;
1530 case 'i':
1531 if (operand2 < *res)
1532 *res = operand2;
1533 break;
1534 case 'a':
1535 if (operand2 > *res)
1536 *res = operand2;
1537 break;
1538 default:
1539 abort();
1540 }
1541 }
1542 return(1);
1543 }
1544
1545 void
1546 roff_setreg(struct roff *r, const char *name, int val, char sign)
1547 {
1548 struct roffreg *reg;
1549
1550 /* Search for an existing register with the same name. */
1551 reg = r->regtab;
1552
1553 while (reg && strcmp(name, reg->key.p))
1554 reg = reg->next;
1555
1556 if (NULL == reg) {
1557 /* Create a new register. */
1558 reg = mandoc_malloc(sizeof(struct roffreg));
1559 reg->key.p = mandoc_strdup(name);
1560 reg->key.sz = strlen(name);
1561 reg->val = 0;
1562 reg->next = r->regtab;
1563 r->regtab = reg;
1564 }
1565
1566 if ('+' == sign)
1567 reg->val += val;
1568 else if ('-' == sign)
1569 reg->val -= val;
1570 else
1571 reg->val = val;
1572 }
1573
1574 /*
1575 * Handle some predefined read-only number registers.
1576 * For now, return -1 if the requested register is not predefined;
1577 * in case a predefined read-only register having the value -1
1578 * were to turn up, another special value would have to be chosen.
1579 */
1580 static int
1581 roff_getregro(const char *name)
1582 {
1583
1584 switch (*name) {
1585 case 'A': /* ASCII approximation mode is always off. */
1586 return(0);
1587 case 'g': /* Groff compatibility mode is always on. */
1588 return(1);
1589 case 'H': /* Fixed horizontal resolution. */
1590 return (24);
1591 case 'j': /* Always adjust left margin only. */
1592 return(0);
1593 case 'T': /* Some output device is always defined. */
1594 return(1);
1595 case 'V': /* Fixed vertical resolution. */
1596 return (40);
1597 default:
1598 return (-1);
1599 }
1600 }
1601
1602 int
1603 roff_getreg(const struct roff *r, const char *name)
1604 {
1605 struct roffreg *reg;
1606 int val;
1607
1608 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1609 val = roff_getregro(name + 1);
1610 if (-1 != val)
1611 return (val);
1612 }
1613
1614 for (reg = r->regtab; reg; reg = reg->next)
1615 if (0 == strcmp(name, reg->key.p))
1616 return(reg->val);
1617
1618 return(0);
1619 }
1620
1621 static int
1622 roff_getregn(const struct roff *r, const char *name, size_t len)
1623 {
1624 struct roffreg *reg;
1625 int val;
1626
1627 if ('.' == name[0] && 2 == len) {
1628 val = roff_getregro(name + 1);
1629 if (-1 != val)
1630 return (val);
1631 }
1632
1633 for (reg = r->regtab; reg; reg = reg->next)
1634 if (len == reg->key.sz &&
1635 0 == strncmp(name, reg->key.p, len))
1636 return(reg->val);
1637
1638 return(0);
1639 }
1640
1641 static void
1642 roff_freereg(struct roffreg *reg)
1643 {
1644 struct roffreg *old_reg;
1645
1646 while (NULL != reg) {
1647 free(reg->key.p);
1648 old_reg = reg;
1649 reg = reg->next;
1650 free(old_reg);
1651 }
1652 }
1653
1654 static enum rofferr
1655 roff_nr(ROFF_ARGS)
1656 {
1657 const char *key;
1658 char *val;
1659 int iv;
1660 char sign;
1661
1662 val = *bufp + pos;
1663 key = roff_getname(r, &val, ln, pos);
1664
1665 sign = *val;
1666 if ('+' == sign || '-' == sign)
1667 val++;
1668
1669 if (roff_evalnum(val, NULL, &iv, 0))
1670 roff_setreg(r, key, iv, sign);
1671
1672 return(ROFF_IGN);
1673 }
1674
1675 static enum rofferr
1676 roff_rr(ROFF_ARGS)
1677 {
1678 struct roffreg *reg, **prev;
1679 const char *name;
1680 char *cp;
1681
1682 cp = *bufp + pos;
1683 name = roff_getname(r, &cp, ln, pos);
1684
1685 prev = &r->regtab;
1686 while (1) {
1687 reg = *prev;
1688 if (NULL == reg || !strcmp(name, reg->key.p))
1689 break;
1690 prev = &reg->next;
1691 }
1692 if (NULL != reg) {
1693 *prev = reg->next;
1694 free(reg->key.p);
1695 free(reg);
1696 }
1697 return(ROFF_IGN);
1698 }
1699
1700 static enum rofferr
1701 roff_rm(ROFF_ARGS)
1702 {
1703 const char *name;
1704 char *cp;
1705
1706 cp = *bufp + pos;
1707 while ('\0' != *cp) {
1708 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1709 if ('\0' != *name)
1710 roff_setstr(r, name, NULL, 0);
1711 }
1712 return(ROFF_IGN);
1713 }
1714
1715 static enum rofferr
1716 roff_it(ROFF_ARGS)
1717 {
1718 char *cp;
1719 size_t len;
1720 int iv;
1721
1722 /* Parse the number of lines. */
1723 cp = *bufp + pos;
1724 len = strcspn(cp, " \t");
1725 cp[len] = '\0';
1726 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1727 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1728 ln, ppos, *bufp + 1);
1729 return(ROFF_IGN);
1730 }
1731 cp += len + 1;
1732
1733 /* Arm the input line trap. */
1734 roffit_lines = iv;
1735 roffit_macro = mandoc_strdup(cp);
1736 return(ROFF_IGN);
1737 }
1738
1739 static enum rofferr
1740 roff_Dd(ROFF_ARGS)
1741 {
1742 const char *const *cp;
1743
1744 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1745 for (cp = __mdoc_reserved; *cp; cp++)
1746 roff_setstr(r, *cp, NULL, 0);
1747
1748 return(ROFF_CONT);
1749 }
1750
1751 static enum rofferr
1752 roff_TH(ROFF_ARGS)
1753 {
1754 const char *const *cp;
1755
1756 if (0 == (MPARSE_QUICK & r->options))
1757 for (cp = __man_reserved; *cp; cp++)
1758 roff_setstr(r, *cp, NULL, 0);
1759
1760 return(ROFF_CONT);
1761 }
1762
1763 static enum rofferr
1764 roff_TE(ROFF_ARGS)
1765 {
1766
1767 if (NULL == r->tbl)
1768 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1769 else
1770 tbl_end(&r->tbl);
1771
1772 return(ROFF_IGN);
1773 }
1774
1775 static enum rofferr
1776 roff_T_(ROFF_ARGS)
1777 {
1778
1779 if (NULL == r->tbl)
1780 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1781 else
1782 tbl_restart(ppos, ln, r->tbl);
1783
1784 return(ROFF_IGN);
1785 }
1786
1787 #if 0
1788 static int
1789 roff_closeeqn(struct roff *r)
1790 {
1791
1792 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1793 }
1794 #endif
1795
1796 static void
1797 roff_openeqn(struct roff *r, const char *name, int line,
1798 int offs, const char *buf)
1799 {
1800 struct eqn_node *e;
1801 int poff;
1802
1803 assert(NULL == r->eqn);
1804 e = eqn_alloc(name, offs, line, r->parse);
1805
1806 if (r->last_eqn)
1807 r->last_eqn->next = e;
1808 else
1809 r->first_eqn = r->last_eqn = e;
1810
1811 r->eqn = r->last_eqn = e;
1812
1813 if (buf) {
1814 poff = 0;
1815 eqn_read(&r->eqn, line, buf, offs, &poff);
1816 }
1817 }
1818
1819 static enum rofferr
1820 roff_EQ(ROFF_ARGS)
1821 {
1822
1823 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1824 return(ROFF_IGN);
1825 }
1826
1827 static enum rofferr
1828 roff_EN(ROFF_ARGS)
1829 {
1830
1831 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1832 return(ROFF_IGN);
1833 }
1834
1835 static enum rofferr
1836 roff_TS(ROFF_ARGS)
1837 {
1838 struct tbl_node *tbl;
1839
1840 if (r->tbl) {
1841 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1842 tbl_end(&r->tbl);
1843 }
1844
1845 tbl = tbl_alloc(ppos, ln, r->parse);
1846
1847 if (r->last_tbl)
1848 r->last_tbl->next = tbl;
1849 else
1850 r->first_tbl = r->last_tbl = tbl;
1851
1852 r->tbl = r->last_tbl = tbl;
1853 return(ROFF_IGN);
1854 }
1855
1856 static enum rofferr
1857 roff_cc(ROFF_ARGS)
1858 {
1859 const char *p;
1860
1861 p = *bufp + pos;
1862
1863 if ('\0' == *p || '.' == (r->control = *p++))
1864 r->control = 0;
1865
1866 if ('\0' != *p)
1867 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1868
1869 return(ROFF_IGN);
1870 }
1871
1872 static enum rofferr
1873 roff_tr(ROFF_ARGS)
1874 {
1875 const char *p, *first, *second;
1876 size_t fsz, ssz;
1877 enum mandoc_esc esc;
1878
1879 p = *bufp + pos;
1880
1881 if ('\0' == *p) {
1882 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1883 return(ROFF_IGN);
1884 }
1885
1886 while ('\0' != *p) {
1887 fsz = ssz = 1;
1888
1889 first = p++;
1890 if ('\\' == *first) {
1891 esc = mandoc_escape(&p, NULL, NULL);
1892 if (ESCAPE_ERROR == esc) {
1893 mandoc_msg(MANDOCERR_BADESCAPE,
1894 r->parse, ln,
1895 (int)(p - *bufp), NULL);
1896 return(ROFF_IGN);
1897 }
1898 fsz = (size_t)(p - first);
1899 }
1900
1901 second = p++;
1902 if ('\\' == *second) {
1903 esc = mandoc_escape(&p, NULL, NULL);
1904 if (ESCAPE_ERROR == esc) {
1905 mandoc_msg(MANDOCERR_BADESCAPE,
1906 r->parse, ln,
1907 (int)(p - *bufp), NULL);
1908 return(ROFF_IGN);
1909 }
1910 ssz = (size_t)(p - second);
1911 } else if ('\0' == *second) {
1912 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1913 ln, (int)(p - *bufp), NULL);
1914 second = " ";
1915 p--;
1916 }
1917
1918 if (fsz > 1) {
1919 roff_setstrn(&r->xmbtab, first, fsz,
1920 second, ssz, 0);
1921 continue;
1922 }
1923
1924 if (NULL == r->xtab)
1925 r->xtab = mandoc_calloc(128,
1926 sizeof(struct roffstr));
1927
1928 free(r->xtab[(int)*first].p);
1929 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1930 r->xtab[(int)*first].sz = ssz;
1931 }
1932
1933 return(ROFF_IGN);
1934 }
1935
1936 static enum rofferr
1937 roff_so(ROFF_ARGS)
1938 {
1939 char *name;
1940
1941 name = *bufp + pos;
1942 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, ".so %s", name);
1943
1944 /*
1945 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1946 * opening anything that's not in our cwd or anything beneath
1947 * it. Thus, explicitly disallow traversing up the file-system
1948 * or using absolute paths.
1949 */
1950
1951 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1952 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
1953 ".so %s", name);
1954 return(ROFF_ERR);
1955 }
1956
1957 *offs = pos;
1958 return(ROFF_SO);
1959 }
1960
1961 static enum rofferr
1962 roff_userdef(ROFF_ARGS)
1963 {
1964 const char *arg[9];
1965 char *cp, *n1, *n2;
1966 int i;
1967
1968 /*
1969 * Collect pointers to macro argument strings
1970 * and NUL-terminate them.
1971 */
1972 cp = *bufp + pos;
1973 for (i = 0; i < 9; i++)
1974 arg[i] = '\0' == *cp ? "" :
1975 mandoc_getarg(r->parse, &cp, ln, &pos);
1976
1977 /*
1978 * Expand macro arguments.
1979 */
1980 *szp = 0;
1981 n1 = cp = mandoc_strdup(r->current_string);
1982 while (NULL != (cp = strstr(cp, "\\$"))) {
1983 i = cp[2] - '1';
1984 if (0 > i || 8 < i) {
1985 /* Not an argument invocation. */
1986 cp += 2;
1987 continue;
1988 }
1989 *cp = '\0';
1990 *szp = mandoc_asprintf(&n2, "%s%s%s",
1991 n1, arg[i], cp + 3) + 1;
1992 cp = n2 + (cp - n1);
1993 free(n1);
1994 n1 = n2;
1995 }
1996
1997 /*
1998 * Replace the macro invocation
1999 * by the expanded macro.
2000 */
2001 free(*bufp);
2002 *bufp = n1;
2003 if (0 == *szp)
2004 *szp = strlen(*bufp) + 1;
2005
2006 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2007 ROFF_REPARSE : ROFF_APPEND);
2008 }
2009
2010 static char *
2011 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2012 {
2013 char *name, *cp;
2014
2015 name = *cpp;
2016 if ('\0' == *name)
2017 return(name);
2018
2019 /* Read until end of name. */
2020 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
2021 if ('\\' != *cp)
2022 continue;
2023 cp++;
2024 if ('\\' == *cp)
2025 continue;
2026 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
2027 *cp = '\0';
2028 name = cp;
2029 }
2030
2031 /* Nil-terminate name. */
2032 if ('\0' != *cp)
2033 *(cp++) = '\0';
2034
2035 /* Read past spaces. */
2036 while (' ' == *cp)
2037 cp++;
2038
2039 *cpp = cp;
2040 return(name);
2041 }
2042
2043 /*
2044 * Store *string into the user-defined string called *name.
2045 * To clear an existing entry, call with (*r, *name, NULL, 0).
2046 * append == 0: replace mode
2047 * append == 1: single-line append mode
2048 * append == 2: multiline append mode, append '\n' after each call
2049 */
2050 static void
2051 roff_setstr(struct roff *r, const char *name, const char *string,
2052 int append)
2053 {
2054
2055 roff_setstrn(&r->strtab, name, strlen(name), string,
2056 string ? strlen(string) : 0, append);
2057 }
2058
2059 static void
2060 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2061 const char *string, size_t stringsz, int append)
2062 {
2063 struct roffkv *n;
2064 char *c;
2065 int i;
2066 size_t oldch, newch;
2067
2068 /* Search for an existing string with the same name. */
2069 n = *r;
2070
2071 while (n && (namesz != n->key.sz ||
2072 strncmp(n->key.p, name, namesz)))
2073 n = n->next;
2074
2075 if (NULL == n) {
2076 /* Create a new string table entry. */
2077 n = mandoc_malloc(sizeof(struct roffkv));
2078 n->key.p = mandoc_strndup(name, namesz);
2079 n->key.sz = namesz;
2080 n->val.p = NULL;
2081 n->val.sz = 0;
2082 n->next = *r;
2083 *r = n;
2084 } else if (0 == append) {
2085 free(n->val.p);
2086 n->val.p = NULL;
2087 n->val.sz = 0;
2088 }
2089
2090 if (NULL == string)
2091 return;
2092
2093 /*
2094 * One additional byte for the '\n' in multiline mode,
2095 * and one for the terminating '\0'.
2096 */
2097 newch = stringsz + (1 < append ? 2u : 1u);
2098
2099 if (NULL == n->val.p) {
2100 n->val.p = mandoc_malloc(newch);
2101 *n->val.p = '\0';
2102 oldch = 0;
2103 } else {
2104 oldch = n->val.sz;
2105 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2106 }
2107
2108 /* Skip existing content in the destination buffer. */
2109 c = n->val.p + (int)oldch;
2110
2111 /* Append new content to the destination buffer. */
2112 i = 0;
2113 while (i < (int)stringsz) {
2114 /*
2115 * Rudimentary roff copy mode:
2116 * Handle escaped backslashes.
2117 */
2118 if ('\\' == string[i] && '\\' == string[i + 1])
2119 i++;
2120 *c++ = string[i++];
2121 }
2122
2123 /* Append terminating bytes. */
2124 if (1 < append)
2125 *c++ = '\n';
2126
2127 *c = '\0';
2128 n->val.sz = (int)(c - n->val.p);
2129 }
2130
2131 static const char *
2132 roff_getstrn(const struct roff *r, const char *name, size_t len)
2133 {
2134 const struct roffkv *n;
2135 int i;
2136
2137 for (n = r->strtab; n; n = n->next)
2138 if (0 == strncmp(name, n->key.p, len) &&
2139 '\0' == n->key.p[(int)len])
2140 return(n->val.p);
2141
2142 for (i = 0; i < PREDEFS_MAX; i++)
2143 if (0 == strncmp(name, predefs[i].name, len) &&
2144 '\0' == predefs[i].name[(int)len])
2145 return(predefs[i].str);
2146
2147 return(NULL);
2148 }
2149
2150 static void
2151 roff_freestr(struct roffkv *r)
2152 {
2153 struct roffkv *n, *nn;
2154
2155 for (n = r; n; n = nn) {
2156 free(n->key.p);
2157 free(n->val.p);
2158 nn = n->next;
2159 free(n);
2160 }
2161 }
2162
2163 const struct tbl_span *
2164 roff_span(const struct roff *r)
2165 {
2166
2167 return(r->tbl ? tbl_span(r->tbl) : NULL);
2168 }
2169
2170 const struct eqn *
2171 roff_eqn(const struct roff *r)
2172 {
2173
2174 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2175 }
2176
2177 /*
2178 * Duplicate an input string, making the appropriate character
2179 * conversations (as stipulated by `tr') along the way.
2180 * Returns a heap-allocated string with all the replacements made.
2181 */
2182 char *
2183 roff_strdup(const struct roff *r, const char *p)
2184 {
2185 const struct roffkv *cp;
2186 char *res;
2187 const char *pp;
2188 size_t ssz, sz;
2189 enum mandoc_esc esc;
2190
2191 if (NULL == r->xmbtab && NULL == r->xtab)
2192 return(mandoc_strdup(p));
2193 else if ('\0' == *p)
2194 return(mandoc_strdup(""));
2195
2196 /*
2197 * Step through each character looking for term matches
2198 * (remember that a `tr' can be invoked with an escape, which is
2199 * a glyph but the escape is multi-character).
2200 * We only do this if the character hash has been initialised
2201 * and the string is >0 length.
2202 */
2203
2204 res = NULL;
2205 ssz = 0;
2206
2207 while ('\0' != *p) {
2208 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2209 sz = r->xtab[(int)*p].sz;
2210 res = mandoc_realloc(res, ssz + sz + 1);
2211 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2212 ssz += sz;
2213 p++;
2214 continue;
2215 } else if ('\\' != *p) {
2216 res = mandoc_realloc(res, ssz + 2);
2217 res[ssz++] = *p++;
2218 continue;
2219 }
2220
2221 /* Search for term matches. */
2222 for (cp = r->xmbtab; cp; cp = cp->next)
2223 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2224 break;
2225
2226 if (NULL != cp) {
2227 /*
2228 * A match has been found.
2229 * Append the match to the array and move
2230 * forward by its keysize.
2231 */
2232 res = mandoc_realloc(res,
2233 ssz + cp->val.sz + 1);
2234 memcpy(res + ssz, cp->val.p, cp->val.sz);
2235 ssz += cp->val.sz;
2236 p += (int)cp->key.sz;
2237 continue;
2238 }
2239
2240 /*
2241 * Handle escapes carefully: we need to copy
2242 * over just the escape itself, or else we might
2243 * do replacements within the escape itself.
2244 * Make sure to pass along the bogus string.
2245 */
2246 pp = p++;
2247 esc = mandoc_escape(&p, NULL, NULL);
2248 if (ESCAPE_ERROR == esc) {
2249 sz = strlen(pp);
2250 res = mandoc_realloc(res, ssz + sz + 1);
2251 memcpy(res + ssz, pp, sz);
2252 break;
2253 }
2254 /*
2255 * We bail out on bad escapes.
2256 * No need to warn: we already did so when
2257 * roff_res() was called.
2258 */
2259 sz = (int)(p - pp);
2260 res = mandoc_realloc(res, ssz + sz + 1);
2261 memcpy(res + ssz, pp, sz);
2262 ssz += sz;
2263 }
2264
2265 res[(int)ssz] = '\0';
2266 return(res);
2267 }
2268
2269 /*
2270 * Find out whether a line is a macro line or not.
2271 * If it is, adjust the current position and return one; if it isn't,
2272 * return zero and don't change the current position.
2273 * If the control character has been set with `.cc', then let that grain
2274 * precedence.
2275 * This is slighly contrary to groff, where using the non-breaking
2276 * control character when `cc' has been invoked will cause the
2277 * non-breaking macro contents to be printed verbatim.
2278 */
2279 int
2280 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2281 {
2282 int pos;
2283
2284 pos = *ppos;
2285
2286 if (0 != r->control && cp[pos] == r->control)
2287 pos++;
2288 else if (0 != r->control)
2289 return(0);
2290 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2291 pos += 2;
2292 else if ('.' == cp[pos] || '\'' == cp[pos])
2293 pos++;
2294 else
2295 return(0);
2296
2297 while (' ' == cp[pos] || '\t' == cp[pos])
2298 pos++;
2299
2300 *ppos = pos;
2301 return(1);
2302 }