]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Improve and test the messages about empty macros,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.215 2014/07/01 00:32:29 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libroff.h"
31 #include "libmandoc.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_ps,
64 ROFF_rm,
65 ROFF_rr,
66 ROFF_so,
67 ROFF_ta,
68 ROFF_tr,
69 ROFF_Dd,
70 ROFF_TH,
71 ROFF_TS,
72 ROFF_TE,
73 ROFF_T_,
74 ROFF_EQ,
75 ROFF_EN,
76 ROFF_cblock,
77 ROFF_USERDEF,
78 ROFF_MAX
79 };
80
81 /*
82 * An incredibly-simple string buffer.
83 */
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
87 };
88
89 /*
90 * A key-value roffstr pair as part of a singly-linked list.
91 */
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
96 };
97
98 /*
99 * A single number register as part of a singly-linked list.
100 */
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
105 };
106
107 struct roff {
108 struct mparse *parse; /* parse point */
109 int options; /* parse options */
110 struct roffnode *last; /* leaf of stack */
111 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 int rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static void roff_ccond(struct roff *, int, int);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static int roff_evalcond(const char *, int *);
184 static int roff_evalnum(const char *, int *, int *, int);
185 static int roff_evalpar(const char *, int *, int *);
186 static int roff_evalstrcond(const char *, int *);
187 static void roff_free1(struct roff *);
188 static void roff_freereg(struct roffreg *);
189 static void roff_freestr(struct roffkv *);
190 static size_t roff_getname(struct roff *, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff *,
194 const char *, size_t);
195 static int roff_getregro(const char *name);
196 static const char *roff_getstrn(const struct roff *,
197 const char *, size_t);
198 static enum rofferr roff_it(ROFF_ARGS);
199 static enum rofferr roff_line_ignore(ROFF_ARGS);
200 static enum rofferr roff_nr(ROFF_ARGS);
201 static void roff_openeqn(struct roff *, const char *,
202 int, int, const char *);
203 static enum rofft roff_parse(struct roff *, char *, int *,
204 int, int);
205 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
206 static enum rofferr roff_res(struct roff *,
207 char **, size_t *, int, int);
208 static enum rofferr roff_rm(ROFF_ARGS);
209 static enum rofferr roff_rr(ROFF_ARGS);
210 static void roff_setstr(struct roff *,
211 const char *, const char *, int);
212 static void roff_setstrn(struct roffkv **, const char *,
213 size_t, const char *, size_t, int);
214 static enum rofferr roff_so(ROFF_ARGS);
215 static enum rofferr roff_tr(ROFF_ARGS);
216 static enum rofferr roff_Dd(ROFF_ARGS);
217 static enum rofferr roff_TH(ROFF_ARGS);
218 static enum rofferr roff_TE(ROFF_ARGS);
219 static enum rofferr roff_TS(ROFF_ARGS);
220 static enum rofferr roff_EQ(ROFF_ARGS);
221 static enum rofferr roff_EN(ROFF_ARGS);
222 static enum rofferr roff_T_(ROFF_ARGS);
223 static enum rofferr roff_userdef(ROFF_ARGS);
224
225 /* See roffhash_find() */
226
227 #define ASCII_HI 126
228 #define ASCII_LO 33
229 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
230
231 static struct roffmac *hash[HASHWIDTH];
232
233 static struct roffmac roffs[ROFF_MAX] = {
234 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
235 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "as", roff_ds, NULL, NULL, 0, NULL },
239 { "cc", roff_cc, NULL, NULL, 0, NULL },
240 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
244 { "ds", roff_ds, NULL, NULL, 0, NULL },
245 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
246 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
250 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
251 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
252 { "it", roff_it, NULL, NULL, 0, NULL },
253 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "nr", roff_nr, NULL, NULL, 0, NULL },
256 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
258 { "rm", roff_rm, NULL, NULL, 0, NULL },
259 { "rr", roff_rr, NULL, NULL, 0, NULL },
260 { "so", roff_so, NULL, NULL, 0, NULL },
261 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
262 { "tr", roff_tr, NULL, NULL, 0, NULL },
263 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
264 { "TH", roff_TH, NULL, NULL, 0, NULL },
265 { "TS", roff_TS, NULL, NULL, 0, NULL },
266 { "TE", roff_TE, NULL, NULL, 0, NULL },
267 { "T&", roff_T_, NULL, NULL, 0, NULL },
268 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
269 { "EN", roff_EN, NULL, NULL, 0, NULL },
270 { ".", roff_cblock, NULL, NULL, 0, NULL },
271 { NULL, roff_userdef, NULL, NULL, 0, NULL },
272 };
273
274 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
275 const char *const __mdoc_reserved[] = {
276 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
277 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
278 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
279 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
280 "Dt", "Dv", "Dx", "D1",
281 "Ec", "Ed", "Ef", "Ek", "El", "Em",
282 "En", "Eo", "Er", "Es", "Ev", "Ex",
283 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
284 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
285 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
286 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
287 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
288 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
289 "Sc", "Sh", "Sm", "So", "Sq",
290 "Ss", "St", "Sx", "Sy",
291 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
292 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
293 "%P", "%Q", "%R", "%T", "%U", "%V",
294 NULL
295 };
296
297 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
298 const char *const __man_reserved[] = {
299 "AT", "B", "BI", "BR", "DT",
300 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
301 "LP", "OP", "P", "PD", "PP",
302 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
303 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
304 NULL
305 };
306
307 /* Array of injected predefined strings. */
308 #define PREDEFS_MAX 38
309 static const struct predef predefs[PREDEFS_MAX] = {
310 #include "predefs.in"
311 };
312
313 /* See roffhash_find() */
314 #define ROFF_HASH(p) (p[0] - ASCII_LO)
315
316 static int roffit_lines; /* number of lines to delay */
317 static char *roffit_macro; /* nil-terminated macro line */
318
319
320 static void
321 roffhash_init(void)
322 {
323 struct roffmac *n;
324 int buc, i;
325
326 for (i = 0; i < (int)ROFF_USERDEF; i++) {
327 assert(roffs[i].name[0] >= ASCII_LO);
328 assert(roffs[i].name[0] <= ASCII_HI);
329
330 buc = ROFF_HASH(roffs[i].name);
331
332 if (NULL != (n = hash[buc])) {
333 for ( ; n->next; n = n->next)
334 /* Do nothing. */ ;
335 n->next = &roffs[i];
336 } else
337 hash[buc] = &roffs[i];
338 }
339 }
340
341 /*
342 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
343 * the nil-terminated string name could be found.
344 */
345 static enum rofft
346 roffhash_find(const char *p, size_t s)
347 {
348 int buc;
349 struct roffmac *n;
350
351 /*
352 * libroff has an extremely simple hashtable, for the time
353 * being, which simply keys on the first character, which must
354 * be printable, then walks a chain. It works well enough until
355 * optimised.
356 */
357
358 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
359 return(ROFF_MAX);
360
361 buc = ROFF_HASH(p);
362
363 if (NULL == (n = hash[buc]))
364 return(ROFF_MAX);
365 for ( ; n; n = n->next)
366 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
367 return((enum rofft)(n - roffs));
368
369 return(ROFF_MAX);
370 }
371
372 /*
373 * Pop the current node off of the stack of roff instructions currently
374 * pending.
375 */
376 static void
377 roffnode_pop(struct roff *r)
378 {
379 struct roffnode *p;
380
381 assert(r->last);
382 p = r->last;
383
384 r->last = r->last->parent;
385 free(p->name);
386 free(p->end);
387 free(p);
388 }
389
390 /*
391 * Push a roff node onto the instruction stack. This must later be
392 * removed with roffnode_pop().
393 */
394 static void
395 roffnode_push(struct roff *r, enum rofft tok, const char *name,
396 int line, int col)
397 {
398 struct roffnode *p;
399
400 p = mandoc_calloc(1, sizeof(struct roffnode));
401 p->tok = tok;
402 if (name)
403 p->name = mandoc_strdup(name);
404 p->parent = r->last;
405 p->line = line;
406 p->col = col;
407 p->rule = p->parent ? p->parent->rule : 0;
408
409 r->last = p;
410 }
411
412 static void
413 roff_free1(struct roff *r)
414 {
415 struct tbl_node *tbl;
416 struct eqn_node *e;
417 int i;
418
419 while (NULL != (tbl = r->first_tbl)) {
420 r->first_tbl = tbl->next;
421 tbl_free(tbl);
422 }
423
424 r->first_tbl = r->last_tbl = r->tbl = NULL;
425
426 while (NULL != (e = r->first_eqn)) {
427 r->first_eqn = e->next;
428 eqn_free(e);
429 }
430
431 r->first_eqn = r->last_eqn = r->eqn = NULL;
432
433 while (r->last)
434 roffnode_pop(r);
435
436 roff_freestr(r->strtab);
437 roff_freestr(r->xmbtab);
438
439 r->strtab = r->xmbtab = NULL;
440
441 roff_freereg(r->regtab);
442
443 r->regtab = NULL;
444
445 if (r->xtab)
446 for (i = 0; i < 128; i++)
447 free(r->xtab[i].p);
448
449 free(r->xtab);
450 r->xtab = NULL;
451 }
452
453 void
454 roff_reset(struct roff *r)
455 {
456
457 roff_free1(r);
458 r->control = 0;
459 }
460
461 void
462 roff_free(struct roff *r)
463 {
464
465 roff_free1(r);
466 free(r);
467 }
468
469 struct roff *
470 roff_alloc(struct mparse *parse, int options)
471 {
472 struct roff *r;
473
474 r = mandoc_calloc(1, sizeof(struct roff));
475 r->parse = parse;
476 r->options = options;
477 r->rstackpos = -1;
478
479 roffhash_init();
480
481 return(r);
482 }
483
484 /*
485 * In the current line, expand escape sequences that tend to get
486 * used in numerical expressions and conditional requests.
487 * Also check the syntax of the remaining escape sequences.
488 */
489 static enum rofferr
490 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
491 {
492 char ubuf[24]; /* buffer to print the number */
493 const char *start; /* start of the string to process */
494 char *stesc; /* start of an escape sequence ('\\') */
495 const char *stnam; /* start of the name, after "[(*" */
496 const char *cp; /* end of the name, e.g. before ']' */
497 const char *res; /* the string to be substituted */
498 char *nbuf; /* new buffer to copy bufp to */
499 size_t maxl; /* expected length of the escape name */
500 size_t naml; /* actual length of the escape name */
501 int expand_count; /* to avoid infinite loops */
502 int npos; /* position in numeric expression */
503 int irc; /* return code from roff_evalnum() */
504 char term; /* character terminating the escape */
505
506 expand_count = 0;
507 start = *bufp + pos;
508 stesc = strchr(start, '\0') - 1;
509 while (stesc-- > start) {
510
511 /* Search backwards for the next backslash. */
512
513 if ('\\' != *stesc)
514 continue;
515
516 /* If it is escaped, skip it. */
517
518 for (cp = stesc - 1; cp >= start; cp--)
519 if ('\\' != *cp)
520 break;
521
522 if (0 == (stesc - cp) % 2) {
523 stesc = (char *)cp;
524 continue;
525 }
526
527 /* Decide whether to expand or to check only. */
528
529 term = '\0';
530 cp = stesc + 1;
531 switch (*cp) {
532 case '*':
533 res = NULL;
534 break;
535 case 'B':
536 /* FALLTHROUGH */
537 case 'w':
538 term = cp[1];
539 /* FALLTHROUGH */
540 case 'n':
541 res = ubuf;
542 break;
543 default:
544 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
545 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
546 ln, (int)(stesc - *bufp), NULL);
547 continue;
548 }
549
550 if (EXPAND_LIMIT < ++expand_count) {
551 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
552 ln, (int)(stesc - *bufp), NULL);
553 return(ROFF_IGN);
554 }
555
556 /*
557 * The third character decides the length
558 * of the name of the string or register.
559 * Save a pointer to the name.
560 */
561
562 if ('\0' == term) {
563 switch (*++cp) {
564 case '\0':
565 maxl = 0;
566 break;
567 case '(':
568 cp++;
569 maxl = 2;
570 break;
571 case '[':
572 cp++;
573 term = ']';
574 maxl = 0;
575 break;
576 default:
577 maxl = 1;
578 break;
579 }
580 } else {
581 cp += 2;
582 maxl = 0;
583 }
584 stnam = cp;
585
586 /* Advance to the end of the name. */
587
588 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
589 if ('\0' == *cp) {
590 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
591 ln, (int)(stesc - *bufp), NULL);
592 break;
593 }
594 if (0 == maxl && *cp == term) {
595 cp++;
596 break;
597 }
598 }
599
600 /*
601 * Retrieve the replacement string; if it is
602 * undefined, resume searching for escapes.
603 */
604
605 switch (stesc[1]) {
606 case '*':
607 res = roff_getstrn(r, stnam, naml);
608 break;
609 case 'B':
610 npos = 0;
611 irc = roff_evalnum(stnam, &npos, NULL, 0);
612 ubuf[0] = irc && stnam + npos + 1 == cp
613 ? '1' : '0';
614 ubuf[1] = '\0';
615 break;
616 case 'n':
617 (void)snprintf(ubuf, sizeof(ubuf), "%d",
618 roff_getregn(r, stnam, naml));
619 break;
620 case 'w':
621 (void)snprintf(ubuf, sizeof(ubuf), "%d",
622 24 * (int)naml);
623 break;
624 }
625
626 if (NULL == res) {
627 mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
628 ln, (int)(stesc - *bufp), NULL);
629 res = "";
630 }
631
632 /* Replace the escape sequence by the string. */
633
634 *stesc = '\0';
635 *szp = mandoc_asprintf(&nbuf, "%s%s%s",
636 *bufp, res, cp) + 1;
637
638 /* Prepare for the next replacement. */
639
640 start = nbuf + pos;
641 stesc = nbuf + (stesc - *bufp) + strlen(res);
642 free(*bufp);
643 *bufp = nbuf;
644 }
645 return(ROFF_CONT);
646 }
647
648 /*
649 * Process text streams:
650 * Convert all breakable hyphens into ASCII_HYPH.
651 * Decrement and spring input line trap.
652 */
653 static enum rofferr
654 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
655 {
656 size_t sz;
657 const char *start;
658 char *p;
659 int isz;
660 enum mandoc_esc esc;
661
662 start = p = *bufp + pos;
663
664 while ('\0' != *p) {
665 sz = strcspn(p, "-\\");
666 p += sz;
667
668 if ('\0' == *p)
669 break;
670
671 if ('\\' == *p) {
672 /* Skip over escapes. */
673 p++;
674 esc = mandoc_escape((const char **)&p, NULL, NULL);
675 if (ESCAPE_ERROR == esc)
676 break;
677 continue;
678 } else if (p == start) {
679 p++;
680 continue;
681 }
682
683 if (isalpha((unsigned char)p[-1]) &&
684 isalpha((unsigned char)p[1]))
685 *p = ASCII_HYPH;
686 p++;
687 }
688
689 /* Spring the input line trap. */
690 if (1 == roffit_lines) {
691 isz = mandoc_asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
692 free(*bufp);
693 *bufp = p;
694 *szp = isz + 1;
695 *offs = 0;
696 free(roffit_macro);
697 roffit_lines = 0;
698 return(ROFF_REPARSE);
699 } else if (1 < roffit_lines)
700 --roffit_lines;
701 return(ROFF_CONT);
702 }
703
704 enum rofferr
705 roff_parseln(struct roff *r, int ln, char **bufp,
706 size_t *szp, int pos, int *offs)
707 {
708 enum rofft t;
709 enum rofferr e;
710 int ppos, ctl;
711
712 /*
713 * Run the reserved-word filter only if we have some reserved
714 * words to fill in.
715 */
716
717 e = roff_res(r, bufp, szp, ln, pos);
718 if (ROFF_IGN == e)
719 return(e);
720 assert(ROFF_CONT == e);
721
722 ppos = pos;
723 ctl = roff_getcontrol(r, *bufp, &pos);
724
725 /*
726 * First, if a scope is open and we're not a macro, pass the
727 * text through the macro's filter. If a scope isn't open and
728 * we're not a macro, just let it through.
729 * Finally, if there's an equation scope open, divert it into it
730 * no matter our state.
731 */
732
733 if (r->last && ! ctl) {
734 t = r->last->tok;
735 assert(roffs[t].text);
736 e = (*roffs[t].text)(r, t, bufp, szp, ln, pos, pos, offs);
737 assert(ROFF_IGN == e || ROFF_CONT == e);
738 if (ROFF_CONT != e)
739 return(e);
740 }
741 if (r->eqn)
742 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
743 if ( ! ctl) {
744 if (r->tbl)
745 return(tbl_read(r->tbl, ln, *bufp, pos));
746 return(roff_parsetext(bufp, szp, pos, offs));
747 }
748
749 /*
750 * If a scope is open, go to the child handler for that macro,
751 * as it may want to preprocess before doing anything with it.
752 * Don't do so if an equation is open.
753 */
754
755 if (r->last) {
756 t = r->last->tok;
757 assert(roffs[t].sub);
758 return((*roffs[t].sub)(r, t, bufp, szp,
759 ln, ppos, pos, offs));
760 }
761
762 /*
763 * Lastly, as we've no scope open, try to look up and execute
764 * the new macro. If no macro is found, simply return and let
765 * the compilers handle it.
766 */
767
768 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
769 return(ROFF_CONT);
770
771 assert(roffs[t].proc);
772 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
773 }
774
775 void
776 roff_endparse(struct roff *r)
777 {
778
779 if (r->last)
780 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
781 r->last->line, r->last->col, NULL);
782
783 if (r->eqn) {
784 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
785 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
786 eqn_end(&r->eqn);
787 }
788
789 if (r->tbl) {
790 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
791 r->tbl->line, r->tbl->pos, NULL);
792 tbl_end(&r->tbl);
793 }
794 }
795
796 /*
797 * Parse a roff node's type from the input buffer. This must be in the
798 * form of ".foo xxx" in the usual way.
799 */
800 static enum rofft
801 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
802 {
803 char *cp;
804 const char *mac;
805 size_t maclen;
806 enum rofft t;
807
808 cp = buf + *pos;
809
810 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
811 return(ROFF_MAX);
812
813 mac = cp;
814 maclen = roff_getname(r, &cp, ln, ppos);
815
816 t = (r->current_string = roff_getstrn(r, mac, maclen))
817 ? ROFF_USERDEF : roffhash_find(mac, maclen);
818
819 if (ROFF_MAX != t)
820 *pos = cp - buf;
821
822 return(t);
823 }
824
825 static enum rofferr
826 roff_cblock(ROFF_ARGS)
827 {
828
829 /*
830 * A block-close `..' should only be invoked as a child of an
831 * ignore macro, otherwise raise a warning and just ignore it.
832 */
833
834 if (NULL == r->last) {
835 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
836 return(ROFF_IGN);
837 }
838
839 switch (r->last->tok) {
840 case ROFF_am:
841 /* FALLTHROUGH */
842 case ROFF_ami:
843 /* FALLTHROUGH */
844 case ROFF_am1:
845 /* FALLTHROUGH */
846 case ROFF_de:
847 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
848 /* FALLTHROUGH */
849 case ROFF_dei:
850 /* FALLTHROUGH */
851 case ROFF_ig:
852 break;
853 default:
854 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
855 return(ROFF_IGN);
856 }
857
858 if ((*bufp)[pos])
859 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
860
861 roffnode_pop(r);
862 roffnode_cleanscope(r);
863 return(ROFF_IGN);
864
865 }
866
867 static void
868 roffnode_cleanscope(struct roff *r)
869 {
870
871 while (r->last) {
872 if (--r->last->endspan != 0)
873 break;
874 roffnode_pop(r);
875 }
876 }
877
878 static void
879 roff_ccond(struct roff *r, int ln, int ppos)
880 {
881
882 if (NULL == r->last) {
883 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
884 return;
885 }
886
887 switch (r->last->tok) {
888 case ROFF_el:
889 /* FALLTHROUGH */
890 case ROFF_ie:
891 /* FALLTHROUGH */
892 case ROFF_if:
893 break;
894 default:
895 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
896 return;
897 }
898
899 if (r->last->endspan > -1) {
900 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
901 return;
902 }
903
904 roffnode_pop(r);
905 roffnode_cleanscope(r);
906 return;
907 }
908
909 static enum rofferr
910 roff_block(ROFF_ARGS)
911 {
912 char *name, *cp;
913 size_t namesz;
914
915 name = cp = *bufp + pos;
916 namesz = 0;
917
918 if (ROFF_ig != tok) {
919 if ('\0' == *cp) {
920 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
921 return(ROFF_IGN);
922 }
923
924 /*
925 * Re-write `de1', since we don't really care about
926 * groff's strange compatibility mode, into `de'.
927 */
928
929 if (ROFF_de1 == tok)
930 tok = ROFF_de;
931 else if (ROFF_de != tok)
932 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
933 roffs[tok].name);
934
935 namesz = roff_getname(r, &cp, ln, ppos);
936 name[namesz] = '\0';
937 } else
938 name = NULL;
939
940 roffnode_push(r, tok, name, ln, ppos);
941
942 /*
943 * At the beginning of a `de' macro, clear the existing string
944 * with the same name, if there is one. New content will be
945 * appended from roff_block_text() in multiline mode.
946 */
947
948 if (namesz && ROFF_de == tok)
949 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
950
951 if ('\0' == *cp)
952 return(ROFF_IGN);
953
954 /* If present, process the custom end-of-line marker. */
955
956 name = cp;
957 namesz = roff_getname(r, &cp, ln, ppos);
958 if (namesz)
959 r->last->end = mandoc_strndup(name, namesz);
960
961 if ('\0' != *cp)
962 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
963
964 return(ROFF_IGN);
965 }
966
967 static enum rofferr
968 roff_block_sub(ROFF_ARGS)
969 {
970 enum rofft t;
971 int i, j;
972
973 /*
974 * First check whether a custom macro exists at this level. If
975 * it does, then check against it. This is some of groff's
976 * stranger behaviours. If we encountered a custom end-scope
977 * tag and that tag also happens to be a "real" macro, then we
978 * need to try interpreting it again as a real macro. If it's
979 * not, then return ignore. Else continue.
980 */
981
982 if (r->last->end) {
983 for (i = pos, j = 0; r->last->end[j]; j++, i++)
984 if ((*bufp)[i] != r->last->end[j])
985 break;
986
987 if ('\0' == r->last->end[j] &&
988 ('\0' == (*bufp)[i] ||
989 ' ' == (*bufp)[i] ||
990 '\t' == (*bufp)[i])) {
991 roffnode_pop(r);
992 roffnode_cleanscope(r);
993
994 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
995 i++;
996
997 pos = i;
998 if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
999 return(ROFF_RERUN);
1000 return(ROFF_IGN);
1001 }
1002 }
1003
1004 /*
1005 * If we have no custom end-query or lookup failed, then try
1006 * pulling it out of the hashtable.
1007 */
1008
1009 t = roff_parse(r, *bufp, &pos, ln, ppos);
1010
1011 /*
1012 * Macros other than block-end are only significant
1013 * in `de' blocks; elsewhere, simply throw them away.
1014 */
1015 if (ROFF_cblock != t) {
1016 if (ROFF_de == tok)
1017 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1018 return(ROFF_IGN);
1019 }
1020
1021 assert(roffs[t].proc);
1022 return((*roffs[t].proc)(r, t, bufp, szp, ln, ppos, pos, offs));
1023 }
1024
1025 static enum rofferr
1026 roff_block_text(ROFF_ARGS)
1027 {
1028
1029 if (ROFF_de == tok)
1030 roff_setstr(r, r->last->name, *bufp + pos, 2);
1031
1032 return(ROFF_IGN);
1033 }
1034
1035 static enum rofferr
1036 roff_cond_sub(ROFF_ARGS)
1037 {
1038 enum rofft t;
1039 char *ep;
1040 int rr;
1041
1042 rr = r->last->rule;
1043 roffnode_cleanscope(r);
1044 t = roff_parse(r, *bufp, &pos, ln, ppos);
1045
1046 /*
1047 * Fully handle known macros when they are structurally
1048 * required or when the conditional evaluated to true.
1049 */
1050
1051 if ((ROFF_MAX != t) &&
1052 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1053 assert(roffs[t].proc);
1054 return((*roffs[t].proc)(r, t, bufp, szp,
1055 ln, ppos, pos, offs));
1056 }
1057
1058 /*
1059 * If `\}' occurs on a macro line without a preceding macro,
1060 * drop the line completely.
1061 */
1062
1063 ep = *bufp + pos;
1064 if ('\\' == ep[0] && '}' == ep[1])
1065 rr = 0;
1066
1067 /* Always check for the closing delimiter `\}'. */
1068
1069 while (NULL != (ep = strchr(ep, '\\'))) {
1070 if ('}' == *(++ep)) {
1071 *ep = '&';
1072 roff_ccond(r, ln, ep - *bufp - 1);
1073 }
1074 ++ep;
1075 }
1076 return(rr ? ROFF_CONT : ROFF_IGN);
1077 }
1078
1079 static enum rofferr
1080 roff_cond_text(ROFF_ARGS)
1081 {
1082 char *ep;
1083 int rr;
1084
1085 rr = r->last->rule;
1086 roffnode_cleanscope(r);
1087
1088 ep = *bufp + pos;
1089 while (NULL != (ep = strchr(ep, '\\'))) {
1090 if ('}' == *(++ep)) {
1091 *ep = '&';
1092 roff_ccond(r, ln, ep - *bufp - 1);
1093 }
1094 ++ep;
1095 }
1096 return(rr ? ROFF_CONT : ROFF_IGN);
1097 }
1098
1099 /*
1100 * Parse a single signed integer number. Stop at the first non-digit.
1101 * If there is at least one digit, return success and advance the
1102 * parse point, else return failure and let the parse point unchanged.
1103 * Ignore overflows, treat them just like the C language.
1104 */
1105 static int
1106 roff_getnum(const char *v, int *pos, int *res)
1107 {
1108 int myres, n, p;
1109
1110 if (NULL == res)
1111 res = &myres;
1112
1113 p = *pos;
1114 n = v[p] == '-';
1115 if (n)
1116 p++;
1117
1118 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1119 *res = 10 * *res + v[p] - '0';
1120 if (p == *pos + n)
1121 return 0;
1122
1123 if (n)
1124 *res = -*res;
1125
1126 *pos = p;
1127 return 1;
1128 }
1129
1130 /*
1131 * Evaluate a string comparison condition.
1132 * The first character is the delimiter.
1133 * Succeed if the string up to its second occurrence
1134 * matches the string up to its third occurence.
1135 * Advance the cursor after the third occurrence
1136 * or lacking that, to the end of the line.
1137 */
1138 static int
1139 roff_evalstrcond(const char *v, int *pos)
1140 {
1141 const char *s1, *s2, *s3;
1142 int match;
1143
1144 match = 0;
1145 s1 = v + *pos; /* initial delimiter */
1146 s2 = s1 + 1; /* for scanning the first string */
1147 s3 = strchr(s2, *s1); /* for scanning the second string */
1148
1149 if (NULL == s3) /* found no middle delimiter */
1150 goto out;
1151
1152 while ('\0' != *++s3) {
1153 if (*s2 != *s3) { /* mismatch */
1154 s3 = strchr(s3, *s1);
1155 break;
1156 }
1157 if (*s3 == *s1) { /* found the final delimiter */
1158 match = 1;
1159 break;
1160 }
1161 s2++;
1162 }
1163
1164 out:
1165 if (NULL == s3)
1166 s3 = strchr(s2, '\0');
1167 else
1168 s3++;
1169 *pos = s3 - v;
1170 return(match);
1171 }
1172
1173 /*
1174 * Evaluate an optionally negated single character, numerical,
1175 * or string condition.
1176 */
1177 static int
1178 roff_evalcond(const char *v, int *pos)
1179 {
1180 int wanttrue, number;
1181
1182 if ('!' == v[*pos]) {
1183 wanttrue = 0;
1184 (*pos)++;
1185 } else
1186 wanttrue = 1;
1187
1188 switch (v[*pos]) {
1189 case 'n':
1190 /* FALLTHROUGH */
1191 case 'o':
1192 (*pos)++;
1193 return(wanttrue);
1194 case 'c':
1195 /* FALLTHROUGH */
1196 case 'd':
1197 /* FALLTHROUGH */
1198 case 'e':
1199 /* FALLTHROUGH */
1200 case 'r':
1201 /* FALLTHROUGH */
1202 case 't':
1203 (*pos)++;
1204 return(!wanttrue);
1205 default:
1206 break;
1207 }
1208
1209 if (roff_evalnum(v, pos, &number, 0))
1210 return((number > 0) == wanttrue);
1211 else
1212 return(roff_evalstrcond(v, pos) == wanttrue);
1213 }
1214
1215 static enum rofferr
1216 roff_line_ignore(ROFF_ARGS)
1217 {
1218
1219 return(ROFF_IGN);
1220 }
1221
1222 static enum rofferr
1223 roff_cond(ROFF_ARGS)
1224 {
1225
1226 roffnode_push(r, tok, NULL, ln, ppos);
1227
1228 /*
1229 * An `.el' has no conditional body: it will consume the value
1230 * of the current rstack entry set in prior `ie' calls or
1231 * defaults to DENY.
1232 *
1233 * If we're not an `el', however, then evaluate the conditional.
1234 */
1235
1236 r->last->rule = ROFF_el == tok ?
1237 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1238 roff_evalcond(*bufp, &pos);
1239
1240 /*
1241 * An if-else will put the NEGATION of the current evaluated
1242 * conditional into the stack of rules.
1243 */
1244
1245 if (ROFF_ie == tok) {
1246 if (r->rstackpos == RSTACK_MAX - 1) {
1247 mandoc_msg(MANDOCERR_MEM,
1248 r->parse, ln, ppos, NULL);
1249 return(ROFF_ERR);
1250 }
1251 r->rstack[++r->rstackpos] = !r->last->rule;
1252 }
1253
1254 /* If the parent has false as its rule, then so do we. */
1255
1256 if (r->last->parent && !r->last->parent->rule)
1257 r->last->rule = 0;
1258
1259 /*
1260 * Determine scope.
1261 * If there is nothing on the line after the conditional,
1262 * not even whitespace, use next-line scope.
1263 */
1264
1265 if ('\0' == (*bufp)[pos]) {
1266 r->last->endspan = 2;
1267 goto out;
1268 }
1269
1270 while (' ' == (*bufp)[pos])
1271 pos++;
1272
1273 /* An opening brace requests multiline scope. */
1274
1275 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1276 r->last->endspan = -1;
1277 pos += 2;
1278 goto out;
1279 }
1280
1281 /*
1282 * Anything else following the conditional causes
1283 * single-line scope. Warn if the scope contains
1284 * nothing but trailing whitespace.
1285 */
1286
1287 if ('\0' == (*bufp)[pos])
1288 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1289
1290 r->last->endspan = 1;
1291
1292 out:
1293 *offs = pos;
1294 return(ROFF_RERUN);
1295 }
1296
1297 static enum rofferr
1298 roff_ds(ROFF_ARGS)
1299 {
1300 char *string;
1301 const char *name;
1302 size_t namesz;
1303
1304 /*
1305 * The first word is the name of the string.
1306 * If it is empty or terminated by an escape sequence,
1307 * abort the `ds' request without defining anything.
1308 */
1309
1310 name = string = *bufp + pos;
1311 if ('\0' == *name)
1312 return(ROFF_IGN);
1313
1314 namesz = roff_getname(r, &string, ln, pos);
1315 if ('\\' == name[namesz])
1316 return(ROFF_IGN);
1317
1318 /* Read past the initial double-quote, if any. */
1319 if ('"' == *string)
1320 string++;
1321
1322 /* The rest is the value. */
1323 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1324 ROFF_as == tok);
1325 return(ROFF_IGN);
1326 }
1327
1328 /*
1329 * Parse a single operator, one or two characters long.
1330 * If the operator is recognized, return success and advance the
1331 * parse point, else return failure and let the parse point unchanged.
1332 */
1333 static int
1334 roff_getop(const char *v, int *pos, char *res)
1335 {
1336
1337 *res = v[*pos];
1338
1339 switch (*res) {
1340 case '+':
1341 /* FALLTHROUGH */
1342 case '-':
1343 /* FALLTHROUGH */
1344 case '*':
1345 /* FALLTHROUGH */
1346 case '/':
1347 /* FALLTHROUGH */
1348 case '%':
1349 /* FALLTHROUGH */
1350 case '&':
1351 /* FALLTHROUGH */
1352 case ':':
1353 break;
1354 case '<':
1355 switch (v[*pos + 1]) {
1356 case '=':
1357 *res = 'l';
1358 (*pos)++;
1359 break;
1360 case '>':
1361 *res = '!';
1362 (*pos)++;
1363 break;
1364 case '?':
1365 *res = 'i';
1366 (*pos)++;
1367 break;
1368 default:
1369 break;
1370 }
1371 break;
1372 case '>':
1373 switch (v[*pos + 1]) {
1374 case '=':
1375 *res = 'g';
1376 (*pos)++;
1377 break;
1378 case '?':
1379 *res = 'a';
1380 (*pos)++;
1381 break;
1382 default:
1383 break;
1384 }
1385 break;
1386 case '=':
1387 if ('=' == v[*pos + 1])
1388 (*pos)++;
1389 break;
1390 default:
1391 return(0);
1392 }
1393 (*pos)++;
1394
1395 return(*res);
1396 }
1397
1398 /*
1399 * Evaluate either a parenthesized numeric expression
1400 * or a single signed integer number.
1401 */
1402 static int
1403 roff_evalpar(const char *v, int *pos, int *res)
1404 {
1405
1406 if ('(' != v[*pos])
1407 return(roff_getnum(v, pos, res));
1408
1409 (*pos)++;
1410 if ( ! roff_evalnum(v, pos, res, 1))
1411 return(0);
1412
1413 /*
1414 * Omission of the closing parenthesis
1415 * is an error in validation mode,
1416 * but ignored in evaluation mode.
1417 */
1418
1419 if (')' == v[*pos])
1420 (*pos)++;
1421 else if (NULL == res)
1422 return(0);
1423
1424 return(1);
1425 }
1426
1427 /*
1428 * Evaluate a complete numeric expression.
1429 * Proceed left to right, there is no concept of precedence.
1430 */
1431 static int
1432 roff_evalnum(const char *v, int *pos, int *res, int skipwhite)
1433 {
1434 int mypos, operand2;
1435 char operator;
1436
1437 if (NULL == pos) {
1438 mypos = 0;
1439 pos = &mypos;
1440 }
1441
1442 if (skipwhite)
1443 while (isspace((unsigned char)v[*pos]))
1444 (*pos)++;
1445
1446 if ( ! roff_evalpar(v, pos, res))
1447 return(0);
1448
1449 while (1) {
1450 if (skipwhite)
1451 while (isspace((unsigned char)v[*pos]))
1452 (*pos)++;
1453
1454 if ( ! roff_getop(v, pos, &operator))
1455 break;
1456
1457 if (skipwhite)
1458 while (isspace((unsigned char)v[*pos]))
1459 (*pos)++;
1460
1461 if ( ! roff_evalpar(v, pos, &operand2))
1462 return(0);
1463
1464 if (skipwhite)
1465 while (isspace((unsigned char)v[*pos]))
1466 (*pos)++;
1467
1468 if (NULL == res)
1469 continue;
1470
1471 switch (operator) {
1472 case '+':
1473 *res += operand2;
1474 break;
1475 case '-':
1476 *res -= operand2;
1477 break;
1478 case '*':
1479 *res *= operand2;
1480 break;
1481 case '/':
1482 *res /= operand2;
1483 break;
1484 case '%':
1485 *res %= operand2;
1486 break;
1487 case '<':
1488 *res = *res < operand2;
1489 break;
1490 case '>':
1491 *res = *res > operand2;
1492 break;
1493 case 'l':
1494 *res = *res <= operand2;
1495 break;
1496 case 'g':
1497 *res = *res >= operand2;
1498 break;
1499 case '=':
1500 *res = *res == operand2;
1501 break;
1502 case '!':
1503 *res = *res != operand2;
1504 break;
1505 case '&':
1506 *res = *res && operand2;
1507 break;
1508 case ':':
1509 *res = *res || operand2;
1510 break;
1511 case 'i':
1512 if (operand2 < *res)
1513 *res = operand2;
1514 break;
1515 case 'a':
1516 if (operand2 > *res)
1517 *res = operand2;
1518 break;
1519 default:
1520 abort();
1521 }
1522 }
1523 return(1);
1524 }
1525
1526 void
1527 roff_setreg(struct roff *r, const char *name, int val, char sign)
1528 {
1529 struct roffreg *reg;
1530
1531 /* Search for an existing register with the same name. */
1532 reg = r->regtab;
1533
1534 while (reg && strcmp(name, reg->key.p))
1535 reg = reg->next;
1536
1537 if (NULL == reg) {
1538 /* Create a new register. */
1539 reg = mandoc_malloc(sizeof(struct roffreg));
1540 reg->key.p = mandoc_strdup(name);
1541 reg->key.sz = strlen(name);
1542 reg->val = 0;
1543 reg->next = r->regtab;
1544 r->regtab = reg;
1545 }
1546
1547 if ('+' == sign)
1548 reg->val += val;
1549 else if ('-' == sign)
1550 reg->val -= val;
1551 else
1552 reg->val = val;
1553 }
1554
1555 /*
1556 * Handle some predefined read-only number registers.
1557 * For now, return -1 if the requested register is not predefined;
1558 * in case a predefined read-only register having the value -1
1559 * were to turn up, another special value would have to be chosen.
1560 */
1561 static int
1562 roff_getregro(const char *name)
1563 {
1564
1565 switch (*name) {
1566 case 'A': /* ASCII approximation mode is always off. */
1567 return(0);
1568 case 'g': /* Groff compatibility mode is always on. */
1569 return(1);
1570 case 'H': /* Fixed horizontal resolution. */
1571 return (24);
1572 case 'j': /* Always adjust left margin only. */
1573 return(0);
1574 case 'T': /* Some output device is always defined. */
1575 return(1);
1576 case 'V': /* Fixed vertical resolution. */
1577 return (40);
1578 default:
1579 return (-1);
1580 }
1581 }
1582
1583 int
1584 roff_getreg(const struct roff *r, const char *name)
1585 {
1586 struct roffreg *reg;
1587 int val;
1588
1589 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1590 val = roff_getregro(name + 1);
1591 if (-1 != val)
1592 return (val);
1593 }
1594
1595 for (reg = r->regtab; reg; reg = reg->next)
1596 if (0 == strcmp(name, reg->key.p))
1597 return(reg->val);
1598
1599 return(0);
1600 }
1601
1602 static int
1603 roff_getregn(const struct roff *r, const char *name, size_t len)
1604 {
1605 struct roffreg *reg;
1606 int val;
1607
1608 if ('.' == name[0] && 2 == len) {
1609 val = roff_getregro(name + 1);
1610 if (-1 != val)
1611 return (val);
1612 }
1613
1614 for (reg = r->regtab; reg; reg = reg->next)
1615 if (len == reg->key.sz &&
1616 0 == strncmp(name, reg->key.p, len))
1617 return(reg->val);
1618
1619 return(0);
1620 }
1621
1622 static void
1623 roff_freereg(struct roffreg *reg)
1624 {
1625 struct roffreg *old_reg;
1626
1627 while (NULL != reg) {
1628 free(reg->key.p);
1629 old_reg = reg;
1630 reg = reg->next;
1631 free(old_reg);
1632 }
1633 }
1634
1635 static enum rofferr
1636 roff_nr(ROFF_ARGS)
1637 {
1638 char *key, *val;
1639 size_t keysz;
1640 int iv;
1641 char sign;
1642
1643 key = val = *bufp + pos;
1644 if ('\0' == *key)
1645 return(ROFF_IGN);
1646
1647 keysz = roff_getname(r, &val, ln, pos);
1648 if ('\\' == key[keysz])
1649 return(ROFF_IGN);
1650 key[keysz] = '\0';
1651
1652 sign = *val;
1653 if ('+' == sign || '-' == sign)
1654 val++;
1655
1656 if (roff_evalnum(val, NULL, &iv, 0))
1657 roff_setreg(r, key, iv, sign);
1658
1659 return(ROFF_IGN);
1660 }
1661
1662 static enum rofferr
1663 roff_rr(ROFF_ARGS)
1664 {
1665 struct roffreg *reg, **prev;
1666 char *name, *cp;
1667 size_t namesz;
1668
1669 name = cp = *bufp + pos;
1670 if ('\0' == *name)
1671 return(ROFF_IGN);
1672 namesz = roff_getname(r, &cp, ln, pos);
1673 name[namesz] = '\0';
1674
1675 prev = &r->regtab;
1676 while (1) {
1677 reg = *prev;
1678 if (NULL == reg || !strcmp(name, reg->key.p))
1679 break;
1680 prev = &reg->next;
1681 }
1682 if (NULL != reg) {
1683 *prev = reg->next;
1684 free(reg->key.p);
1685 free(reg);
1686 }
1687 return(ROFF_IGN);
1688 }
1689
1690 static enum rofferr
1691 roff_rm(ROFF_ARGS)
1692 {
1693 const char *name;
1694 char *cp;
1695 size_t namesz;
1696
1697 cp = *bufp + pos;
1698 while ('\0' != *cp) {
1699 name = cp;
1700 namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1701 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1702 if ('\\' == name[namesz])
1703 break;
1704 }
1705 return(ROFF_IGN);
1706 }
1707
1708 static enum rofferr
1709 roff_it(ROFF_ARGS)
1710 {
1711 char *cp;
1712 size_t len;
1713 int iv;
1714
1715 /* Parse the number of lines. */
1716 cp = *bufp + pos;
1717 len = strcspn(cp, " \t");
1718 cp[len] = '\0';
1719 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1720 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1721 ln, ppos, *bufp + 1);
1722 return(ROFF_IGN);
1723 }
1724 cp += len + 1;
1725
1726 /* Arm the input line trap. */
1727 roffit_lines = iv;
1728 roffit_macro = mandoc_strdup(cp);
1729 return(ROFF_IGN);
1730 }
1731
1732 static enum rofferr
1733 roff_Dd(ROFF_ARGS)
1734 {
1735 const char *const *cp;
1736
1737 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1738 for (cp = __mdoc_reserved; *cp; cp++)
1739 roff_setstr(r, *cp, NULL, 0);
1740
1741 return(ROFF_CONT);
1742 }
1743
1744 static enum rofferr
1745 roff_TH(ROFF_ARGS)
1746 {
1747 const char *const *cp;
1748
1749 if (0 == (MPARSE_QUICK & r->options))
1750 for (cp = __man_reserved; *cp; cp++)
1751 roff_setstr(r, *cp, NULL, 0);
1752
1753 return(ROFF_CONT);
1754 }
1755
1756 static enum rofferr
1757 roff_TE(ROFF_ARGS)
1758 {
1759
1760 if (NULL == r->tbl)
1761 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1762 else
1763 tbl_end(&r->tbl);
1764
1765 return(ROFF_IGN);
1766 }
1767
1768 static enum rofferr
1769 roff_T_(ROFF_ARGS)
1770 {
1771
1772 if (NULL == r->tbl)
1773 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1774 else
1775 tbl_restart(ppos, ln, r->tbl);
1776
1777 return(ROFF_IGN);
1778 }
1779
1780 #if 0
1781 static int
1782 roff_closeeqn(struct roff *r)
1783 {
1784
1785 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1786 }
1787 #endif
1788
1789 static void
1790 roff_openeqn(struct roff *r, const char *name, int line,
1791 int offs, const char *buf)
1792 {
1793 struct eqn_node *e;
1794 int poff;
1795
1796 assert(NULL == r->eqn);
1797 e = eqn_alloc(name, offs, line, r->parse);
1798
1799 if (r->last_eqn)
1800 r->last_eqn->next = e;
1801 else
1802 r->first_eqn = r->last_eqn = e;
1803
1804 r->eqn = r->last_eqn = e;
1805
1806 if (buf) {
1807 poff = 0;
1808 eqn_read(&r->eqn, line, buf, offs, &poff);
1809 }
1810 }
1811
1812 static enum rofferr
1813 roff_EQ(ROFF_ARGS)
1814 {
1815
1816 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1817 return(ROFF_IGN);
1818 }
1819
1820 static enum rofferr
1821 roff_EN(ROFF_ARGS)
1822 {
1823
1824 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1825 return(ROFF_IGN);
1826 }
1827
1828 static enum rofferr
1829 roff_TS(ROFF_ARGS)
1830 {
1831 struct tbl_node *tbl;
1832
1833 if (r->tbl) {
1834 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1835 tbl_end(&r->tbl);
1836 }
1837
1838 tbl = tbl_alloc(ppos, ln, r->parse);
1839
1840 if (r->last_tbl)
1841 r->last_tbl->next = tbl;
1842 else
1843 r->first_tbl = r->last_tbl = tbl;
1844
1845 r->tbl = r->last_tbl = tbl;
1846 return(ROFF_IGN);
1847 }
1848
1849 static enum rofferr
1850 roff_cc(ROFF_ARGS)
1851 {
1852 const char *p;
1853
1854 p = *bufp + pos;
1855
1856 if ('\0' == *p || '.' == (r->control = *p++))
1857 r->control = 0;
1858
1859 if ('\0' != *p)
1860 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1861
1862 return(ROFF_IGN);
1863 }
1864
1865 static enum rofferr
1866 roff_tr(ROFF_ARGS)
1867 {
1868 const char *p, *first, *second;
1869 size_t fsz, ssz;
1870 enum mandoc_esc esc;
1871
1872 p = *bufp + pos;
1873
1874 if ('\0' == *p) {
1875 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1876 return(ROFF_IGN);
1877 }
1878
1879 while ('\0' != *p) {
1880 fsz = ssz = 1;
1881
1882 first = p++;
1883 if ('\\' == *first) {
1884 esc = mandoc_escape(&p, NULL, NULL);
1885 if (ESCAPE_ERROR == esc) {
1886 mandoc_msg(MANDOCERR_BADESCAPE,
1887 r->parse, ln,
1888 (int)(p - *bufp), NULL);
1889 return(ROFF_IGN);
1890 }
1891 fsz = (size_t)(p - first);
1892 }
1893
1894 second = p++;
1895 if ('\\' == *second) {
1896 esc = mandoc_escape(&p, NULL, NULL);
1897 if (ESCAPE_ERROR == esc) {
1898 mandoc_msg(MANDOCERR_BADESCAPE,
1899 r->parse, ln,
1900 (int)(p - *bufp), NULL);
1901 return(ROFF_IGN);
1902 }
1903 ssz = (size_t)(p - second);
1904 } else if ('\0' == *second) {
1905 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1906 ln, (int)(p - *bufp), NULL);
1907 second = " ";
1908 p--;
1909 }
1910
1911 if (fsz > 1) {
1912 roff_setstrn(&r->xmbtab, first, fsz,
1913 second, ssz, 0);
1914 continue;
1915 }
1916
1917 if (NULL == r->xtab)
1918 r->xtab = mandoc_calloc(128,
1919 sizeof(struct roffstr));
1920
1921 free(r->xtab[(int)*first].p);
1922 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1923 r->xtab[(int)*first].sz = ssz;
1924 }
1925
1926 return(ROFF_IGN);
1927 }
1928
1929 static enum rofferr
1930 roff_so(ROFF_ARGS)
1931 {
1932 char *name;
1933
1934 name = *bufp + pos;
1935 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, ".so %s", name);
1936
1937 /*
1938 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1939 * opening anything that's not in our cwd or anything beneath
1940 * it. Thus, explicitly disallow traversing up the file-system
1941 * or using absolute paths.
1942 */
1943
1944 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1945 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
1946 ".so %s", name);
1947 return(ROFF_ERR);
1948 }
1949
1950 *offs = pos;
1951 return(ROFF_SO);
1952 }
1953
1954 static enum rofferr
1955 roff_userdef(ROFF_ARGS)
1956 {
1957 const char *arg[9];
1958 char *cp, *n1, *n2;
1959 int i;
1960
1961 /*
1962 * Collect pointers to macro argument strings
1963 * and NUL-terminate them.
1964 */
1965 cp = *bufp + pos;
1966 for (i = 0; i < 9; i++)
1967 arg[i] = '\0' == *cp ? "" :
1968 mandoc_getarg(r->parse, &cp, ln, &pos);
1969
1970 /*
1971 * Expand macro arguments.
1972 */
1973 *szp = 0;
1974 n1 = cp = mandoc_strdup(r->current_string);
1975 while (NULL != (cp = strstr(cp, "\\$"))) {
1976 i = cp[2] - '1';
1977 if (0 > i || 8 < i) {
1978 /* Not an argument invocation. */
1979 cp += 2;
1980 continue;
1981 }
1982 *cp = '\0';
1983 *szp = mandoc_asprintf(&n2, "%s%s%s",
1984 n1, arg[i], cp + 3) + 1;
1985 cp = n2 + (cp - n1);
1986 free(n1);
1987 n1 = n2;
1988 }
1989
1990 /*
1991 * Replace the macro invocation
1992 * by the expanded macro.
1993 */
1994 free(*bufp);
1995 *bufp = n1;
1996 if (0 == *szp)
1997 *szp = strlen(*bufp) + 1;
1998
1999 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
2000 ROFF_REPARSE : ROFF_APPEND);
2001 }
2002
2003 static size_t
2004 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2005 {
2006 char *name, *cp;
2007 size_t namesz;
2008
2009 name = *cpp;
2010 if ('\0' == *name)
2011 return(0);
2012
2013 /* Read until end of name and terminate it with NUL. */
2014 for (cp = name; 1; cp++) {
2015 if ('\0' == *cp || ' ' == *cp) {
2016 namesz = cp - name;
2017 break;
2018 }
2019 if ('\\' != *cp)
2020 continue;
2021 namesz = cp - name;
2022 if ('{' == cp[1] || '}' == cp[1])
2023 break;
2024 cp++;
2025 if ('\\' == *cp)
2026 continue;
2027 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
2028 mandoc_escape((const char **)&cp, NULL, NULL);
2029 break;
2030 }
2031
2032 /* Read past spaces. */
2033 while (' ' == *cp)
2034 cp++;
2035
2036 *cpp = cp;
2037 return(namesz);
2038 }
2039
2040 /*
2041 * Store *string into the user-defined string called *name.
2042 * To clear an existing entry, call with (*r, *name, NULL, 0).
2043 * append == 0: replace mode
2044 * append == 1: single-line append mode
2045 * append == 2: multiline append mode, append '\n' after each call
2046 */
2047 static void
2048 roff_setstr(struct roff *r, const char *name, const char *string,
2049 int append)
2050 {
2051
2052 roff_setstrn(&r->strtab, name, strlen(name), string,
2053 string ? strlen(string) : 0, append);
2054 }
2055
2056 static void
2057 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2058 const char *string, size_t stringsz, int append)
2059 {
2060 struct roffkv *n;
2061 char *c;
2062 int i;
2063 size_t oldch, newch;
2064
2065 /* Search for an existing string with the same name. */
2066 n = *r;
2067
2068 while (n && (namesz != n->key.sz ||
2069 strncmp(n->key.p, name, namesz)))
2070 n = n->next;
2071
2072 if (NULL == n) {
2073 /* Create a new string table entry. */
2074 n = mandoc_malloc(sizeof(struct roffkv));
2075 n->key.p = mandoc_strndup(name, namesz);
2076 n->key.sz = namesz;
2077 n->val.p = NULL;
2078 n->val.sz = 0;
2079 n->next = *r;
2080 *r = n;
2081 } else if (0 == append) {
2082 free(n->val.p);
2083 n->val.p = NULL;
2084 n->val.sz = 0;
2085 }
2086
2087 if (NULL == string)
2088 return;
2089
2090 /*
2091 * One additional byte for the '\n' in multiline mode,
2092 * and one for the terminating '\0'.
2093 */
2094 newch = stringsz + (1 < append ? 2u : 1u);
2095
2096 if (NULL == n->val.p) {
2097 n->val.p = mandoc_malloc(newch);
2098 *n->val.p = '\0';
2099 oldch = 0;
2100 } else {
2101 oldch = n->val.sz;
2102 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2103 }
2104
2105 /* Skip existing content in the destination buffer. */
2106 c = n->val.p + (int)oldch;
2107
2108 /* Append new content to the destination buffer. */
2109 i = 0;
2110 while (i < (int)stringsz) {
2111 /*
2112 * Rudimentary roff copy mode:
2113 * Handle escaped backslashes.
2114 */
2115 if ('\\' == string[i] && '\\' == string[i + 1])
2116 i++;
2117 *c++ = string[i++];
2118 }
2119
2120 /* Append terminating bytes. */
2121 if (1 < append)
2122 *c++ = '\n';
2123
2124 *c = '\0';
2125 n->val.sz = (int)(c - n->val.p);
2126 }
2127
2128 static const char *
2129 roff_getstrn(const struct roff *r, const char *name, size_t len)
2130 {
2131 const struct roffkv *n;
2132 int i;
2133
2134 for (n = r->strtab; n; n = n->next)
2135 if (0 == strncmp(name, n->key.p, len) &&
2136 '\0' == n->key.p[(int)len])
2137 return(n->val.p);
2138
2139 for (i = 0; i < PREDEFS_MAX; i++)
2140 if (0 == strncmp(name, predefs[i].name, len) &&
2141 '\0' == predefs[i].name[(int)len])
2142 return(predefs[i].str);
2143
2144 return(NULL);
2145 }
2146
2147 static void
2148 roff_freestr(struct roffkv *r)
2149 {
2150 struct roffkv *n, *nn;
2151
2152 for (n = r; n; n = nn) {
2153 free(n->key.p);
2154 free(n->val.p);
2155 nn = n->next;
2156 free(n);
2157 }
2158 }
2159
2160 const struct tbl_span *
2161 roff_span(const struct roff *r)
2162 {
2163
2164 return(r->tbl ? tbl_span(r->tbl) : NULL);
2165 }
2166
2167 const struct eqn *
2168 roff_eqn(const struct roff *r)
2169 {
2170
2171 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2172 }
2173
2174 /*
2175 * Duplicate an input string, making the appropriate character
2176 * conversations (as stipulated by `tr') along the way.
2177 * Returns a heap-allocated string with all the replacements made.
2178 */
2179 char *
2180 roff_strdup(const struct roff *r, const char *p)
2181 {
2182 const struct roffkv *cp;
2183 char *res;
2184 const char *pp;
2185 size_t ssz, sz;
2186 enum mandoc_esc esc;
2187
2188 if (NULL == r->xmbtab && NULL == r->xtab)
2189 return(mandoc_strdup(p));
2190 else if ('\0' == *p)
2191 return(mandoc_strdup(""));
2192
2193 /*
2194 * Step through each character looking for term matches
2195 * (remember that a `tr' can be invoked with an escape, which is
2196 * a glyph but the escape is multi-character).
2197 * We only do this if the character hash has been initialised
2198 * and the string is >0 length.
2199 */
2200
2201 res = NULL;
2202 ssz = 0;
2203
2204 while ('\0' != *p) {
2205 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2206 sz = r->xtab[(int)*p].sz;
2207 res = mandoc_realloc(res, ssz + sz + 1);
2208 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2209 ssz += sz;
2210 p++;
2211 continue;
2212 } else if ('\\' != *p) {
2213 res = mandoc_realloc(res, ssz + 2);
2214 res[ssz++] = *p++;
2215 continue;
2216 }
2217
2218 /* Search for term matches. */
2219 for (cp = r->xmbtab; cp; cp = cp->next)
2220 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2221 break;
2222
2223 if (NULL != cp) {
2224 /*
2225 * A match has been found.
2226 * Append the match to the array and move
2227 * forward by its keysize.
2228 */
2229 res = mandoc_realloc(res,
2230 ssz + cp->val.sz + 1);
2231 memcpy(res + ssz, cp->val.p, cp->val.sz);
2232 ssz += cp->val.sz;
2233 p += (int)cp->key.sz;
2234 continue;
2235 }
2236
2237 /*
2238 * Handle escapes carefully: we need to copy
2239 * over just the escape itself, or else we might
2240 * do replacements within the escape itself.
2241 * Make sure to pass along the bogus string.
2242 */
2243 pp = p++;
2244 esc = mandoc_escape(&p, NULL, NULL);
2245 if (ESCAPE_ERROR == esc) {
2246 sz = strlen(pp);
2247 res = mandoc_realloc(res, ssz + sz + 1);
2248 memcpy(res + ssz, pp, sz);
2249 break;
2250 }
2251 /*
2252 * We bail out on bad escapes.
2253 * No need to warn: we already did so when
2254 * roff_res() was called.
2255 */
2256 sz = (int)(p - pp);
2257 res = mandoc_realloc(res, ssz + sz + 1);
2258 memcpy(res + ssz, pp, sz);
2259 ssz += sz;
2260 }
2261
2262 res[(int)ssz] = '\0';
2263 return(res);
2264 }
2265
2266 /*
2267 * Find out whether a line is a macro line or not.
2268 * If it is, adjust the current position and return one; if it isn't,
2269 * return zero and don't change the current position.
2270 * If the control character has been set with `.cc', then let that grain
2271 * precedence.
2272 * This is slighly contrary to groff, where using the non-breaking
2273 * control character when `cc' has been invoked will cause the
2274 * non-breaking macro contents to be printed verbatim.
2275 */
2276 int
2277 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2278 {
2279 int pos;
2280
2281 pos = *ppos;
2282
2283 if (0 != r->control && cp[pos] == r->control)
2284 pos++;
2285 else if (0 != r->control)
2286 return(0);
2287 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2288 pos += 2;
2289 else if ('.' == cp[pos] || '\'' == cp[pos])
2290 pos++;
2291 else
2292 return(0);
2293
2294 while (' ' == cp[pos] || '\t' == cp[pos])
2295 pos++;
2296
2297 *ppos = pos;
2298 return(1);
2299 }