]> git.cameronkatri.com Git - mandoc.git/blob - eqn.c
Clean up eqn(7) error handling:
[mandoc.git] / eqn.c
1 /* $Id: eqn.c,v 1.57 2015/01/28 21:11:53 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33
34 #define EQN_NEST_MAX 128 /* maximum nesting of defines */
35 #define STRNEQ(p1, sz1, p2, sz2) \
36 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37
38 enum eqn_tok {
39 EQN_TOK_DYAD = 0,
40 EQN_TOK_VEC,
41 EQN_TOK_UNDER,
42 EQN_TOK_BAR,
43 EQN_TOK_TILDE,
44 EQN_TOK_HAT,
45 EQN_TOK_DOT,
46 EQN_TOK_DOTDOT,
47 EQN_TOK_FWD,
48 EQN_TOK_BACK,
49 EQN_TOK_DOWN,
50 EQN_TOK_UP,
51 EQN_TOK_FAT,
52 EQN_TOK_ROMAN,
53 EQN_TOK_ITALIC,
54 EQN_TOK_BOLD,
55 EQN_TOK_SIZE,
56 EQN_TOK_SUB,
57 EQN_TOK_SUP,
58 EQN_TOK_SQRT,
59 EQN_TOK_OVER,
60 EQN_TOK_FROM,
61 EQN_TOK_TO,
62 EQN_TOK_BRACE_OPEN,
63 EQN_TOK_BRACE_CLOSE,
64 EQN_TOK_GSIZE,
65 EQN_TOK_GFONT,
66 EQN_TOK_MARK,
67 EQN_TOK_LINEUP,
68 EQN_TOK_LEFT,
69 EQN_TOK_RIGHT,
70 EQN_TOK_PILE,
71 EQN_TOK_LPILE,
72 EQN_TOK_RPILE,
73 EQN_TOK_CPILE,
74 EQN_TOK_MATRIX,
75 EQN_TOK_CCOL,
76 EQN_TOK_LCOL,
77 EQN_TOK_RCOL,
78 EQN_TOK_DELIM,
79 EQN_TOK_DEFINE,
80 EQN_TOK_TDEFINE,
81 EQN_TOK_NDEFINE,
82 EQN_TOK_UNDEF,
83 EQN_TOK_EOF,
84 EQN_TOK_ABOVE,
85 EQN_TOK__MAX
86 };
87
88 static const char *eqn_toks[EQN_TOK__MAX] = {
89 "dyad", /* EQN_TOK_DYAD */
90 "vec", /* EQN_TOK_VEC */
91 "under", /* EQN_TOK_UNDER */
92 "bar", /* EQN_TOK_BAR */
93 "tilde", /* EQN_TOK_TILDE */
94 "hat", /* EQN_TOK_HAT */
95 "dot", /* EQN_TOK_DOT */
96 "dotdot", /* EQN_TOK_DOTDOT */
97 "fwd", /* EQN_TOK_FWD * */
98 "back", /* EQN_TOK_BACK */
99 "down", /* EQN_TOK_DOWN */
100 "up", /* EQN_TOK_UP */
101 "fat", /* EQN_TOK_FAT */
102 "roman", /* EQN_TOK_ROMAN */
103 "italic", /* EQN_TOK_ITALIC */
104 "bold", /* EQN_TOK_BOLD */
105 "size", /* EQN_TOK_SIZE */
106 "sub", /* EQN_TOK_SUB */
107 "sup", /* EQN_TOK_SUP */
108 "sqrt", /* EQN_TOK_SQRT */
109 "over", /* EQN_TOK_OVER */
110 "from", /* EQN_TOK_FROM */
111 "to", /* EQN_TOK_TO */
112 "{", /* EQN_TOK_BRACE_OPEN */
113 "}", /* EQN_TOK_BRACE_CLOSE */
114 "gsize", /* EQN_TOK_GSIZE */
115 "gfont", /* EQN_TOK_GFONT */
116 "mark", /* EQN_TOK_MARK */
117 "lineup", /* EQN_TOK_LINEUP */
118 "left", /* EQN_TOK_LEFT */
119 "right", /* EQN_TOK_RIGHT */
120 "pile", /* EQN_TOK_PILE */
121 "lpile", /* EQN_TOK_LPILE */
122 "rpile", /* EQN_TOK_RPILE */
123 "cpile", /* EQN_TOK_CPILE */
124 "matrix", /* EQN_TOK_MATRIX */
125 "ccol", /* EQN_TOK_CCOL */
126 "lcol", /* EQN_TOK_LCOL */
127 "rcol", /* EQN_TOK_RCOL */
128 "delim", /* EQN_TOK_DELIM */
129 "define", /* EQN_TOK_DEFINE */
130 "tdefine", /* EQN_TOK_TDEFINE */
131 "ndefine", /* EQN_TOK_NDEFINE */
132 "undef", /* EQN_TOK_UNDEF */
133 NULL, /* EQN_TOK_EOF */
134 "above", /* EQN_TOK_ABOVE */
135 };
136
137 enum eqn_symt {
138 EQNSYM_alpha,
139 EQNSYM_beta,
140 EQNSYM_chi,
141 EQNSYM_delta,
142 EQNSYM_epsilon,
143 EQNSYM_eta,
144 EQNSYM_gamma,
145 EQNSYM_iota,
146 EQNSYM_kappa,
147 EQNSYM_lambda,
148 EQNSYM_mu,
149 EQNSYM_nu,
150 EQNSYM_omega,
151 EQNSYM_omicron,
152 EQNSYM_phi,
153 EQNSYM_pi,
154 EQNSYM_ps,
155 EQNSYM_rho,
156 EQNSYM_sigma,
157 EQNSYM_tau,
158 EQNSYM_theta,
159 EQNSYM_upsilon,
160 EQNSYM_xi,
161 EQNSYM_zeta,
162 EQNSYM_DELTA,
163 EQNSYM_GAMMA,
164 EQNSYM_LAMBDA,
165 EQNSYM_OMEGA,
166 EQNSYM_PHI,
167 EQNSYM_PI,
168 EQNSYM_PSI,
169 EQNSYM_SIGMA,
170 EQNSYM_THETA,
171 EQNSYM_UPSILON,
172 EQNSYM_XI,
173 EQNSYM_inter,
174 EQNSYM_union,
175 EQNSYM_prod,
176 EQNSYM_int,
177 EQNSYM_sum,
178 EQNSYM_grad,
179 EQNSYM_del,
180 EQNSYM_times,
181 EQNSYM_cdot,
182 EQNSYM_nothing,
183 EQNSYM_approx,
184 EQNSYM_prime,
185 EQNSYM_half,
186 EQNSYM_partial,
187 EQNSYM_inf,
188 EQNSYM_muchgreat,
189 EQNSYM_muchless,
190 EQNSYM_larrow,
191 EQNSYM_rarrow,
192 EQNSYM_pm,
193 EQNSYM_nequal,
194 EQNSYM_equiv,
195 EQNSYM_lessequal,
196 EQNSYM_moreequal,
197 EQNSYM__MAX
198 };
199
200 struct eqnsym {
201 const char *str;
202 const char *sym;
203 };
204
205 static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
206 { "alpha", "*a" }, /* EQNSYM_alpha */
207 { "beta", "*b" }, /* EQNSYM_beta */
208 { "chi", "*x" }, /* EQNSYM_chi */
209 { "delta", "*d" }, /* EQNSYM_delta */
210 { "epsilon", "*e" }, /* EQNSYM_epsilon */
211 { "eta", "*y" }, /* EQNSYM_eta */
212 { "gamma", "*g" }, /* EQNSYM_gamma */
213 { "iota", "*i" }, /* EQNSYM_iota */
214 { "kappa", "*k" }, /* EQNSYM_kappa */
215 { "lambda", "*l" }, /* EQNSYM_lambda */
216 { "mu", "*m" }, /* EQNSYM_mu */
217 { "nu", "*n" }, /* EQNSYM_nu */
218 { "omega", "*w" }, /* EQNSYM_omega */
219 { "omicron", "*o" }, /* EQNSYM_omicron */
220 { "phi", "*f" }, /* EQNSYM_phi */
221 { "pi", "*p" }, /* EQNSYM_pi */
222 { "psi", "*q" }, /* EQNSYM_psi */
223 { "rho", "*r" }, /* EQNSYM_rho */
224 { "sigma", "*s" }, /* EQNSYM_sigma */
225 { "tau", "*t" }, /* EQNSYM_tau */
226 { "theta", "*h" }, /* EQNSYM_theta */
227 { "upsilon", "*u" }, /* EQNSYM_upsilon */
228 { "xi", "*c" }, /* EQNSYM_xi */
229 { "zeta", "*z" }, /* EQNSYM_zeta */
230 { "DELTA", "*D" }, /* EQNSYM_DELTA */
231 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */
232 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
233 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */
234 { "PHI", "*F" }, /* EQNSYM_PHI */
235 { "PI", "*P" }, /* EQNSYM_PI */
236 { "PSI", "*Q" }, /* EQNSYM_PSI */
237 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */
238 { "THETA", "*H" }, /* EQNSYM_THETA */
239 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */
240 { "XI", "*C" }, /* EQNSYM_XI */
241 { "inter", "ca" }, /* EQNSYM_inter */
242 { "union", "cu" }, /* EQNSYM_union */
243 { "prod", "product" }, /* EQNSYM_prod */
244 { "int", "integral" }, /* EQNSYM_int */
245 { "sum", "sum" }, /* EQNSYM_sum */
246 { "grad", "gr" }, /* EQNSYM_grad */
247 { "del", "gr" }, /* EQNSYM_del */
248 { "times", "mu" }, /* EQNSYM_times */
249 { "cdot", "pc" }, /* EQNSYM_cdot */
250 { "nothing", "&" }, /* EQNSYM_nothing */
251 { "approx", "~~" }, /* EQNSYM_approx */
252 { "prime", "aq" }, /* EQNSYM_prime */
253 { "half", "12" }, /* EQNSYM_half */
254 { "partial", "pd" }, /* EQNSYM_partial */
255 { "inf", "if" }, /* EQNSYM_inf */
256 { ">>", ">>" }, /* EQNSYM_muchgreat */
257 { "<<", "<<" }, /* EQNSYM_muchless */
258 { "<-", "<-" }, /* EQNSYM_larrow */
259 { "->", "->" }, /* EQNSYM_rarrow */
260 { "+-", "+-" }, /* EQNSYM_pm */
261 { "!=", "!=" }, /* EQNSYM_nequal */
262 { "==", "==" }, /* EQNSYM_equiv */
263 { "<=", "<=" }, /* EQNSYM_lessequal */
264 { ">=", ">=" }, /* EQNSYM_moreequal */
265 };
266
267 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
268 static void eqn_box_free(struct eqn_box *);
269 static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
270 enum eqn_post, struct eqn_box *);
271 static void eqn_def(struct eqn_node *);
272 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t);
273 static void eqn_delim(struct eqn_node *);
274 static const char *eqn_next(struct eqn_node *, char, size_t *, int);
275 static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
276 static const char *eqn_nexttok(struct eqn_node *, size_t *);
277 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *);
278 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **);
279 static void eqn_undef(struct eqn_node *);
280
281
282 enum rofferr
283 eqn_read(struct eqn_node **epp, int ln,
284 const char *p, int pos, int *offs)
285 {
286 size_t sz;
287 struct eqn_node *ep;
288 enum rofferr er;
289
290 ep = *epp;
291
292 /*
293 * If we're the terminating mark, unset our equation status and
294 * validate the full equation.
295 */
296
297 if (0 == strncmp(p, ".EN", 3)) {
298 er = eqn_end(epp);
299 p += 3;
300 while (' ' == *p || '\t' == *p)
301 p++;
302 if ('\0' == *p)
303 return(er);
304 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
305 ln, pos, "EN %s", p);
306 return(er);
307 }
308
309 /*
310 * Build up the full string, replacing all newlines with regular
311 * whitespace.
312 */
313
314 sz = strlen(p + pos) + 1;
315 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
316
317 /* First invocation: nil terminate the string. */
318
319 if (0 == ep->sz)
320 *ep->data = '\0';
321
322 ep->sz += sz;
323 strlcat(ep->data, p + pos, ep->sz + 1);
324 strlcat(ep->data, " ", ep->sz + 1);
325 return(ROFF_IGN);
326 }
327
328 struct eqn_node *
329 eqn_alloc(int pos, int line, struct mparse *parse)
330 {
331 struct eqn_node *p;
332
333 p = mandoc_calloc(1, sizeof(struct eqn_node));
334
335 p->parse = parse;
336 p->eqn.ln = line;
337 p->eqn.pos = pos;
338 p->gsize = EQN_DEFSIZE;
339
340 return(p);
341 }
342
343 /*
344 * Find the key "key" of the give size within our eqn-defined values.
345 */
346 static struct eqn_def *
347 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
348 {
349 int i;
350
351 for (i = 0; i < (int)ep->defsz; i++)
352 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
353 ep->defs[i].keysz, key, sz))
354 return(&ep->defs[i]);
355
356 return(NULL);
357 }
358
359 /*
360 * Get the next token from the input stream using the given quote
361 * character.
362 * Optionally make any replacements.
363 */
364 static const char *
365 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
366 {
367 char *start, *next;
368 int q, diff, lim;
369 size_t ssz, dummy;
370 struct eqn_def *def;
371
372 if (NULL == sz)
373 sz = &dummy;
374
375 lim = 0;
376 ep->rew = ep->cur;
377 again:
378 /* Prevent self-definitions. */
379
380 if (lim >= EQN_NEST_MAX) {
381 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
382 ep->eqn.ln, ep->eqn.pos, NULL);
383 return(NULL);
384 }
385
386 ep->cur = ep->rew;
387 start = &ep->data[(int)ep->cur];
388 q = 0;
389
390 if ('\0' == *start)
391 return(NULL);
392
393 if (quote == *start) {
394 ep->cur++;
395 q = 1;
396 }
397
398 start = &ep->data[(int)ep->cur];
399
400 if ( ! q) {
401 if ('{' == *start || '}' == *start)
402 ssz = 1;
403 else
404 ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
405 next = start + (int)ssz;
406 if ('\0' == *next)
407 next = NULL;
408 } else
409 next = strchr(start, quote);
410
411 if (NULL != next) {
412 *sz = (size_t)(next - start);
413 ep->cur += *sz;
414 if (q)
415 ep->cur++;
416 while (' ' == ep->data[(int)ep->cur] ||
417 '\t' == ep->data[(int)ep->cur] ||
418 '^' == ep->data[(int)ep->cur] ||
419 '~' == ep->data[(int)ep->cur])
420 ep->cur++;
421 } else {
422 if (q)
423 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
424 ep->eqn.ln, ep->eqn.pos, NULL);
425 next = strchr(start, '\0');
426 *sz = (size_t)(next - start);
427 ep->cur += *sz;
428 }
429
430 /* Quotes aren't expanded for values. */
431
432 if (q || ! repl)
433 return(start);
434
435 if (NULL != (def = eqn_def_find(ep, start, *sz))) {
436 diff = def->valsz - *sz;
437
438 if (def->valsz > *sz) {
439 ep->sz += diff;
440 ep->data = mandoc_realloc(ep->data, ep->sz + 1);
441 ep->data[ep->sz] = '\0';
442 start = &ep->data[(int)ep->rew];
443 }
444
445 diff = def->valsz - *sz;
446 memmove(start + *sz + diff, start + *sz,
447 (strlen(start) - *sz) + 1);
448 memcpy(start, def->val, def->valsz);
449 goto again;
450 }
451
452 return(start);
453 }
454
455 /*
456 * Get the next delimited token using the default current quote
457 * character.
458 */
459 static const char *
460 eqn_nexttok(struct eqn_node *ep, size_t *sz)
461 {
462
463 return(eqn_next(ep, '"', sz, 1));
464 }
465
466 /*
467 * Get next token without replacement.
468 */
469 static const char *
470 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
471 {
472
473 return(eqn_next(ep, '"', sz, 0));
474 }
475
476 /*
477 * Parse a token from the stream of text.
478 * A token consists of one of the recognised eqn(7) strings.
479 * Strings are separated by delimiting marks.
480 * This returns EQN_TOK_EOF when there are no more tokens.
481 * If the token is an unrecognised string literal, then it returns
482 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
483 * string.
484 * This must be later freed with free(3).
485 */
486 static enum eqn_tok
487 eqn_tok_parse(struct eqn_node *ep, char **p)
488 {
489 const char *start;
490 size_t i, sz;
491 int quoted;
492
493 if (NULL != p)
494 *p = NULL;
495
496 quoted = ep->data[ep->cur] == '"';
497
498 if (NULL == (start = eqn_nexttok(ep, &sz)))
499 return(EQN_TOK_EOF);
500
501 if (quoted) {
502 if (p != NULL)
503 *p = mandoc_strndup(start, sz);
504 return(EQN_TOK__MAX);
505 }
506
507 for (i = 0; i < EQN_TOK__MAX; i++) {
508 if (NULL == eqn_toks[i])
509 continue;
510 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
511 break;
512 }
513
514 if (i == EQN_TOK__MAX && NULL != p)
515 *p = mandoc_strndup(start, sz);
516
517 return(i);
518 }
519
520 static void
521 eqn_box_free(struct eqn_box *bp)
522 {
523
524 if (bp->first)
525 eqn_box_free(bp->first);
526 if (bp->next)
527 eqn_box_free(bp->next);
528
529 free(bp->text);
530 free(bp->left);
531 free(bp->right);
532 free(bp->top);
533 free(bp->bottom);
534 free(bp);
535 }
536
537 /*
538 * Allocate a box as the last child of the parent node.
539 */
540 static struct eqn_box *
541 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
542 {
543 struct eqn_box *bp;
544
545 bp = mandoc_calloc(1, sizeof(struct eqn_box));
546 bp->parent = parent;
547 bp->parent->args++;
548 bp->expectargs = UINT_MAX;
549 bp->size = ep->gsize;
550
551 if (NULL != parent->first) {
552 parent->last->next = bp;
553 bp->prev = parent->last;
554 } else
555 parent->first = bp;
556
557 parent->last = bp;
558 return(bp);
559 }
560
561 /*
562 * Reparent the current last node (of the current parent) under a new
563 * EQN_SUBEXPR as the first element.
564 * Then return the new parent.
565 * The new EQN_SUBEXPR will have a two-child limit.
566 */
567 static struct eqn_box *
568 eqn_box_makebinary(struct eqn_node *ep,
569 enum eqn_post pos, struct eqn_box *parent)
570 {
571 struct eqn_box *b, *newb;
572
573 assert(NULL != parent->last);
574 b = parent->last;
575 if (parent->last == parent->first)
576 parent->first = NULL;
577 parent->args--;
578 parent->last = b->prev;
579 b->prev = NULL;
580 newb = eqn_box_alloc(ep, parent);
581 newb->pos = pos;
582 newb->type = EQN_SUBEXPR;
583 newb->expectargs = 2;
584 newb->args = 1;
585 newb->first = newb->last = b;
586 newb->first->next = NULL;
587 b->parent = newb;
588 return(newb);
589 }
590
591 /*
592 * Parse the "delim" control statement.
593 */
594 static void
595 eqn_delim(struct eqn_node *ep)
596 {
597 const char *start;
598 size_t sz;
599
600 if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
601 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
602 ep->eqn.ln, ep->eqn.pos, "delim");
603 else if (strncmp(start, "off", 3) == 0)
604 ep->delim = 0;
605 else if (strncmp(start, "on", 2) == 0) {
606 if (ep->odelim && ep->cdelim)
607 ep->delim = 1;
608 } else if (start[1] != '\0') {
609 ep->odelim = start[0];
610 ep->cdelim = start[1];
611 ep->delim = 1;
612 }
613 }
614
615 /*
616 * Undefine a previously-defined string.
617 */
618 static void
619 eqn_undef(struct eqn_node *ep)
620 {
621 const char *start;
622 struct eqn_def *def;
623 size_t sz;
624
625 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
626 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
627 ep->eqn.ln, ep->eqn.pos, "undef");
628 return;
629 }
630 if ((def = eqn_def_find(ep, start, sz)) == NULL)
631 return;
632 free(def->key);
633 free(def->val);
634 def->key = def->val = NULL;
635 def->keysz = def->valsz = 0;
636 }
637
638 static void
639 eqn_def(struct eqn_node *ep)
640 {
641 const char *start;
642 size_t sz;
643 struct eqn_def *def;
644 int i;
645
646 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
647 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
648 ep->eqn.ln, ep->eqn.pos, "define");
649 return;
650 }
651
652 /*
653 * Search for a key that already exists.
654 * Create a new key if none is found.
655 */
656 if (NULL == (def = eqn_def_find(ep, start, sz))) {
657 /* Find holes in string array. */
658 for (i = 0; i < (int)ep->defsz; i++)
659 if (0 == ep->defs[i].keysz)
660 break;
661
662 if (i == (int)ep->defsz) {
663 ep->defsz++;
664 ep->defs = mandoc_reallocarray(ep->defs,
665 ep->defsz, sizeof(struct eqn_def));
666 ep->defs[i].key = ep->defs[i].val = NULL;
667 }
668
669 def = ep->defs + i;
670 free(def->key);
671 def->key = mandoc_strndup(start, sz);
672 def->keysz = sz;
673 }
674
675 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
676 if (start == NULL) {
677 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
678 ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
679 free(def->key);
680 free(def->val);
681 def->key = def->val = NULL;
682 def->keysz = def->valsz = 0;
683 return;
684 }
685 free(def->val);
686 def->val = mandoc_strndup(start, sz);
687 def->valsz = sz;
688 }
689
690 /*
691 * Recursively parse an eqn(7) expression.
692 */
693 static enum rofferr
694 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
695 {
696 char sym[64];
697 struct eqn_box *cur;
698 const char *start;
699 char *p;
700 size_t i, sz;
701 enum eqn_tok tok, subtok;
702 enum eqn_post pos;
703 int size;
704
705 assert(parent != NULL);
706
707 /*
708 * Empty equation.
709 * Do not add it to the high-level syntax tree.
710 */
711
712 if (ep->data == NULL)
713 return(ROFF_IGN);
714
715 next_tok:
716 tok = eqn_tok_parse(ep, &p);
717
718 this_tok:
719 switch (tok) {
720 case (EQN_TOK_UNDEF):
721 eqn_undef(ep);
722 break;
723 case (EQN_TOK_NDEFINE):
724 case (EQN_TOK_DEFINE):
725 eqn_def(ep);
726 break;
727 case (EQN_TOK_TDEFINE):
728 if (eqn_nextrawtok(ep, NULL) == NULL ||
729 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
730 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
731 ep->eqn.ln, ep->eqn.pos, "tdefine");
732 break;
733 case (EQN_TOK_DELIM):
734 eqn_delim(ep);
735 break;
736 case (EQN_TOK_GFONT):
737 if (eqn_nextrawtok(ep, NULL) == NULL)
738 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
739 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
740 break;
741 case (EQN_TOK_MARK):
742 case (EQN_TOK_LINEUP):
743 /* Ignore these. */
744 break;
745 case (EQN_TOK_DYAD):
746 case (EQN_TOK_VEC):
747 case (EQN_TOK_UNDER):
748 case (EQN_TOK_BAR):
749 case (EQN_TOK_TILDE):
750 case (EQN_TOK_HAT):
751 case (EQN_TOK_DOT):
752 case (EQN_TOK_DOTDOT):
753 if (parent->last == NULL) {
754 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
755 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
756 cur = eqn_box_alloc(ep, parent);
757 cur->type = EQN_TEXT;
758 cur->text = mandoc_strdup("");
759 }
760 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
761 parent->type = EQN_LISTONE;
762 parent->expectargs = 1;
763 switch (tok) {
764 case (EQN_TOK_DOTDOT):
765 strlcpy(sym, "\\[ad]", sizeof(sym));
766 break;
767 case (EQN_TOK_VEC):
768 strlcpy(sym, "\\[->]", sizeof(sym));
769 break;
770 case (EQN_TOK_DYAD):
771 strlcpy(sym, "\\[<>]", sizeof(sym));
772 break;
773 case (EQN_TOK_TILDE):
774 strlcpy(sym, "\\[a~]", sizeof(sym));
775 break;
776 case (EQN_TOK_UNDER):
777 strlcpy(sym, "\\[ul]", sizeof(sym));
778 break;
779 case (EQN_TOK_BAR):
780 strlcpy(sym, "\\[rl]", sizeof(sym));
781 break;
782 case (EQN_TOK_DOT):
783 strlcpy(sym, "\\[a.]", sizeof(sym));
784 break;
785 case (EQN_TOK_HAT):
786 strlcpy(sym, "\\[ha]", sizeof(sym));
787 break;
788 default:
789 abort();
790 }
791
792 switch (tok) {
793 case (EQN_TOK_DOTDOT):
794 case (EQN_TOK_VEC):
795 case (EQN_TOK_DYAD):
796 case (EQN_TOK_TILDE):
797 case (EQN_TOK_BAR):
798 case (EQN_TOK_DOT):
799 case (EQN_TOK_HAT):
800 parent->top = mandoc_strdup(sym);
801 break;
802 case (EQN_TOK_UNDER):
803 parent->bottom = mandoc_strdup(sym);
804 break;
805 default:
806 abort();
807 }
808 parent = parent->parent;
809 break;
810 case (EQN_TOK_FWD):
811 case (EQN_TOK_BACK):
812 case (EQN_TOK_DOWN):
813 case (EQN_TOK_UP):
814 subtok = eqn_tok_parse(ep, NULL);
815 if (subtok != EQN_TOK__MAX) {
816 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
817 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
818 tok = subtok;
819 goto this_tok;
820 }
821 break;
822 case (EQN_TOK_FAT):
823 case (EQN_TOK_ROMAN):
824 case (EQN_TOK_ITALIC):
825 case (EQN_TOK_BOLD):
826 while (parent->args == parent->expectargs)
827 parent = parent->parent;
828 /*
829 * These values apply to the next word or sequence of
830 * words; thus, we mark that we'll have a child with
831 * exactly one of those.
832 */
833 parent = eqn_box_alloc(ep, parent);
834 parent->type = EQN_LISTONE;
835 parent->expectargs = 1;
836 switch (tok) {
837 case (EQN_TOK_FAT):
838 parent->font = EQNFONT_FAT;
839 break;
840 case (EQN_TOK_ROMAN):
841 parent->font = EQNFONT_ROMAN;
842 break;
843 case (EQN_TOK_ITALIC):
844 parent->font = EQNFONT_ITALIC;
845 break;
846 case (EQN_TOK_BOLD):
847 parent->font = EQNFONT_BOLD;
848 break;
849 default:
850 abort();
851 }
852 break;
853 case (EQN_TOK_SIZE):
854 case (EQN_TOK_GSIZE):
855 /* Accept two values: integral size and a single. */
856 if (NULL == (start = eqn_nexttok(ep, &sz))) {
857 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
858 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
859 break;
860 }
861 size = mandoc_strntoi(start, sz, 10);
862 if (-1 == size) {
863 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
864 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
865 break;
866 }
867 if (EQN_TOK_GSIZE == tok) {
868 ep->gsize = size;
869 break;
870 }
871 parent = eqn_box_alloc(ep, parent);
872 parent->type = EQN_LISTONE;
873 parent->expectargs = 1;
874 parent->size = size;
875 break;
876 case (EQN_TOK_FROM):
877 case (EQN_TOK_TO):
878 case (EQN_TOK_SUB):
879 case (EQN_TOK_SUP):
880 /*
881 * We have a left-right-associative expression.
882 * Repivot under a positional node, open a child scope
883 * and keep on reading.
884 */
885 if (parent->last == NULL) {
886 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
887 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
888 cur = eqn_box_alloc(ep, parent);
889 cur->type = EQN_TEXT;
890 cur->text = mandoc_strdup("");
891 }
892 /* Handle the "subsup" and "fromto" positions. */
893 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
894 parent->expectargs = 3;
895 parent->pos = EQNPOS_SUBSUP;
896 break;
897 }
898 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
899 parent->expectargs = 3;
900 parent->pos = EQNPOS_FROMTO;
901 break;
902 }
903 switch (tok) {
904 case (EQN_TOK_FROM):
905 pos = EQNPOS_FROM;
906 break;
907 case (EQN_TOK_TO):
908 pos = EQNPOS_TO;
909 break;
910 case (EQN_TOK_SUP):
911 pos = EQNPOS_SUP;
912 break;
913 case (EQN_TOK_SUB):
914 pos = EQNPOS_SUB;
915 break;
916 default:
917 abort();
918 }
919 parent = eqn_box_makebinary(ep, pos, parent);
920 break;
921 case (EQN_TOK_SQRT):
922 while (parent->args == parent->expectargs)
923 parent = parent->parent;
924 /*
925 * Accept a left-right-associative set of arguments just
926 * like sub and sup and friends but without rebalancing
927 * under a pivot.
928 */
929 parent = eqn_box_alloc(ep, parent);
930 parent->type = EQN_SUBEXPR;
931 parent->pos = EQNPOS_SQRT;
932 parent->expectargs = 1;
933 break;
934 case (EQN_TOK_OVER):
935 /*
936 * We have a right-left-associative fraction.
937 * Close out anything that's currently open, then
938 * rebalance and continue reading.
939 */
940 if (parent->last == NULL) {
941 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
942 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
943 cur = eqn_box_alloc(ep, parent);
944 cur->type = EQN_TEXT;
945 cur->text = mandoc_strdup("");
946 }
947 while (EQN_SUBEXPR == parent->type)
948 parent = parent->parent;
949 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
950 break;
951 case (EQN_TOK_RIGHT):
952 case (EQN_TOK_BRACE_CLOSE):
953 /*
954 * Close out the existing brace.
955 * FIXME: this is a shitty sentinel: we should really
956 * have a native EQN_BRACE type or whatnot.
957 */
958 for (cur = parent; cur != NULL; cur = cur->parent)
959 if (cur->type == EQN_LIST &&
960 (tok == EQN_TOK_BRACE_CLOSE ||
961 cur->left != NULL))
962 break;
963 if (cur == NULL) {
964 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
965 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
966 break;
967 }
968 parent = cur;
969 if (EQN_TOK_RIGHT == tok) {
970 if (NULL == (start = eqn_nexttok(ep, &sz))) {
971 mandoc_msg(MANDOCERR_REQ_EMPTY,
972 ep->parse, ep->eqn.ln,
973 ep->eqn.pos, eqn_toks[tok]);
974 break;
975 }
976 /* Handling depends on right/left. */
977 if (STRNEQ(start, sz, "ceiling", 7)) {
978 strlcpy(sym, "\\[rc]", sizeof(sym));
979 parent->right = mandoc_strdup(sym);
980 } else if (STRNEQ(start, sz, "floor", 5)) {
981 strlcpy(sym, "\\[rf]", sizeof(sym));
982 parent->right = mandoc_strdup(sym);
983 } else
984 parent->right = mandoc_strndup(start, sz);
985 }
986 parent = parent->parent;
987 if (EQN_TOK_BRACE_CLOSE == tok && parent &&
988 (parent->type == EQN_PILE ||
989 parent->type == EQN_MATRIX))
990 parent = parent->parent;
991 /* Close out any "singleton" lists. */
992 while (parent->type == EQN_LISTONE &&
993 parent->args == parent->expectargs)
994 parent = parent->parent;
995 break;
996 case (EQN_TOK_BRACE_OPEN):
997 case (EQN_TOK_LEFT):
998 /*
999 * If we already have something in the stack and we're
1000 * in an expression, then rewind til we're not any more
1001 * (just like with the text node).
1002 */
1003 while (parent->args == parent->expectargs)
1004 parent = parent->parent;
1005 if (EQN_TOK_LEFT == tok &&
1006 (start = eqn_nexttok(ep, &sz)) == NULL) {
1007 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1008 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1009 break;
1010 }
1011 parent = eqn_box_alloc(ep, parent);
1012 parent->type = EQN_LIST;
1013 if (EQN_TOK_LEFT == tok) {
1014 if (STRNEQ(start, sz, "ceiling", 7)) {
1015 strlcpy(sym, "\\[lc]", sizeof(sym));
1016 parent->left = mandoc_strdup(sym);
1017 } else if (STRNEQ(start, sz, "floor", 5)) {
1018 strlcpy(sym, "\\[lf]", sizeof(sym));
1019 parent->left = mandoc_strdup(sym);
1020 } else
1021 parent->left = mandoc_strndup(start, sz);
1022 }
1023 break;
1024 case (EQN_TOK_PILE):
1025 case (EQN_TOK_LPILE):
1026 case (EQN_TOK_RPILE):
1027 case (EQN_TOK_CPILE):
1028 case (EQN_TOK_CCOL):
1029 case (EQN_TOK_LCOL):
1030 case (EQN_TOK_RCOL):
1031 while (parent->args == parent->expectargs)
1032 parent = parent->parent;
1033 parent = eqn_box_alloc(ep, parent);
1034 parent->type = EQN_PILE;
1035 parent->expectargs = 1;
1036 break;
1037 case (EQN_TOK_ABOVE):
1038 for (cur = parent; cur != NULL; cur = cur->parent)
1039 if (cur->type == EQN_PILE)
1040 break;
1041 if (cur == NULL) {
1042 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1043 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1044 break;
1045 }
1046 parent = eqn_box_alloc(ep, cur);
1047 parent->type = EQN_LIST;
1048 break;
1049 case (EQN_TOK_MATRIX):
1050 while (parent->args == parent->expectargs)
1051 parent = parent->parent;
1052 parent = eqn_box_alloc(ep, parent);
1053 parent->type = EQN_MATRIX;
1054 parent->expectargs = 1;
1055 break;
1056 case (EQN_TOK_EOF):
1057 /*
1058 * End of file!
1059 * TODO: make sure we're not in an open subexpression.
1060 */
1061 return(ROFF_EQN);
1062 default:
1063 assert(tok == EQN_TOK__MAX);
1064 assert(NULL != p);
1065 /*
1066 * If we already have something in the stack and we're
1067 * in an expression, then rewind til we're not any more.
1068 */
1069 while (parent->args == parent->expectargs)
1070 parent = parent->parent;
1071 cur = eqn_box_alloc(ep, parent);
1072 cur->type = EQN_TEXT;
1073 for (i = 0; i < EQNSYM__MAX; i++)
1074 if (0 == strcmp(eqnsyms[i].str, p)) {
1075 (void)snprintf(sym, sizeof(sym),
1076 "\\[%s]", eqnsyms[i].sym);
1077 cur->text = mandoc_strdup(sym);
1078 free(p);
1079 break;
1080 }
1081
1082 if (i == EQNSYM__MAX)
1083 cur->text = p;
1084 /*
1085 * Post-process list status.
1086 */
1087 while (parent->type == EQN_LISTONE &&
1088 parent->args == parent->expectargs)
1089 parent = parent->parent;
1090 break;
1091 }
1092 goto next_tok;
1093 }
1094
1095 enum rofferr
1096 eqn_end(struct eqn_node **epp)
1097 {
1098 struct eqn_node *ep;
1099
1100 ep = *epp;
1101 *epp = NULL;
1102
1103 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1104 ep->eqn.root->expectargs = UINT_MAX;
1105 return(eqn_parse(ep, ep->eqn.root));
1106 }
1107
1108 void
1109 eqn_free(struct eqn_node *p)
1110 {
1111 int i;
1112
1113 eqn_box_free(p->eqn.root);
1114
1115 for (i = 0; i < (int)p->defsz; i++) {
1116 free(p->defs[i].key);
1117 free(p->defs[i].val);
1118 }
1119
1120 free(p->data);
1121 free(p->defs);
1122 free(p);
1123 }