]> git.cameronkatri.com Git - mandoc.git/blob - eqn.c
The concept of endianness seems to be somewhat newfangled, so the
[mandoc.git] / eqn.c
1 /* $Id: eqn.c,v 1.61 2016/01/08 00:50:45 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33
34 #define EQN_NEST_MAX 128 /* maximum nesting of defines */
35 #define STRNEQ(p1, sz1, p2, sz2) \
36 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37
38 enum eqn_tok {
39 EQN_TOK_DYAD = 0,
40 EQN_TOK_VEC,
41 EQN_TOK_UNDER,
42 EQN_TOK_BAR,
43 EQN_TOK_TILDE,
44 EQN_TOK_HAT,
45 EQN_TOK_DOT,
46 EQN_TOK_DOTDOT,
47 EQN_TOK_FWD,
48 EQN_TOK_BACK,
49 EQN_TOK_DOWN,
50 EQN_TOK_UP,
51 EQN_TOK_FAT,
52 EQN_TOK_ROMAN,
53 EQN_TOK_ITALIC,
54 EQN_TOK_BOLD,
55 EQN_TOK_SIZE,
56 EQN_TOK_SUB,
57 EQN_TOK_SUP,
58 EQN_TOK_SQRT,
59 EQN_TOK_OVER,
60 EQN_TOK_FROM,
61 EQN_TOK_TO,
62 EQN_TOK_BRACE_OPEN,
63 EQN_TOK_BRACE_CLOSE,
64 EQN_TOK_GSIZE,
65 EQN_TOK_GFONT,
66 EQN_TOK_MARK,
67 EQN_TOK_LINEUP,
68 EQN_TOK_LEFT,
69 EQN_TOK_RIGHT,
70 EQN_TOK_PILE,
71 EQN_TOK_LPILE,
72 EQN_TOK_RPILE,
73 EQN_TOK_CPILE,
74 EQN_TOK_MATRIX,
75 EQN_TOK_CCOL,
76 EQN_TOK_LCOL,
77 EQN_TOK_RCOL,
78 EQN_TOK_DELIM,
79 EQN_TOK_DEFINE,
80 EQN_TOK_TDEFINE,
81 EQN_TOK_NDEFINE,
82 EQN_TOK_UNDEF,
83 EQN_TOK_EOF,
84 EQN_TOK_ABOVE,
85 EQN_TOK__MAX
86 };
87
88 static const char *eqn_toks[EQN_TOK__MAX] = {
89 "dyad", /* EQN_TOK_DYAD */
90 "vec", /* EQN_TOK_VEC */
91 "under", /* EQN_TOK_UNDER */
92 "bar", /* EQN_TOK_BAR */
93 "tilde", /* EQN_TOK_TILDE */
94 "hat", /* EQN_TOK_HAT */
95 "dot", /* EQN_TOK_DOT */
96 "dotdot", /* EQN_TOK_DOTDOT */
97 "fwd", /* EQN_TOK_FWD * */
98 "back", /* EQN_TOK_BACK */
99 "down", /* EQN_TOK_DOWN */
100 "up", /* EQN_TOK_UP */
101 "fat", /* EQN_TOK_FAT */
102 "roman", /* EQN_TOK_ROMAN */
103 "italic", /* EQN_TOK_ITALIC */
104 "bold", /* EQN_TOK_BOLD */
105 "size", /* EQN_TOK_SIZE */
106 "sub", /* EQN_TOK_SUB */
107 "sup", /* EQN_TOK_SUP */
108 "sqrt", /* EQN_TOK_SQRT */
109 "over", /* EQN_TOK_OVER */
110 "from", /* EQN_TOK_FROM */
111 "to", /* EQN_TOK_TO */
112 "{", /* EQN_TOK_BRACE_OPEN */
113 "}", /* EQN_TOK_BRACE_CLOSE */
114 "gsize", /* EQN_TOK_GSIZE */
115 "gfont", /* EQN_TOK_GFONT */
116 "mark", /* EQN_TOK_MARK */
117 "lineup", /* EQN_TOK_LINEUP */
118 "left", /* EQN_TOK_LEFT */
119 "right", /* EQN_TOK_RIGHT */
120 "pile", /* EQN_TOK_PILE */
121 "lpile", /* EQN_TOK_LPILE */
122 "rpile", /* EQN_TOK_RPILE */
123 "cpile", /* EQN_TOK_CPILE */
124 "matrix", /* EQN_TOK_MATRIX */
125 "ccol", /* EQN_TOK_CCOL */
126 "lcol", /* EQN_TOK_LCOL */
127 "rcol", /* EQN_TOK_RCOL */
128 "delim", /* EQN_TOK_DELIM */
129 "define", /* EQN_TOK_DEFINE */
130 "tdefine", /* EQN_TOK_TDEFINE */
131 "ndefine", /* EQN_TOK_NDEFINE */
132 "undef", /* EQN_TOK_UNDEF */
133 NULL, /* EQN_TOK_EOF */
134 "above", /* EQN_TOK_ABOVE */
135 };
136
137 enum eqn_symt {
138 EQNSYM_alpha,
139 EQNSYM_beta,
140 EQNSYM_chi,
141 EQNSYM_delta,
142 EQNSYM_epsilon,
143 EQNSYM_eta,
144 EQNSYM_gamma,
145 EQNSYM_iota,
146 EQNSYM_kappa,
147 EQNSYM_lambda,
148 EQNSYM_mu,
149 EQNSYM_nu,
150 EQNSYM_omega,
151 EQNSYM_omicron,
152 EQNSYM_phi,
153 EQNSYM_pi,
154 EQNSYM_ps,
155 EQNSYM_rho,
156 EQNSYM_sigma,
157 EQNSYM_tau,
158 EQNSYM_theta,
159 EQNSYM_upsilon,
160 EQNSYM_xi,
161 EQNSYM_zeta,
162 EQNSYM_DELTA,
163 EQNSYM_GAMMA,
164 EQNSYM_LAMBDA,
165 EQNSYM_OMEGA,
166 EQNSYM_PHI,
167 EQNSYM_PI,
168 EQNSYM_PSI,
169 EQNSYM_SIGMA,
170 EQNSYM_THETA,
171 EQNSYM_UPSILON,
172 EQNSYM_XI,
173 EQNSYM_inter,
174 EQNSYM_union,
175 EQNSYM_prod,
176 EQNSYM_int,
177 EQNSYM_sum,
178 EQNSYM_grad,
179 EQNSYM_del,
180 EQNSYM_times,
181 EQNSYM_cdot,
182 EQNSYM_nothing,
183 EQNSYM_approx,
184 EQNSYM_prime,
185 EQNSYM_half,
186 EQNSYM_partial,
187 EQNSYM_inf,
188 EQNSYM_muchgreat,
189 EQNSYM_muchless,
190 EQNSYM_larrow,
191 EQNSYM_rarrow,
192 EQNSYM_pm,
193 EQNSYM_nequal,
194 EQNSYM_equiv,
195 EQNSYM_lessequal,
196 EQNSYM_moreequal,
197 EQNSYM_minus,
198 EQNSYM__MAX
199 };
200
201 struct eqnsym {
202 const char *str;
203 const char *sym;
204 };
205
206 static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
207 { "alpha", "*a" }, /* EQNSYM_alpha */
208 { "beta", "*b" }, /* EQNSYM_beta */
209 { "chi", "*x" }, /* EQNSYM_chi */
210 { "delta", "*d" }, /* EQNSYM_delta */
211 { "epsilon", "*e" }, /* EQNSYM_epsilon */
212 { "eta", "*y" }, /* EQNSYM_eta */
213 { "gamma", "*g" }, /* EQNSYM_gamma */
214 { "iota", "*i" }, /* EQNSYM_iota */
215 { "kappa", "*k" }, /* EQNSYM_kappa */
216 { "lambda", "*l" }, /* EQNSYM_lambda */
217 { "mu", "*m" }, /* EQNSYM_mu */
218 { "nu", "*n" }, /* EQNSYM_nu */
219 { "omega", "*w" }, /* EQNSYM_omega */
220 { "omicron", "*o" }, /* EQNSYM_omicron */
221 { "phi", "*f" }, /* EQNSYM_phi */
222 { "pi", "*p" }, /* EQNSYM_pi */
223 { "psi", "*q" }, /* EQNSYM_psi */
224 { "rho", "*r" }, /* EQNSYM_rho */
225 { "sigma", "*s" }, /* EQNSYM_sigma */
226 { "tau", "*t" }, /* EQNSYM_tau */
227 { "theta", "*h" }, /* EQNSYM_theta */
228 { "upsilon", "*u" }, /* EQNSYM_upsilon */
229 { "xi", "*c" }, /* EQNSYM_xi */
230 { "zeta", "*z" }, /* EQNSYM_zeta */
231 { "DELTA", "*D" }, /* EQNSYM_DELTA */
232 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */
233 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
234 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */
235 { "PHI", "*F" }, /* EQNSYM_PHI */
236 { "PI", "*P" }, /* EQNSYM_PI */
237 { "PSI", "*Q" }, /* EQNSYM_PSI */
238 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */
239 { "THETA", "*H" }, /* EQNSYM_THETA */
240 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */
241 { "XI", "*C" }, /* EQNSYM_XI */
242 { "inter", "ca" }, /* EQNSYM_inter */
243 { "union", "cu" }, /* EQNSYM_union */
244 { "prod", "product" }, /* EQNSYM_prod */
245 { "int", "integral" }, /* EQNSYM_int */
246 { "sum", "sum" }, /* EQNSYM_sum */
247 { "grad", "gr" }, /* EQNSYM_grad */
248 { "del", "gr" }, /* EQNSYM_del */
249 { "times", "mu" }, /* EQNSYM_times */
250 { "cdot", "pc" }, /* EQNSYM_cdot */
251 { "nothing", "&" }, /* EQNSYM_nothing */
252 { "approx", "~~" }, /* EQNSYM_approx */
253 { "prime", "fm" }, /* EQNSYM_prime */
254 { "half", "12" }, /* EQNSYM_half */
255 { "partial", "pd" }, /* EQNSYM_partial */
256 { "inf", "if" }, /* EQNSYM_inf */
257 { ">>", ">>" }, /* EQNSYM_muchgreat */
258 { "<<", "<<" }, /* EQNSYM_muchless */
259 { "<-", "<-" }, /* EQNSYM_larrow */
260 { "->", "->" }, /* EQNSYM_rarrow */
261 { "+-", "+-" }, /* EQNSYM_pm */
262 { "!=", "!=" }, /* EQNSYM_nequal */
263 { "==", "==" }, /* EQNSYM_equiv */
264 { "<=", "<=" }, /* EQNSYM_lessequal */
265 { ">=", ">=" }, /* EQNSYM_moreequal */
266 { "-", "mi" }, /* EQNSYM_minus */
267 };
268
269 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
270 static void eqn_box_free(struct eqn_box *);
271 static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
272 enum eqn_post, struct eqn_box *);
273 static void eqn_def(struct eqn_node *);
274 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t);
275 static void eqn_delim(struct eqn_node *);
276 static const char *eqn_next(struct eqn_node *, char, size_t *, int);
277 static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
278 static const char *eqn_nexttok(struct eqn_node *, size_t *);
279 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *);
280 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **);
281 static void eqn_undef(struct eqn_node *);
282
283
284 enum rofferr
285 eqn_read(struct eqn_node **epp, int ln,
286 const char *p, int pos, int *offs)
287 {
288 size_t sz;
289 struct eqn_node *ep;
290 enum rofferr er;
291
292 ep = *epp;
293
294 /*
295 * If we're the terminating mark, unset our equation status and
296 * validate the full equation.
297 */
298
299 if (0 == strncmp(p, ".EN", 3)) {
300 er = eqn_end(epp);
301 p += 3;
302 while (' ' == *p || '\t' == *p)
303 p++;
304 if ('\0' == *p)
305 return er;
306 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
307 ln, pos, "EN %s", p);
308 return er;
309 }
310
311 /*
312 * Build up the full string, replacing all newlines with regular
313 * whitespace.
314 */
315
316 sz = strlen(p + pos) + 1;
317 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
318
319 /* First invocation: nil terminate the string. */
320
321 if (0 == ep->sz)
322 *ep->data = '\0';
323
324 ep->sz += sz;
325 strlcat(ep->data, p + pos, ep->sz + 1);
326 strlcat(ep->data, " ", ep->sz + 1);
327 return ROFF_IGN;
328 }
329
330 struct eqn_node *
331 eqn_alloc(int pos, int line, struct mparse *parse)
332 {
333 struct eqn_node *p;
334
335 p = mandoc_calloc(1, sizeof(struct eqn_node));
336
337 p->parse = parse;
338 p->eqn.ln = line;
339 p->eqn.pos = pos;
340 p->gsize = EQN_DEFSIZE;
341
342 return p;
343 }
344
345 /*
346 * Find the key "key" of the give size within our eqn-defined values.
347 */
348 static struct eqn_def *
349 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
350 {
351 int i;
352
353 for (i = 0; i < (int)ep->defsz; i++)
354 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
355 ep->defs[i].keysz, key, sz))
356 return &ep->defs[i];
357
358 return NULL;
359 }
360
361 /*
362 * Get the next token from the input stream using the given quote
363 * character.
364 * Optionally make any replacements.
365 */
366 static const char *
367 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
368 {
369 char *start, *next;
370 int q, diff, lim;
371 size_t ssz, dummy;
372 struct eqn_def *def;
373
374 if (NULL == sz)
375 sz = &dummy;
376
377 lim = 0;
378 ep->rew = ep->cur;
379 again:
380 /* Prevent self-definitions. */
381
382 if (lim >= EQN_NEST_MAX) {
383 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
384 ep->eqn.ln, ep->eqn.pos, NULL);
385 return NULL;
386 }
387
388 ep->cur = ep->rew;
389 start = &ep->data[(int)ep->cur];
390 q = 0;
391
392 if ('\0' == *start)
393 return NULL;
394
395 if (quote == *start) {
396 ep->cur++;
397 q = 1;
398 }
399
400 start = &ep->data[(int)ep->cur];
401
402 if ( ! q) {
403 if ('{' == *start || '}' == *start)
404 ssz = 1;
405 else
406 ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
407 next = start + (int)ssz;
408 if ('\0' == *next)
409 next = NULL;
410 } else
411 next = strchr(start, quote);
412
413 if (NULL != next) {
414 *sz = (size_t)(next - start);
415 ep->cur += *sz;
416 if (q)
417 ep->cur++;
418 while (' ' == ep->data[(int)ep->cur] ||
419 '\t' == ep->data[(int)ep->cur] ||
420 '^' == ep->data[(int)ep->cur] ||
421 '~' == ep->data[(int)ep->cur])
422 ep->cur++;
423 } else {
424 if (q)
425 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
426 ep->eqn.ln, ep->eqn.pos, NULL);
427 next = strchr(start, '\0');
428 *sz = (size_t)(next - start);
429 ep->cur += *sz;
430 }
431
432 /* Quotes aren't expanded for values. */
433
434 if (q || ! repl)
435 return start;
436
437 if (NULL != (def = eqn_def_find(ep, start, *sz))) {
438 diff = def->valsz - *sz;
439
440 if (def->valsz > *sz) {
441 ep->sz += diff;
442 ep->data = mandoc_realloc(ep->data, ep->sz + 1);
443 ep->data[ep->sz] = '\0';
444 start = &ep->data[(int)ep->rew];
445 }
446
447 diff = def->valsz - *sz;
448 memmove(start + *sz + diff, start + *sz,
449 (strlen(start) - *sz) + 1);
450 memcpy(start, def->val, def->valsz);
451 lim++;
452 goto again;
453 }
454
455 return start;
456 }
457
458 /*
459 * Get the next delimited token using the default current quote
460 * character.
461 */
462 static const char *
463 eqn_nexttok(struct eqn_node *ep, size_t *sz)
464 {
465
466 return eqn_next(ep, '"', sz, 1);
467 }
468
469 /*
470 * Get next token without replacement.
471 */
472 static const char *
473 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
474 {
475
476 return eqn_next(ep, '"', sz, 0);
477 }
478
479 /*
480 * Parse a token from the stream of text.
481 * A token consists of one of the recognised eqn(7) strings.
482 * Strings are separated by delimiting marks.
483 * This returns EQN_TOK_EOF when there are no more tokens.
484 * If the token is an unrecognised string literal, then it returns
485 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
486 * string.
487 * This must be later freed with free(3).
488 */
489 static enum eqn_tok
490 eqn_tok_parse(struct eqn_node *ep, char **p)
491 {
492 const char *start;
493 size_t i, sz;
494 int quoted;
495
496 if (NULL != p)
497 *p = NULL;
498
499 quoted = ep->data[ep->cur] == '"';
500
501 if (NULL == (start = eqn_nexttok(ep, &sz)))
502 return EQN_TOK_EOF;
503
504 if (quoted) {
505 if (p != NULL)
506 *p = mandoc_strndup(start, sz);
507 return EQN_TOK__MAX;
508 }
509
510 for (i = 0; i < EQN_TOK__MAX; i++) {
511 if (NULL == eqn_toks[i])
512 continue;
513 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
514 break;
515 }
516
517 if (i == EQN_TOK__MAX && NULL != p)
518 *p = mandoc_strndup(start, sz);
519
520 return i;
521 }
522
523 static void
524 eqn_box_free(struct eqn_box *bp)
525 {
526
527 if (bp->first)
528 eqn_box_free(bp->first);
529 if (bp->next)
530 eqn_box_free(bp->next);
531
532 free(bp->text);
533 free(bp->left);
534 free(bp->right);
535 free(bp->top);
536 free(bp->bottom);
537 free(bp);
538 }
539
540 /*
541 * Allocate a box as the last child of the parent node.
542 */
543 static struct eqn_box *
544 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
545 {
546 struct eqn_box *bp;
547
548 bp = mandoc_calloc(1, sizeof(struct eqn_box));
549 bp->parent = parent;
550 bp->parent->args++;
551 bp->expectargs = UINT_MAX;
552 bp->size = ep->gsize;
553
554 if (NULL != parent->first) {
555 parent->last->next = bp;
556 bp->prev = parent->last;
557 } else
558 parent->first = bp;
559
560 parent->last = bp;
561 return bp;
562 }
563
564 /*
565 * Reparent the current last node (of the current parent) under a new
566 * EQN_SUBEXPR as the first element.
567 * Then return the new parent.
568 * The new EQN_SUBEXPR will have a two-child limit.
569 */
570 static struct eqn_box *
571 eqn_box_makebinary(struct eqn_node *ep,
572 enum eqn_post pos, struct eqn_box *parent)
573 {
574 struct eqn_box *b, *newb;
575
576 assert(NULL != parent->last);
577 b = parent->last;
578 if (parent->last == parent->first)
579 parent->first = NULL;
580 parent->args--;
581 parent->last = b->prev;
582 b->prev = NULL;
583 newb = eqn_box_alloc(ep, parent);
584 newb->pos = pos;
585 newb->type = EQN_SUBEXPR;
586 newb->expectargs = 2;
587 newb->args = 1;
588 newb->first = newb->last = b;
589 newb->first->next = NULL;
590 b->parent = newb;
591 return newb;
592 }
593
594 /*
595 * Parse the "delim" control statement.
596 */
597 static void
598 eqn_delim(struct eqn_node *ep)
599 {
600 const char *start;
601 size_t sz;
602
603 if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
604 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
605 ep->eqn.ln, ep->eqn.pos, "delim");
606 else if (strncmp(start, "off", 3) == 0)
607 ep->delim = 0;
608 else if (strncmp(start, "on", 2) == 0) {
609 if (ep->odelim && ep->cdelim)
610 ep->delim = 1;
611 } else if (start[1] != '\0') {
612 ep->odelim = start[0];
613 ep->cdelim = start[1];
614 ep->delim = 1;
615 }
616 }
617
618 /*
619 * Undefine a previously-defined string.
620 */
621 static void
622 eqn_undef(struct eqn_node *ep)
623 {
624 const char *start;
625 struct eqn_def *def;
626 size_t sz;
627
628 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
629 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
630 ep->eqn.ln, ep->eqn.pos, "undef");
631 return;
632 }
633 if ((def = eqn_def_find(ep, start, sz)) == NULL)
634 return;
635 free(def->key);
636 free(def->val);
637 def->key = def->val = NULL;
638 def->keysz = def->valsz = 0;
639 }
640
641 static void
642 eqn_def(struct eqn_node *ep)
643 {
644 const char *start;
645 size_t sz;
646 struct eqn_def *def;
647 int i;
648
649 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
650 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
651 ep->eqn.ln, ep->eqn.pos, "define");
652 return;
653 }
654
655 /*
656 * Search for a key that already exists.
657 * Create a new key if none is found.
658 */
659 if (NULL == (def = eqn_def_find(ep, start, sz))) {
660 /* Find holes in string array. */
661 for (i = 0; i < (int)ep->defsz; i++)
662 if (0 == ep->defs[i].keysz)
663 break;
664
665 if (i == (int)ep->defsz) {
666 ep->defsz++;
667 ep->defs = mandoc_reallocarray(ep->defs,
668 ep->defsz, sizeof(struct eqn_def));
669 ep->defs[i].key = ep->defs[i].val = NULL;
670 }
671
672 def = ep->defs + i;
673 free(def->key);
674 def->key = mandoc_strndup(start, sz);
675 def->keysz = sz;
676 }
677
678 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
679 if (start == NULL) {
680 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
681 ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
682 free(def->key);
683 free(def->val);
684 def->key = def->val = NULL;
685 def->keysz = def->valsz = 0;
686 return;
687 }
688 free(def->val);
689 def->val = mandoc_strndup(start, sz);
690 def->valsz = sz;
691 }
692
693 /*
694 * Recursively parse an eqn(7) expression.
695 */
696 static enum rofferr
697 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
698 {
699 char sym[64];
700 struct eqn_box *cur;
701 const char *start;
702 char *p;
703 size_t i, sz;
704 enum eqn_tok tok, subtok;
705 enum eqn_post pos;
706 int size;
707
708 assert(parent != NULL);
709
710 /*
711 * Empty equation.
712 * Do not add it to the high-level syntax tree.
713 */
714
715 if (ep->data == NULL)
716 return ROFF_IGN;
717
718 next_tok:
719 tok = eqn_tok_parse(ep, &p);
720
721 this_tok:
722 switch (tok) {
723 case (EQN_TOK_UNDEF):
724 eqn_undef(ep);
725 break;
726 case (EQN_TOK_NDEFINE):
727 case (EQN_TOK_DEFINE):
728 eqn_def(ep);
729 break;
730 case (EQN_TOK_TDEFINE):
731 if (eqn_nextrawtok(ep, NULL) == NULL ||
732 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
733 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
734 ep->eqn.ln, ep->eqn.pos, "tdefine");
735 break;
736 case (EQN_TOK_DELIM):
737 eqn_delim(ep);
738 break;
739 case (EQN_TOK_GFONT):
740 if (eqn_nextrawtok(ep, NULL) == NULL)
741 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
742 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
743 break;
744 case (EQN_TOK_MARK):
745 case (EQN_TOK_LINEUP):
746 /* Ignore these. */
747 break;
748 case (EQN_TOK_DYAD):
749 case (EQN_TOK_VEC):
750 case (EQN_TOK_UNDER):
751 case (EQN_TOK_BAR):
752 case (EQN_TOK_TILDE):
753 case (EQN_TOK_HAT):
754 case (EQN_TOK_DOT):
755 case (EQN_TOK_DOTDOT):
756 if (parent->last == NULL) {
757 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
758 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
759 cur = eqn_box_alloc(ep, parent);
760 cur->type = EQN_TEXT;
761 cur->text = mandoc_strdup("");
762 }
763 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
764 parent->type = EQN_LISTONE;
765 parent->expectargs = 1;
766 switch (tok) {
767 case (EQN_TOK_DOTDOT):
768 strlcpy(sym, "\\[ad]", sizeof(sym));
769 break;
770 case (EQN_TOK_VEC):
771 strlcpy(sym, "\\[->]", sizeof(sym));
772 break;
773 case (EQN_TOK_DYAD):
774 strlcpy(sym, "\\[<>]", sizeof(sym));
775 break;
776 case (EQN_TOK_TILDE):
777 strlcpy(sym, "\\[a~]", sizeof(sym));
778 break;
779 case (EQN_TOK_UNDER):
780 strlcpy(sym, "\\[ul]", sizeof(sym));
781 break;
782 case (EQN_TOK_BAR):
783 strlcpy(sym, "\\[rl]", sizeof(sym));
784 break;
785 case (EQN_TOK_DOT):
786 strlcpy(sym, "\\[a.]", sizeof(sym));
787 break;
788 case (EQN_TOK_HAT):
789 strlcpy(sym, "\\[ha]", sizeof(sym));
790 break;
791 default:
792 abort();
793 }
794
795 switch (tok) {
796 case (EQN_TOK_DOTDOT):
797 case (EQN_TOK_VEC):
798 case (EQN_TOK_DYAD):
799 case (EQN_TOK_TILDE):
800 case (EQN_TOK_BAR):
801 case (EQN_TOK_DOT):
802 case (EQN_TOK_HAT):
803 parent->top = mandoc_strdup(sym);
804 break;
805 case (EQN_TOK_UNDER):
806 parent->bottom = mandoc_strdup(sym);
807 break;
808 default:
809 abort();
810 }
811 parent = parent->parent;
812 break;
813 case (EQN_TOK_FWD):
814 case (EQN_TOK_BACK):
815 case (EQN_TOK_DOWN):
816 case (EQN_TOK_UP):
817 subtok = eqn_tok_parse(ep, NULL);
818 if (subtok != EQN_TOK__MAX) {
819 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
820 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
821 tok = subtok;
822 goto this_tok;
823 }
824 break;
825 case (EQN_TOK_FAT):
826 case (EQN_TOK_ROMAN):
827 case (EQN_TOK_ITALIC):
828 case (EQN_TOK_BOLD):
829 while (parent->args == parent->expectargs)
830 parent = parent->parent;
831 /*
832 * These values apply to the next word or sequence of
833 * words; thus, we mark that we'll have a child with
834 * exactly one of those.
835 */
836 parent = eqn_box_alloc(ep, parent);
837 parent->type = EQN_LISTONE;
838 parent->expectargs = 1;
839 switch (tok) {
840 case (EQN_TOK_FAT):
841 parent->font = EQNFONT_FAT;
842 break;
843 case (EQN_TOK_ROMAN):
844 parent->font = EQNFONT_ROMAN;
845 break;
846 case (EQN_TOK_ITALIC):
847 parent->font = EQNFONT_ITALIC;
848 break;
849 case (EQN_TOK_BOLD):
850 parent->font = EQNFONT_BOLD;
851 break;
852 default:
853 abort();
854 }
855 break;
856 case (EQN_TOK_SIZE):
857 case (EQN_TOK_GSIZE):
858 /* Accept two values: integral size and a single. */
859 if (NULL == (start = eqn_nexttok(ep, &sz))) {
860 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
861 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
862 break;
863 }
864 size = mandoc_strntoi(start, sz, 10);
865 if (-1 == size) {
866 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
867 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
868 break;
869 }
870 if (EQN_TOK_GSIZE == tok) {
871 ep->gsize = size;
872 break;
873 }
874 parent = eqn_box_alloc(ep, parent);
875 parent->type = EQN_LISTONE;
876 parent->expectargs = 1;
877 parent->size = size;
878 break;
879 case (EQN_TOK_FROM):
880 case (EQN_TOK_TO):
881 case (EQN_TOK_SUB):
882 case (EQN_TOK_SUP):
883 /*
884 * We have a left-right-associative expression.
885 * Repivot under a positional node, open a child scope
886 * and keep on reading.
887 */
888 if (parent->last == NULL) {
889 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
890 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
891 cur = eqn_box_alloc(ep, parent);
892 cur->type = EQN_TEXT;
893 cur->text = mandoc_strdup("");
894 }
895 /* Handle the "subsup" and "fromto" positions. */
896 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
897 parent->expectargs = 3;
898 parent->pos = EQNPOS_SUBSUP;
899 break;
900 }
901 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
902 parent->expectargs = 3;
903 parent->pos = EQNPOS_FROMTO;
904 break;
905 }
906 switch (tok) {
907 case (EQN_TOK_FROM):
908 pos = EQNPOS_FROM;
909 break;
910 case (EQN_TOK_TO):
911 pos = EQNPOS_TO;
912 break;
913 case (EQN_TOK_SUP):
914 pos = EQNPOS_SUP;
915 break;
916 case (EQN_TOK_SUB):
917 pos = EQNPOS_SUB;
918 break;
919 default:
920 abort();
921 }
922 parent = eqn_box_makebinary(ep, pos, parent);
923 break;
924 case (EQN_TOK_SQRT):
925 while (parent->args == parent->expectargs)
926 parent = parent->parent;
927 /*
928 * Accept a left-right-associative set of arguments just
929 * like sub and sup and friends but without rebalancing
930 * under a pivot.
931 */
932 parent = eqn_box_alloc(ep, parent);
933 parent->type = EQN_SUBEXPR;
934 parent->pos = EQNPOS_SQRT;
935 parent->expectargs = 1;
936 break;
937 case (EQN_TOK_OVER):
938 /*
939 * We have a right-left-associative fraction.
940 * Close out anything that's currently open, then
941 * rebalance and continue reading.
942 */
943 if (parent->last == NULL) {
944 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
945 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
946 cur = eqn_box_alloc(ep, parent);
947 cur->type = EQN_TEXT;
948 cur->text = mandoc_strdup("");
949 }
950 while (EQN_SUBEXPR == parent->type)
951 parent = parent->parent;
952 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
953 break;
954 case (EQN_TOK_RIGHT):
955 case (EQN_TOK_BRACE_CLOSE):
956 /*
957 * Close out the existing brace.
958 * FIXME: this is a shitty sentinel: we should really
959 * have a native EQN_BRACE type or whatnot.
960 */
961 for (cur = parent; cur != NULL; cur = cur->parent)
962 if (cur->type == EQN_LIST &&
963 (tok == EQN_TOK_BRACE_CLOSE ||
964 cur->left != NULL))
965 break;
966 if (cur == NULL) {
967 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
968 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
969 break;
970 }
971 parent = cur;
972 if (EQN_TOK_RIGHT == tok) {
973 if (NULL == (start = eqn_nexttok(ep, &sz))) {
974 mandoc_msg(MANDOCERR_REQ_EMPTY,
975 ep->parse, ep->eqn.ln,
976 ep->eqn.pos, eqn_toks[tok]);
977 break;
978 }
979 /* Handling depends on right/left. */
980 if (STRNEQ(start, sz, "ceiling", 7)) {
981 strlcpy(sym, "\\[rc]", sizeof(sym));
982 parent->right = mandoc_strdup(sym);
983 } else if (STRNEQ(start, sz, "floor", 5)) {
984 strlcpy(sym, "\\[rf]", sizeof(sym));
985 parent->right = mandoc_strdup(sym);
986 } else
987 parent->right = mandoc_strndup(start, sz);
988 }
989 parent = parent->parent;
990 if (tok == EQN_TOK_BRACE_CLOSE &&
991 (parent->type == EQN_PILE ||
992 parent->type == EQN_MATRIX))
993 parent = parent->parent;
994 /* Close out any "singleton" lists. */
995 while (parent->type == EQN_LISTONE &&
996 parent->args == parent->expectargs)
997 parent = parent->parent;
998 break;
999 case (EQN_TOK_BRACE_OPEN):
1000 case (EQN_TOK_LEFT):
1001 /*
1002 * If we already have something in the stack and we're
1003 * in an expression, then rewind til we're not any more
1004 * (just like with the text node).
1005 */
1006 while (parent->args == parent->expectargs)
1007 parent = parent->parent;
1008 if (EQN_TOK_LEFT == tok &&
1009 (start = eqn_nexttok(ep, &sz)) == NULL) {
1010 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1011 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1012 break;
1013 }
1014 parent = eqn_box_alloc(ep, parent);
1015 parent->type = EQN_LIST;
1016 if (EQN_TOK_LEFT == tok) {
1017 if (STRNEQ(start, sz, "ceiling", 7)) {
1018 strlcpy(sym, "\\[lc]", sizeof(sym));
1019 parent->left = mandoc_strdup(sym);
1020 } else if (STRNEQ(start, sz, "floor", 5)) {
1021 strlcpy(sym, "\\[lf]", sizeof(sym));
1022 parent->left = mandoc_strdup(sym);
1023 } else
1024 parent->left = mandoc_strndup(start, sz);
1025 }
1026 break;
1027 case (EQN_TOK_PILE):
1028 case (EQN_TOK_LPILE):
1029 case (EQN_TOK_RPILE):
1030 case (EQN_TOK_CPILE):
1031 case (EQN_TOK_CCOL):
1032 case (EQN_TOK_LCOL):
1033 case (EQN_TOK_RCOL):
1034 while (parent->args == parent->expectargs)
1035 parent = parent->parent;
1036 parent = eqn_box_alloc(ep, parent);
1037 parent->type = EQN_PILE;
1038 parent->expectargs = 1;
1039 break;
1040 case (EQN_TOK_ABOVE):
1041 for (cur = parent; cur != NULL; cur = cur->parent)
1042 if (cur->type == EQN_PILE)
1043 break;
1044 if (cur == NULL) {
1045 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1046 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1047 break;
1048 }
1049 parent = eqn_box_alloc(ep, cur);
1050 parent->type = EQN_LIST;
1051 break;
1052 case (EQN_TOK_MATRIX):
1053 while (parent->args == parent->expectargs)
1054 parent = parent->parent;
1055 parent = eqn_box_alloc(ep, parent);
1056 parent->type = EQN_MATRIX;
1057 parent->expectargs = 1;
1058 break;
1059 case (EQN_TOK_EOF):
1060 /*
1061 * End of file!
1062 * TODO: make sure we're not in an open subexpression.
1063 */
1064 return ROFF_EQN;
1065 default:
1066 assert(tok == EQN_TOK__MAX);
1067 assert(NULL != p);
1068 /*
1069 * If we already have something in the stack and we're
1070 * in an expression, then rewind til we're not any more.
1071 */
1072 while (parent->args == parent->expectargs)
1073 parent = parent->parent;
1074 cur = eqn_box_alloc(ep, parent);
1075 cur->type = EQN_TEXT;
1076 for (i = 0; i < EQNSYM__MAX; i++)
1077 if (0 == strcmp(eqnsyms[i].str, p)) {
1078 (void)snprintf(sym, sizeof(sym),
1079 "\\[%s]", eqnsyms[i].sym);
1080 cur->text = mandoc_strdup(sym);
1081 free(p);
1082 break;
1083 }
1084
1085 if (i == EQNSYM__MAX)
1086 cur->text = p;
1087 /*
1088 * Post-process list status.
1089 */
1090 while (parent->type == EQN_LISTONE &&
1091 parent->args == parent->expectargs)
1092 parent = parent->parent;
1093 break;
1094 }
1095 goto next_tok;
1096 }
1097
1098 enum rofferr
1099 eqn_end(struct eqn_node **epp)
1100 {
1101 struct eqn_node *ep;
1102
1103 ep = *epp;
1104 *epp = NULL;
1105
1106 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1107 ep->eqn.root->expectargs = UINT_MAX;
1108 return eqn_parse(ep, ep->eqn.root);
1109 }
1110
1111 void
1112 eqn_free(struct eqn_node *p)
1113 {
1114 int i;
1115
1116 eqn_box_free(p->eqn.root);
1117
1118 for (i = 0; i < (int)p->defsz; i++) {
1119 free(p->defs[i].key);
1120 free(p->defs[i].val);
1121 }
1122
1123 free(p->data);
1124 free(p->defs);
1125 free(p);
1126 }