]> git.cameronkatri.com Git - mandoc.git/blob - eqn.c
quoted words are not parsed for defined keys
[mandoc.git] / eqn.c
1 /* $Id: eqn.c,v 1.65 2017/06/21 18:38:26 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33
34 #define EQN_NEST_MAX 128 /* maximum nesting of defines */
35 #define STRNEQ(p1, sz1, p2, sz2) \
36 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37
38 enum eqn_tok {
39 EQN_TOK_DYAD = 0,
40 EQN_TOK_VEC,
41 EQN_TOK_UNDER,
42 EQN_TOK_BAR,
43 EQN_TOK_TILDE,
44 EQN_TOK_HAT,
45 EQN_TOK_DOT,
46 EQN_TOK_DOTDOT,
47 EQN_TOK_FWD,
48 EQN_TOK_BACK,
49 EQN_TOK_DOWN,
50 EQN_TOK_UP,
51 EQN_TOK_FAT,
52 EQN_TOK_ROMAN,
53 EQN_TOK_ITALIC,
54 EQN_TOK_BOLD,
55 EQN_TOK_SIZE,
56 EQN_TOK_SUB,
57 EQN_TOK_SUP,
58 EQN_TOK_SQRT,
59 EQN_TOK_OVER,
60 EQN_TOK_FROM,
61 EQN_TOK_TO,
62 EQN_TOK_BRACE_OPEN,
63 EQN_TOK_BRACE_CLOSE,
64 EQN_TOK_GSIZE,
65 EQN_TOK_GFONT,
66 EQN_TOK_MARK,
67 EQN_TOK_LINEUP,
68 EQN_TOK_LEFT,
69 EQN_TOK_RIGHT,
70 EQN_TOK_PILE,
71 EQN_TOK_LPILE,
72 EQN_TOK_RPILE,
73 EQN_TOK_CPILE,
74 EQN_TOK_MATRIX,
75 EQN_TOK_CCOL,
76 EQN_TOK_LCOL,
77 EQN_TOK_RCOL,
78 EQN_TOK_DELIM,
79 EQN_TOK_DEFINE,
80 EQN_TOK_TDEFINE,
81 EQN_TOK_NDEFINE,
82 EQN_TOK_UNDEF,
83 EQN_TOK_ABOVE,
84 EQN_TOK__MAX,
85 EQN_TOK_FUNC,
86 EQN_TOK_EOF
87 };
88
89 static const char *eqn_toks[EQN_TOK__MAX] = {
90 "dyad", /* EQN_TOK_DYAD */
91 "vec", /* EQN_TOK_VEC */
92 "under", /* EQN_TOK_UNDER */
93 "bar", /* EQN_TOK_BAR */
94 "tilde", /* EQN_TOK_TILDE */
95 "hat", /* EQN_TOK_HAT */
96 "dot", /* EQN_TOK_DOT */
97 "dotdot", /* EQN_TOK_DOTDOT */
98 "fwd", /* EQN_TOK_FWD * */
99 "back", /* EQN_TOK_BACK */
100 "down", /* EQN_TOK_DOWN */
101 "up", /* EQN_TOK_UP */
102 "fat", /* EQN_TOK_FAT */
103 "roman", /* EQN_TOK_ROMAN */
104 "italic", /* EQN_TOK_ITALIC */
105 "bold", /* EQN_TOK_BOLD */
106 "size", /* EQN_TOK_SIZE */
107 "sub", /* EQN_TOK_SUB */
108 "sup", /* EQN_TOK_SUP */
109 "sqrt", /* EQN_TOK_SQRT */
110 "over", /* EQN_TOK_OVER */
111 "from", /* EQN_TOK_FROM */
112 "to", /* EQN_TOK_TO */
113 "{", /* EQN_TOK_BRACE_OPEN */
114 "}", /* EQN_TOK_BRACE_CLOSE */
115 "gsize", /* EQN_TOK_GSIZE */
116 "gfont", /* EQN_TOK_GFONT */
117 "mark", /* EQN_TOK_MARK */
118 "lineup", /* EQN_TOK_LINEUP */
119 "left", /* EQN_TOK_LEFT */
120 "right", /* EQN_TOK_RIGHT */
121 "pile", /* EQN_TOK_PILE */
122 "lpile", /* EQN_TOK_LPILE */
123 "rpile", /* EQN_TOK_RPILE */
124 "cpile", /* EQN_TOK_CPILE */
125 "matrix", /* EQN_TOK_MATRIX */
126 "ccol", /* EQN_TOK_CCOL */
127 "lcol", /* EQN_TOK_LCOL */
128 "rcol", /* EQN_TOK_RCOL */
129 "delim", /* EQN_TOK_DELIM */
130 "define", /* EQN_TOK_DEFINE */
131 "tdefine", /* EQN_TOK_TDEFINE */
132 "ndefine", /* EQN_TOK_NDEFINE */
133 "undef", /* EQN_TOK_UNDEF */
134 "above", /* EQN_TOK_ABOVE */
135 };
136
137 static const char *const eqn_func[] = {
138 "acos", "acsc", "and", "arc", "asec", "asin", "atan",
139 "cos", "cosh", "coth", "csc", "det", "exp", "for",
140 "if", "lim", "ln", "log", "max", "min",
141 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re",
142 };
143
144 enum eqn_symt {
145 EQNSYM_alpha,
146 EQNSYM_beta,
147 EQNSYM_chi,
148 EQNSYM_delta,
149 EQNSYM_epsilon,
150 EQNSYM_eta,
151 EQNSYM_gamma,
152 EQNSYM_iota,
153 EQNSYM_kappa,
154 EQNSYM_lambda,
155 EQNSYM_mu,
156 EQNSYM_nu,
157 EQNSYM_omega,
158 EQNSYM_omicron,
159 EQNSYM_phi,
160 EQNSYM_pi,
161 EQNSYM_ps,
162 EQNSYM_rho,
163 EQNSYM_sigma,
164 EQNSYM_tau,
165 EQNSYM_theta,
166 EQNSYM_upsilon,
167 EQNSYM_xi,
168 EQNSYM_zeta,
169 EQNSYM_DELTA,
170 EQNSYM_GAMMA,
171 EQNSYM_LAMBDA,
172 EQNSYM_OMEGA,
173 EQNSYM_PHI,
174 EQNSYM_PI,
175 EQNSYM_PSI,
176 EQNSYM_SIGMA,
177 EQNSYM_THETA,
178 EQNSYM_UPSILON,
179 EQNSYM_XI,
180 EQNSYM_inter,
181 EQNSYM_union,
182 EQNSYM_prod,
183 EQNSYM_int,
184 EQNSYM_sum,
185 EQNSYM_grad,
186 EQNSYM_del,
187 EQNSYM_times,
188 EQNSYM_cdot,
189 EQNSYM_nothing,
190 EQNSYM_approx,
191 EQNSYM_prime,
192 EQNSYM_half,
193 EQNSYM_partial,
194 EQNSYM_inf,
195 EQNSYM_muchgreat,
196 EQNSYM_muchless,
197 EQNSYM_larrow,
198 EQNSYM_rarrow,
199 EQNSYM_pm,
200 EQNSYM_nequal,
201 EQNSYM_equiv,
202 EQNSYM_lessequal,
203 EQNSYM_moreequal,
204 EQNSYM_minus,
205 EQNSYM__MAX
206 };
207
208 struct eqnsym {
209 const char *str;
210 const char *sym;
211 };
212
213 static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
214 { "alpha", "*a" }, /* EQNSYM_alpha */
215 { "beta", "*b" }, /* EQNSYM_beta */
216 { "chi", "*x" }, /* EQNSYM_chi */
217 { "delta", "*d" }, /* EQNSYM_delta */
218 { "epsilon", "*e" }, /* EQNSYM_epsilon */
219 { "eta", "*y" }, /* EQNSYM_eta */
220 { "gamma", "*g" }, /* EQNSYM_gamma */
221 { "iota", "*i" }, /* EQNSYM_iota */
222 { "kappa", "*k" }, /* EQNSYM_kappa */
223 { "lambda", "*l" }, /* EQNSYM_lambda */
224 { "mu", "*m" }, /* EQNSYM_mu */
225 { "nu", "*n" }, /* EQNSYM_nu */
226 { "omega", "*w" }, /* EQNSYM_omega */
227 { "omicron", "*o" }, /* EQNSYM_omicron */
228 { "phi", "*f" }, /* EQNSYM_phi */
229 { "pi", "*p" }, /* EQNSYM_pi */
230 { "psi", "*q" }, /* EQNSYM_psi */
231 { "rho", "*r" }, /* EQNSYM_rho */
232 { "sigma", "*s" }, /* EQNSYM_sigma */
233 { "tau", "*t" }, /* EQNSYM_tau */
234 { "theta", "*h" }, /* EQNSYM_theta */
235 { "upsilon", "*u" }, /* EQNSYM_upsilon */
236 { "xi", "*c" }, /* EQNSYM_xi */
237 { "zeta", "*z" }, /* EQNSYM_zeta */
238 { "DELTA", "*D" }, /* EQNSYM_DELTA */
239 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */
240 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
241 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */
242 { "PHI", "*F" }, /* EQNSYM_PHI */
243 { "PI", "*P" }, /* EQNSYM_PI */
244 { "PSI", "*Q" }, /* EQNSYM_PSI */
245 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */
246 { "THETA", "*H" }, /* EQNSYM_THETA */
247 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */
248 { "XI", "*C" }, /* EQNSYM_XI */
249 { "inter", "ca" }, /* EQNSYM_inter */
250 { "union", "cu" }, /* EQNSYM_union */
251 { "prod", "product" }, /* EQNSYM_prod */
252 { "int", "integral" }, /* EQNSYM_int */
253 { "sum", "sum" }, /* EQNSYM_sum */
254 { "grad", "gr" }, /* EQNSYM_grad */
255 { "del", "gr" }, /* EQNSYM_del */
256 { "times", "mu" }, /* EQNSYM_times */
257 { "cdot", "pc" }, /* EQNSYM_cdot */
258 { "nothing", "&" }, /* EQNSYM_nothing */
259 { "approx", "~~" }, /* EQNSYM_approx */
260 { "prime", "fm" }, /* EQNSYM_prime */
261 { "half", "12" }, /* EQNSYM_half */
262 { "partial", "pd" }, /* EQNSYM_partial */
263 { "inf", "if" }, /* EQNSYM_inf */
264 { ">>", ">>" }, /* EQNSYM_muchgreat */
265 { "<<", "<<" }, /* EQNSYM_muchless */
266 { "<-", "<-" }, /* EQNSYM_larrow */
267 { "->", "->" }, /* EQNSYM_rarrow */
268 { "+-", "+-" }, /* EQNSYM_pm */
269 { "!=", "!=" }, /* EQNSYM_nequal */
270 { "==", "==" }, /* EQNSYM_equiv */
271 { "<=", "<=" }, /* EQNSYM_lessequal */
272 { ">=", ">=" }, /* EQNSYM_moreequal */
273 { "-", "mi" }, /* EQNSYM_minus */
274 };
275
276 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
277 static void eqn_box_free(struct eqn_box *);
278 static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
279 enum eqn_post, struct eqn_box *);
280 static void eqn_def(struct eqn_node *);
281 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t);
282 static void eqn_delim(struct eqn_node *);
283 static const char *eqn_next(struct eqn_node *, char, size_t *, int);
284 static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
285 static const char *eqn_nexttok(struct eqn_node *, size_t *);
286 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *);
287 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **);
288 static void eqn_undef(struct eqn_node *);
289
290
291 enum rofferr
292 eqn_read(struct eqn_node **epp, int ln,
293 const char *p, int pos, int *offs)
294 {
295 size_t sz;
296 struct eqn_node *ep;
297 enum rofferr er;
298
299 ep = *epp;
300
301 /*
302 * If we're the terminating mark, unset our equation status and
303 * validate the full equation.
304 */
305
306 if (0 == strncmp(p, ".EN", 3)) {
307 er = eqn_end(epp);
308 p += 3;
309 while (' ' == *p || '\t' == *p)
310 p++;
311 if ('\0' == *p)
312 return er;
313 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
314 ln, pos, "EN %s", p);
315 return er;
316 }
317
318 /*
319 * Build up the full string, replacing all newlines with regular
320 * whitespace.
321 */
322
323 sz = strlen(p + pos) + 1;
324 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
325
326 /* First invocation: nil terminate the string. */
327
328 if (0 == ep->sz)
329 *ep->data = '\0';
330
331 ep->sz += sz;
332 strlcat(ep->data, p + pos, ep->sz + 1);
333 strlcat(ep->data, " ", ep->sz + 1);
334 return ROFF_IGN;
335 }
336
337 struct eqn_node *
338 eqn_alloc(int pos, int line, struct mparse *parse)
339 {
340 struct eqn_node *p;
341
342 p = mandoc_calloc(1, sizeof(struct eqn_node));
343
344 p->parse = parse;
345 p->eqn.ln = line;
346 p->eqn.pos = pos;
347 p->gsize = EQN_DEFSIZE;
348
349 return p;
350 }
351
352 /*
353 * Find the key "key" of the give size within our eqn-defined values.
354 */
355 static struct eqn_def *
356 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
357 {
358 int i;
359
360 for (i = 0; i < (int)ep->defsz; i++)
361 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
362 ep->defs[i].keysz, key, sz))
363 return &ep->defs[i];
364
365 return NULL;
366 }
367
368 /*
369 * Get the next token from the input stream using the given quote
370 * character.
371 * Optionally make any replacements.
372 */
373 static const char *
374 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
375 {
376 static size_t last_len;
377 static int lim;
378
379 char *start, *next;
380 int q, diff;
381 size_t ssz, dummy;
382 struct eqn_def *def;
383
384 if (NULL == sz)
385 sz = &dummy;
386
387 if (ep->cur >= last_len)
388 lim = 0;
389 ep->rew = ep->cur;
390 again:
391 /* Prevent self-definitions. */
392
393 if (lim >= EQN_NEST_MAX) {
394 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
395 ep->eqn.ln, ep->eqn.pos, NULL);
396 return NULL;
397 }
398
399 ep->cur = ep->rew;
400 start = &ep->data[(int)ep->cur];
401 q = 0;
402
403 if ('\0' == *start)
404 return NULL;
405
406 if (quote == *start) {
407 ep->cur++;
408 q = 1;
409 }
410
411 start = &ep->data[(int)ep->cur];
412
413 if ( ! q) {
414 if ('{' == *start || '}' == *start)
415 ssz = 1;
416 else
417 ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
418 next = start + (int)ssz;
419 if ('\0' == *next)
420 next = NULL;
421 } else
422 next = strchr(start, quote);
423
424 if (NULL != next) {
425 *sz = (size_t)(next - start);
426 ep->cur += *sz;
427 if (q)
428 ep->cur++;
429 while (' ' == ep->data[(int)ep->cur] ||
430 '\t' == ep->data[(int)ep->cur] ||
431 '^' == ep->data[(int)ep->cur] ||
432 '~' == ep->data[(int)ep->cur])
433 ep->cur++;
434 } else {
435 if (q)
436 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
437 ep->eqn.ln, ep->eqn.pos, NULL);
438 next = strchr(start, '\0');
439 *sz = (size_t)(next - start);
440 ep->cur += *sz;
441 }
442
443 /* Quotes aren't expanded for values. */
444
445 if (q || ! repl)
446 return start;
447
448 if (NULL != (def = eqn_def_find(ep, start, *sz))) {
449 diff = def->valsz - *sz;
450
451 if (def->valsz > *sz) {
452 ep->sz += diff;
453 ep->data = mandoc_realloc(ep->data, ep->sz + 1);
454 ep->data[ep->sz] = '\0';
455 start = &ep->data[(int)ep->rew];
456 }
457
458 diff = def->valsz - *sz;
459 memmove(start + *sz + diff, start + *sz,
460 (strlen(start) - *sz) + 1);
461 memcpy(start, def->val, def->valsz);
462 last_len = start - ep->data + def->valsz;
463 lim++;
464 goto again;
465 }
466
467 return start;
468 }
469
470 /*
471 * Get the next delimited token using the default current quote
472 * character.
473 */
474 static const char *
475 eqn_nexttok(struct eqn_node *ep, size_t *sz)
476 {
477
478 return eqn_next(ep, '"', sz, 1);
479 }
480
481 /*
482 * Get next token without replacement.
483 */
484 static const char *
485 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
486 {
487
488 return eqn_next(ep, '"', sz, 0);
489 }
490
491 /*
492 * Parse a token from the stream of text.
493 * A token consists of one of the recognised eqn(7) strings.
494 * Strings are separated by delimiting marks.
495 * This returns EQN_TOK_EOF when there are no more tokens.
496 * If the token is an unrecognised string literal, then it returns
497 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
498 * string.
499 * This must be later freed with free(3).
500 */
501 static enum eqn_tok
502 eqn_tok_parse(struct eqn_node *ep, char **p)
503 {
504 const char *start;
505 size_t i, sz;
506 int quoted;
507
508 if (p != NULL)
509 *p = NULL;
510
511 quoted = ep->data[ep->cur] == '"';
512
513 if ((start = eqn_nexttok(ep, &sz)) == NULL)
514 return EQN_TOK_EOF;
515
516 if (quoted) {
517 if (p != NULL)
518 *p = mandoc_strndup(start, sz);
519 return EQN_TOK__MAX;
520 }
521
522 for (i = 0; i < EQN_TOK__MAX; i++)
523 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
524 return i;
525
526 for (i = 0; i < EQNSYM__MAX; i++) {
527 if (STRNEQ(start, sz,
528 eqnsyms[i].str, strlen(eqnsyms[i].str))) {
529 mandoc_asprintf(p, "\\[%s]", eqnsyms[i].sym);
530 return EQN_TOK__MAX;
531 }
532 }
533
534 if (p != NULL)
535 *p = mandoc_strndup(start, sz);
536
537 for (i = 0; i < sizeof(eqn_func)/sizeof(*eqn_func); i++)
538 if (STRNEQ(start, sz, eqn_func[i], strlen(eqn_func[i])))
539 return EQN_TOK_FUNC;
540
541 return EQN_TOK__MAX;
542 }
543
544 static void
545 eqn_box_free(struct eqn_box *bp)
546 {
547
548 if (bp->first)
549 eqn_box_free(bp->first);
550 if (bp->next)
551 eqn_box_free(bp->next);
552
553 free(bp->text);
554 free(bp->left);
555 free(bp->right);
556 free(bp->top);
557 free(bp->bottom);
558 free(bp);
559 }
560
561 /*
562 * Allocate a box as the last child of the parent node.
563 */
564 static struct eqn_box *
565 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
566 {
567 struct eqn_box *bp;
568
569 bp = mandoc_calloc(1, sizeof(struct eqn_box));
570 bp->parent = parent;
571 bp->parent->args++;
572 bp->expectargs = UINT_MAX;
573 bp->size = ep->gsize;
574
575 if (NULL != parent->first) {
576 parent->last->next = bp;
577 bp->prev = parent->last;
578 } else
579 parent->first = bp;
580
581 parent->last = bp;
582 return bp;
583 }
584
585 /*
586 * Reparent the current last node (of the current parent) under a new
587 * EQN_SUBEXPR as the first element.
588 * Then return the new parent.
589 * The new EQN_SUBEXPR will have a two-child limit.
590 */
591 static struct eqn_box *
592 eqn_box_makebinary(struct eqn_node *ep,
593 enum eqn_post pos, struct eqn_box *parent)
594 {
595 struct eqn_box *b, *newb;
596
597 assert(NULL != parent->last);
598 b = parent->last;
599 if (parent->last == parent->first)
600 parent->first = NULL;
601 parent->args--;
602 parent->last = b->prev;
603 b->prev = NULL;
604 newb = eqn_box_alloc(ep, parent);
605 newb->pos = pos;
606 newb->type = EQN_SUBEXPR;
607 newb->expectargs = 2;
608 newb->args = 1;
609 newb->first = newb->last = b;
610 newb->first->next = NULL;
611 b->parent = newb;
612 return newb;
613 }
614
615 /*
616 * Parse the "delim" control statement.
617 */
618 static void
619 eqn_delim(struct eqn_node *ep)
620 {
621 const char *start;
622 size_t sz;
623
624 if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
625 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
626 ep->eqn.ln, ep->eqn.pos, "delim");
627 else if (strncmp(start, "off", 3) == 0)
628 ep->delim = 0;
629 else if (strncmp(start, "on", 2) == 0) {
630 if (ep->odelim && ep->cdelim)
631 ep->delim = 1;
632 } else if (start[1] != '\0') {
633 ep->odelim = start[0];
634 ep->cdelim = start[1];
635 ep->delim = 1;
636 }
637 }
638
639 /*
640 * Undefine a previously-defined string.
641 */
642 static void
643 eqn_undef(struct eqn_node *ep)
644 {
645 const char *start;
646 struct eqn_def *def;
647 size_t sz;
648
649 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
650 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
651 ep->eqn.ln, ep->eqn.pos, "undef");
652 return;
653 }
654 if ((def = eqn_def_find(ep, start, sz)) == NULL)
655 return;
656 free(def->key);
657 free(def->val);
658 def->key = def->val = NULL;
659 def->keysz = def->valsz = 0;
660 }
661
662 static void
663 eqn_def(struct eqn_node *ep)
664 {
665 const char *start;
666 size_t sz;
667 struct eqn_def *def;
668 int i;
669
670 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
671 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
672 ep->eqn.ln, ep->eqn.pos, "define");
673 return;
674 }
675
676 /*
677 * Search for a key that already exists.
678 * Create a new key if none is found.
679 */
680 if (NULL == (def = eqn_def_find(ep, start, sz))) {
681 /* Find holes in string array. */
682 for (i = 0; i < (int)ep->defsz; i++)
683 if (0 == ep->defs[i].keysz)
684 break;
685
686 if (i == (int)ep->defsz) {
687 ep->defsz++;
688 ep->defs = mandoc_reallocarray(ep->defs,
689 ep->defsz, sizeof(struct eqn_def));
690 ep->defs[i].key = ep->defs[i].val = NULL;
691 }
692
693 def = ep->defs + i;
694 free(def->key);
695 def->key = mandoc_strndup(start, sz);
696 def->keysz = sz;
697 }
698
699 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
700 if (start == NULL) {
701 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
702 ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
703 free(def->key);
704 free(def->val);
705 def->key = def->val = NULL;
706 def->keysz = def->valsz = 0;
707 return;
708 }
709 free(def->val);
710 def->val = mandoc_strndup(start, sz);
711 def->valsz = sz;
712 }
713
714 /*
715 * Recursively parse an eqn(7) expression.
716 */
717 static enum rofferr
718 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
719 {
720 char sym[64];
721 struct eqn_box *cur;
722 const char *start;
723 char *p;
724 size_t sz;
725 enum eqn_tok tok, subtok;
726 enum eqn_post pos;
727 int size;
728
729 assert(parent != NULL);
730
731 /*
732 * Empty equation.
733 * Do not add it to the high-level syntax tree.
734 */
735
736 if (ep->data == NULL)
737 return ROFF_IGN;
738
739 next_tok:
740 tok = eqn_tok_parse(ep, &p);
741
742 this_tok:
743 switch (tok) {
744 case EQN_TOK_UNDEF:
745 eqn_undef(ep);
746 break;
747 case EQN_TOK_NDEFINE:
748 case EQN_TOK_DEFINE:
749 eqn_def(ep);
750 break;
751 case EQN_TOK_TDEFINE:
752 if (eqn_nextrawtok(ep, NULL) == NULL ||
753 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
754 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
755 ep->eqn.ln, ep->eqn.pos, "tdefine");
756 break;
757 case EQN_TOK_DELIM:
758 eqn_delim(ep);
759 break;
760 case EQN_TOK_GFONT:
761 if (eqn_nextrawtok(ep, NULL) == NULL)
762 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
763 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
764 break;
765 case EQN_TOK_MARK:
766 case EQN_TOK_LINEUP:
767 /* Ignore these. */
768 break;
769 case EQN_TOK_DYAD:
770 case EQN_TOK_VEC:
771 case EQN_TOK_UNDER:
772 case EQN_TOK_BAR:
773 case EQN_TOK_TILDE:
774 case EQN_TOK_HAT:
775 case EQN_TOK_DOT:
776 case EQN_TOK_DOTDOT:
777 if (parent->last == NULL) {
778 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
779 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
780 cur = eqn_box_alloc(ep, parent);
781 cur->type = EQN_TEXT;
782 cur->text = mandoc_strdup("");
783 }
784 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
785 parent->type = EQN_LISTONE;
786 parent->expectargs = 1;
787 switch (tok) {
788 case EQN_TOK_DOTDOT:
789 strlcpy(sym, "\\[ad]", sizeof(sym));
790 break;
791 case EQN_TOK_VEC:
792 strlcpy(sym, "\\[->]", sizeof(sym));
793 break;
794 case EQN_TOK_DYAD:
795 strlcpy(sym, "\\[<>]", sizeof(sym));
796 break;
797 case EQN_TOK_TILDE:
798 strlcpy(sym, "\\[a~]", sizeof(sym));
799 break;
800 case EQN_TOK_UNDER:
801 strlcpy(sym, "\\[ul]", sizeof(sym));
802 break;
803 case EQN_TOK_BAR:
804 strlcpy(sym, "\\[rl]", sizeof(sym));
805 break;
806 case EQN_TOK_DOT:
807 strlcpy(sym, "\\[a.]", sizeof(sym));
808 break;
809 case EQN_TOK_HAT:
810 strlcpy(sym, "\\[ha]", sizeof(sym));
811 break;
812 default:
813 abort();
814 }
815
816 switch (tok) {
817 case EQN_TOK_DOTDOT:
818 case EQN_TOK_VEC:
819 case EQN_TOK_DYAD:
820 case EQN_TOK_TILDE:
821 case EQN_TOK_BAR:
822 case EQN_TOK_DOT:
823 case EQN_TOK_HAT:
824 parent->top = mandoc_strdup(sym);
825 break;
826 case EQN_TOK_UNDER:
827 parent->bottom = mandoc_strdup(sym);
828 break;
829 default:
830 abort();
831 }
832 parent = parent->parent;
833 break;
834 case EQN_TOK_FWD:
835 case EQN_TOK_BACK:
836 case EQN_TOK_DOWN:
837 case EQN_TOK_UP:
838 subtok = eqn_tok_parse(ep, NULL);
839 if (subtok != EQN_TOK__MAX) {
840 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
841 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
842 tok = subtok;
843 goto this_tok;
844 }
845 break;
846 case EQN_TOK_FAT:
847 case EQN_TOK_ROMAN:
848 case EQN_TOK_ITALIC:
849 case EQN_TOK_BOLD:
850 while (parent->args == parent->expectargs)
851 parent = parent->parent;
852 /*
853 * These values apply to the next word or sequence of
854 * words; thus, we mark that we'll have a child with
855 * exactly one of those.
856 */
857 parent = eqn_box_alloc(ep, parent);
858 parent->type = EQN_LISTONE;
859 parent->expectargs = 1;
860 switch (tok) {
861 case EQN_TOK_FAT:
862 parent->font = EQNFONT_FAT;
863 break;
864 case EQN_TOK_ROMAN:
865 parent->font = EQNFONT_ROMAN;
866 break;
867 case EQN_TOK_ITALIC:
868 parent->font = EQNFONT_ITALIC;
869 break;
870 case EQN_TOK_BOLD:
871 parent->font = EQNFONT_BOLD;
872 break;
873 default:
874 abort();
875 }
876 break;
877 case EQN_TOK_SIZE:
878 case EQN_TOK_GSIZE:
879 /* Accept two values: integral size and a single. */
880 if (NULL == (start = eqn_nexttok(ep, &sz))) {
881 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
882 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
883 break;
884 }
885 size = mandoc_strntoi(start, sz, 10);
886 if (-1 == size) {
887 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
888 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
889 break;
890 }
891 if (EQN_TOK_GSIZE == tok) {
892 ep->gsize = size;
893 break;
894 }
895 parent = eqn_box_alloc(ep, parent);
896 parent->type = EQN_LISTONE;
897 parent->expectargs = 1;
898 parent->size = size;
899 break;
900 case EQN_TOK_FROM:
901 case EQN_TOK_TO:
902 case EQN_TOK_SUB:
903 case EQN_TOK_SUP:
904 /*
905 * We have a left-right-associative expression.
906 * Repivot under a positional node, open a child scope
907 * and keep on reading.
908 */
909 if (parent->last == NULL) {
910 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
911 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
912 cur = eqn_box_alloc(ep, parent);
913 cur->type = EQN_TEXT;
914 cur->text = mandoc_strdup("");
915 }
916 /* Handle the "subsup" and "fromto" positions. */
917 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
918 parent->expectargs = 3;
919 parent->pos = EQNPOS_SUBSUP;
920 break;
921 }
922 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
923 parent->expectargs = 3;
924 parent->pos = EQNPOS_FROMTO;
925 break;
926 }
927 switch (tok) {
928 case EQN_TOK_FROM:
929 pos = EQNPOS_FROM;
930 break;
931 case EQN_TOK_TO:
932 pos = EQNPOS_TO;
933 break;
934 case EQN_TOK_SUP:
935 pos = EQNPOS_SUP;
936 break;
937 case EQN_TOK_SUB:
938 pos = EQNPOS_SUB;
939 break;
940 default:
941 abort();
942 }
943 parent = eqn_box_makebinary(ep, pos, parent);
944 break;
945 case EQN_TOK_SQRT:
946 while (parent->args == parent->expectargs)
947 parent = parent->parent;
948 /*
949 * Accept a left-right-associative set of arguments just
950 * like sub and sup and friends but without rebalancing
951 * under a pivot.
952 */
953 parent = eqn_box_alloc(ep, parent);
954 parent->type = EQN_SUBEXPR;
955 parent->pos = EQNPOS_SQRT;
956 parent->expectargs = 1;
957 break;
958 case EQN_TOK_OVER:
959 /*
960 * We have a right-left-associative fraction.
961 * Close out anything that's currently open, then
962 * rebalance and continue reading.
963 */
964 if (parent->last == NULL) {
965 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
966 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
967 cur = eqn_box_alloc(ep, parent);
968 cur->type = EQN_TEXT;
969 cur->text = mandoc_strdup("");
970 }
971 while (EQN_SUBEXPR == parent->type)
972 parent = parent->parent;
973 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
974 break;
975 case EQN_TOK_RIGHT:
976 case EQN_TOK_BRACE_CLOSE:
977 /*
978 * Close out the existing brace.
979 * FIXME: this is a shitty sentinel: we should really
980 * have a native EQN_BRACE type or whatnot.
981 */
982 for (cur = parent; cur != NULL; cur = cur->parent)
983 if (cur->type == EQN_LIST &&
984 (tok == EQN_TOK_BRACE_CLOSE ||
985 cur->left != NULL))
986 break;
987 if (cur == NULL) {
988 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
989 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
990 break;
991 }
992 parent = cur;
993 if (EQN_TOK_RIGHT == tok) {
994 if (NULL == (start = eqn_nexttok(ep, &sz))) {
995 mandoc_msg(MANDOCERR_REQ_EMPTY,
996 ep->parse, ep->eqn.ln,
997 ep->eqn.pos, eqn_toks[tok]);
998 break;
999 }
1000 /* Handling depends on right/left. */
1001 if (STRNEQ(start, sz, "ceiling", 7)) {
1002 strlcpy(sym, "\\[rc]", sizeof(sym));
1003 parent->right = mandoc_strdup(sym);
1004 } else if (STRNEQ(start, sz, "floor", 5)) {
1005 strlcpy(sym, "\\[rf]", sizeof(sym));
1006 parent->right = mandoc_strdup(sym);
1007 } else
1008 parent->right = mandoc_strndup(start, sz);
1009 }
1010 parent = parent->parent;
1011 if (tok == EQN_TOK_BRACE_CLOSE &&
1012 (parent->type == EQN_PILE ||
1013 parent->type == EQN_MATRIX))
1014 parent = parent->parent;
1015 /* Close out any "singleton" lists. */
1016 while (parent->type == EQN_LISTONE &&
1017 parent->args == parent->expectargs)
1018 parent = parent->parent;
1019 break;
1020 case EQN_TOK_BRACE_OPEN:
1021 case EQN_TOK_LEFT:
1022 /*
1023 * If we already have something in the stack and we're
1024 * in an expression, then rewind til we're not any more
1025 * (just like with the text node).
1026 */
1027 while (parent->args == parent->expectargs)
1028 parent = parent->parent;
1029 if (EQN_TOK_LEFT == tok &&
1030 (start = eqn_nexttok(ep, &sz)) == NULL) {
1031 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1032 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1033 break;
1034 }
1035 parent = eqn_box_alloc(ep, parent);
1036 parent->type = EQN_LIST;
1037 if (EQN_TOK_LEFT == tok) {
1038 if (STRNEQ(start, sz, "ceiling", 7)) {
1039 strlcpy(sym, "\\[lc]", sizeof(sym));
1040 parent->left = mandoc_strdup(sym);
1041 } else if (STRNEQ(start, sz, "floor", 5)) {
1042 strlcpy(sym, "\\[lf]", sizeof(sym));
1043 parent->left = mandoc_strdup(sym);
1044 } else
1045 parent->left = mandoc_strndup(start, sz);
1046 }
1047 break;
1048 case EQN_TOK_PILE:
1049 case EQN_TOK_LPILE:
1050 case EQN_TOK_RPILE:
1051 case EQN_TOK_CPILE:
1052 case EQN_TOK_CCOL:
1053 case EQN_TOK_LCOL:
1054 case EQN_TOK_RCOL:
1055 while (parent->args == parent->expectargs)
1056 parent = parent->parent;
1057 parent = eqn_box_alloc(ep, parent);
1058 parent->type = EQN_PILE;
1059 parent->expectargs = 1;
1060 break;
1061 case EQN_TOK_ABOVE:
1062 for (cur = parent; cur != NULL; cur = cur->parent)
1063 if (cur->type == EQN_PILE)
1064 break;
1065 if (cur == NULL) {
1066 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1067 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1068 break;
1069 }
1070 parent = eqn_box_alloc(ep, cur);
1071 parent->type = EQN_LIST;
1072 break;
1073 case EQN_TOK_MATRIX:
1074 while (parent->args == parent->expectargs)
1075 parent = parent->parent;
1076 parent = eqn_box_alloc(ep, parent);
1077 parent->type = EQN_MATRIX;
1078 parent->expectargs = 1;
1079 break;
1080 case EQN_TOK_EOF:
1081 /*
1082 * End of file!
1083 * TODO: make sure we're not in an open subexpression.
1084 */
1085 return ROFF_EQN;
1086 case EQN_TOK_FUNC:
1087 case EQN_TOK__MAX:
1088 assert(p != NULL);
1089 /*
1090 * If we already have something in the stack and we're
1091 * in an expression, then rewind til we're not any more.
1092 */
1093 while (parent->args == parent->expectargs)
1094 parent = parent->parent;
1095 if (tok == EQN_TOK_FUNC) {
1096 for (cur = parent; cur != NULL; cur = cur->parent)
1097 if (cur->font != EQNFONT_NONE)
1098 break;
1099 if (cur == NULL || cur->font != EQNFONT_ROMAN) {
1100 parent = eqn_box_alloc(ep, parent);
1101 parent->type = EQN_LISTONE;
1102 parent->font = EQNFONT_ROMAN;
1103 parent->expectargs = 1;
1104 }
1105 }
1106 cur = eqn_box_alloc(ep, parent);
1107 cur->type = EQN_TEXT;
1108 cur->text = p;
1109
1110 /*
1111 * Post-process list status.
1112 */
1113 while (parent->type == EQN_LISTONE &&
1114 parent->args == parent->expectargs)
1115 parent = parent->parent;
1116 break;
1117 default:
1118 abort();
1119 }
1120 goto next_tok;
1121 }
1122
1123 enum rofferr
1124 eqn_end(struct eqn_node **epp)
1125 {
1126 struct eqn_node *ep;
1127
1128 ep = *epp;
1129 *epp = NULL;
1130
1131 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1132 ep->eqn.root->expectargs = UINT_MAX;
1133 return eqn_parse(ep, ep->eqn.root);
1134 }
1135
1136 void
1137 eqn_free(struct eqn_node *p)
1138 {
1139 int i;
1140
1141 eqn_box_free(p->eqn.root);
1142
1143 for (i = 0; i < (int)p->defsz; i++) {
1144 free(p->defs[i].key);
1145 free(p->defs[i].val);
1146 }
1147
1148 free(p->data);
1149 free(p->defs);
1150 free(p);
1151 }