]> git.cameronkatri.com Git - mandoc.git/blob - eqn.c
When checking the validity of cross references with -Tlint,
[mandoc.git] / eqn.c
1 /* $Id: eqn.c,v 1.72 2017/06/29 16:31:15 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29
30 #include "mandoc.h"
31 #include "mandoc_aux.h"
32 #include "libmandoc.h"
33 #include "libroff.h"
34
35 #define EQN_NEST_MAX 128 /* maximum nesting of defines */
36 #define STRNEQ(p1, sz1, p2, sz2) \
37 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
38
39 enum eqn_tok {
40 EQN_TOK_DYAD = 0,
41 EQN_TOK_VEC,
42 EQN_TOK_UNDER,
43 EQN_TOK_BAR,
44 EQN_TOK_TILDE,
45 EQN_TOK_HAT,
46 EQN_TOK_DOT,
47 EQN_TOK_DOTDOT,
48 EQN_TOK_FWD,
49 EQN_TOK_BACK,
50 EQN_TOK_DOWN,
51 EQN_TOK_UP,
52 EQN_TOK_FAT,
53 EQN_TOK_ROMAN,
54 EQN_TOK_ITALIC,
55 EQN_TOK_BOLD,
56 EQN_TOK_SIZE,
57 EQN_TOK_SUB,
58 EQN_TOK_SUP,
59 EQN_TOK_SQRT,
60 EQN_TOK_OVER,
61 EQN_TOK_FROM,
62 EQN_TOK_TO,
63 EQN_TOK_BRACE_OPEN,
64 EQN_TOK_BRACE_CLOSE,
65 EQN_TOK_GSIZE,
66 EQN_TOK_GFONT,
67 EQN_TOK_MARK,
68 EQN_TOK_LINEUP,
69 EQN_TOK_LEFT,
70 EQN_TOK_RIGHT,
71 EQN_TOK_PILE,
72 EQN_TOK_LPILE,
73 EQN_TOK_RPILE,
74 EQN_TOK_CPILE,
75 EQN_TOK_MATRIX,
76 EQN_TOK_CCOL,
77 EQN_TOK_LCOL,
78 EQN_TOK_RCOL,
79 EQN_TOK_DELIM,
80 EQN_TOK_DEFINE,
81 EQN_TOK_TDEFINE,
82 EQN_TOK_NDEFINE,
83 EQN_TOK_UNDEF,
84 EQN_TOK_ABOVE,
85 EQN_TOK__MAX,
86 EQN_TOK_FUNC,
87 EQN_TOK_QUOTED,
88 EQN_TOK_SYM,
89 EQN_TOK_EOF
90 };
91
92 static const char *eqn_toks[EQN_TOK__MAX] = {
93 "dyad", /* EQN_TOK_DYAD */
94 "vec", /* EQN_TOK_VEC */
95 "under", /* EQN_TOK_UNDER */
96 "bar", /* EQN_TOK_BAR */
97 "tilde", /* EQN_TOK_TILDE */
98 "hat", /* EQN_TOK_HAT */
99 "dot", /* EQN_TOK_DOT */
100 "dotdot", /* EQN_TOK_DOTDOT */
101 "fwd", /* EQN_TOK_FWD * */
102 "back", /* EQN_TOK_BACK */
103 "down", /* EQN_TOK_DOWN */
104 "up", /* EQN_TOK_UP */
105 "fat", /* EQN_TOK_FAT */
106 "roman", /* EQN_TOK_ROMAN */
107 "italic", /* EQN_TOK_ITALIC */
108 "bold", /* EQN_TOK_BOLD */
109 "size", /* EQN_TOK_SIZE */
110 "sub", /* EQN_TOK_SUB */
111 "sup", /* EQN_TOK_SUP */
112 "sqrt", /* EQN_TOK_SQRT */
113 "over", /* EQN_TOK_OVER */
114 "from", /* EQN_TOK_FROM */
115 "to", /* EQN_TOK_TO */
116 "{", /* EQN_TOK_BRACE_OPEN */
117 "}", /* EQN_TOK_BRACE_CLOSE */
118 "gsize", /* EQN_TOK_GSIZE */
119 "gfont", /* EQN_TOK_GFONT */
120 "mark", /* EQN_TOK_MARK */
121 "lineup", /* EQN_TOK_LINEUP */
122 "left", /* EQN_TOK_LEFT */
123 "right", /* EQN_TOK_RIGHT */
124 "pile", /* EQN_TOK_PILE */
125 "lpile", /* EQN_TOK_LPILE */
126 "rpile", /* EQN_TOK_RPILE */
127 "cpile", /* EQN_TOK_CPILE */
128 "matrix", /* EQN_TOK_MATRIX */
129 "ccol", /* EQN_TOK_CCOL */
130 "lcol", /* EQN_TOK_LCOL */
131 "rcol", /* EQN_TOK_RCOL */
132 "delim", /* EQN_TOK_DELIM */
133 "define", /* EQN_TOK_DEFINE */
134 "tdefine", /* EQN_TOK_TDEFINE */
135 "ndefine", /* EQN_TOK_NDEFINE */
136 "undef", /* EQN_TOK_UNDEF */
137 "above", /* EQN_TOK_ABOVE */
138 };
139
140 static const char *const eqn_func[] = {
141 "acos", "acsc", "and", "arc", "asec", "asin", "atan",
142 "cos", "cosh", "coth", "csc", "det", "exp", "for",
143 "if", "lim", "ln", "log", "max", "min",
144 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re",
145 };
146
147 enum eqn_symt {
148 EQNSYM_alpha = 0,
149 EQNSYM_beta,
150 EQNSYM_chi,
151 EQNSYM_delta,
152 EQNSYM_epsilon,
153 EQNSYM_eta,
154 EQNSYM_gamma,
155 EQNSYM_iota,
156 EQNSYM_kappa,
157 EQNSYM_lambda,
158 EQNSYM_mu,
159 EQNSYM_nu,
160 EQNSYM_omega,
161 EQNSYM_omicron,
162 EQNSYM_phi,
163 EQNSYM_pi,
164 EQNSYM_ps,
165 EQNSYM_rho,
166 EQNSYM_sigma,
167 EQNSYM_tau,
168 EQNSYM_theta,
169 EQNSYM_upsilon,
170 EQNSYM_xi,
171 EQNSYM_zeta,
172 EQNSYM_DELTA,
173 EQNSYM_GAMMA,
174 EQNSYM_LAMBDA,
175 EQNSYM_OMEGA,
176 EQNSYM_PHI,
177 EQNSYM_PI,
178 EQNSYM_PSI,
179 EQNSYM_SIGMA,
180 EQNSYM_THETA,
181 EQNSYM_UPSILON,
182 EQNSYM_XI,
183 EQNSYM_inter,
184 EQNSYM_union,
185 EQNSYM_prod,
186 EQNSYM_int,
187 EQNSYM_sum,
188 EQNSYM_grad,
189 EQNSYM_del,
190 EQNSYM_times,
191 EQNSYM_cdot,
192 EQNSYM_nothing,
193 EQNSYM_approx,
194 EQNSYM_prime,
195 EQNSYM_half,
196 EQNSYM_partial,
197 EQNSYM_inf,
198 EQNSYM_muchgreat,
199 EQNSYM_muchless,
200 EQNSYM_larrow,
201 EQNSYM_rarrow,
202 EQNSYM_pm,
203 EQNSYM_nequal,
204 EQNSYM_equiv,
205 EQNSYM_lessequal,
206 EQNSYM_moreequal,
207 EQNSYM_minus,
208 EQNSYM__MAX
209 };
210
211 struct eqnsym {
212 const char *str;
213 const char *sym;
214 };
215
216 static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
217 { "alpha", "*a" }, /* EQNSYM_alpha */
218 { "beta", "*b" }, /* EQNSYM_beta */
219 { "chi", "*x" }, /* EQNSYM_chi */
220 { "delta", "*d" }, /* EQNSYM_delta */
221 { "epsilon", "*e" }, /* EQNSYM_epsilon */
222 { "eta", "*y" }, /* EQNSYM_eta */
223 { "gamma", "*g" }, /* EQNSYM_gamma */
224 { "iota", "*i" }, /* EQNSYM_iota */
225 { "kappa", "*k" }, /* EQNSYM_kappa */
226 { "lambda", "*l" }, /* EQNSYM_lambda */
227 { "mu", "*m" }, /* EQNSYM_mu */
228 { "nu", "*n" }, /* EQNSYM_nu */
229 { "omega", "*w" }, /* EQNSYM_omega */
230 { "omicron", "*o" }, /* EQNSYM_omicron */
231 { "phi", "*f" }, /* EQNSYM_phi */
232 { "pi", "*p" }, /* EQNSYM_pi */
233 { "psi", "*q" }, /* EQNSYM_psi */
234 { "rho", "*r" }, /* EQNSYM_rho */
235 { "sigma", "*s" }, /* EQNSYM_sigma */
236 { "tau", "*t" }, /* EQNSYM_tau */
237 { "theta", "*h" }, /* EQNSYM_theta */
238 { "upsilon", "*u" }, /* EQNSYM_upsilon */
239 { "xi", "*c" }, /* EQNSYM_xi */
240 { "zeta", "*z" }, /* EQNSYM_zeta */
241 { "DELTA", "*D" }, /* EQNSYM_DELTA */
242 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */
243 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
244 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */
245 { "PHI", "*F" }, /* EQNSYM_PHI */
246 { "PI", "*P" }, /* EQNSYM_PI */
247 { "PSI", "*Q" }, /* EQNSYM_PSI */
248 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */
249 { "THETA", "*H" }, /* EQNSYM_THETA */
250 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */
251 { "XI", "*C" }, /* EQNSYM_XI */
252 { "inter", "ca" }, /* EQNSYM_inter */
253 { "union", "cu" }, /* EQNSYM_union */
254 { "prod", "product" }, /* EQNSYM_prod */
255 { "int", "integral" }, /* EQNSYM_int */
256 { "sum", "sum" }, /* EQNSYM_sum */
257 { "grad", "gr" }, /* EQNSYM_grad */
258 { "del", "gr" }, /* EQNSYM_del */
259 { "times", "mu" }, /* EQNSYM_times */
260 { "cdot", "pc" }, /* EQNSYM_cdot */
261 { "nothing", "&" }, /* EQNSYM_nothing */
262 { "approx", "~~" }, /* EQNSYM_approx */
263 { "prime", "fm" }, /* EQNSYM_prime */
264 { "half", "12" }, /* EQNSYM_half */
265 { "partial", "pd" }, /* EQNSYM_partial */
266 { "inf", "if" }, /* EQNSYM_inf */
267 { ">>", ">>" }, /* EQNSYM_muchgreat */
268 { "<<", "<<" }, /* EQNSYM_muchless */
269 { "<-", "<-" }, /* EQNSYM_larrow */
270 { "->", "->" }, /* EQNSYM_rarrow */
271 { "+-", "+-" }, /* EQNSYM_pm */
272 { "!=", "!=" }, /* EQNSYM_nequal */
273 { "==", "==" }, /* EQNSYM_equiv */
274 { "<=", "<=" }, /* EQNSYM_lessequal */
275 { ">=", ">=" }, /* EQNSYM_moreequal */
276 { "-", "mi" }, /* EQNSYM_minus */
277 };
278
279 enum parse_mode {
280 MODE_QUOTED,
281 MODE_NOSUB,
282 MODE_SUB,
283 MODE_TOK
284 };
285
286 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
287 static void eqn_box_free(struct eqn_box *);
288 static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
289 enum eqn_post, struct eqn_box *);
290 static void eqn_def(struct eqn_node *);
291 static struct eqn_def *eqn_def_find(struct eqn_node *);
292 static void eqn_delim(struct eqn_node *);
293 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode);
294 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *);
295 static void eqn_undef(struct eqn_node *);
296
297
298 enum rofferr
299 eqn_read(struct eqn_node **epp, int ln,
300 const char *p, int pos, int *offs)
301 {
302 size_t sz;
303 struct eqn_node *ep;
304 enum rofferr er;
305
306 ep = *epp;
307
308 /*
309 * If we're the terminating mark, unset our equation status and
310 * validate the full equation.
311 */
312
313 if (0 == strncmp(p, ".EN", 3)) {
314 er = eqn_end(epp);
315 p += 3;
316 while (' ' == *p || '\t' == *p)
317 p++;
318 if ('\0' == *p)
319 return er;
320 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
321 ln, pos, "EN %s", p);
322 return er;
323 }
324
325 /*
326 * Build up the full string, replacing all newlines with regular
327 * whitespace.
328 */
329
330 sz = strlen(p + pos) + 1;
331 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
332
333 /* First invocation: nil terminate the string. */
334
335 if (0 == ep->sz)
336 *ep->data = '\0';
337
338 ep->sz += sz;
339 strlcat(ep->data, p + pos, ep->sz + 1);
340 strlcat(ep->data, " ", ep->sz + 1);
341 return ROFF_IGN;
342 }
343
344 struct eqn_node *
345 eqn_alloc(int pos, int line, struct mparse *parse)
346 {
347 struct eqn_node *p;
348
349 p = mandoc_calloc(1, sizeof(struct eqn_node));
350
351 p->parse = parse;
352 p->eqn.ln = line;
353 p->eqn.pos = pos;
354 p->gsize = EQN_DEFSIZE;
355
356 return p;
357 }
358
359 /*
360 * Find the key "key" of the give size within our eqn-defined values.
361 */
362 static struct eqn_def *
363 eqn_def_find(struct eqn_node *ep)
364 {
365 int i;
366
367 for (i = 0; i < (int)ep->defsz; i++)
368 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
369 ep->defs[i].keysz, ep->start, ep->toksz))
370 return &ep->defs[i];
371
372 return NULL;
373 }
374
375 /*
376 * Parse a token from the input text. The modes are:
377 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
378 * before its next occurence. Do not interpret the token in any
379 * way and return EQN_TOK_QUOTED. All other modes behave like
380 * MODE_QUOTED when *ep->start is '"'.
381 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
382 * otherwise, it ends before the next whitespace or brace.
383 * Do not interpret the token and return EQN_TOK__MAX.
384 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
385 * alias created with define. If it is an alias, replace it with
386 * its string value and reparse.
387 * MODE_TOK: Like MODE_SUB, but also check the token against the list
388 * of tokens, and if there is a match, return that token. Otherwise,
389 * if the token matches a symbol, return EQN_TOK_SYM; if it matches
390 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for
391 * a token match, *ep->start is set to an allocated string that the
392 * caller is expected to free.
393 * All modes skip whitespace following the end of the token.
394 */
395 static enum eqn_tok
396 eqn_next(struct eqn_node *ep, enum parse_mode mode)
397 {
398 static int last_len, lim;
399
400 struct eqn_def *def;
401 size_t start;
402 int diff, i, quoted;
403 enum eqn_tok tok;
404
405 /*
406 * Reset the recursion counter after advancing
407 * beyond the end of the previous substitution.
408 */
409 if (ep->end - ep->data >= last_len)
410 lim = 0;
411
412 ep->start = ep->end;
413 quoted = mode == MODE_QUOTED;
414 for (;;) {
415 switch (*ep->start) {
416 case '\0':
417 ep->toksz = 0;
418 return EQN_TOK_EOF;
419 case '"':
420 quoted = 1;
421 break;
422 default:
423 break;
424 }
425 if (quoted) {
426 ep->end = strchr(ep->start + 1, *ep->start);
427 ep->start++; /* Skip opening quote. */
428 if (ep->end == NULL) {
429 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
430 ep->eqn.ln, ep->eqn.pos, NULL);
431 ep->end = strchr(ep->start, '\0');
432 }
433 } else {
434 ep->end = ep->start + 1;
435 if (*ep->start != '{' && *ep->start != '}')
436 ep->end += strcspn(ep->end, " ^~\"{}\t");
437 }
438 ep->toksz = ep->end - ep->start;
439 if (quoted && *ep->end != '\0')
440 ep->end++; /* Skip closing quote. */
441 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
442 ep->end++;
443 if (quoted) /* Cannot return, may have to strndup. */
444 break;
445 if (mode == MODE_NOSUB)
446 return EQN_TOK__MAX;
447 if ((def = eqn_def_find(ep)) == NULL)
448 break;
449 if (++lim > EQN_NEST_MAX) {
450 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
451 ep->eqn.ln, ep->eqn.pos, NULL);
452 return EQN_TOK_EOF;
453 }
454
455 /* Replace a defined name with its string value. */
456 if ((diff = def->valsz - ep->toksz) > 0) {
457 start = ep->start - ep->data;
458 ep->sz += diff;
459 ep->data = mandoc_realloc(ep->data, ep->sz + 1);
460 ep->start = ep->data + start;
461 }
462 if (diff)
463 memmove(ep->start + def->valsz, ep->start + ep->toksz,
464 strlen(ep->start + ep->toksz) + 1);
465 memcpy(ep->start, def->val, def->valsz);
466 last_len = ep->start - ep->data + def->valsz;
467 }
468 if (mode != MODE_TOK)
469 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
470 if (quoted) {
471 ep->start = mandoc_strndup(ep->start, ep->toksz);
472 return EQN_TOK_QUOTED;
473 }
474 for (tok = 0; tok < EQN_TOK__MAX; tok++)
475 if (STRNEQ(ep->start, ep->toksz,
476 eqn_toks[tok], strlen(eqn_toks[tok])))
477 return tok;
478
479 for (i = 0; i < EQNSYM__MAX; i++) {
480 if (STRNEQ(ep->start, ep->toksz,
481 eqnsyms[i].str, strlen(eqnsyms[i].str))) {
482 mandoc_asprintf(&ep->start,
483 "\\[%s]", eqnsyms[i].sym);
484 return EQN_TOK_SYM;
485 }
486 }
487 ep->start = mandoc_strndup(ep->start, ep->toksz);
488 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
489 if (STRNEQ(ep->start, ep->toksz,
490 eqn_func[i], strlen(eqn_func[i])))
491 return EQN_TOK_FUNC;
492 return EQN_TOK__MAX;
493 }
494
495 static void
496 eqn_box_free(struct eqn_box *bp)
497 {
498
499 if (bp->first)
500 eqn_box_free(bp->first);
501 if (bp->next)
502 eqn_box_free(bp->next);
503
504 free(bp->text);
505 free(bp->left);
506 free(bp->right);
507 free(bp->top);
508 free(bp->bottom);
509 free(bp);
510 }
511
512 /*
513 * Allocate a box as the last child of the parent node.
514 */
515 static struct eqn_box *
516 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
517 {
518 struct eqn_box *bp;
519
520 bp = mandoc_calloc(1, sizeof(struct eqn_box));
521 bp->parent = parent;
522 bp->parent->args++;
523 bp->expectargs = UINT_MAX;
524 bp->font = bp->parent->font;
525 bp->size = ep->gsize;
526
527 if (NULL != parent->first) {
528 parent->last->next = bp;
529 bp->prev = parent->last;
530 } else
531 parent->first = bp;
532
533 parent->last = bp;
534 return bp;
535 }
536
537 /*
538 * Reparent the current last node (of the current parent) under a new
539 * EQN_SUBEXPR as the first element.
540 * Then return the new parent.
541 * The new EQN_SUBEXPR will have a two-child limit.
542 */
543 static struct eqn_box *
544 eqn_box_makebinary(struct eqn_node *ep,
545 enum eqn_post pos, struct eqn_box *parent)
546 {
547 struct eqn_box *b, *newb;
548
549 assert(NULL != parent->last);
550 b = parent->last;
551 if (parent->last == parent->first)
552 parent->first = NULL;
553 parent->args--;
554 parent->last = b->prev;
555 b->prev = NULL;
556 newb = eqn_box_alloc(ep, parent);
557 newb->pos = pos;
558 newb->type = EQN_SUBEXPR;
559 newb->expectargs = 2;
560 newb->args = 1;
561 newb->first = newb->last = b;
562 newb->first->next = NULL;
563 b->parent = newb;
564 return newb;
565 }
566
567 /*
568 * Parse the "delim" control statement.
569 */
570 static void
571 eqn_delim(struct eqn_node *ep)
572 {
573 if (ep->end[0] == '\0' || ep->end[1] == '\0') {
574 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
575 ep->eqn.ln, ep->eqn.pos, "delim");
576 if (ep->end[0] != '\0')
577 ep->end++;
578 } else if (strncmp(ep->end, "off", 3) == 0) {
579 ep->delim = 0;
580 ep->end += 3;
581 } else if (strncmp(ep->end, "on", 2) == 0) {
582 if (ep->odelim && ep->cdelim)
583 ep->delim = 1;
584 ep->end += 2;
585 } else {
586 ep->odelim = *ep->end++;
587 ep->cdelim = *ep->end++;
588 ep->delim = 1;
589 }
590 }
591
592 /*
593 * Undefine a previously-defined string.
594 */
595 static void
596 eqn_undef(struct eqn_node *ep)
597 {
598 struct eqn_def *def;
599
600 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
601 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
602 ep->eqn.ln, ep->eqn.pos, "undef");
603 return;
604 }
605 if ((def = eqn_def_find(ep)) == NULL)
606 return;
607 free(def->key);
608 free(def->val);
609 def->key = def->val = NULL;
610 def->keysz = def->valsz = 0;
611 }
612
613 static void
614 eqn_def(struct eqn_node *ep)
615 {
616 struct eqn_def *def;
617 int i;
618
619 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
620 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
621 ep->eqn.ln, ep->eqn.pos, "define");
622 return;
623 }
624
625 /*
626 * Search for a key that already exists.
627 * Create a new key if none is found.
628 */
629 if ((def = eqn_def_find(ep)) == NULL) {
630 /* Find holes in string array. */
631 for (i = 0; i < (int)ep->defsz; i++)
632 if (0 == ep->defs[i].keysz)
633 break;
634
635 if (i == (int)ep->defsz) {
636 ep->defsz++;
637 ep->defs = mandoc_reallocarray(ep->defs,
638 ep->defsz, sizeof(struct eqn_def));
639 ep->defs[i].key = ep->defs[i].val = NULL;
640 }
641
642 def = ep->defs + i;
643 free(def->key);
644 def->key = mandoc_strndup(ep->start, ep->toksz);
645 def->keysz = ep->toksz;
646 }
647
648 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
649 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
650 ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
651 free(def->key);
652 free(def->val);
653 def->key = def->val = NULL;
654 def->keysz = def->valsz = 0;
655 return;
656 }
657 free(def->val);
658 def->val = mandoc_strndup(ep->start, ep->toksz);
659 def->valsz = ep->toksz;
660 }
661
662 /*
663 * Recursively parse an eqn(7) expression.
664 */
665 static enum rofferr
666 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
667 {
668 struct eqn_box *cur, *nbox, *split;
669 const char *cp, *cpn;
670 char *p;
671 enum eqn_tok tok;
672 enum eqn_post pos;
673 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
674 int size;
675
676 assert(parent != NULL);
677
678 /*
679 * Empty equation.
680 * Do not add it to the high-level syntax tree.
681 */
682
683 if (ep->data == NULL)
684 return ROFF_IGN;
685
686 ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
687
688 next_tok:
689 tok = eqn_next(ep, MODE_TOK);
690 switch (tok) {
691 case EQN_TOK_UNDEF:
692 eqn_undef(ep);
693 break;
694 case EQN_TOK_NDEFINE:
695 case EQN_TOK_DEFINE:
696 eqn_def(ep);
697 break;
698 case EQN_TOK_TDEFINE:
699 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
700 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
701 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
702 ep->eqn.ln, ep->eqn.pos, "tdefine");
703 break;
704 case EQN_TOK_DELIM:
705 eqn_delim(ep);
706 break;
707 case EQN_TOK_GFONT:
708 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
709 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
710 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
711 break;
712 case EQN_TOK_MARK:
713 case EQN_TOK_LINEUP:
714 /* Ignore these. */
715 break;
716 case EQN_TOK_DYAD:
717 case EQN_TOK_VEC:
718 case EQN_TOK_UNDER:
719 case EQN_TOK_BAR:
720 case EQN_TOK_TILDE:
721 case EQN_TOK_HAT:
722 case EQN_TOK_DOT:
723 case EQN_TOK_DOTDOT:
724 if (parent->last == NULL) {
725 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
726 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
727 cur = eqn_box_alloc(ep, parent);
728 cur->type = EQN_TEXT;
729 cur->text = mandoc_strdup("");
730 }
731 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
732 parent->type = EQN_LISTONE;
733 parent->expectargs = 1;
734 parent->font = EQNFONT_ROMAN;
735 switch (tok) {
736 case EQN_TOK_DOTDOT:
737 parent->top = mandoc_strdup("\\[ad]");
738 break;
739 case EQN_TOK_VEC:
740 parent->top = mandoc_strdup("\\[->]");
741 break;
742 case EQN_TOK_DYAD:
743 parent->top = mandoc_strdup("\\[<>]");
744 break;
745 case EQN_TOK_TILDE:
746 parent->top = mandoc_strdup("\\[a~]");
747 break;
748 case EQN_TOK_UNDER:
749 parent->bottom = mandoc_strdup("\\[ul]");
750 break;
751 case EQN_TOK_BAR:
752 parent->top = mandoc_strdup("\\[rl]");
753 break;
754 case EQN_TOK_DOT:
755 parent->top = mandoc_strdup("\\[a.]");
756 break;
757 case EQN_TOK_HAT:
758 parent->top = mandoc_strdup("\\[ha]");
759 break;
760 default:
761 abort();
762 }
763 parent = parent->parent;
764 break;
765 case EQN_TOK_FWD:
766 case EQN_TOK_BACK:
767 case EQN_TOK_DOWN:
768 case EQN_TOK_UP:
769 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
770 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
771 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
772 break;
773 case EQN_TOK_FAT:
774 case EQN_TOK_ROMAN:
775 case EQN_TOK_ITALIC:
776 case EQN_TOK_BOLD:
777 while (parent->args == parent->expectargs)
778 parent = parent->parent;
779 /*
780 * These values apply to the next word or sequence of
781 * words; thus, we mark that we'll have a child with
782 * exactly one of those.
783 */
784 parent = eqn_box_alloc(ep, parent);
785 parent->type = EQN_LISTONE;
786 parent->expectargs = 1;
787 switch (tok) {
788 case EQN_TOK_FAT:
789 parent->font = EQNFONT_FAT;
790 break;
791 case EQN_TOK_ROMAN:
792 parent->font = EQNFONT_ROMAN;
793 break;
794 case EQN_TOK_ITALIC:
795 parent->font = EQNFONT_ITALIC;
796 break;
797 case EQN_TOK_BOLD:
798 parent->font = EQNFONT_BOLD;
799 break;
800 default:
801 abort();
802 }
803 break;
804 case EQN_TOK_SIZE:
805 case EQN_TOK_GSIZE:
806 /* Accept two values: integral size and a single. */
807 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
808 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
809 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
810 break;
811 }
812 size = mandoc_strntoi(ep->start, ep->toksz, 10);
813 if (-1 == size) {
814 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
815 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
816 break;
817 }
818 if (EQN_TOK_GSIZE == tok) {
819 ep->gsize = size;
820 break;
821 }
822 parent = eqn_box_alloc(ep, parent);
823 parent->type = EQN_LISTONE;
824 parent->expectargs = 1;
825 parent->size = size;
826 break;
827 case EQN_TOK_FROM:
828 case EQN_TOK_TO:
829 case EQN_TOK_SUB:
830 case EQN_TOK_SUP:
831 /*
832 * We have a left-right-associative expression.
833 * Repivot under a positional node, open a child scope
834 * and keep on reading.
835 */
836 if (parent->last == NULL) {
837 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
838 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
839 cur = eqn_box_alloc(ep, parent);
840 cur->type = EQN_TEXT;
841 cur->text = mandoc_strdup("");
842 }
843 /* Handle the "subsup" and "fromto" positions. */
844 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
845 parent->expectargs = 3;
846 parent->pos = EQNPOS_SUBSUP;
847 break;
848 }
849 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
850 parent->expectargs = 3;
851 parent->pos = EQNPOS_FROMTO;
852 break;
853 }
854 switch (tok) {
855 case EQN_TOK_FROM:
856 pos = EQNPOS_FROM;
857 break;
858 case EQN_TOK_TO:
859 pos = EQNPOS_TO;
860 break;
861 case EQN_TOK_SUP:
862 pos = EQNPOS_SUP;
863 break;
864 case EQN_TOK_SUB:
865 pos = EQNPOS_SUB;
866 break;
867 default:
868 abort();
869 }
870 parent = eqn_box_makebinary(ep, pos, parent);
871 break;
872 case EQN_TOK_SQRT:
873 while (parent->args == parent->expectargs)
874 parent = parent->parent;
875 /*
876 * Accept a left-right-associative set of arguments just
877 * like sub and sup and friends but without rebalancing
878 * under a pivot.
879 */
880 parent = eqn_box_alloc(ep, parent);
881 parent->type = EQN_SUBEXPR;
882 parent->pos = EQNPOS_SQRT;
883 parent->expectargs = 1;
884 break;
885 case EQN_TOK_OVER:
886 /*
887 * We have a right-left-associative fraction.
888 * Close out anything that's currently open, then
889 * rebalance and continue reading.
890 */
891 if (parent->last == NULL) {
892 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
893 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
894 cur = eqn_box_alloc(ep, parent);
895 cur->type = EQN_TEXT;
896 cur->text = mandoc_strdup("");
897 }
898 while (EQN_SUBEXPR == parent->type)
899 parent = parent->parent;
900 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
901 break;
902 case EQN_TOK_RIGHT:
903 case EQN_TOK_BRACE_CLOSE:
904 /*
905 * Close out the existing brace.
906 * FIXME: this is a shitty sentinel: we should really
907 * have a native EQN_BRACE type or whatnot.
908 */
909 for (cur = parent; cur != NULL; cur = cur->parent)
910 if (cur->type == EQN_LIST &&
911 (tok == EQN_TOK_BRACE_CLOSE ||
912 cur->left != NULL))
913 break;
914 if (cur == NULL) {
915 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
916 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
917 break;
918 }
919 parent = cur;
920 if (EQN_TOK_RIGHT == tok) {
921 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
922 mandoc_msg(MANDOCERR_REQ_EMPTY,
923 ep->parse, ep->eqn.ln,
924 ep->eqn.pos, eqn_toks[tok]);
925 break;
926 }
927 /* Handling depends on right/left. */
928 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
929 parent->right = mandoc_strdup("\\[rc]");
930 else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
931 parent->right = mandoc_strdup("\\[rf]");
932 else
933 parent->right =
934 mandoc_strndup(ep->start, ep->toksz);
935 }
936 parent = parent->parent;
937 if (tok == EQN_TOK_BRACE_CLOSE &&
938 (parent->type == EQN_PILE ||
939 parent->type == EQN_MATRIX))
940 parent = parent->parent;
941 /* Close out any "singleton" lists. */
942 while (parent->type == EQN_LISTONE &&
943 parent->args == parent->expectargs)
944 parent = parent->parent;
945 break;
946 case EQN_TOK_BRACE_OPEN:
947 case EQN_TOK_LEFT:
948 /*
949 * If we already have something in the stack and we're
950 * in an expression, then rewind til we're not any more
951 * (just like with the text node).
952 */
953 while (parent->args == parent->expectargs)
954 parent = parent->parent;
955 if (EQN_TOK_LEFT == tok &&
956 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
957 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
958 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
959 break;
960 }
961 parent = eqn_box_alloc(ep, parent);
962 parent->type = EQN_LIST;
963 if (EQN_TOK_LEFT == tok) {
964 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
965 parent->left = mandoc_strdup("\\[lc]");
966 else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
967 parent->left = mandoc_strdup("\\[lf]");
968 else
969 parent->left =
970 mandoc_strndup(ep->start, ep->toksz);
971 }
972 break;
973 case EQN_TOK_PILE:
974 case EQN_TOK_LPILE:
975 case EQN_TOK_RPILE:
976 case EQN_TOK_CPILE:
977 case EQN_TOK_CCOL:
978 case EQN_TOK_LCOL:
979 case EQN_TOK_RCOL:
980 while (parent->args == parent->expectargs)
981 parent = parent->parent;
982 parent = eqn_box_alloc(ep, parent);
983 parent->type = EQN_PILE;
984 parent->expectargs = 1;
985 break;
986 case EQN_TOK_ABOVE:
987 for (cur = parent; cur != NULL; cur = cur->parent)
988 if (cur->type == EQN_PILE)
989 break;
990 if (cur == NULL) {
991 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
992 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
993 break;
994 }
995 parent = eqn_box_alloc(ep, cur);
996 parent->type = EQN_LIST;
997 break;
998 case EQN_TOK_MATRIX:
999 while (parent->args == parent->expectargs)
1000 parent = parent->parent;
1001 parent = eqn_box_alloc(ep, parent);
1002 parent->type = EQN_MATRIX;
1003 parent->expectargs = 1;
1004 break;
1005 case EQN_TOK_EOF:
1006 /*
1007 * End of file!
1008 * TODO: make sure we're not in an open subexpression.
1009 */
1010 return ROFF_EQN;
1011 case EQN_TOK__MAX:
1012 case EQN_TOK_FUNC:
1013 case EQN_TOK_QUOTED:
1014 case EQN_TOK_SYM:
1015 p = ep->start;
1016 assert(p != NULL);
1017 /*
1018 * If we already have something in the stack and we're
1019 * in an expression, then rewind til we're not any more.
1020 */
1021 while (parent->args == parent->expectargs)
1022 parent = parent->parent;
1023 cur = eqn_box_alloc(ep, parent);
1024 cur->type = EQN_TEXT;
1025 cur->text = p;
1026 switch (tok) {
1027 case EQN_TOK_FUNC:
1028 cur->font = EQNFONT_ROMAN;
1029 break;
1030 case EQN_TOK_QUOTED:
1031 if (cur->font == EQNFONT_NONE)
1032 cur->font = EQNFONT_ITALIC;
1033 break;
1034 case EQN_TOK_SYM:
1035 break;
1036 default:
1037 if (cur->font != EQNFONT_NONE || *p == '\0')
1038 break;
1039 cpn = p - 1;
1040 ccln = CCL_LET;
1041 split = NULL;
1042 for (;;) {
1043 /* Advance to next character. */
1044 cp = cpn++;
1045 ccl = ccln;
1046 ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1047 isdigit((unsigned char)*cpn) ||
1048 (*cpn == '.' && (ccl == CCL_DIG ||
1049 isdigit((unsigned char)cpn[1]))) ?
1050 CCL_DIG : CCL_PUN;
1051 /* No boundary before first character. */
1052 if (cp < p)
1053 continue;
1054 cur->font = ccl == CCL_LET ?
1055 EQNFONT_ITALIC : EQNFONT_ROMAN;
1056 if (*cp == '\\')
1057 mandoc_escape(&cpn, NULL, NULL);
1058 /* No boundary after last character. */
1059 if (*cpn == '\0')
1060 break;
1061 if (ccln == ccl)
1062 continue;
1063 /* Boundary found, split the text. */
1064 if (parent->args == parent->expectargs) {
1065 /* Remove the text from the tree. */
1066 if (cur->prev == NULL)
1067 parent->first = cur->next;
1068 else
1069 cur->prev->next = NULL;
1070 parent->last = cur->prev;
1071 parent->args--;
1072 /* Set up a list instead. */
1073 split = eqn_box_alloc(ep, parent);
1074 split->type = EQN_LIST;
1075 /* Insert the word into the list. */
1076 split->first = split->last = cur;
1077 cur->parent = split;
1078 cur->prev = NULL;
1079 parent = split;
1080 }
1081 /* Append a new text box. */
1082 nbox = eqn_box_alloc(ep, parent);
1083 nbox->type = EQN_TEXT;
1084 nbox->text = mandoc_strdup(cpn);
1085 /* Truncate the old box. */
1086 p = mandoc_strndup(cur->text,
1087 cpn - cur->text);
1088 free(cur->text);
1089 cur->text = p;
1090 /* Setup to process the new box. */
1091 cur = nbox;
1092 p = nbox->text;
1093 cpn = p - 1;
1094 ccln = CCL_LET;
1095 }
1096 if (split != NULL)
1097 parent = split->parent;
1098 break;
1099 }
1100 /*
1101 * Post-process list status.
1102 */
1103 while (parent->type == EQN_LISTONE &&
1104 parent->args == parent->expectargs)
1105 parent = parent->parent;
1106 break;
1107 default:
1108 abort();
1109 }
1110 goto next_tok;
1111 }
1112
1113 enum rofferr
1114 eqn_end(struct eqn_node **epp)
1115 {
1116 struct eqn_node *ep;
1117
1118 ep = *epp;
1119 *epp = NULL;
1120
1121 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1122 ep->eqn.root->expectargs = UINT_MAX;
1123 return eqn_parse(ep, ep->eqn.root);
1124 }
1125
1126 void
1127 eqn_free(struct eqn_node *p)
1128 {
1129 int i;
1130
1131 eqn_box_free(p->eqn.root);
1132
1133 for (i = 0; i < (int)p->defsz; i++) {
1134 free(p->defs[i].key);
1135 free(p->defs[i].val);
1136 }
1137
1138 free(p->data);
1139 free(p->defs);
1140 free(p);
1141 }