]> git.cameronkatri.com Git - mandoc.git/blob - eqn.c
operating system dependent message about unknown architecture;
[mandoc.git] / eqn.c
1 /* $Id: eqn.c,v 1.69 2017/06/23 21:04:57 schwarze Exp $
2 /*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29
30 #include "mandoc.h"
31 #include "mandoc_aux.h"
32 #include "libmandoc.h"
33 #include "libroff.h"
34
35 #define EQN_NEST_MAX 128 /* maximum nesting of defines */
36 #define STRNEQ(p1, sz1, p2, sz2) \
37 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
38
39 enum eqn_tok {
40 EQN_TOK_DYAD = 0,
41 EQN_TOK_VEC,
42 EQN_TOK_UNDER,
43 EQN_TOK_BAR,
44 EQN_TOK_TILDE,
45 EQN_TOK_HAT,
46 EQN_TOK_DOT,
47 EQN_TOK_DOTDOT,
48 EQN_TOK_FWD,
49 EQN_TOK_BACK,
50 EQN_TOK_DOWN,
51 EQN_TOK_UP,
52 EQN_TOK_FAT,
53 EQN_TOK_ROMAN,
54 EQN_TOK_ITALIC,
55 EQN_TOK_BOLD,
56 EQN_TOK_SIZE,
57 EQN_TOK_SUB,
58 EQN_TOK_SUP,
59 EQN_TOK_SQRT,
60 EQN_TOK_OVER,
61 EQN_TOK_FROM,
62 EQN_TOK_TO,
63 EQN_TOK_BRACE_OPEN,
64 EQN_TOK_BRACE_CLOSE,
65 EQN_TOK_GSIZE,
66 EQN_TOK_GFONT,
67 EQN_TOK_MARK,
68 EQN_TOK_LINEUP,
69 EQN_TOK_LEFT,
70 EQN_TOK_RIGHT,
71 EQN_TOK_PILE,
72 EQN_TOK_LPILE,
73 EQN_TOK_RPILE,
74 EQN_TOK_CPILE,
75 EQN_TOK_MATRIX,
76 EQN_TOK_CCOL,
77 EQN_TOK_LCOL,
78 EQN_TOK_RCOL,
79 EQN_TOK_DELIM,
80 EQN_TOK_DEFINE,
81 EQN_TOK_TDEFINE,
82 EQN_TOK_NDEFINE,
83 EQN_TOK_UNDEF,
84 EQN_TOK_ABOVE,
85 EQN_TOK__MAX,
86 EQN_TOK_FUNC,
87 EQN_TOK_QUOTED,
88 EQN_TOK_SYM,
89 EQN_TOK_EOF
90 };
91
92 static const char *eqn_toks[EQN_TOK__MAX] = {
93 "dyad", /* EQN_TOK_DYAD */
94 "vec", /* EQN_TOK_VEC */
95 "under", /* EQN_TOK_UNDER */
96 "bar", /* EQN_TOK_BAR */
97 "tilde", /* EQN_TOK_TILDE */
98 "hat", /* EQN_TOK_HAT */
99 "dot", /* EQN_TOK_DOT */
100 "dotdot", /* EQN_TOK_DOTDOT */
101 "fwd", /* EQN_TOK_FWD * */
102 "back", /* EQN_TOK_BACK */
103 "down", /* EQN_TOK_DOWN */
104 "up", /* EQN_TOK_UP */
105 "fat", /* EQN_TOK_FAT */
106 "roman", /* EQN_TOK_ROMAN */
107 "italic", /* EQN_TOK_ITALIC */
108 "bold", /* EQN_TOK_BOLD */
109 "size", /* EQN_TOK_SIZE */
110 "sub", /* EQN_TOK_SUB */
111 "sup", /* EQN_TOK_SUP */
112 "sqrt", /* EQN_TOK_SQRT */
113 "over", /* EQN_TOK_OVER */
114 "from", /* EQN_TOK_FROM */
115 "to", /* EQN_TOK_TO */
116 "{", /* EQN_TOK_BRACE_OPEN */
117 "}", /* EQN_TOK_BRACE_CLOSE */
118 "gsize", /* EQN_TOK_GSIZE */
119 "gfont", /* EQN_TOK_GFONT */
120 "mark", /* EQN_TOK_MARK */
121 "lineup", /* EQN_TOK_LINEUP */
122 "left", /* EQN_TOK_LEFT */
123 "right", /* EQN_TOK_RIGHT */
124 "pile", /* EQN_TOK_PILE */
125 "lpile", /* EQN_TOK_LPILE */
126 "rpile", /* EQN_TOK_RPILE */
127 "cpile", /* EQN_TOK_CPILE */
128 "matrix", /* EQN_TOK_MATRIX */
129 "ccol", /* EQN_TOK_CCOL */
130 "lcol", /* EQN_TOK_LCOL */
131 "rcol", /* EQN_TOK_RCOL */
132 "delim", /* EQN_TOK_DELIM */
133 "define", /* EQN_TOK_DEFINE */
134 "tdefine", /* EQN_TOK_TDEFINE */
135 "ndefine", /* EQN_TOK_NDEFINE */
136 "undef", /* EQN_TOK_UNDEF */
137 "above", /* EQN_TOK_ABOVE */
138 };
139
140 static const char *const eqn_func[] = {
141 "acos", "acsc", "and", "arc", "asec", "asin", "atan",
142 "cos", "cosh", "coth", "csc", "det", "exp", "for",
143 "if", "lim", "ln", "log", "max", "min",
144 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re",
145 };
146
147 enum eqn_symt {
148 EQNSYM_alpha,
149 EQNSYM_beta,
150 EQNSYM_chi,
151 EQNSYM_delta,
152 EQNSYM_epsilon,
153 EQNSYM_eta,
154 EQNSYM_gamma,
155 EQNSYM_iota,
156 EQNSYM_kappa,
157 EQNSYM_lambda,
158 EQNSYM_mu,
159 EQNSYM_nu,
160 EQNSYM_omega,
161 EQNSYM_omicron,
162 EQNSYM_phi,
163 EQNSYM_pi,
164 EQNSYM_ps,
165 EQNSYM_rho,
166 EQNSYM_sigma,
167 EQNSYM_tau,
168 EQNSYM_theta,
169 EQNSYM_upsilon,
170 EQNSYM_xi,
171 EQNSYM_zeta,
172 EQNSYM_DELTA,
173 EQNSYM_GAMMA,
174 EQNSYM_LAMBDA,
175 EQNSYM_OMEGA,
176 EQNSYM_PHI,
177 EQNSYM_PI,
178 EQNSYM_PSI,
179 EQNSYM_SIGMA,
180 EQNSYM_THETA,
181 EQNSYM_UPSILON,
182 EQNSYM_XI,
183 EQNSYM_inter,
184 EQNSYM_union,
185 EQNSYM_prod,
186 EQNSYM_int,
187 EQNSYM_sum,
188 EQNSYM_grad,
189 EQNSYM_del,
190 EQNSYM_times,
191 EQNSYM_cdot,
192 EQNSYM_nothing,
193 EQNSYM_approx,
194 EQNSYM_prime,
195 EQNSYM_half,
196 EQNSYM_partial,
197 EQNSYM_inf,
198 EQNSYM_muchgreat,
199 EQNSYM_muchless,
200 EQNSYM_larrow,
201 EQNSYM_rarrow,
202 EQNSYM_pm,
203 EQNSYM_nequal,
204 EQNSYM_equiv,
205 EQNSYM_lessequal,
206 EQNSYM_moreequal,
207 EQNSYM_minus,
208 EQNSYM__MAX
209 };
210
211 struct eqnsym {
212 const char *str;
213 const char *sym;
214 };
215
216 static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
217 { "alpha", "*a" }, /* EQNSYM_alpha */
218 { "beta", "*b" }, /* EQNSYM_beta */
219 { "chi", "*x" }, /* EQNSYM_chi */
220 { "delta", "*d" }, /* EQNSYM_delta */
221 { "epsilon", "*e" }, /* EQNSYM_epsilon */
222 { "eta", "*y" }, /* EQNSYM_eta */
223 { "gamma", "*g" }, /* EQNSYM_gamma */
224 { "iota", "*i" }, /* EQNSYM_iota */
225 { "kappa", "*k" }, /* EQNSYM_kappa */
226 { "lambda", "*l" }, /* EQNSYM_lambda */
227 { "mu", "*m" }, /* EQNSYM_mu */
228 { "nu", "*n" }, /* EQNSYM_nu */
229 { "omega", "*w" }, /* EQNSYM_omega */
230 { "omicron", "*o" }, /* EQNSYM_omicron */
231 { "phi", "*f" }, /* EQNSYM_phi */
232 { "pi", "*p" }, /* EQNSYM_pi */
233 { "psi", "*q" }, /* EQNSYM_psi */
234 { "rho", "*r" }, /* EQNSYM_rho */
235 { "sigma", "*s" }, /* EQNSYM_sigma */
236 { "tau", "*t" }, /* EQNSYM_tau */
237 { "theta", "*h" }, /* EQNSYM_theta */
238 { "upsilon", "*u" }, /* EQNSYM_upsilon */
239 { "xi", "*c" }, /* EQNSYM_xi */
240 { "zeta", "*z" }, /* EQNSYM_zeta */
241 { "DELTA", "*D" }, /* EQNSYM_DELTA */
242 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */
243 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
244 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */
245 { "PHI", "*F" }, /* EQNSYM_PHI */
246 { "PI", "*P" }, /* EQNSYM_PI */
247 { "PSI", "*Q" }, /* EQNSYM_PSI */
248 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */
249 { "THETA", "*H" }, /* EQNSYM_THETA */
250 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */
251 { "XI", "*C" }, /* EQNSYM_XI */
252 { "inter", "ca" }, /* EQNSYM_inter */
253 { "union", "cu" }, /* EQNSYM_union */
254 { "prod", "product" }, /* EQNSYM_prod */
255 { "int", "integral" }, /* EQNSYM_int */
256 { "sum", "sum" }, /* EQNSYM_sum */
257 { "grad", "gr" }, /* EQNSYM_grad */
258 { "del", "gr" }, /* EQNSYM_del */
259 { "times", "mu" }, /* EQNSYM_times */
260 { "cdot", "pc" }, /* EQNSYM_cdot */
261 { "nothing", "&" }, /* EQNSYM_nothing */
262 { "approx", "~~" }, /* EQNSYM_approx */
263 { "prime", "fm" }, /* EQNSYM_prime */
264 { "half", "12" }, /* EQNSYM_half */
265 { "partial", "pd" }, /* EQNSYM_partial */
266 { "inf", "if" }, /* EQNSYM_inf */
267 { ">>", ">>" }, /* EQNSYM_muchgreat */
268 { "<<", "<<" }, /* EQNSYM_muchless */
269 { "<-", "<-" }, /* EQNSYM_larrow */
270 { "->", "->" }, /* EQNSYM_rarrow */
271 { "+-", "+-" }, /* EQNSYM_pm */
272 { "!=", "!=" }, /* EQNSYM_nequal */
273 { "==", "==" }, /* EQNSYM_equiv */
274 { "<=", "<=" }, /* EQNSYM_lessequal */
275 { ">=", ">=" }, /* EQNSYM_moreequal */
276 { "-", "mi" }, /* EQNSYM_minus */
277 };
278
279 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
280 static void eqn_box_free(struct eqn_box *);
281 static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
282 enum eqn_post, struct eqn_box *);
283 static void eqn_def(struct eqn_node *);
284 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t);
285 static void eqn_delim(struct eqn_node *);
286 static const char *eqn_next(struct eqn_node *, char, size_t *, int);
287 static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
288 static const char *eqn_nexttok(struct eqn_node *, size_t *);
289 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *);
290 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **);
291 static void eqn_undef(struct eqn_node *);
292
293
294 enum rofferr
295 eqn_read(struct eqn_node **epp, int ln,
296 const char *p, int pos, int *offs)
297 {
298 size_t sz;
299 struct eqn_node *ep;
300 enum rofferr er;
301
302 ep = *epp;
303
304 /*
305 * If we're the terminating mark, unset our equation status and
306 * validate the full equation.
307 */
308
309 if (0 == strncmp(p, ".EN", 3)) {
310 er = eqn_end(epp);
311 p += 3;
312 while (' ' == *p || '\t' == *p)
313 p++;
314 if ('\0' == *p)
315 return er;
316 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
317 ln, pos, "EN %s", p);
318 return er;
319 }
320
321 /*
322 * Build up the full string, replacing all newlines with regular
323 * whitespace.
324 */
325
326 sz = strlen(p + pos) + 1;
327 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
328
329 /* First invocation: nil terminate the string. */
330
331 if (0 == ep->sz)
332 *ep->data = '\0';
333
334 ep->sz += sz;
335 strlcat(ep->data, p + pos, ep->sz + 1);
336 strlcat(ep->data, " ", ep->sz + 1);
337 return ROFF_IGN;
338 }
339
340 struct eqn_node *
341 eqn_alloc(int pos, int line, struct mparse *parse)
342 {
343 struct eqn_node *p;
344
345 p = mandoc_calloc(1, sizeof(struct eqn_node));
346
347 p->parse = parse;
348 p->eqn.ln = line;
349 p->eqn.pos = pos;
350 p->gsize = EQN_DEFSIZE;
351
352 return p;
353 }
354
355 /*
356 * Find the key "key" of the give size within our eqn-defined values.
357 */
358 static struct eqn_def *
359 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
360 {
361 int i;
362
363 for (i = 0; i < (int)ep->defsz; i++)
364 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
365 ep->defs[i].keysz, key, sz))
366 return &ep->defs[i];
367
368 return NULL;
369 }
370
371 /*
372 * Get the next token from the input stream using the given quote
373 * character.
374 * Optionally make any replacements.
375 */
376 static const char *
377 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
378 {
379 static size_t last_len;
380 static int lim;
381
382 char *start, *next;
383 int q, diff;
384 size_t ssz, dummy;
385 struct eqn_def *def;
386
387 if (NULL == sz)
388 sz = &dummy;
389
390 if (ep->cur >= last_len)
391 lim = 0;
392 ep->rew = ep->cur;
393 again:
394 /* Prevent self-definitions. */
395
396 if (lim >= EQN_NEST_MAX) {
397 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
398 ep->eqn.ln, ep->eqn.pos, NULL);
399 return NULL;
400 }
401
402 ep->cur = ep->rew;
403 start = &ep->data[(int)ep->cur];
404 q = 0;
405
406 if ('\0' == *start)
407 return NULL;
408
409 if (quote == *start) {
410 ep->cur++;
411 q = 1;
412 }
413
414 start = &ep->data[(int)ep->cur];
415
416 if ( ! q) {
417 if ('{' == *start || '}' == *start)
418 ssz = 1;
419 else
420 ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
421 next = start + (int)ssz;
422 if ('\0' == *next)
423 next = NULL;
424 } else
425 next = strchr(start, quote);
426
427 if (NULL != next) {
428 *sz = (size_t)(next - start);
429 ep->cur += *sz;
430 if (q)
431 ep->cur++;
432 while (' ' == ep->data[(int)ep->cur] ||
433 '\t' == ep->data[(int)ep->cur] ||
434 '^' == ep->data[(int)ep->cur] ||
435 '~' == ep->data[(int)ep->cur])
436 ep->cur++;
437 } else {
438 if (q)
439 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
440 ep->eqn.ln, ep->eqn.pos, NULL);
441 next = strchr(start, '\0');
442 *sz = (size_t)(next - start);
443 ep->cur += *sz;
444 }
445
446 /* Quotes aren't expanded for values. */
447
448 if (q || ! repl)
449 return start;
450
451 if (NULL != (def = eqn_def_find(ep, start, *sz))) {
452 diff = def->valsz - *sz;
453
454 if (def->valsz > *sz) {
455 ep->sz += diff;
456 ep->data = mandoc_realloc(ep->data, ep->sz + 1);
457 ep->data[ep->sz] = '\0';
458 start = &ep->data[(int)ep->rew];
459 }
460
461 diff = def->valsz - *sz;
462 memmove(start + *sz + diff, start + *sz,
463 (strlen(start) - *sz) + 1);
464 memcpy(start, def->val, def->valsz);
465 last_len = start - ep->data + def->valsz;
466 lim++;
467 goto again;
468 }
469
470 return start;
471 }
472
473 /*
474 * Get the next delimited token using the default current quote
475 * character.
476 */
477 static const char *
478 eqn_nexttok(struct eqn_node *ep, size_t *sz)
479 {
480
481 return eqn_next(ep, '"', sz, 1);
482 }
483
484 /*
485 * Get next token without replacement.
486 */
487 static const char *
488 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
489 {
490
491 return eqn_next(ep, '"', sz, 0);
492 }
493
494 /*
495 * Parse a token from the stream of text.
496 * A token consists of one of the recognised eqn(7) strings.
497 * Strings are separated by delimiting marks.
498 * This returns EQN_TOK_EOF when there are no more tokens.
499 * If the token is an unrecognised string literal, then it returns
500 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
501 * string.
502 * This must be later freed with free(3).
503 */
504 static enum eqn_tok
505 eqn_tok_parse(struct eqn_node *ep, char **p)
506 {
507 const char *start;
508 size_t i, sz;
509 int quoted;
510
511 if (p != NULL)
512 *p = NULL;
513
514 quoted = ep->data[ep->cur] == '"';
515
516 if ((start = eqn_nexttok(ep, &sz)) == NULL)
517 return EQN_TOK_EOF;
518
519 if (quoted) {
520 if (p != NULL)
521 *p = mandoc_strndup(start, sz);
522 return EQN_TOK_QUOTED;
523 }
524
525 for (i = 0; i < EQN_TOK__MAX; i++)
526 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
527 return i;
528
529 for (i = 0; i < EQNSYM__MAX; i++) {
530 if (STRNEQ(start, sz,
531 eqnsyms[i].str, strlen(eqnsyms[i].str))) {
532 mandoc_asprintf(p, "\\[%s]", eqnsyms[i].sym);
533 return EQN_TOK_SYM;
534 }
535 }
536
537 if (p != NULL)
538 *p = mandoc_strndup(start, sz);
539
540 for (i = 0; i < sizeof(eqn_func)/sizeof(*eqn_func); i++)
541 if (STRNEQ(start, sz, eqn_func[i], strlen(eqn_func[i])))
542 return EQN_TOK_FUNC;
543
544 return EQN_TOK__MAX;
545 }
546
547 static void
548 eqn_box_free(struct eqn_box *bp)
549 {
550
551 if (bp->first)
552 eqn_box_free(bp->first);
553 if (bp->next)
554 eqn_box_free(bp->next);
555
556 free(bp->text);
557 free(bp->left);
558 free(bp->right);
559 free(bp->top);
560 free(bp->bottom);
561 free(bp);
562 }
563
564 /*
565 * Allocate a box as the last child of the parent node.
566 */
567 static struct eqn_box *
568 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
569 {
570 struct eqn_box *bp;
571
572 bp = mandoc_calloc(1, sizeof(struct eqn_box));
573 bp->parent = parent;
574 bp->parent->args++;
575 bp->expectargs = UINT_MAX;
576 bp->font = bp->parent->font;
577 bp->size = ep->gsize;
578
579 if (NULL != parent->first) {
580 parent->last->next = bp;
581 bp->prev = parent->last;
582 } else
583 parent->first = bp;
584
585 parent->last = bp;
586 return bp;
587 }
588
589 /*
590 * Reparent the current last node (of the current parent) under a new
591 * EQN_SUBEXPR as the first element.
592 * Then return the new parent.
593 * The new EQN_SUBEXPR will have a two-child limit.
594 */
595 static struct eqn_box *
596 eqn_box_makebinary(struct eqn_node *ep,
597 enum eqn_post pos, struct eqn_box *parent)
598 {
599 struct eqn_box *b, *newb;
600
601 assert(NULL != parent->last);
602 b = parent->last;
603 if (parent->last == parent->first)
604 parent->first = NULL;
605 parent->args--;
606 parent->last = b->prev;
607 b->prev = NULL;
608 newb = eqn_box_alloc(ep, parent);
609 newb->pos = pos;
610 newb->type = EQN_SUBEXPR;
611 newb->expectargs = 2;
612 newb->args = 1;
613 newb->first = newb->last = b;
614 newb->first->next = NULL;
615 b->parent = newb;
616 return newb;
617 }
618
619 /*
620 * Parse the "delim" control statement.
621 */
622 static void
623 eqn_delim(struct eqn_node *ep)
624 {
625 const char *start;
626 size_t sz;
627
628 if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
629 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
630 ep->eqn.ln, ep->eqn.pos, "delim");
631 else if (strncmp(start, "off", 3) == 0)
632 ep->delim = 0;
633 else if (strncmp(start, "on", 2) == 0) {
634 if (ep->odelim && ep->cdelim)
635 ep->delim = 1;
636 } else if (start[1] != '\0') {
637 ep->odelim = start[0];
638 ep->cdelim = start[1];
639 ep->delim = 1;
640 }
641 }
642
643 /*
644 * Undefine a previously-defined string.
645 */
646 static void
647 eqn_undef(struct eqn_node *ep)
648 {
649 const char *start;
650 struct eqn_def *def;
651 size_t sz;
652
653 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
654 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
655 ep->eqn.ln, ep->eqn.pos, "undef");
656 return;
657 }
658 if ((def = eqn_def_find(ep, start, sz)) == NULL)
659 return;
660 free(def->key);
661 free(def->val);
662 def->key = def->val = NULL;
663 def->keysz = def->valsz = 0;
664 }
665
666 static void
667 eqn_def(struct eqn_node *ep)
668 {
669 const char *start;
670 size_t sz;
671 struct eqn_def *def;
672 int i;
673
674 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
675 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
676 ep->eqn.ln, ep->eqn.pos, "define");
677 return;
678 }
679
680 /*
681 * Search for a key that already exists.
682 * Create a new key if none is found.
683 */
684 if (NULL == (def = eqn_def_find(ep, start, sz))) {
685 /* Find holes in string array. */
686 for (i = 0; i < (int)ep->defsz; i++)
687 if (0 == ep->defs[i].keysz)
688 break;
689
690 if (i == (int)ep->defsz) {
691 ep->defsz++;
692 ep->defs = mandoc_reallocarray(ep->defs,
693 ep->defsz, sizeof(struct eqn_def));
694 ep->defs[i].key = ep->defs[i].val = NULL;
695 }
696
697 def = ep->defs + i;
698 free(def->key);
699 def->key = mandoc_strndup(start, sz);
700 def->keysz = sz;
701 }
702
703 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
704 if (start == NULL) {
705 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
706 ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
707 free(def->key);
708 free(def->val);
709 def->key = def->val = NULL;
710 def->keysz = def->valsz = 0;
711 return;
712 }
713 free(def->val);
714 def->val = mandoc_strndup(start, sz);
715 def->valsz = sz;
716 }
717
718 /*
719 * Recursively parse an eqn(7) expression.
720 */
721 static enum rofferr
722 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
723 {
724 char sym[64];
725 struct eqn_box *cur, *nbox;
726 const char *cp, *cpn, *start;
727 char *p;
728 size_t sz;
729 enum eqn_tok tok, subtok;
730 enum eqn_post pos;
731 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
732 int size;
733
734 assert(parent != NULL);
735
736 /*
737 * Empty equation.
738 * Do not add it to the high-level syntax tree.
739 */
740
741 if (ep->data == NULL)
742 return ROFF_IGN;
743
744 next_tok:
745 tok = eqn_tok_parse(ep, &p);
746
747 this_tok:
748 switch (tok) {
749 case EQN_TOK_UNDEF:
750 eqn_undef(ep);
751 break;
752 case EQN_TOK_NDEFINE:
753 case EQN_TOK_DEFINE:
754 eqn_def(ep);
755 break;
756 case EQN_TOK_TDEFINE:
757 if (eqn_nextrawtok(ep, NULL) == NULL ||
758 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
759 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
760 ep->eqn.ln, ep->eqn.pos, "tdefine");
761 break;
762 case EQN_TOK_DELIM:
763 eqn_delim(ep);
764 break;
765 case EQN_TOK_GFONT:
766 if (eqn_nextrawtok(ep, NULL) == NULL)
767 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
768 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
769 break;
770 case EQN_TOK_MARK:
771 case EQN_TOK_LINEUP:
772 /* Ignore these. */
773 break;
774 case EQN_TOK_DYAD:
775 case EQN_TOK_VEC:
776 case EQN_TOK_UNDER:
777 case EQN_TOK_BAR:
778 case EQN_TOK_TILDE:
779 case EQN_TOK_HAT:
780 case EQN_TOK_DOT:
781 case EQN_TOK_DOTDOT:
782 if (parent->last == NULL) {
783 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
784 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
785 cur = eqn_box_alloc(ep, parent);
786 cur->type = EQN_TEXT;
787 cur->text = mandoc_strdup("");
788 }
789 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
790 parent->type = EQN_LISTONE;
791 parent->expectargs = 1;
792 parent->font = EQNFONT_ROMAN;
793 switch (tok) {
794 case EQN_TOK_DOTDOT:
795 strlcpy(sym, "\\[ad]", sizeof(sym));
796 break;
797 case EQN_TOK_VEC:
798 strlcpy(sym, "\\[->]", sizeof(sym));
799 break;
800 case EQN_TOK_DYAD:
801 strlcpy(sym, "\\[<>]", sizeof(sym));
802 break;
803 case EQN_TOK_TILDE:
804 strlcpy(sym, "\\[a~]", sizeof(sym));
805 break;
806 case EQN_TOK_UNDER:
807 strlcpy(sym, "\\[ul]", sizeof(sym));
808 break;
809 case EQN_TOK_BAR:
810 strlcpy(sym, "\\[rl]", sizeof(sym));
811 break;
812 case EQN_TOK_DOT:
813 strlcpy(sym, "\\[a.]", sizeof(sym));
814 break;
815 case EQN_TOK_HAT:
816 strlcpy(sym, "\\[ha]", sizeof(sym));
817 break;
818 default:
819 abort();
820 }
821
822 switch (tok) {
823 case EQN_TOK_DOTDOT:
824 case EQN_TOK_VEC:
825 case EQN_TOK_DYAD:
826 case EQN_TOK_TILDE:
827 case EQN_TOK_BAR:
828 case EQN_TOK_DOT:
829 case EQN_TOK_HAT:
830 parent->top = mandoc_strdup(sym);
831 break;
832 case EQN_TOK_UNDER:
833 parent->bottom = mandoc_strdup(sym);
834 break;
835 default:
836 abort();
837 }
838 parent = parent->parent;
839 break;
840 case EQN_TOK_FWD:
841 case EQN_TOK_BACK:
842 case EQN_TOK_DOWN:
843 case EQN_TOK_UP:
844 subtok = eqn_tok_parse(ep, NULL);
845 if (subtok != EQN_TOK__MAX) {
846 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
847 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
848 tok = subtok;
849 goto this_tok;
850 }
851 break;
852 case EQN_TOK_FAT:
853 case EQN_TOK_ROMAN:
854 case EQN_TOK_ITALIC:
855 case EQN_TOK_BOLD:
856 while (parent->args == parent->expectargs)
857 parent = parent->parent;
858 /*
859 * These values apply to the next word or sequence of
860 * words; thus, we mark that we'll have a child with
861 * exactly one of those.
862 */
863 parent = eqn_box_alloc(ep, parent);
864 parent->type = EQN_LISTONE;
865 parent->expectargs = 1;
866 switch (tok) {
867 case EQN_TOK_FAT:
868 parent->font = EQNFONT_FAT;
869 break;
870 case EQN_TOK_ROMAN:
871 parent->font = EQNFONT_ROMAN;
872 break;
873 case EQN_TOK_ITALIC:
874 parent->font = EQNFONT_ITALIC;
875 break;
876 case EQN_TOK_BOLD:
877 parent->font = EQNFONT_BOLD;
878 break;
879 default:
880 abort();
881 }
882 break;
883 case EQN_TOK_SIZE:
884 case EQN_TOK_GSIZE:
885 /* Accept two values: integral size and a single. */
886 if (NULL == (start = eqn_nexttok(ep, &sz))) {
887 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
888 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
889 break;
890 }
891 size = mandoc_strntoi(start, sz, 10);
892 if (-1 == size) {
893 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
894 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
895 break;
896 }
897 if (EQN_TOK_GSIZE == tok) {
898 ep->gsize = size;
899 break;
900 }
901 parent = eqn_box_alloc(ep, parent);
902 parent->type = EQN_LISTONE;
903 parent->expectargs = 1;
904 parent->size = size;
905 break;
906 case EQN_TOK_FROM:
907 case EQN_TOK_TO:
908 case EQN_TOK_SUB:
909 case EQN_TOK_SUP:
910 /*
911 * We have a left-right-associative expression.
912 * Repivot under a positional node, open a child scope
913 * and keep on reading.
914 */
915 if (parent->last == NULL) {
916 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
917 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
918 cur = eqn_box_alloc(ep, parent);
919 cur->type = EQN_TEXT;
920 cur->text = mandoc_strdup("");
921 }
922 /* Handle the "subsup" and "fromto" positions. */
923 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
924 parent->expectargs = 3;
925 parent->pos = EQNPOS_SUBSUP;
926 break;
927 }
928 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
929 parent->expectargs = 3;
930 parent->pos = EQNPOS_FROMTO;
931 break;
932 }
933 switch (tok) {
934 case EQN_TOK_FROM:
935 pos = EQNPOS_FROM;
936 break;
937 case EQN_TOK_TO:
938 pos = EQNPOS_TO;
939 break;
940 case EQN_TOK_SUP:
941 pos = EQNPOS_SUP;
942 break;
943 case EQN_TOK_SUB:
944 pos = EQNPOS_SUB;
945 break;
946 default:
947 abort();
948 }
949 parent = eqn_box_makebinary(ep, pos, parent);
950 break;
951 case EQN_TOK_SQRT:
952 while (parent->args == parent->expectargs)
953 parent = parent->parent;
954 /*
955 * Accept a left-right-associative set of arguments just
956 * like sub and sup and friends but without rebalancing
957 * under a pivot.
958 */
959 parent = eqn_box_alloc(ep, parent);
960 parent->type = EQN_SUBEXPR;
961 parent->pos = EQNPOS_SQRT;
962 parent->expectargs = 1;
963 break;
964 case EQN_TOK_OVER:
965 /*
966 * We have a right-left-associative fraction.
967 * Close out anything that's currently open, then
968 * rebalance and continue reading.
969 */
970 if (parent->last == NULL) {
971 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
972 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
973 cur = eqn_box_alloc(ep, parent);
974 cur->type = EQN_TEXT;
975 cur->text = mandoc_strdup("");
976 }
977 while (EQN_SUBEXPR == parent->type)
978 parent = parent->parent;
979 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
980 break;
981 case EQN_TOK_RIGHT:
982 case EQN_TOK_BRACE_CLOSE:
983 /*
984 * Close out the existing brace.
985 * FIXME: this is a shitty sentinel: we should really
986 * have a native EQN_BRACE type or whatnot.
987 */
988 for (cur = parent; cur != NULL; cur = cur->parent)
989 if (cur->type == EQN_LIST &&
990 (tok == EQN_TOK_BRACE_CLOSE ||
991 cur->left != NULL))
992 break;
993 if (cur == NULL) {
994 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
995 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
996 break;
997 }
998 parent = cur;
999 if (EQN_TOK_RIGHT == tok) {
1000 if (NULL == (start = eqn_nexttok(ep, &sz))) {
1001 mandoc_msg(MANDOCERR_REQ_EMPTY,
1002 ep->parse, ep->eqn.ln,
1003 ep->eqn.pos, eqn_toks[tok]);
1004 break;
1005 }
1006 /* Handling depends on right/left. */
1007 if (STRNEQ(start, sz, "ceiling", 7)) {
1008 strlcpy(sym, "\\[rc]", sizeof(sym));
1009 parent->right = mandoc_strdup(sym);
1010 } else if (STRNEQ(start, sz, "floor", 5)) {
1011 strlcpy(sym, "\\[rf]", sizeof(sym));
1012 parent->right = mandoc_strdup(sym);
1013 } else
1014 parent->right = mandoc_strndup(start, sz);
1015 }
1016 parent = parent->parent;
1017 if (tok == EQN_TOK_BRACE_CLOSE &&
1018 (parent->type == EQN_PILE ||
1019 parent->type == EQN_MATRIX))
1020 parent = parent->parent;
1021 /* Close out any "singleton" lists. */
1022 while (parent->type == EQN_LISTONE &&
1023 parent->args == parent->expectargs)
1024 parent = parent->parent;
1025 break;
1026 case EQN_TOK_BRACE_OPEN:
1027 case EQN_TOK_LEFT:
1028 /*
1029 * If we already have something in the stack and we're
1030 * in an expression, then rewind til we're not any more
1031 * (just like with the text node).
1032 */
1033 while (parent->args == parent->expectargs)
1034 parent = parent->parent;
1035 if (EQN_TOK_LEFT == tok &&
1036 (start = eqn_nexttok(ep, &sz)) == NULL) {
1037 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1038 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1039 break;
1040 }
1041 parent = eqn_box_alloc(ep, parent);
1042 parent->type = EQN_LIST;
1043 if (EQN_TOK_LEFT == tok) {
1044 if (STRNEQ(start, sz, "ceiling", 7)) {
1045 strlcpy(sym, "\\[lc]", sizeof(sym));
1046 parent->left = mandoc_strdup(sym);
1047 } else if (STRNEQ(start, sz, "floor", 5)) {
1048 strlcpy(sym, "\\[lf]", sizeof(sym));
1049 parent->left = mandoc_strdup(sym);
1050 } else
1051 parent->left = mandoc_strndup(start, sz);
1052 }
1053 break;
1054 case EQN_TOK_PILE:
1055 case EQN_TOK_LPILE:
1056 case EQN_TOK_RPILE:
1057 case EQN_TOK_CPILE:
1058 case EQN_TOK_CCOL:
1059 case EQN_TOK_LCOL:
1060 case EQN_TOK_RCOL:
1061 while (parent->args == parent->expectargs)
1062 parent = parent->parent;
1063 parent = eqn_box_alloc(ep, parent);
1064 parent->type = EQN_PILE;
1065 parent->expectargs = 1;
1066 break;
1067 case EQN_TOK_ABOVE:
1068 for (cur = parent; cur != NULL; cur = cur->parent)
1069 if (cur->type == EQN_PILE)
1070 break;
1071 if (cur == NULL) {
1072 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1073 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1074 break;
1075 }
1076 parent = eqn_box_alloc(ep, cur);
1077 parent->type = EQN_LIST;
1078 break;
1079 case EQN_TOK_MATRIX:
1080 while (parent->args == parent->expectargs)
1081 parent = parent->parent;
1082 parent = eqn_box_alloc(ep, parent);
1083 parent->type = EQN_MATRIX;
1084 parent->expectargs = 1;
1085 break;
1086 case EQN_TOK_EOF:
1087 /*
1088 * End of file!
1089 * TODO: make sure we're not in an open subexpression.
1090 */
1091 return ROFF_EQN;
1092 case EQN_TOK__MAX:
1093 case EQN_TOK_FUNC:
1094 case EQN_TOK_QUOTED:
1095 case EQN_TOK_SYM:
1096 assert(p != NULL);
1097 /*
1098 * If we already have something in the stack and we're
1099 * in an expression, then rewind til we're not any more.
1100 */
1101 while (parent->args == parent->expectargs)
1102 parent = parent->parent;
1103 cur = eqn_box_alloc(ep, parent);
1104 cur->type = EQN_TEXT;
1105 cur->text = p;
1106 switch (tok) {
1107 case EQN_TOK_FUNC:
1108 cur->font = EQNFONT_ROMAN;
1109 break;
1110 case EQN_TOK_QUOTED:
1111 if (cur->font == EQNFONT_NONE)
1112 cur->font = EQNFONT_ITALIC;
1113 break;
1114 case EQN_TOK_SYM:
1115 break;
1116 default:
1117 if (cur->font != EQNFONT_NONE || *p == '\0')
1118 break;
1119 cpn = p - 1;
1120 ccln = CCL_LET;
1121 for (;;) {
1122 /* Advance to next character. */
1123 cp = cpn++;
1124 ccl = ccln;
1125 ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1126 isdigit((unsigned char)*cpn) ||
1127 (*cpn == '.' && (ccl == CCL_DIG ||
1128 isdigit((unsigned char)cpn[1]))) ?
1129 CCL_DIG : CCL_PUN;
1130 /* No boundary before first character. */
1131 if (cp < p)
1132 continue;
1133 cur->font = ccl == CCL_LET ?
1134 EQNFONT_ITALIC : EQNFONT_ROMAN;
1135 if (*cp == '\\')
1136 mandoc_escape(&cpn, NULL, NULL);
1137 /* No boundary after last character. */
1138 if (*cpn == '\0')
1139 break;
1140 if (ccln == ccl)
1141 continue;
1142 /* Boundary found, split the text. */
1143 if (parent->args == parent->expectargs) {
1144 /* Remove the text from the tree. */
1145 if (cur->prev == NULL)
1146 parent->first = cur->next;
1147 else
1148 cur->prev->next = NULL;
1149 parent->last = cur->prev;
1150 parent->args--;
1151 /* Set up a list instead. */
1152 nbox = eqn_box_alloc(ep, parent);
1153 nbox->type = EQN_LIST;
1154 /* Insert the word into the list. */
1155 nbox->first = nbox->last = cur;
1156 cur->parent = nbox;
1157 cur->prev = NULL;
1158 parent = nbox;
1159 }
1160 /* Append a new text box. */
1161 nbox = eqn_box_alloc(ep, parent);
1162 nbox->type = EQN_TEXT;
1163 nbox->text = mandoc_strdup(cpn);
1164 /* Truncate the old box. */
1165 p = mandoc_strndup(cur->text,
1166 cpn - cur->text);
1167 free(cur->text);
1168 cur->text = p;
1169 /* Setup to process the new box. */
1170 cur = nbox;
1171 p = nbox->text;
1172 cpn = p - 1;
1173 ccln = CCL_LET;
1174 }
1175 break;
1176 }
1177 /*
1178 * Post-process list status.
1179 */
1180 while (parent->type == EQN_LISTONE &&
1181 parent->args == parent->expectargs)
1182 parent = parent->parent;
1183 break;
1184 default:
1185 abort();
1186 }
1187 goto next_tok;
1188 }
1189
1190 enum rofferr
1191 eqn_end(struct eqn_node **epp)
1192 {
1193 struct eqn_node *ep;
1194
1195 ep = *epp;
1196 *epp = NULL;
1197
1198 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1199 ep->eqn.root->expectargs = UINT_MAX;
1200 return eqn_parse(ep, ep->eqn.root);
1201 }
1202
1203 void
1204 eqn_free(struct eqn_node *p)
1205 {
1206 int i;
1207
1208 eqn_box_free(p->eqn.root);
1209
1210 for (i = 0; i < (int)p->defsz; i++) {
1211 free(p->defs[i].key);
1212 free(p->defs[i].val);
1213 }
1214
1215 free(p->data);
1216 free(p->defs);
1217 free(p);
1218 }