]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
23f0d7bb5410e5fb97e83f79352b8a3a1a2faac4
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.48 2009/02/23 12:45:19 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "private.h"
28
29 /*
30 * Main caller in the libmdoc library. This begins the parsing routine,
31 * handles allocation of data, and so forth. Most of the "work" is done
32 * in macro.c and validate.c.
33 */
34
35 static struct mdoc_arg *argdup(size_t, const struct mdoc_arg *);
36 static void argfree(size_t, struct mdoc_arg *);
37 static void argcpy(struct mdoc_arg *,
38 const struct mdoc_arg *);
39
40 static struct mdoc_node *mdoc_node_alloc(const struct mdoc *);
41 static int mdoc_node_append(struct mdoc *,
42 struct mdoc_node *);
43 static void mdoc_elem_free(struct mdoc_elem *);
44 static void mdoc_text_free(struct mdoc_text *);
45
46
47 const char *const __mdoc_macronames[MDOC_MAX] = {
48 "\\\"", "Dd", "Dt", "Os",
49 "Sh", "Ss", "Pp", "D1",
50 "Dl", "Bd", "Ed", "Bl",
51 "El", "It", "Ad", "An",
52 "Ar", "Cd", "Cm", "Dv",
53 "Er", "Ev", "Ex", "Fa",
54 "Fd", "Fl", "Fn", "Ft",
55 "Ic", "In", "Li", "Nd",
56 "Nm", "Op", "Ot", "Pa",
57 "Rv", "St", "Va", "Vt",
58 /* LINTED */
59 "Xr", "\%A", "\%B", "\%D",
60 /* LINTED */
61 "\%I", "\%J", "\%N", "\%O",
62 /* LINTED */
63 "\%P", "\%R", "\%T", "\%V",
64 "Ac", "Ao", "Aq", "At",
65 "Bc", "Bf", "Bo", "Bq",
66 "Bsx", "Bx", "Db", "Dc",
67 "Do", "Dq", "Ec", "Ef",
68 "Em", "Eo", "Fx", "Ms",
69 "No", "Ns", "Nx", "Ox",
70 "Pc", "Pf", "Po", "Pq",
71 "Qc", "Ql", "Qo", "Qq",
72 "Re", "Rs", "Sc", "So",
73 "Sq", "Sm", "Sx", "Sy",
74 "Tn", "Ux", "Xc", "Xo",
75 "Fo", "Fc", "Oo", "Oc",
76 "Bk", "Ek", "Bt", "Hf",
77 "Fr", "Ud",
78 };
79
80 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
81 "split", "nosplit", "ragged",
82 "unfilled", "literal", "file",
83 "offset", "bullet", "dash",
84 "hyphen", "item", "enum",
85 "tag", "diag", "hang",
86 "ohang", "inset", "column",
87 "width", "compact", "std",
88 "p1003.1-88", "p1003.1-90", "p1003.1-96",
89 "p1003.1-2001", "p1003.1-2004", "p1003.1",
90 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
91 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
92 "p1003.2", "p1387.2", "isoC-90",
93 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
94 "isoC-99", "ansiC", "ansiC-89",
95 "ansiC-99", "ieee754", "iso8802-3",
96 "xpg3", "xpg4", "xpg4.2",
97 "xpg4.3", "xbd5", "xcu5",
98 "xsh5", "xns5", "xns5.2d2.0",
99 "xcurses4.2", "susv2", "susv3",
100 "svid4", "filled", "words",
101 "emphasis", "symbolic",
102 };
103
104 const char * const *mdoc_macronames = __mdoc_macronames;
105 const char * const *mdoc_argnames = __mdoc_argnames;
106
107
108 const struct mdoc_node *
109 mdoc_node(const struct mdoc *mdoc)
110 {
111
112 return(mdoc->first);
113 }
114
115
116 const struct mdoc_meta *
117 mdoc_meta(const struct mdoc *mdoc)
118 {
119
120 return(&mdoc->meta);
121 }
122
123
124 void
125 mdoc_free(struct mdoc *mdoc)
126 {
127
128 if (mdoc->first)
129 mdoc_node_freelist(mdoc->first);
130 if (mdoc->htab)
131 mdoc_tokhash_free(mdoc->htab);
132 if (mdoc->meta.title)
133 free(mdoc->meta.title);
134 if (mdoc->meta.os)
135 free(mdoc->meta.os);
136 if (mdoc->meta.name)
137 free(mdoc->meta.name);
138
139 free(mdoc);
140 }
141
142
143 struct mdoc *
144 mdoc_alloc(void *data, const struct mdoc_cb *cb)
145 {
146 struct mdoc *p;
147
148 p = xcalloc(1, sizeof(struct mdoc));
149
150 p->data = data;
151 if (cb)
152 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
153
154 p->last = xcalloc(1, sizeof(struct mdoc_node));
155 p->last->type = MDOC_ROOT;
156 p->first = p->last;
157
158 p->next = MDOC_NEXT_CHILD;
159 p->htab = mdoc_tokhash_alloc();
160
161 return(p);
162 }
163
164
165 int
166 mdoc_endparse(struct mdoc *mdoc)
167 {
168
169 if (MDOC_HALT & mdoc->flags)
170 return(0);
171 if (NULL == mdoc->first)
172 return(1);
173
174 assert(mdoc->last);
175 if ( ! macro_end(mdoc)) {
176 mdoc->flags |= MDOC_HALT;
177 return(0);
178 }
179 return(1);
180 }
181
182
183 int
184 mdoc_parseln(struct mdoc *mdoc, int line, char *buf)
185 {
186 int c, i;
187 char tmp[5];
188
189 if (MDOC_HALT & mdoc->flags)
190 return(0);
191
192 mdoc->linetok = 0;
193
194 if ('.' != *buf) {
195 if (SEC_PROLOGUE == mdoc->lastnamed)
196 return(mdoc_perr(mdoc, line, 0, "text disallowed in document prologue"));
197 if ( ! mdoc_word_alloc(mdoc, line, 0, buf))
198 return(0);
199 mdoc->next = MDOC_NEXT_SIBLING;
200 return(1);
201 }
202
203 if (buf[1] && '\\' == buf[1])
204 if (buf[2] && '\"' == buf[2])
205 return(1);
206
207 i = 1;
208 while (buf[i] && ! isspace((int)buf[i]) && i < (int)sizeof(tmp))
209 i++;
210
211 if (i == (int)sizeof(tmp)) {
212 mdoc->flags |= MDOC_HALT;
213 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
214 } else if (i <= 2) {
215 mdoc->flags |= MDOC_HALT;
216 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
217 }
218
219 i--;
220
221 (void)memcpy(tmp, buf + 1, (size_t)i);
222 tmp[i++] = 0;
223
224 if (MDOC_MAX == (c = mdoc_find(mdoc, tmp))) {
225 mdoc->flags |= MDOC_HALT;
226 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
227 }
228
229 while (buf[i] && isspace((int)buf[i]))
230 i++;
231
232 if ( ! mdoc_macro(mdoc, c, line, 1, &i, buf)) {
233 mdoc->flags |= MDOC_HALT;
234 return(0);
235 }
236 return(1);
237 }
238
239
240 void
241 mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
242 {
243 char buf[256];
244 va_list ap;
245
246 if (NULL == mdoc->cb.mdoc_msg)
247 return;
248
249 va_start(ap, fmt);
250 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
251 va_end(ap);
252 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
253 }
254
255
256 int
257 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
258 const char *fmt, ...)
259 {
260 char buf[256];
261 va_list ap;
262
263 if (NULL == mdoc->cb.mdoc_err)
264 return(0);
265
266 va_start(ap, fmt);
267 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
268 va_end(ap);
269 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
270 }
271
272
273 int
274 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
275 enum mdoc_warn type, const char *fmt, ...)
276 {
277 char buf[256];
278 va_list ap;
279
280 if (NULL == mdoc->cb.mdoc_warn)
281 return(0);
282
283 va_start(ap, fmt);
284 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
285 va_end(ap);
286 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
287 }
288
289
290 int
291 mdoc_macro(struct mdoc *mdoc, int tok,
292 int ln, int ppos, int *pos, char *buf)
293 {
294
295 assert(mdoc_macros[tok].fp);
296
297 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
298 SEC_PROLOGUE != mdoc->lastnamed)
299 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document body"));
300 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
301 SEC_PROLOGUE == mdoc->lastnamed)
302 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document prologue"));
303 if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
304 return(mdoc_perr(mdoc, ln, ppos, "macro not callable"));
305 return((*mdoc_macros[tok].fp)(mdoc, tok, ln, ppos, pos, buf));
306 }
307
308
309 static int
310 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
311 {
312 const char *nn, *nt, *on, *ot, *act;
313
314 assert(mdoc->last);
315 assert(mdoc->first);
316 assert(MDOC_ROOT != p->type);
317
318 /* See if we exceed the suggest line-max. */
319
320 switch (p->type) {
321 case (MDOC_TEXT):
322 /* FALLTHROUGH */
323 case (MDOC_ELEM):
324 /* FALLTHROUGH */
325 case (MDOC_BLOCK):
326 mdoc->linetok++;
327 break;
328 default:
329 break;
330 }
331
332 /* This sort-of works (re-opening of text macros...). */
333 if (mdoc->linetok > MDOC_LINEARG_SOFTMAX)
334 if ( ! mdoc_nwarn(mdoc, p, WARN_COMPAT,
335 "suggested %d tokens per line exceeded (has %d)",
336 MDOC_LINEARG_SOFTMAX, mdoc->linetok))
337 return(0);
338
339 if (MDOC_TEXT == mdoc->last->type)
340 on = "<text>";
341 else if (MDOC_ROOT == mdoc->last->type)
342 on = "<root>";
343 else
344 on = mdoc_macronames[mdoc->last->tok];
345
346 if (MDOC_TEXT == p->type)
347 nn = "<text>";
348 else if (MDOC_ROOT == p->type)
349 nn = "<root>";
350 else
351 nn = mdoc_macronames[p->tok];
352
353 ot = mdoc_type2a(mdoc->last->type);
354 nt = mdoc_type2a(p->type);
355
356 switch (mdoc->next) {
357 case (MDOC_NEXT_SIBLING):
358 mdoc->last->next = p;
359 p->prev = mdoc->last;
360 p->parent = mdoc->last->parent;
361 act = "sibling";
362 break;
363 case (MDOC_NEXT_CHILD):
364 mdoc->last->child = p;
365 p->parent = mdoc->last;
366 act = "child";
367 break;
368 default:
369 abort();
370 /* NOTREACHED */
371 }
372
373 if ( ! mdoc_valid_pre(mdoc, p))
374 return(0);
375
376 switch (p->type) {
377 case (MDOC_HEAD):
378 assert(MDOC_BLOCK == p->parent->type);
379 p->parent->data.block.head = p;
380 break;
381 case (MDOC_TAIL):
382 assert(MDOC_BLOCK == p->parent->type);
383 p->parent->data.block.tail = p;
384 break;
385 case (MDOC_BODY):
386 assert(MDOC_BLOCK == p->parent->type);
387 p->parent->data.block.body = p;
388 break;
389 default:
390 break;
391 }
392
393 mdoc->last = p;
394 mdoc_msg(mdoc, "parse: %s `%s' %s of %s `%s'",
395 nt, nn, act, ot, on);
396 return(1);
397 }
398
399
400 static struct mdoc_node *
401 mdoc_node_alloc(const struct mdoc *mdoc)
402 {
403 struct mdoc_node *p;
404
405 p = xcalloc(1, sizeof(struct mdoc_node));
406 p->sec = mdoc->lastsec;
407
408 return(p);
409 }
410
411
412 int
413 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
414 {
415 struct mdoc_node *p;
416
417 assert(mdoc->first);
418 assert(mdoc->last);
419
420 p = mdoc_node_alloc(mdoc);
421
422 p->line = line;
423 p->pos = pos;
424 p->type = MDOC_TAIL;
425 p->tok = tok;
426
427 return(mdoc_node_append(mdoc, p));
428 }
429
430
431 int
432 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
433 {
434 struct mdoc_node *p;
435
436 assert(mdoc->first);
437 assert(mdoc->last);
438
439 p = mdoc_node_alloc(mdoc);
440
441 p->line = line;
442 p->pos = pos;
443 p->type = MDOC_HEAD;
444 p->tok = tok;
445
446 return(mdoc_node_append(mdoc, p));
447 }
448
449
450 int
451 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
452 {
453 struct mdoc_node *p;
454
455 assert(mdoc->first);
456 assert(mdoc->last);
457
458 p = mdoc_node_alloc(mdoc);
459
460 p->line = line;
461 p->pos = pos;
462 p->type = MDOC_BODY;
463 p->tok = tok;
464
465 return(mdoc_node_append(mdoc, p));
466 }
467
468
469 int
470 mdoc_root_alloc(struct mdoc *mdoc)
471 {
472 struct mdoc_node *p;
473
474 p = mdoc_node_alloc(mdoc);
475
476 p->type = MDOC_ROOT;
477
478 return(mdoc_node_append(mdoc, p));
479 }
480
481
482 int
483 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
484 int tok, size_t argsz, const struct mdoc_arg *args)
485 {
486 struct mdoc_node *p;
487
488 p = mdoc_node_alloc(mdoc);
489
490 p->pos = pos;
491 p->line = line;
492 p->type = MDOC_BLOCK;
493 p->tok = tok;
494 p->data.block.argc = argsz;
495 p->data.block.argv = argdup(argsz, args);
496
497 return(mdoc_node_append(mdoc, p));
498 }
499
500
501 int
502 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
503 int tok, size_t argsz, const struct mdoc_arg *args)
504 {
505 struct mdoc_node *p;
506
507 p = mdoc_node_alloc(mdoc);
508
509 p->line = line;
510 p->pos = pos;
511 p->type = MDOC_ELEM;
512 p->tok = tok;
513 p->data.elem.argc = argsz;
514 p->data.elem.argv = argdup(argsz, args);
515
516 return(mdoc_node_append(mdoc, p));
517 }
518
519
520 int
521 mdoc_word_alloc(struct mdoc *mdoc,
522 int line, int pos, const char *word)
523 {
524 struct mdoc_node *p;
525
526 p = mdoc_node_alloc(mdoc);
527
528 p->line = line;
529 p->pos = pos;
530 p->type = MDOC_TEXT;
531 p->data.text.string = xstrdup(word);
532
533 return(mdoc_node_append(mdoc, p));
534 }
535
536
537 static void
538 argfree(size_t sz, struct mdoc_arg *p)
539 {
540 int i, j;
541
542 if (0 == sz)
543 return;
544
545 assert(p);
546 /* LINTED */
547 for (i = 0; i < (int)sz; i++)
548 if (p[i].sz > 0) {
549 assert(p[i].value);
550 /* LINTED */
551 for (j = 0; j < (int)p[i].sz; j++)
552 free(p[i].value[j]);
553 free(p[i].value);
554 }
555 free(p);
556 }
557
558
559 static void
560 mdoc_elem_free(struct mdoc_elem *p)
561 {
562
563 argfree(p->argc, p->argv);
564 }
565
566
567 static void
568 mdoc_block_free(struct mdoc_block *p)
569 {
570
571 argfree(p->argc, p->argv);
572 }
573
574
575 static void
576 mdoc_text_free(struct mdoc_text *p)
577 {
578
579 if (p->string)
580 free(p->string);
581 }
582
583
584 void
585 mdoc_node_free(struct mdoc_node *p)
586 {
587
588 switch (p->type) {
589 case (MDOC_TEXT):
590 mdoc_text_free(&p->data.text);
591 break;
592 case (MDOC_ELEM):
593 mdoc_elem_free(&p->data.elem);
594 break;
595 case (MDOC_BLOCK):
596 mdoc_block_free(&p->data.block);
597 break;
598 default:
599 break;
600 }
601
602 free(p);
603 }
604
605
606 void
607 mdoc_node_freelist(struct mdoc_node *p)
608 {
609
610 if (p->child)
611 mdoc_node_freelist(p->child);
612 if (p->next)
613 mdoc_node_freelist(p->next);
614
615 mdoc_node_free(p);
616 }
617
618
619 int
620 mdoc_find(const struct mdoc *mdoc, const char *key)
621 {
622
623 return(mdoc_tokhash_find(mdoc->htab, key));
624 }
625
626
627 static void
628 argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src)
629 {
630 int i;
631
632 dst->line = src->line;
633 dst->pos = src->pos;
634 dst->arg = src->arg;
635 if (0 == (dst->sz = src->sz))
636 return;
637 dst->value = xcalloc(dst->sz, sizeof(char *));
638 for (i = 0; i < (int)dst->sz; i++)
639 dst->value[i] = xstrdup(src->value[i]);
640 }
641
642
643 static struct mdoc_arg *
644 argdup(size_t argsz, const struct mdoc_arg *args)
645 {
646 struct mdoc_arg *pp;
647 int i;
648
649 if (0 == argsz)
650 return(NULL);
651
652 pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg));
653 for (i = 0; i < (int)argsz; i++)
654 argcpy(&pp[i], &args[i]);
655
656 return(pp);
657 }
658
659
660 /* FIXME: deprecate. */
661 char *
662 mdoc_node2a(struct mdoc_node *node)
663 {
664 static char buf[64];
665
666 assert(node);
667
668 buf[0] = 0;
669 (void)xstrlcat(buf, mdoc_type2a(node->type), 64);
670 if (MDOC_ROOT == node->type)
671 return(buf);
672 (void)xstrlcat(buf, " `", 64);
673 if (MDOC_TEXT == node->type)
674 (void)xstrlcat(buf, node->data.text.string, 64);
675 else
676 (void)xstrlcat(buf, mdoc_macronames[node->tok], 64);
677 (void)xstrlcat(buf, "'", 64);
678
679 return(buf);
680 }
681
682