]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
58ad387412d0d5fca4ee6f547c25042bc3ed8723
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.51 2009/03/05 13:12:12 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "private.h"
28
29 /*
30 * Main caller in the libmdoc library. This begins the parsing routine,
31 * handles allocation of data, and so forth. Most of the "work" is done
32 * in macro.c and validate.c.
33 */
34
35 static struct mdoc_arg *argdup(size_t, const struct mdoc_arg *);
36 static void argfree(size_t, struct mdoc_arg *);
37 static void argcpy(struct mdoc_arg *,
38 const struct mdoc_arg *);
39
40 static struct mdoc_node *mdoc_node_alloc(const struct mdoc *);
41 static int mdoc_node_append(struct mdoc *,
42 struct mdoc_node *);
43 static void mdoc_elem_free(struct mdoc_elem *);
44 static void mdoc_text_free(struct mdoc_text *);
45
46
47 const char *const __mdoc_macronames[MDOC_MAX] = {
48 "\\\"", "Dd", "Dt", "Os",
49 "Sh", "Ss", "Pp", "D1",
50 "Dl", "Bd", "Ed", "Bl",
51 "El", "It", "Ad", "An",
52 "Ar", "Cd", "Cm", "Dv",
53 "Er", "Ev", "Ex", "Fa",
54 "Fd", "Fl", "Fn", "Ft",
55 "Ic", "In", "Li", "Nd",
56 "Nm", "Op", "Ot", "Pa",
57 "Rv", "St", "Va", "Vt",
58 /* LINTED */
59 "Xr", "\%A", "\%B", "\%D",
60 /* LINTED */
61 "\%I", "\%J", "\%N", "\%O",
62 /* LINTED */
63 "\%P", "\%R", "\%T", "\%V",
64 "Ac", "Ao", "Aq", "At",
65 "Bc", "Bf", "Bo", "Bq",
66 "Bsx", "Bx", "Db", "Dc",
67 "Do", "Dq", "Ec", "Ef",
68 "Em", "Eo", "Fx", "Ms",
69 "No", "Ns", "Nx", "Ox",
70 "Pc", "Pf", "Po", "Pq",
71 "Qc", "Ql", "Qo", "Qq",
72 "Re", "Rs", "Sc", "So",
73 "Sq", "Sm", "Sx", "Sy",
74 "Tn", "Ux", "Xc", "Xo",
75 "Fo", "Fc", "Oo", "Oc",
76 "Bk", "Ek", "Bt", "Hf",
77 "Fr", "Ud",
78 };
79
80 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
81 "split", "nosplit", "ragged",
82 "unfilled", "literal", "file",
83 "offset", "bullet", "dash",
84 "hyphen", "item", "enum",
85 "tag", "diag", "hang",
86 "ohang", "inset", "column",
87 "width", "compact", "std",
88 "p1003.1-88", "p1003.1-90", "p1003.1-96",
89 "p1003.1-2001", "p1003.1-2004", "p1003.1",
90 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
91 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
92 "p1003.2", "p1387.2", "isoC-90",
93 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
94 "isoC-99", "ansiC", "ansiC-89",
95 "ansiC-99", "ieee754", "iso8802-3",
96 "xpg3", "xpg4", "xpg4.2",
97 "xpg4.3", "xbd5", "xcu5",
98 "xsh5", "xns5", "xns5.2d2.0",
99 "xcurses4.2", "susv2", "susv3",
100 "svid4", "filled", "words",
101 "emphasis", "symbolic",
102 };
103
104 const char * const *mdoc_macronames = __mdoc_macronames;
105 const char * const *mdoc_argnames = __mdoc_argnames;
106
107
108 const struct mdoc_node *
109 mdoc_node(const struct mdoc *mdoc)
110 {
111
112 return(mdoc->first);
113 }
114
115
116 const struct mdoc_meta *
117 mdoc_meta(const struct mdoc *mdoc)
118 {
119
120 return(&mdoc->meta);
121 }
122
123
124 void
125 mdoc_free(struct mdoc *mdoc)
126 {
127
128 if (mdoc->first)
129 mdoc_node_freelist(mdoc->first);
130 if (mdoc->htab)
131 mdoc_tokhash_free(mdoc->htab);
132 if (mdoc->meta.title)
133 free(mdoc->meta.title);
134 if (mdoc->meta.os)
135 free(mdoc->meta.os);
136 if (mdoc->meta.name)
137 free(mdoc->meta.name);
138
139 free(mdoc);
140 }
141
142
143 struct mdoc *
144 mdoc_alloc(void *data, const struct mdoc_cb *cb)
145 {
146 struct mdoc *p;
147
148 p = xcalloc(1, sizeof(struct mdoc));
149
150 p->data = data;
151 if (cb)
152 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
153
154 p->last = xcalloc(1, sizeof(struct mdoc_node));
155 p->last->type = MDOC_ROOT;
156 p->first = p->last;
157
158 p->next = MDOC_NEXT_CHILD;
159 p->htab = mdoc_tokhash_alloc();
160
161 return(p);
162 }
163
164
165 int
166 mdoc_endparse(struct mdoc *mdoc)
167 {
168
169 if (MDOC_HALT & mdoc->flags)
170 return(0);
171 if (NULL == mdoc->first)
172 return(1);
173
174 assert(mdoc->last);
175 if ( ! macro_end(mdoc)) {
176 mdoc->flags |= MDOC_HALT;
177 return(0);
178 }
179 return(1);
180 }
181
182
183 /*
184 * Main line-parsing routine. If the line is a macro-line (started with
185 * a '.' control character), then pass along to the parser, which parses
186 * subsequent macros until the end of line. If normal text, simply
187 * append the entire line to the chain.
188 */
189 int
190 mdoc_parseln(struct mdoc *mdoc, int line, char *buf)
191 {
192 int c, i;
193 char tmp[5];
194
195 if (MDOC_HALT & mdoc->flags)
196 return(0);
197
198 mdoc->linetok = 0;
199
200 if ('.' != *buf) {
201 /*
202 * Free-form text. Not allowed in the prologue.
203 */
204 if (SEC_PROLOGUE == mdoc->lastnamed)
205 return(mdoc_perr(mdoc, line, 0,
206 "no text in prologue"));
207
208 if ( ! mdoc_word_alloc(mdoc, line, 0, buf))
209 return(0);
210 mdoc->next = MDOC_NEXT_SIBLING;
211 return(1);
212 }
213
214 /*
215 * Control-character detected. Begin the parsing sequence.
216 */
217
218 if (buf[1] && '\\' == buf[1])
219 if (buf[2] && '\"' == buf[2])
220 return(1);
221
222 i = 1;
223 while (buf[i] && ! isspace((u_char)buf[i]) &&
224 i < (int)sizeof(tmp))
225 i++;
226
227 if (i == (int)sizeof(tmp)) {
228 mdoc->flags |= MDOC_HALT;
229 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
230 } else if (i <= 2) {
231 mdoc->flags |= MDOC_HALT;
232 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
233 }
234
235 i--;
236
237 (void)memcpy(tmp, buf + 1, (size_t)i);
238 tmp[i++] = 0;
239
240 if (MDOC_MAX == (c = mdoc_find(mdoc, tmp))) {
241 mdoc->flags |= MDOC_HALT;
242 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
243 }
244
245 while (buf[i] && isspace((u_char)buf[i]))
246 i++;
247
248 if ( ! mdoc_macro(mdoc, c, line, 1, &i, buf)) {
249 mdoc->flags |= MDOC_HALT;
250 return(0);
251 }
252
253 return(1);
254 }
255
256
257 void
258 mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
259 {
260 char buf[256];
261 va_list ap;
262
263 if (NULL == mdoc->cb.mdoc_msg)
264 return;
265
266 va_start(ap, fmt);
267 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
268 va_end(ap);
269 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
270 }
271
272
273 int
274 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
275 const char *fmt, ...)
276 {
277 char buf[256];
278 va_list ap;
279
280 if (NULL == mdoc->cb.mdoc_err)
281 return(0);
282
283 va_start(ap, fmt);
284 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
285 va_end(ap);
286 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
287 }
288
289
290 int
291 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
292 enum mdoc_warn type, const char *fmt, ...)
293 {
294 char buf[256];
295 va_list ap;
296
297 if (NULL == mdoc->cb.mdoc_warn)
298 return(0);
299
300 va_start(ap, fmt);
301 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
302 va_end(ap);
303 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
304 }
305
306
307 int
308 mdoc_macro(struct mdoc *mdoc, int tok,
309 int ln, int ppos, int *pos, char *buf)
310 {
311
312 assert(mdoc_macros[tok].fp);
313
314 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
315 SEC_PROLOGUE != mdoc->lastnamed)
316 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document body"));
317 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
318 SEC_PROLOGUE == mdoc->lastnamed)
319 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document prologue"));
320 if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
321 return(mdoc_perr(mdoc, ln, ppos, "macro not callable"));
322 return((*mdoc_macros[tok].fp)(mdoc, tok, ln, ppos, pos, buf));
323 }
324
325
326 static int
327 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
328 {
329 const char *nn, *nt, *on, *ot, *act;
330
331 assert(mdoc->last);
332 assert(mdoc->first);
333 assert(MDOC_ROOT != p->type);
334
335 /* See if we exceed the suggest line-max. */
336
337 switch (p->type) {
338 case (MDOC_TEXT):
339 /* FALLTHROUGH */
340 case (MDOC_ELEM):
341 /* FALLTHROUGH */
342 case (MDOC_BLOCK):
343 mdoc->linetok++;
344 break;
345 default:
346 break;
347 }
348
349 /* This sort-of works (re-opening of text macros...). */
350 if (mdoc->linetok > MDOC_LINEARG_SOFTMAX)
351 if ( ! mdoc_nwarn(mdoc, p, WARN_COMPAT,
352 "suggested %d tokens per line exceeded (has %d)",
353 MDOC_LINEARG_SOFTMAX, mdoc->linetok))
354 return(0);
355
356 if (MDOC_TEXT == mdoc->last->type)
357 on = "<text>";
358 else if (MDOC_ROOT == mdoc->last->type)
359 on = "<root>";
360 else
361 on = mdoc_macronames[mdoc->last->tok];
362
363 if (MDOC_TEXT == p->type)
364 nn = "<text>";
365 else if (MDOC_ROOT == p->type)
366 nn = "<root>";
367 else
368 nn = mdoc_macronames[p->tok];
369
370 ot = mdoc_type2a(mdoc->last->type);
371 nt = mdoc_type2a(p->type);
372
373 switch (mdoc->next) {
374 case (MDOC_NEXT_SIBLING):
375 mdoc->last->next = p;
376 p->prev = mdoc->last;
377 p->parent = mdoc->last->parent;
378 act = "sibling";
379 break;
380 case (MDOC_NEXT_CHILD):
381 mdoc->last->child = p;
382 p->parent = mdoc->last;
383 act = "child";
384 break;
385 default:
386 abort();
387 /* NOTREACHED */
388 }
389
390 if ( ! mdoc_valid_pre(mdoc, p))
391 return(0);
392
393 switch (p->type) {
394 case (MDOC_HEAD):
395 assert(MDOC_BLOCK == p->parent->type);
396 p->parent->data.block.head = p;
397 break;
398 case (MDOC_TAIL):
399 assert(MDOC_BLOCK == p->parent->type);
400 p->parent->data.block.tail = p;
401 break;
402 case (MDOC_BODY):
403 assert(MDOC_BLOCK == p->parent->type);
404 p->parent->data.block.body = p;
405 break;
406 default:
407 break;
408 }
409
410 mdoc->last = p;
411 mdoc_msg(mdoc, "parse: %s `%s' %s of %s `%s'",
412 nt, nn, act, ot, on);
413 return(1);
414 }
415
416
417 static struct mdoc_node *
418 mdoc_node_alloc(const struct mdoc *mdoc)
419 {
420 struct mdoc_node *p;
421
422 p = xcalloc(1, sizeof(struct mdoc_node));
423 p->sec = mdoc->lastsec;
424
425 return(p);
426 }
427
428
429 int
430 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
431 {
432 struct mdoc_node *p;
433
434 assert(mdoc->first);
435 assert(mdoc->last);
436
437 p = mdoc_node_alloc(mdoc);
438
439 p->line = line;
440 p->pos = pos;
441 p->type = MDOC_TAIL;
442 p->tok = tok;
443
444 return(mdoc_node_append(mdoc, p));
445 }
446
447
448 int
449 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
450 {
451 struct mdoc_node *p;
452
453 assert(mdoc->first);
454 assert(mdoc->last);
455
456 p = mdoc_node_alloc(mdoc);
457
458 p->line = line;
459 p->pos = pos;
460 p->type = MDOC_HEAD;
461 p->tok = tok;
462
463 return(mdoc_node_append(mdoc, p));
464 }
465
466
467 int
468 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
469 {
470 struct mdoc_node *p;
471
472 assert(mdoc->first);
473 assert(mdoc->last);
474
475 p = mdoc_node_alloc(mdoc);
476
477 p->line = line;
478 p->pos = pos;
479 p->type = MDOC_BODY;
480 p->tok = tok;
481
482 return(mdoc_node_append(mdoc, p));
483 }
484
485
486 int
487 mdoc_root_alloc(struct mdoc *mdoc)
488 {
489 struct mdoc_node *p;
490
491 p = mdoc_node_alloc(mdoc);
492
493 p->type = MDOC_ROOT;
494
495 return(mdoc_node_append(mdoc, p));
496 }
497
498
499 int
500 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
501 int tok, size_t argsz, const struct mdoc_arg *args)
502 {
503 struct mdoc_node *p;
504
505 p = mdoc_node_alloc(mdoc);
506
507 p->pos = pos;
508 p->line = line;
509 p->type = MDOC_BLOCK;
510 p->tok = tok;
511 p->data.block.argc = argsz;
512 p->data.block.argv = argdup(argsz, args);
513
514 return(mdoc_node_append(mdoc, p));
515 }
516
517
518 int
519 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
520 int tok, size_t argsz, const struct mdoc_arg *args)
521 {
522 struct mdoc_node *p;
523
524 p = mdoc_node_alloc(mdoc);
525
526 p->line = line;
527 p->pos = pos;
528 p->type = MDOC_ELEM;
529 p->tok = tok;
530 p->data.elem.argc = argsz;
531 p->data.elem.argv = argdup(argsz, args);
532
533 return(mdoc_node_append(mdoc, p));
534 }
535
536
537 int
538 mdoc_word_alloc(struct mdoc *mdoc,
539 int line, int pos, const char *word)
540 {
541 struct mdoc_node *p;
542
543 p = mdoc_node_alloc(mdoc);
544
545 p->line = line;
546 p->pos = pos;
547 p->type = MDOC_TEXT;
548 p->data.text.string = xstrdup(word);
549
550 return(mdoc_node_append(mdoc, p));
551 }
552
553
554 static void
555 argfree(size_t sz, struct mdoc_arg *p)
556 {
557 int i, j;
558
559 if (0 == sz)
560 return;
561
562 assert(p);
563 /* LINTED */
564 for (i = 0; i < (int)sz; i++)
565 if (p[i].sz > 0) {
566 assert(p[i].value);
567 /* LINTED */
568 for (j = 0; j < (int)p[i].sz; j++)
569 free(p[i].value[j]);
570 free(p[i].value);
571 }
572 free(p);
573 }
574
575
576 static void
577 mdoc_elem_free(struct mdoc_elem *p)
578 {
579
580 argfree(p->argc, p->argv);
581 }
582
583
584 static void
585 mdoc_block_free(struct mdoc_block *p)
586 {
587
588 argfree(p->argc, p->argv);
589 }
590
591
592 static void
593 mdoc_text_free(struct mdoc_text *p)
594 {
595
596 if (p->string)
597 free(p->string);
598 }
599
600
601 void
602 mdoc_node_free(struct mdoc_node *p)
603 {
604
605 switch (p->type) {
606 case (MDOC_TEXT):
607 mdoc_text_free(&p->data.text);
608 break;
609 case (MDOC_ELEM):
610 mdoc_elem_free(&p->data.elem);
611 break;
612 case (MDOC_BLOCK):
613 mdoc_block_free(&p->data.block);
614 break;
615 default:
616 break;
617 }
618
619 free(p);
620 }
621
622
623 void
624 mdoc_node_freelist(struct mdoc_node *p)
625 {
626
627 if (p->child)
628 mdoc_node_freelist(p->child);
629 if (p->next)
630 mdoc_node_freelist(p->next);
631
632 mdoc_node_free(p);
633 }
634
635
636 int
637 mdoc_find(const struct mdoc *mdoc, const char *key)
638 {
639
640 return(mdoc_tokhash_find(mdoc->htab, key));
641 }
642
643
644 static void
645 argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src)
646 {
647 int i;
648
649 dst->line = src->line;
650 dst->pos = src->pos;
651 dst->arg = src->arg;
652 if (0 == (dst->sz = src->sz))
653 return;
654 dst->value = xcalloc(dst->sz, sizeof(char *));
655 for (i = 0; i < (int)dst->sz; i++)
656 dst->value[i] = xstrdup(src->value[i]);
657 }
658
659
660 static struct mdoc_arg *
661 argdup(size_t argsz, const struct mdoc_arg *args)
662 {
663 struct mdoc_arg *pp;
664 int i;
665
666 if (0 == argsz)
667 return(NULL);
668
669 pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg));
670 for (i = 0; i < (int)argsz; i++)
671 argcpy(&pp[i], &args[i]);
672
673 return(pp);
674 }
675
676
677 /* FIXME: deprecate. */
678 char *
679 mdoc_node2a(struct mdoc_node *node)
680 {
681 static char buf[64];
682
683 assert(node);
684
685 buf[0] = 0;
686 (void)xstrlcat(buf, mdoc_type2a(node->type), 64);
687 if (MDOC_ROOT == node->type)
688 return(buf);
689 (void)xstrlcat(buf, " `", 64);
690 if (MDOC_TEXT == node->type)
691 (void)xstrlcat(buf, node->data.text.string, 64);
692 else
693 (void)xstrlcat(buf, mdoc_macronames[node->tok], 64);
694 (void)xstrlcat(buf, "'", 64);
695
696 return(buf);
697 }
698
699