]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
af8a7fcce8802cbf3d24cc12ff02881db4c0b06d
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.49 2009/03/01 23:14:15 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "private.h"
28
29 /*
30 * Main caller in the libmdoc library. This begins the parsing routine,
31 * handles allocation of data, and so forth. Most of the "work" is done
32 * in macro.c and validate.c.
33 */
34
35 static struct mdoc_arg *argdup(size_t, const struct mdoc_arg *);
36 static void argfree(size_t, struct mdoc_arg *);
37 static void argcpy(struct mdoc_arg *,
38 const struct mdoc_arg *);
39
40 static struct mdoc_node *mdoc_node_alloc(const struct mdoc *);
41 static int mdoc_node_append(struct mdoc *,
42 struct mdoc_node *);
43 static void mdoc_elem_free(struct mdoc_elem *);
44 static void mdoc_text_free(struct mdoc_text *);
45
46
47 const char *const __mdoc_macronames[MDOC_MAX] = {
48 "\\\"", "Dd", "Dt", "Os",
49 "Sh", "Ss", "Pp", "D1",
50 "Dl", "Bd", "Ed", "Bl",
51 "El", "It", "Ad", "An",
52 "Ar", "Cd", "Cm", "Dv",
53 "Er", "Ev", "Ex", "Fa",
54 "Fd", "Fl", "Fn", "Ft",
55 "Ic", "In", "Li", "Nd",
56 "Nm", "Op", "Ot", "Pa",
57 "Rv", "St", "Va", "Vt",
58 /* LINTED */
59 "Xr", "\%A", "\%B", "\%D",
60 /* LINTED */
61 "\%I", "\%J", "\%N", "\%O",
62 /* LINTED */
63 "\%P", "\%R", "\%T", "\%V",
64 "Ac", "Ao", "Aq", "At",
65 "Bc", "Bf", "Bo", "Bq",
66 "Bsx", "Bx", "Db", "Dc",
67 "Do", "Dq", "Ec", "Ef",
68 "Em", "Eo", "Fx", "Ms",
69 "No", "Ns", "Nx", "Ox",
70 "Pc", "Pf", "Po", "Pq",
71 "Qc", "Ql", "Qo", "Qq",
72 "Re", "Rs", "Sc", "So",
73 "Sq", "Sm", "Sx", "Sy",
74 "Tn", "Ux", "Xc", "Xo",
75 "Fo", "Fc", "Oo", "Oc",
76 "Bk", "Ek", "Bt", "Hf",
77 "Fr", "Ud",
78 };
79
80 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
81 "split", "nosplit", "ragged",
82 "unfilled", "literal", "file",
83 "offset", "bullet", "dash",
84 "hyphen", "item", "enum",
85 "tag", "diag", "hang",
86 "ohang", "inset", "column",
87 "width", "compact", "std",
88 "p1003.1-88", "p1003.1-90", "p1003.1-96",
89 "p1003.1-2001", "p1003.1-2004", "p1003.1",
90 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
91 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
92 "p1003.2", "p1387.2", "isoC-90",
93 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
94 "isoC-99", "ansiC", "ansiC-89",
95 "ansiC-99", "ieee754", "iso8802-3",
96 "xpg3", "xpg4", "xpg4.2",
97 "xpg4.3", "xbd5", "xcu5",
98 "xsh5", "xns5", "xns5.2d2.0",
99 "xcurses4.2", "susv2", "susv3",
100 "svid4", "filled", "words",
101 "emphasis", "symbolic",
102 };
103
104 const char * const *mdoc_macronames = __mdoc_macronames;
105 const char * const *mdoc_argnames = __mdoc_argnames;
106
107
108 const struct mdoc_node *
109 mdoc_node(const struct mdoc *mdoc)
110 {
111
112 return(mdoc->first);
113 }
114
115
116 const struct mdoc_meta *
117 mdoc_meta(const struct mdoc *mdoc)
118 {
119
120 return(&mdoc->meta);
121 }
122
123
124 void
125 mdoc_free(struct mdoc *mdoc)
126 {
127
128 if (mdoc->first)
129 mdoc_node_freelist(mdoc->first);
130 if (mdoc->htab)
131 mdoc_tokhash_free(mdoc->htab);
132 if (mdoc->meta.title)
133 free(mdoc->meta.title);
134 if (mdoc->meta.os)
135 free(mdoc->meta.os);
136 if (mdoc->meta.name)
137 free(mdoc->meta.name);
138
139 free(mdoc);
140 }
141
142
143 struct mdoc *
144 mdoc_alloc(void *data, const struct mdoc_cb *cb)
145 {
146 struct mdoc *p;
147
148 p = xcalloc(1, sizeof(struct mdoc));
149
150 p->data = data;
151 if (cb)
152 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
153
154 p->last = xcalloc(1, sizeof(struct mdoc_node));
155 p->last->type = MDOC_ROOT;
156 p->first = p->last;
157
158 p->next = MDOC_NEXT_CHILD;
159 p->htab = mdoc_tokhash_alloc();
160
161 return(p);
162 }
163
164
165 int
166 mdoc_endparse(struct mdoc *mdoc)
167 {
168
169 if (MDOC_HALT & mdoc->flags)
170 return(0);
171 if (NULL == mdoc->first)
172 return(1);
173
174 assert(mdoc->last);
175 if ( ! macro_end(mdoc)) {
176 mdoc->flags |= MDOC_HALT;
177 return(0);
178 }
179 return(1);
180 }
181
182
183 int
184 mdoc_parseln(struct mdoc *mdoc, int line, char *buf)
185 {
186 int c, i;
187 char tmp[5];
188
189 if (MDOC_HALT & mdoc->flags)
190 return(0);
191
192 mdoc->linetok = 0;
193
194 /*
195 * FIXME: should puke on whitespace in non-literal displays.
196 */
197
198 if ('.' != *buf) {
199 if (SEC_PROLOGUE == mdoc->lastnamed)
200 return(mdoc_perr(mdoc, line, 0,
201 "no text in document prologue"));
202 if ( ! mdoc_word_alloc(mdoc, line, 0, buf))
203 return(0);
204 mdoc->next = MDOC_NEXT_SIBLING;
205 return(1);
206 }
207
208 if (buf[1] && '\\' == buf[1])
209 if (buf[2] && '\"' == buf[2])
210 return(1);
211
212 i = 1;
213 while (buf[i] && ! isspace((int)buf[i]) && i < (int)sizeof(tmp))
214 i++;
215
216 if (i == (int)sizeof(tmp)) {
217 mdoc->flags |= MDOC_HALT;
218 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
219 } else if (i <= 2) {
220 mdoc->flags |= MDOC_HALT;
221 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
222 }
223
224 i--;
225
226 (void)memcpy(tmp, buf + 1, (size_t)i);
227 tmp[i++] = 0;
228
229 if (MDOC_MAX == (c = mdoc_find(mdoc, tmp))) {
230 mdoc->flags |= MDOC_HALT;
231 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
232 }
233
234 while (buf[i] && isspace((int)buf[i]))
235 i++;
236
237 if ( ! mdoc_macro(mdoc, c, line, 1, &i, buf)) {
238 mdoc->flags |= MDOC_HALT;
239 return(0);
240 }
241 return(1);
242 }
243
244
245 void
246 mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
247 {
248 char buf[256];
249 va_list ap;
250
251 if (NULL == mdoc->cb.mdoc_msg)
252 return;
253
254 va_start(ap, fmt);
255 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
256 va_end(ap);
257 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
258 }
259
260
261 int
262 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
263 const char *fmt, ...)
264 {
265 char buf[256];
266 va_list ap;
267
268 if (NULL == mdoc->cb.mdoc_err)
269 return(0);
270
271 va_start(ap, fmt);
272 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
273 va_end(ap);
274 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
275 }
276
277
278 int
279 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
280 enum mdoc_warn type, const char *fmt, ...)
281 {
282 char buf[256];
283 va_list ap;
284
285 if (NULL == mdoc->cb.mdoc_warn)
286 return(0);
287
288 va_start(ap, fmt);
289 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
290 va_end(ap);
291 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
292 }
293
294
295 int
296 mdoc_macro(struct mdoc *mdoc, int tok,
297 int ln, int ppos, int *pos, char *buf)
298 {
299
300 assert(mdoc_macros[tok].fp);
301
302 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
303 SEC_PROLOGUE != mdoc->lastnamed)
304 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document body"));
305 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
306 SEC_PROLOGUE == mdoc->lastnamed)
307 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document prologue"));
308 if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
309 return(mdoc_perr(mdoc, ln, ppos, "macro not callable"));
310 return((*mdoc_macros[tok].fp)(mdoc, tok, ln, ppos, pos, buf));
311 }
312
313
314 static int
315 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
316 {
317 const char *nn, *nt, *on, *ot, *act;
318
319 assert(mdoc->last);
320 assert(mdoc->first);
321 assert(MDOC_ROOT != p->type);
322
323 /* See if we exceed the suggest line-max. */
324
325 switch (p->type) {
326 case (MDOC_TEXT):
327 /* FALLTHROUGH */
328 case (MDOC_ELEM):
329 /* FALLTHROUGH */
330 case (MDOC_BLOCK):
331 mdoc->linetok++;
332 break;
333 default:
334 break;
335 }
336
337 /* This sort-of works (re-opening of text macros...). */
338 if (mdoc->linetok > MDOC_LINEARG_SOFTMAX)
339 if ( ! mdoc_nwarn(mdoc, p, WARN_COMPAT,
340 "suggested %d tokens per line exceeded (has %d)",
341 MDOC_LINEARG_SOFTMAX, mdoc->linetok))
342 return(0);
343
344 if (MDOC_TEXT == mdoc->last->type)
345 on = "<text>";
346 else if (MDOC_ROOT == mdoc->last->type)
347 on = "<root>";
348 else
349 on = mdoc_macronames[mdoc->last->tok];
350
351 if (MDOC_TEXT == p->type)
352 nn = "<text>";
353 else if (MDOC_ROOT == p->type)
354 nn = "<root>";
355 else
356 nn = mdoc_macronames[p->tok];
357
358 ot = mdoc_type2a(mdoc->last->type);
359 nt = mdoc_type2a(p->type);
360
361 switch (mdoc->next) {
362 case (MDOC_NEXT_SIBLING):
363 mdoc->last->next = p;
364 p->prev = mdoc->last;
365 p->parent = mdoc->last->parent;
366 act = "sibling";
367 break;
368 case (MDOC_NEXT_CHILD):
369 mdoc->last->child = p;
370 p->parent = mdoc->last;
371 act = "child";
372 break;
373 default:
374 abort();
375 /* NOTREACHED */
376 }
377
378 if ( ! mdoc_valid_pre(mdoc, p))
379 return(0);
380
381 switch (p->type) {
382 case (MDOC_HEAD):
383 assert(MDOC_BLOCK == p->parent->type);
384 p->parent->data.block.head = p;
385 break;
386 case (MDOC_TAIL):
387 assert(MDOC_BLOCK == p->parent->type);
388 p->parent->data.block.tail = p;
389 break;
390 case (MDOC_BODY):
391 assert(MDOC_BLOCK == p->parent->type);
392 p->parent->data.block.body = p;
393 break;
394 default:
395 break;
396 }
397
398 mdoc->last = p;
399 mdoc_msg(mdoc, "parse: %s `%s' %s of %s `%s'",
400 nt, nn, act, ot, on);
401 return(1);
402 }
403
404
405 static struct mdoc_node *
406 mdoc_node_alloc(const struct mdoc *mdoc)
407 {
408 struct mdoc_node *p;
409
410 p = xcalloc(1, sizeof(struct mdoc_node));
411 p->sec = mdoc->lastsec;
412
413 return(p);
414 }
415
416
417 int
418 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
419 {
420 struct mdoc_node *p;
421
422 assert(mdoc->first);
423 assert(mdoc->last);
424
425 p = mdoc_node_alloc(mdoc);
426
427 p->line = line;
428 p->pos = pos;
429 p->type = MDOC_TAIL;
430 p->tok = tok;
431
432 return(mdoc_node_append(mdoc, p));
433 }
434
435
436 int
437 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
438 {
439 struct mdoc_node *p;
440
441 assert(mdoc->first);
442 assert(mdoc->last);
443
444 p = mdoc_node_alloc(mdoc);
445
446 p->line = line;
447 p->pos = pos;
448 p->type = MDOC_HEAD;
449 p->tok = tok;
450
451 return(mdoc_node_append(mdoc, p));
452 }
453
454
455 int
456 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
457 {
458 struct mdoc_node *p;
459
460 assert(mdoc->first);
461 assert(mdoc->last);
462
463 p = mdoc_node_alloc(mdoc);
464
465 p->line = line;
466 p->pos = pos;
467 p->type = MDOC_BODY;
468 p->tok = tok;
469
470 return(mdoc_node_append(mdoc, p));
471 }
472
473
474 int
475 mdoc_root_alloc(struct mdoc *mdoc)
476 {
477 struct mdoc_node *p;
478
479 p = mdoc_node_alloc(mdoc);
480
481 p->type = MDOC_ROOT;
482
483 return(mdoc_node_append(mdoc, p));
484 }
485
486
487 int
488 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
489 int tok, size_t argsz, const struct mdoc_arg *args)
490 {
491 struct mdoc_node *p;
492
493 p = mdoc_node_alloc(mdoc);
494
495 p->pos = pos;
496 p->line = line;
497 p->type = MDOC_BLOCK;
498 p->tok = tok;
499 p->data.block.argc = argsz;
500 p->data.block.argv = argdup(argsz, args);
501
502 return(mdoc_node_append(mdoc, p));
503 }
504
505
506 int
507 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
508 int tok, size_t argsz, const struct mdoc_arg *args)
509 {
510 struct mdoc_node *p;
511
512 p = mdoc_node_alloc(mdoc);
513
514 p->line = line;
515 p->pos = pos;
516 p->type = MDOC_ELEM;
517 p->tok = tok;
518 p->data.elem.argc = argsz;
519 p->data.elem.argv = argdup(argsz, args);
520
521 return(mdoc_node_append(mdoc, p));
522 }
523
524
525 int
526 mdoc_word_alloc(struct mdoc *mdoc,
527 int line, int pos, const char *word)
528 {
529 struct mdoc_node *p;
530
531 p = mdoc_node_alloc(mdoc);
532
533 p->line = line;
534 p->pos = pos;
535 p->type = MDOC_TEXT;
536 p->data.text.string = xstrdup(word);
537
538 return(mdoc_node_append(mdoc, p));
539 }
540
541
542 static void
543 argfree(size_t sz, struct mdoc_arg *p)
544 {
545 int i, j;
546
547 if (0 == sz)
548 return;
549
550 assert(p);
551 /* LINTED */
552 for (i = 0; i < (int)sz; i++)
553 if (p[i].sz > 0) {
554 assert(p[i].value);
555 /* LINTED */
556 for (j = 0; j < (int)p[i].sz; j++)
557 free(p[i].value[j]);
558 free(p[i].value);
559 }
560 free(p);
561 }
562
563
564 static void
565 mdoc_elem_free(struct mdoc_elem *p)
566 {
567
568 argfree(p->argc, p->argv);
569 }
570
571
572 static void
573 mdoc_block_free(struct mdoc_block *p)
574 {
575
576 argfree(p->argc, p->argv);
577 }
578
579
580 static void
581 mdoc_text_free(struct mdoc_text *p)
582 {
583
584 if (p->string)
585 free(p->string);
586 }
587
588
589 void
590 mdoc_node_free(struct mdoc_node *p)
591 {
592
593 switch (p->type) {
594 case (MDOC_TEXT):
595 mdoc_text_free(&p->data.text);
596 break;
597 case (MDOC_ELEM):
598 mdoc_elem_free(&p->data.elem);
599 break;
600 case (MDOC_BLOCK):
601 mdoc_block_free(&p->data.block);
602 break;
603 default:
604 break;
605 }
606
607 free(p);
608 }
609
610
611 void
612 mdoc_node_freelist(struct mdoc_node *p)
613 {
614
615 if (p->child)
616 mdoc_node_freelist(p->child);
617 if (p->next)
618 mdoc_node_freelist(p->next);
619
620 mdoc_node_free(p);
621 }
622
623
624 int
625 mdoc_find(const struct mdoc *mdoc, const char *key)
626 {
627
628 return(mdoc_tokhash_find(mdoc->htab, key));
629 }
630
631
632 static void
633 argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src)
634 {
635 int i;
636
637 dst->line = src->line;
638 dst->pos = src->pos;
639 dst->arg = src->arg;
640 if (0 == (dst->sz = src->sz))
641 return;
642 dst->value = xcalloc(dst->sz, sizeof(char *));
643 for (i = 0; i < (int)dst->sz; i++)
644 dst->value[i] = xstrdup(src->value[i]);
645 }
646
647
648 static struct mdoc_arg *
649 argdup(size_t argsz, const struct mdoc_arg *args)
650 {
651 struct mdoc_arg *pp;
652 int i;
653
654 if (0 == argsz)
655 return(NULL);
656
657 pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg));
658 for (i = 0; i < (int)argsz; i++)
659 argcpy(&pp[i], &args[i]);
660
661 return(pp);
662 }
663
664
665 /* FIXME: deprecate. */
666 char *
667 mdoc_node2a(struct mdoc_node *node)
668 {
669 static char buf[64];
670
671 assert(node);
672
673 buf[0] = 0;
674 (void)xstrlcat(buf, mdoc_type2a(node->type), 64);
675 if (MDOC_ROOT == node->type)
676 return(buf);
677 (void)xstrlcat(buf, " `", 64);
678 if (MDOC_TEXT == node->type)
679 (void)xstrlcat(buf, node->data.text.string, 64);
680 else
681 (void)xstrlcat(buf, mdoc_macronames[node->tok], 64);
682 (void)xstrlcat(buf, "'", 64);
683
684 return(buf);
685 }
686
687