]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
ca3f5752a648092b916352875150daa8c2043bc9
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.50 2009/03/02 17:14:46 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "private.h"
28
29 /*
30 * Main caller in the libmdoc library. This begins the parsing routine,
31 * handles allocation of data, and so forth. Most of the "work" is done
32 * in macro.c and validate.c.
33 */
34
35 static struct mdoc_arg *argdup(size_t, const struct mdoc_arg *);
36 static void argfree(size_t, struct mdoc_arg *);
37 static void argcpy(struct mdoc_arg *,
38 const struct mdoc_arg *);
39
40 static struct mdoc_node *mdoc_node_alloc(const struct mdoc *);
41 static int mdoc_node_append(struct mdoc *,
42 struct mdoc_node *);
43 static void mdoc_elem_free(struct mdoc_elem *);
44 static void mdoc_text_free(struct mdoc_text *);
45
46
47 const char *const __mdoc_macronames[MDOC_MAX] = {
48 "\\\"", "Dd", "Dt", "Os",
49 "Sh", "Ss", "Pp", "D1",
50 "Dl", "Bd", "Ed", "Bl",
51 "El", "It", "Ad", "An",
52 "Ar", "Cd", "Cm", "Dv",
53 "Er", "Ev", "Ex", "Fa",
54 "Fd", "Fl", "Fn", "Ft",
55 "Ic", "In", "Li", "Nd",
56 "Nm", "Op", "Ot", "Pa",
57 "Rv", "St", "Va", "Vt",
58 /* LINTED */
59 "Xr", "\%A", "\%B", "\%D",
60 /* LINTED */
61 "\%I", "\%J", "\%N", "\%O",
62 /* LINTED */
63 "\%P", "\%R", "\%T", "\%V",
64 "Ac", "Ao", "Aq", "At",
65 "Bc", "Bf", "Bo", "Bq",
66 "Bsx", "Bx", "Db", "Dc",
67 "Do", "Dq", "Ec", "Ef",
68 "Em", "Eo", "Fx", "Ms",
69 "No", "Ns", "Nx", "Ox",
70 "Pc", "Pf", "Po", "Pq",
71 "Qc", "Ql", "Qo", "Qq",
72 "Re", "Rs", "Sc", "So",
73 "Sq", "Sm", "Sx", "Sy",
74 "Tn", "Ux", "Xc", "Xo",
75 "Fo", "Fc", "Oo", "Oc",
76 "Bk", "Ek", "Bt", "Hf",
77 "Fr", "Ud",
78 };
79
80 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
81 "split", "nosplit", "ragged",
82 "unfilled", "literal", "file",
83 "offset", "bullet", "dash",
84 "hyphen", "item", "enum",
85 "tag", "diag", "hang",
86 "ohang", "inset", "column",
87 "width", "compact", "std",
88 "p1003.1-88", "p1003.1-90", "p1003.1-96",
89 "p1003.1-2001", "p1003.1-2004", "p1003.1",
90 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
91 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
92 "p1003.2", "p1387.2", "isoC-90",
93 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
94 "isoC-99", "ansiC", "ansiC-89",
95 "ansiC-99", "ieee754", "iso8802-3",
96 "xpg3", "xpg4", "xpg4.2",
97 "xpg4.3", "xbd5", "xcu5",
98 "xsh5", "xns5", "xns5.2d2.0",
99 "xcurses4.2", "susv2", "susv3",
100 "svid4", "filled", "words",
101 "emphasis", "symbolic",
102 };
103
104 const char * const *mdoc_macronames = __mdoc_macronames;
105 const char * const *mdoc_argnames = __mdoc_argnames;
106
107
108 const struct mdoc_node *
109 mdoc_node(const struct mdoc *mdoc)
110 {
111
112 return(mdoc->first);
113 }
114
115
116 const struct mdoc_meta *
117 mdoc_meta(const struct mdoc *mdoc)
118 {
119
120 return(&mdoc->meta);
121 }
122
123
124 void
125 mdoc_free(struct mdoc *mdoc)
126 {
127
128 if (mdoc->first)
129 mdoc_node_freelist(mdoc->first);
130 if (mdoc->htab)
131 mdoc_tokhash_free(mdoc->htab);
132 if (mdoc->meta.title)
133 free(mdoc->meta.title);
134 if (mdoc->meta.os)
135 free(mdoc->meta.os);
136 if (mdoc->meta.name)
137 free(mdoc->meta.name);
138
139 free(mdoc);
140 }
141
142
143 struct mdoc *
144 mdoc_alloc(void *data, const struct mdoc_cb *cb)
145 {
146 struct mdoc *p;
147
148 p = xcalloc(1, sizeof(struct mdoc));
149
150 p->data = data;
151 if (cb)
152 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
153
154 p->last = xcalloc(1, sizeof(struct mdoc_node));
155 p->last->type = MDOC_ROOT;
156 p->first = p->last;
157
158 p->next = MDOC_NEXT_CHILD;
159 p->htab = mdoc_tokhash_alloc();
160
161 return(p);
162 }
163
164
165 int
166 mdoc_endparse(struct mdoc *mdoc)
167 {
168
169 if (MDOC_HALT & mdoc->flags)
170 return(0);
171 if (NULL == mdoc->first)
172 return(1);
173
174 assert(mdoc->last);
175 if ( ! macro_end(mdoc)) {
176 mdoc->flags |= MDOC_HALT;
177 return(0);
178 }
179 return(1);
180 }
181
182
183 /*
184 * Main line-parsing routine. If the line is a macro-line (started with
185 * a '.' control character), then pass along to the parser, which parses
186 * subsequent macros until the end of line. If normal text, simply
187 * append the entire line to the chain.
188 */
189 int
190 mdoc_parseln(struct mdoc *mdoc, int line, char *buf)
191 {
192 int c, i;
193 char tmp[5];
194
195 if (MDOC_HALT & mdoc->flags)
196 return(0);
197
198 mdoc->linetok = 0;
199
200 if ('.' != *buf) {
201 /*
202 * Free-form text. Not allowed in the prologue.
203 */
204 if (SEC_PROLOGUE == mdoc->lastnamed)
205 return(mdoc_perr(mdoc, line, 0,
206 "no text in prologue"));
207
208 if ( ! mdoc_word_alloc(mdoc, line, 0, buf))
209 return(0);
210 mdoc->next = MDOC_NEXT_SIBLING;
211 return(1);
212 }
213
214 /*
215 * Control-character detected. Begin the parsing sequence.
216 */
217
218 if (buf[1] && '\\' == buf[1])
219 if (buf[2] && '\"' == buf[2])
220 return(1);
221
222 i = 1;
223 while (buf[i] && ! isspace((int)buf[i]) && i < (int)sizeof(tmp))
224 i++;
225
226 if (i == (int)sizeof(tmp)) {
227 mdoc->flags |= MDOC_HALT;
228 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
229 } else if (i <= 2) {
230 mdoc->flags |= MDOC_HALT;
231 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
232 }
233
234 i--;
235
236 (void)memcpy(tmp, buf + 1, (size_t)i);
237 tmp[i++] = 0;
238
239 if (MDOC_MAX == (c = mdoc_find(mdoc, tmp))) {
240 mdoc->flags |= MDOC_HALT;
241 return(mdoc_perr(mdoc, line, 1, "unknown macro"));
242 }
243
244 while (buf[i] && isspace((int)buf[i]))
245 i++;
246
247 if ( ! mdoc_macro(mdoc, c, line, 1, &i, buf)) {
248 mdoc->flags |= MDOC_HALT;
249 return(0);
250 }
251
252 return(1);
253 }
254
255
256 void
257 mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
258 {
259 char buf[256];
260 va_list ap;
261
262 if (NULL == mdoc->cb.mdoc_msg)
263 return;
264
265 va_start(ap, fmt);
266 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
267 va_end(ap);
268 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
269 }
270
271
272 int
273 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
274 const char *fmt, ...)
275 {
276 char buf[256];
277 va_list ap;
278
279 if (NULL == mdoc->cb.mdoc_err)
280 return(0);
281
282 va_start(ap, fmt);
283 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
284 va_end(ap);
285 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
286 }
287
288
289 int
290 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
291 enum mdoc_warn type, const char *fmt, ...)
292 {
293 char buf[256];
294 va_list ap;
295
296 if (NULL == mdoc->cb.mdoc_warn)
297 return(0);
298
299 va_start(ap, fmt);
300 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
301 va_end(ap);
302 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
303 }
304
305
306 int
307 mdoc_macro(struct mdoc *mdoc, int tok,
308 int ln, int ppos, int *pos, char *buf)
309 {
310
311 assert(mdoc_macros[tok].fp);
312
313 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
314 SEC_PROLOGUE != mdoc->lastnamed)
315 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document body"));
316 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
317 SEC_PROLOGUE == mdoc->lastnamed)
318 return(mdoc_perr(mdoc, ln, ppos, "macro disallowed in document prologue"));
319 if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
320 return(mdoc_perr(mdoc, ln, ppos, "macro not callable"));
321 return((*mdoc_macros[tok].fp)(mdoc, tok, ln, ppos, pos, buf));
322 }
323
324
325 static int
326 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
327 {
328 const char *nn, *nt, *on, *ot, *act;
329
330 assert(mdoc->last);
331 assert(mdoc->first);
332 assert(MDOC_ROOT != p->type);
333
334 /* See if we exceed the suggest line-max. */
335
336 switch (p->type) {
337 case (MDOC_TEXT):
338 /* FALLTHROUGH */
339 case (MDOC_ELEM):
340 /* FALLTHROUGH */
341 case (MDOC_BLOCK):
342 mdoc->linetok++;
343 break;
344 default:
345 break;
346 }
347
348 /* This sort-of works (re-opening of text macros...). */
349 if (mdoc->linetok > MDOC_LINEARG_SOFTMAX)
350 if ( ! mdoc_nwarn(mdoc, p, WARN_COMPAT,
351 "suggested %d tokens per line exceeded (has %d)",
352 MDOC_LINEARG_SOFTMAX, mdoc->linetok))
353 return(0);
354
355 if (MDOC_TEXT == mdoc->last->type)
356 on = "<text>";
357 else if (MDOC_ROOT == mdoc->last->type)
358 on = "<root>";
359 else
360 on = mdoc_macronames[mdoc->last->tok];
361
362 if (MDOC_TEXT == p->type)
363 nn = "<text>";
364 else if (MDOC_ROOT == p->type)
365 nn = "<root>";
366 else
367 nn = mdoc_macronames[p->tok];
368
369 ot = mdoc_type2a(mdoc->last->type);
370 nt = mdoc_type2a(p->type);
371
372 switch (mdoc->next) {
373 case (MDOC_NEXT_SIBLING):
374 mdoc->last->next = p;
375 p->prev = mdoc->last;
376 p->parent = mdoc->last->parent;
377 act = "sibling";
378 break;
379 case (MDOC_NEXT_CHILD):
380 mdoc->last->child = p;
381 p->parent = mdoc->last;
382 act = "child";
383 break;
384 default:
385 abort();
386 /* NOTREACHED */
387 }
388
389 if ( ! mdoc_valid_pre(mdoc, p))
390 return(0);
391
392 switch (p->type) {
393 case (MDOC_HEAD):
394 assert(MDOC_BLOCK == p->parent->type);
395 p->parent->data.block.head = p;
396 break;
397 case (MDOC_TAIL):
398 assert(MDOC_BLOCK == p->parent->type);
399 p->parent->data.block.tail = p;
400 break;
401 case (MDOC_BODY):
402 assert(MDOC_BLOCK == p->parent->type);
403 p->parent->data.block.body = p;
404 break;
405 default:
406 break;
407 }
408
409 mdoc->last = p;
410 mdoc_msg(mdoc, "parse: %s `%s' %s of %s `%s'",
411 nt, nn, act, ot, on);
412 return(1);
413 }
414
415
416 static struct mdoc_node *
417 mdoc_node_alloc(const struct mdoc *mdoc)
418 {
419 struct mdoc_node *p;
420
421 p = xcalloc(1, sizeof(struct mdoc_node));
422 p->sec = mdoc->lastsec;
423
424 return(p);
425 }
426
427
428 int
429 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
430 {
431 struct mdoc_node *p;
432
433 assert(mdoc->first);
434 assert(mdoc->last);
435
436 p = mdoc_node_alloc(mdoc);
437
438 p->line = line;
439 p->pos = pos;
440 p->type = MDOC_TAIL;
441 p->tok = tok;
442
443 return(mdoc_node_append(mdoc, p));
444 }
445
446
447 int
448 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
449 {
450 struct mdoc_node *p;
451
452 assert(mdoc->first);
453 assert(mdoc->last);
454
455 p = mdoc_node_alloc(mdoc);
456
457 p->line = line;
458 p->pos = pos;
459 p->type = MDOC_HEAD;
460 p->tok = tok;
461
462 return(mdoc_node_append(mdoc, p));
463 }
464
465
466 int
467 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
468 {
469 struct mdoc_node *p;
470
471 assert(mdoc->first);
472 assert(mdoc->last);
473
474 p = mdoc_node_alloc(mdoc);
475
476 p->line = line;
477 p->pos = pos;
478 p->type = MDOC_BODY;
479 p->tok = tok;
480
481 return(mdoc_node_append(mdoc, p));
482 }
483
484
485 int
486 mdoc_root_alloc(struct mdoc *mdoc)
487 {
488 struct mdoc_node *p;
489
490 p = mdoc_node_alloc(mdoc);
491
492 p->type = MDOC_ROOT;
493
494 return(mdoc_node_append(mdoc, p));
495 }
496
497
498 int
499 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
500 int tok, size_t argsz, const struct mdoc_arg *args)
501 {
502 struct mdoc_node *p;
503
504 p = mdoc_node_alloc(mdoc);
505
506 p->pos = pos;
507 p->line = line;
508 p->type = MDOC_BLOCK;
509 p->tok = tok;
510 p->data.block.argc = argsz;
511 p->data.block.argv = argdup(argsz, args);
512
513 return(mdoc_node_append(mdoc, p));
514 }
515
516
517 int
518 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
519 int tok, size_t argsz, const struct mdoc_arg *args)
520 {
521 struct mdoc_node *p;
522
523 p = mdoc_node_alloc(mdoc);
524
525 p->line = line;
526 p->pos = pos;
527 p->type = MDOC_ELEM;
528 p->tok = tok;
529 p->data.elem.argc = argsz;
530 p->data.elem.argv = argdup(argsz, args);
531
532 return(mdoc_node_append(mdoc, p));
533 }
534
535
536 int
537 mdoc_word_alloc(struct mdoc *mdoc,
538 int line, int pos, const char *word)
539 {
540 struct mdoc_node *p;
541
542 p = mdoc_node_alloc(mdoc);
543
544 p->line = line;
545 p->pos = pos;
546 p->type = MDOC_TEXT;
547 p->data.text.string = xstrdup(word);
548
549 return(mdoc_node_append(mdoc, p));
550 }
551
552
553 static void
554 argfree(size_t sz, struct mdoc_arg *p)
555 {
556 int i, j;
557
558 if (0 == sz)
559 return;
560
561 assert(p);
562 /* LINTED */
563 for (i = 0; i < (int)sz; i++)
564 if (p[i].sz > 0) {
565 assert(p[i].value);
566 /* LINTED */
567 for (j = 0; j < (int)p[i].sz; j++)
568 free(p[i].value[j]);
569 free(p[i].value);
570 }
571 free(p);
572 }
573
574
575 static void
576 mdoc_elem_free(struct mdoc_elem *p)
577 {
578
579 argfree(p->argc, p->argv);
580 }
581
582
583 static void
584 mdoc_block_free(struct mdoc_block *p)
585 {
586
587 argfree(p->argc, p->argv);
588 }
589
590
591 static void
592 mdoc_text_free(struct mdoc_text *p)
593 {
594
595 if (p->string)
596 free(p->string);
597 }
598
599
600 void
601 mdoc_node_free(struct mdoc_node *p)
602 {
603
604 switch (p->type) {
605 case (MDOC_TEXT):
606 mdoc_text_free(&p->data.text);
607 break;
608 case (MDOC_ELEM):
609 mdoc_elem_free(&p->data.elem);
610 break;
611 case (MDOC_BLOCK):
612 mdoc_block_free(&p->data.block);
613 break;
614 default:
615 break;
616 }
617
618 free(p);
619 }
620
621
622 void
623 mdoc_node_freelist(struct mdoc_node *p)
624 {
625
626 if (p->child)
627 mdoc_node_freelist(p->child);
628 if (p->next)
629 mdoc_node_freelist(p->next);
630
631 mdoc_node_free(p);
632 }
633
634
635 int
636 mdoc_find(const struct mdoc *mdoc, const char *key)
637 {
638
639 return(mdoc_tokhash_find(mdoc->htab, key));
640 }
641
642
643 static void
644 argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src)
645 {
646 int i;
647
648 dst->line = src->line;
649 dst->pos = src->pos;
650 dst->arg = src->arg;
651 if (0 == (dst->sz = src->sz))
652 return;
653 dst->value = xcalloc(dst->sz, sizeof(char *));
654 for (i = 0; i < (int)dst->sz; i++)
655 dst->value[i] = xstrdup(src->value[i]);
656 }
657
658
659 static struct mdoc_arg *
660 argdup(size_t argsz, const struct mdoc_arg *args)
661 {
662 struct mdoc_arg *pp;
663 int i;
664
665 if (0 == argsz)
666 return(NULL);
667
668 pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg));
669 for (i = 0; i < (int)argsz; i++)
670 argcpy(&pp[i], &args[i]);
671
672 return(pp);
673 }
674
675
676 /* FIXME: deprecate. */
677 char *
678 mdoc_node2a(struct mdoc_node *node)
679 {
680 static char buf[64];
681
682 assert(node);
683
684 buf[0] = 0;
685 (void)xstrlcat(buf, mdoc_type2a(node->type), 64);
686 if (MDOC_ROOT == node->type)
687 return(buf);
688 (void)xstrlcat(buf, " `", 64);
689 if (MDOC_TEXT == node->type)
690 (void)xstrlcat(buf, node->data.text.string, 64);
691 else
692 (void)xstrlcat(buf, mdoc_macronames[node->tok], 64);
693 (void)xstrlcat(buf, "'", 64);
694
695 return(buf);
696 }
697
698