]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
*** empty log message ***
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.11 2008/12/30 13:43:53 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "private.h"
28
29 const char *const __mdoc_macronames[MDOC_MAX] = {
30 "\\\"", "Dd", "Dt", "Os",
31 "Sh", "Ss", "Pp", "D1",
32 "Dl", "Bd", "Ed", "Bl",
33 "El", "It", "Ad", "An",
34 "Ar", "Cd", "Cm", "Dv",
35 "Er", "Ev", "Ex", "Fa",
36 "Fd", "Fl", "Fn", "Ft",
37 "Ic", "In", "Li", "Nd",
38 "Nm", "Op", "Ot", "Pa",
39 "Rv", "St", "Va", "Vt",
40 /* LINTED */
41 "Xr", "\%A", "\%B", "\%D",
42 /* LINTED */
43 "\%I", "\%J", "\%N", "\%O",
44 /* LINTED */
45 "\%P", "\%R", "\%T", "\%V",
46 "Ac", "Ao", "Aq", "At",
47 "Bc", "Bf", "Bo", "Bq",
48 "Bsx", "Bx", "Db", "Dc",
49 "Do", "Dq", "Ec", "Ef",
50 "Em", "Eo", "Fx", "Ms",
51 "No", "Ns", "Nx", "Ox",
52 "Pc", "Pf", "Po", "Pq",
53 "Qc", "Ql", "Qo", "Qq",
54 "Re", "Rs", "Sc", "So",
55 "Sq", "Sm", "Sx", "Sy",
56 "Tn", "Ux", "Xc", "Xo",
57 "Fo", "Fc", "Oo", "Oc",
58 "Bk", "Ek", "Bt", "Hf",
59 "Fr", "Ud",
60 };
61
62 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
63 "split", "nosplit", "ragged",
64 "unfilled", "literal", "file",
65 "offset", "bullet", "dash",
66 "hyphen", "item", "enum",
67 "tag", "diag", "hang",
68 "ohang", "inset", "column",
69 "width", "compact", "std",
70 "p1003.1-88", "p1003.1-90", "p1003.1-96",
71 "p1003.1-2001", "p1003.1-2004", "p1003.1",
72 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
73 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
74 "p1003.2", "p1387.2", "isoC-90",
75 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
76 "isoC-99", "ansiC", "ansiC-89",
77 "ansiC-99", "ieee754", "iso8802-3",
78 "xpg3", "xpg4", "xpg4.2",
79 "xpg4.3", "xbd5", "xcu5",
80 "xsh5", "xns5", "xns5.2d2.0",
81 "xcurses4.2", "susv2", "susv3",
82 "svid4", "filled", "words",
83 };
84
85 const struct mdoc_macro __mdoc_macros[MDOC_MAX] = {
86 { NULL, 0 }, /* \" */
87 { macro_prologue_ddate, 0 }, /* Dd */
88 { macro_prologue_dtitle, 0 }, /* Dt */
89 { macro_prologue_os, 0 }, /* Os */
90 { macro_scoped_implicit, 0 }, /* Sh */
91 { macro_scoped_implicit, 0 }, /* Ss */
92 { macro_text, 0 }, /* Pp */
93 { macro_scoped_line, 0 }, /* D1 */
94 { macro_scoped_line, 0 }, /* Dl */
95 { macro_scoped_explicit, MDOC_EXPLICIT }, /* Bd */
96 { macro_scoped_explicit, 0 }, /* Ed */
97 { macro_scoped_explicit, MDOC_EXPLICIT }, /* Bl */
98 { macro_scoped_explicit, 0 }, /* El */
99 { NULL, 0 }, /* It */
100 { macro_text, MDOC_CALLABLE }, /* Ad */
101 { NULL, 0 }, /* An */
102 { macro_text, MDOC_CALLABLE }, /* Ar */
103 { NULL, 0 }, /* Cd */
104 { macro_text, MDOC_CALLABLE }, /* Cm */
105 { macro_text, MDOC_CALLABLE }, /* Dv */
106 { macro_text, MDOC_CALLABLE }, /* Er */
107 { macro_text, MDOC_CALLABLE }, /* Ev */
108 { macro_constant_argv, 0 }, /* Ex */
109 { macro_text, MDOC_CALLABLE }, /* Fa */
110 { NULL, 0 }, /* Fd */
111 { macro_text, MDOC_CALLABLE }, /* Fl */
112 { NULL, 0 }, /* Fn */
113 { macro_text, 0 }, /* Ft */
114 { macro_text, MDOC_CALLABLE }, /* Ic */
115 { NULL, 0 }, /* In */
116 { macro_text, MDOC_CALLABLE }, /* Li */
117 { macro_constant, 0 }, /* Nd */
118 { NULL, 0 }, /* Nm */
119 { NULL, 0 }, /* Op */
120 { NULL, 0 }, /* Ot */
121 { macro_text, MDOC_CALLABLE }, /* Pa */
122 { macro_constant_argv, 0 }, /* Rv */
123 { NULL, 0 }, /* St */
124 { macro_text, MDOC_CALLABLE }, /* Va */
125 { macro_text, MDOC_CALLABLE }, /* Vt */
126 { NULL, 0 }, /* Xr */
127 { NULL, 0 }, /* %A */
128 { NULL, 0 }, /* %B */
129 { NULL, 0 }, /* %D */
130 { NULL, 0 }, /* %I */
131 { NULL, 0 }, /* %J */
132 { NULL, 0 }, /* %N */
133 { NULL, 0 }, /* %O */
134 { NULL, 0 }, /* %P */
135 { NULL, 0 }, /* %R */
136 { NULL, 0 }, /* %T */
137 { NULL, 0 }, /* %V */
138 { NULL, 0 }, /* Ac */
139 { NULL, 0 }, /* Ao */
140 { macro_scoped_pline, MDOC_CALLABLE }, /* Aq */
141 { macro_constant, 0 }, /* At */
142 { NULL, 0 }, /* Bc */
143 { NULL, 0 }, /* Bf */
144 { NULL, 0 }, /* Bo */
145 { macro_scoped_pline, MDOC_CALLABLE }, /* Bq */
146 { macro_constant_delimited, 0 }, /* Bsx */
147 { macro_constant_delimited, 0 }, /* Bx */
148 { NULL, 0 }, /* Db */
149 { NULL, 0 }, /* Dc */
150 { NULL, 0 }, /* Do */
151 { macro_scoped_pline, MDOC_CALLABLE }, /* Dq */
152 { NULL, 0 }, /* Ec */
153 { NULL, 0 }, /* Ef */
154 { macro_text, MDOC_CALLABLE }, /* Em */
155 { NULL, 0 }, /* Eo */
156 { macro_constant_delimited, 0 }, /* Fx */
157 { macro_text, 0 }, /* Ms */
158 { NULL, 0 }, /* No */
159 { NULL, 0 }, /* Ns */
160 { macro_constant_delimited, 0 }, /* Nx */
161 { macro_constant_delimited, 0 }, /* Ox */
162 { NULL, 0 }, /* Pc */
163 { NULL, 0 }, /* Pf */
164 { NULL, 0 }, /* Po */
165 { macro_scoped_pline, MDOC_CALLABLE }, /* Pq */
166 { NULL, 0 }, /* Qc */
167 { macro_scoped_pline, MDOC_CALLABLE }, /* Ql */
168 { NULL, 0 }, /* Qo */
169 { macro_scoped_pline, MDOC_CALLABLE }, /* Qq */
170 { NULL, 0 }, /* Re */
171 { NULL, 0 }, /* Rs */
172 { NULL, 0 }, /* Sc */
173 { NULL, 0 }, /* So */
174 { macro_scoped_pline, MDOC_CALLABLE }, /* Sq */
175 { NULL, 0 }, /* Sm */
176 { macro_text, MDOC_CALLABLE }, /* Sx */
177 { macro_text, MDOC_CALLABLE }, /* Sy */
178 { macro_text, MDOC_CALLABLE }, /* Tn */
179 { macro_constant_delimited, 0 }, /* Ux */
180 { NULL, 0 }, /* Xc */
181 { NULL, 0 }, /* Xo */
182 { NULL, 0 }, /* Fo */
183 { NULL, 0 }, /* Fc */
184 { NULL, 0 }, /* Oo */
185 { NULL, 0 }, /* Oc */
186 { NULL, 0 }, /* Bk */
187 { NULL, 0 }, /* Ek */
188 { macro_constant, 0 }, /* Bt */
189 { macro_constant, 0 }, /* Hf */
190 { NULL, 0 }, /* Fr */
191 { macro_constant, 0 }, /* Ud */
192 };
193
194 const char * const *mdoc_macronames = __mdoc_macronames;
195 const char * const *mdoc_argnames = __mdoc_argnames;
196 const struct mdoc_macro * const mdoc_macros = __mdoc_macros;
197
198
199 static struct mdoc_arg *argdup(size_t, const struct mdoc_arg *);
200 static void argfree(size_t, struct mdoc_arg *);
201 static void argcpy(struct mdoc_arg *,
202 const struct mdoc_arg *);
203 static char **paramdup(size_t, const char **);
204 static void paramfree(size_t, char **);
205
206 static void mdoc_node_freelist(struct mdoc_node *);
207 static void mdoc_node_append(struct mdoc *, int,
208 struct mdoc_node *);
209 static void mdoc_elem_free(struct mdoc_elem *);
210 static void mdoc_text_free(struct mdoc_text *);
211
212
213 const struct mdoc_node *
214 mdoc_result(struct mdoc *mdoc)
215 {
216
217 return(mdoc->first);
218 }
219
220
221 void
222 mdoc_free(struct mdoc *mdoc)
223 {
224
225 if (mdoc->first)
226 mdoc_node_freelist(mdoc->first);
227 if (mdoc->htab)
228 mdoc_tokhash_free(mdoc->htab);
229
230 free(mdoc);
231 }
232
233
234 struct mdoc *
235 mdoc_alloc(void *data, const struct mdoc_cb *cb)
236 {
237 struct mdoc *p;
238
239 p = xcalloc(1, sizeof(struct mdoc));
240
241 p->data = data;
242 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
243
244 p->htab = mdoc_tokhash_alloc();
245 return(p);
246 }
247
248
249 int
250 mdoc_parseln(struct mdoc *mdoc, char *buf)
251 {
252 int c, i;
253 char tmp[5];
254
255 if ('.' != *buf) {
256 mdoc_word_alloc(mdoc, 0, buf);
257 return(1);
258 }
259
260 if (buf[1] && '\\' == buf[1])
261 if (buf[2] && '\"' == buf[2])
262 return(1);
263
264 i = 1;
265 while (buf[i] && ! isspace(buf[i]) && i < (int)sizeof(tmp))
266 i++;
267
268 if (i == (int)sizeof(tmp))
269 return(mdoc_err(mdoc, -1, 1, ERR_MACRO_NOTSUP));
270 else if (i <= 2)
271 return(mdoc_err(mdoc, -1, 1, ERR_MACRO_NOTSUP));
272
273 i--;
274
275 (void)memcpy(tmp, buf + 1, (size_t)i);
276 tmp[i++] = 0;
277
278 if (MDOC_MAX == (c = mdoc_find(mdoc, tmp)))
279 return(mdoc_err(mdoc, c, 1, ERR_MACRO_NOTSUP));
280
281 while (buf[i] && isspace(buf[i]))
282 i++;
283
284 return(mdoc_macro(mdoc, c, 1, &i, buf));
285 }
286
287
288 void
289 mdoc_msg(struct mdoc *mdoc, int pos, const char *fmt, ...)
290 {
291 va_list ap;
292 char buf[256];
293
294 if (NULL == mdoc->cb.mdoc_msg)
295 return;
296
297 va_start(ap, fmt);
298 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
299 va_end(ap);
300
301 (*mdoc->cb.mdoc_msg)(mdoc->data, pos, buf);
302 }
303
304
305 int
306 mdoc_err(struct mdoc *mdoc, int tok, int pos, enum mdoc_err type)
307 {
308
309 if (NULL == mdoc->cb.mdoc_err)
310 return(0);
311 return((*mdoc->cb.mdoc_err)(mdoc->data, tok, pos, type));
312 }
313
314
315 int
316 mdoc_warn(struct mdoc *mdoc, int tok, int pos, enum mdoc_warn type)
317 {
318
319 if (NULL == mdoc->cb.mdoc_warn)
320 return(0);
321 return((*mdoc->cb.mdoc_warn)(mdoc->data, tok, pos, type));
322 }
323
324
325 int
326 mdoc_macro(struct mdoc *mdoc, int tok, int ppos, int *pos, char *buf)
327 {
328
329 if (NULL == (mdoc_macros[tok].fp)) {
330 (void)mdoc_err(mdoc, tok, ppos, ERR_MACRO_NOTSUP);
331 return(0);
332 }
333
334 if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) {
335 (void)mdoc_err(mdoc, tok, ppos, ERR_MACRO_NOTCALL);
336 return(0);
337 }
338
339 /*mdoc_msg(mdoc, ppos, "calling `%s'", mdoc_macronames[tok]);*/
340
341 return((*mdoc_macros[tok].fp)(mdoc, tok, ppos, pos, buf));
342 }
343
344
345 static void
346 mdoc_node_append(struct mdoc *mdoc, int pos, struct mdoc_node *p)
347 {
348 const char *nn, *on, *nt, *ot, *act;
349
350 switch (p->type) {
351 case (MDOC_TEXT):
352 nn = p->data.text.string;
353 nt = "text";
354 break;
355 case (MDOC_BODY):
356 nn = mdoc_macronames[p->data.body.tok];
357 nt = "body";
358 break;
359 case (MDOC_ELEM):
360 nn = mdoc_macronames[p->data.elem.tok];
361 nt = "elem";
362 break;
363 case (MDOC_HEAD):
364 nn = mdoc_macronames[p->data.head.tok];
365 nt = "head";
366 break;
367 case (MDOC_BLOCK):
368 nn = mdoc_macronames[p->data.block.tok];
369 nt = "block";
370 break;
371 default:
372 abort();
373 /* NOTREACHED */
374 }
375
376 if (NULL == mdoc->first) {
377 assert(NULL == mdoc->last);
378 mdoc->first = p;
379 mdoc->last = p;
380 mdoc_msg(mdoc, pos, "parse: root %s `%s'", nt, nn);
381 return;
382 }
383
384 switch (mdoc->last->type) {
385 case (MDOC_TEXT):
386 on = "<text>";
387 ot = "text";
388 break;
389 case (MDOC_BODY):
390 on = mdoc_macronames[mdoc->last->data.body.tok];
391 ot = "body";
392 break;
393 case (MDOC_ELEM):
394 on = mdoc_macronames[mdoc->last->data.elem.tok];
395 ot = "elem";
396 break;
397 case (MDOC_HEAD):
398 on = mdoc_macronames[mdoc->last->data.head.tok];
399 ot = "head";
400 break;
401 case (MDOC_BLOCK):
402 on = mdoc_macronames[mdoc->last->data.block.tok];
403 ot = "block";
404 break;
405 default:
406 abort();
407 /* NOTREACHED */
408 }
409
410 switch (p->type) {
411 case (MDOC_BODY):
412 p->parent = mdoc->last->parent;
413 mdoc->last->next = p;
414 p->prev = mdoc->last;
415 act = "sibling";
416 break;
417
418 case (MDOC_HEAD):
419 assert(mdoc->last->type == MDOC_BLOCK);
420 p->parent = mdoc->last;
421 mdoc->last->child = p;
422 act = "child";
423 break;
424
425 default:
426 switch (mdoc->last->type) {
427 case (MDOC_BODY):
428 /* FALLTHROUGH */
429 case (MDOC_HEAD):
430 p->parent = mdoc->last;
431 mdoc->last->child = p;
432 act = "child";
433 break;
434 default:
435 p->parent = mdoc->last->parent;
436 p->prev = mdoc->last;
437 mdoc->last->next = p;
438 act = "sibling";
439 break;
440 }
441 break;
442 }
443
444 mdoc_msg(mdoc, pos, "parse: %s `%s' %s %s `%s'",
445 nt, nn, act, ot, on);
446 mdoc->last = p;
447 }
448
449
450 /* FIXME: deprecate paramsz, params. */
451 void
452 mdoc_head_alloc(struct mdoc *mdoc, int pos, int tok,
453 size_t paramsz, const char **params)
454 {
455 struct mdoc_node *p;
456
457 assert(mdoc->first);
458 assert(mdoc->last);
459 assert(mdoc->last->type == MDOC_BLOCK);
460 assert(mdoc->last->data.block.tok == tok);
461
462 p = xcalloc(1, sizeof(struct mdoc_node));
463 p->type = MDOC_HEAD;
464 p->data.head.tok = tok;
465 p->data.head.sz = paramsz;
466 p->data.head.args = paramdup(paramsz, params);
467
468 mdoc_node_append(mdoc, pos, p);
469 }
470
471
472 void
473 mdoc_body_alloc(struct mdoc *mdoc, int pos, int tok)
474 {
475 struct mdoc_node *p;
476
477 assert(mdoc->first);
478 assert(mdoc->last);
479 assert((mdoc->last->type == MDOC_BLOCK) ||
480 (mdoc->last->type == MDOC_HEAD));
481 if (mdoc->last->type == MDOC_BLOCK)
482 assert(mdoc->last->data.block.tok == tok);
483 else
484 assert(mdoc->last->data.head.tok == tok);
485
486 p = xcalloc(1, sizeof(struct mdoc_node));
487
488 p->type = MDOC_BODY;
489 p->data.body.tok = tok;
490
491 mdoc_node_append(mdoc, pos, p);
492 }
493
494
495 void
496 mdoc_block_alloc(struct mdoc *mdoc, int pos, int tok,
497 size_t argsz, const struct mdoc_arg *args)
498 {
499 struct mdoc_node *p;
500
501 p = xcalloc(1, sizeof(struct mdoc_node));
502
503 p->type = MDOC_BLOCK;
504 p->data.block.tok = tok;
505 p->data.block.argc = argsz;
506 p->data.block.argv = argdup(argsz, args);
507
508 mdoc_node_append(mdoc, pos, p);
509 }
510
511
512 void
513 mdoc_elem_alloc(struct mdoc *mdoc, int pos, int tok,
514 size_t argsz, const struct mdoc_arg *args,
515 size_t paramsz, const char **params)
516 {
517 struct mdoc_node *p;
518
519 p = xcalloc(1, sizeof(struct mdoc_node));
520 p->type = MDOC_ELEM;
521 p->data.elem.tok = tok;
522 p->data.elem.sz = paramsz;
523 p->data.elem.args = paramdup(paramsz, params);
524 p->data.elem.argc = argsz;
525 p->data.elem.argv = argdup(argsz, args);
526
527 mdoc_node_append(mdoc, pos, p);
528 }
529
530
531 void
532 mdoc_word_alloc(struct mdoc *mdoc, int pos, const char *word)
533 {
534 struct mdoc_node *p;
535
536 p = xcalloc(1, sizeof(struct mdoc_node));
537 p->type = MDOC_TEXT;
538 p->data.text.string = xstrdup(word);
539
540 mdoc_node_append(mdoc, pos, p);
541 }
542
543
544 static void
545 argfree(size_t sz, struct mdoc_arg *p)
546 {
547 int i, j;
548
549 if (0 == sz)
550 return;
551
552 assert(p);
553 /* LINTED */
554 for (i = 0; i < (int)sz; i++)
555 if (p[i].sz > 0) {
556 assert(p[i].value);
557 /* LINTED */
558 for (j = 0; j < (int)p[i].sz; j++)
559 free(p[i].value[j]);
560 }
561 free(p);
562 }
563
564
565 static void
566 mdoc_elem_free(struct mdoc_elem *p)
567 {
568
569 paramfree(p->sz, p->args);
570 argfree(p->argc, p->argv);
571 }
572
573
574 static void
575 mdoc_block_free(struct mdoc_block *p)
576 {
577
578 argfree(p->argc, p->argv);
579 }
580
581
582 static void
583 mdoc_text_free(struct mdoc_text *p)
584 {
585
586 if (p->string)
587 free(p->string);
588 }
589
590
591 static void
592 mdoc_head_free(struct mdoc_head *p)
593 {
594
595 paramfree(p->sz, p->args);
596 }
597
598
599 void
600 mdoc_node_free(struct mdoc_node *p)
601 {
602
603 switch (p->type) {
604 case (MDOC_TEXT):
605 mdoc_text_free(&p->data.text);
606 break;
607 case (MDOC_ELEM):
608 mdoc_elem_free(&p->data.elem);
609 break;
610 case (MDOC_BLOCK):
611 mdoc_block_free(&p->data.block);
612 break;
613 case (MDOC_HEAD):
614 mdoc_head_free(&p->data.head);
615 break;
616 default:
617 break;
618 }
619
620 free(p);
621 }
622
623
624 static void
625 mdoc_node_freelist(struct mdoc_node *p)
626 {
627
628 if (p->child)
629 mdoc_node_freelist(p->child);
630 if (p->next)
631 mdoc_node_freelist(p->next);
632
633 mdoc_node_free(p);
634 }
635
636
637 int
638 mdoc_find(const struct mdoc *mdoc, const char *key)
639 {
640
641 return(mdoc_tokhash_find(mdoc->htab, key));
642 }
643
644
645 static void
646 argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src)
647 {
648 int i;
649
650 dst->arg = src->arg;
651 if (0 == (dst->sz = src->sz))
652 return;
653 dst->value = xcalloc(dst->sz, sizeof(char *));
654 for (i = 0; i < (int)dst->sz; i++)
655 dst->value[i] = xstrdup(src->value[i]);
656 }
657
658
659 static struct mdoc_arg *
660 argdup(size_t argsz, const struct mdoc_arg *args)
661 {
662 struct mdoc_arg *pp;
663 int i;
664
665 if (0 == argsz)
666 return(NULL);
667
668 pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg));
669 for (i = 0; i < (int)argsz; i++)
670 argcpy(&pp[i], &args[i]);
671
672 return(pp);
673 }
674
675
676 static void
677 paramfree(size_t sz, char **p)
678 {
679 int i;
680
681 if (0 == sz)
682 return;
683
684 assert(p);
685 /* LINTED */
686 for (i = 0; i < (int)sz; i++)
687 free(p[i]);
688 free(p);
689 }
690
691
692 static char **
693 paramdup(size_t sz, const char **p)
694 {
695 char **pp;
696 int i;
697
698 if (0 == sz)
699 return(NULL);
700
701 pp = xcalloc(sz, sizeof(char *));
702 for (i = 0; i < (int)sz; i++)
703 pp[i] = xstrdup(p[i]);
704
705 return(pp);
706 }