]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
48756059b3ceaf5000aaa79c8bcc302234579abc
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.14 2009/01/02 14:06:16 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "private.h"
28
29 const char *const __mdoc_macronames[MDOC_MAX] = {
30 "\\\"", "Dd", "Dt", "Os",
31 "Sh", "Ss", "Pp", "D1",
32 "Dl", "Bd", "Ed", "Bl",
33 "El", "It", "Ad", "An",
34 "Ar", "Cd", "Cm", "Dv",
35 "Er", "Ev", "Ex", "Fa",
36 "Fd", "Fl", "Fn", "Ft",
37 "Ic", "In", "Li", "Nd",
38 "Nm", "Op", "Ot", "Pa",
39 "Rv", "St", "Va", "Vt",
40 /* LINTED */
41 "Xr", "\%A", "\%B", "\%D",
42 /* LINTED */
43 "\%I", "\%J", "\%N", "\%O",
44 /* LINTED */
45 "\%P", "\%R", "\%T", "\%V",
46 "Ac", "Ao", "Aq", "At",
47 "Bc", "Bf", "Bo", "Bq",
48 "Bsx", "Bx", "Db", "Dc",
49 "Do", "Dq", "Ec", "Ef",
50 "Em", "Eo", "Fx", "Ms",
51 "No", "Ns", "Nx", "Ox",
52 "Pc", "Pf", "Po", "Pq",
53 "Qc", "Ql", "Qo", "Qq",
54 "Re", "Rs", "Sc", "So",
55 "Sq", "Sm", "Sx", "Sy",
56 "Tn", "Ux", "Xc", "Xo",
57 "Fo", "Fc", "Oo", "Oc",
58 "Bk", "Ek", "Bt", "Hf",
59 "Fr", "Ud",
60 };
61
62 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
63 "split", "nosplit", "ragged",
64 "unfilled", "literal", "file",
65 "offset", "bullet", "dash",
66 "hyphen", "item", "enum",
67 "tag", "diag", "hang",
68 "ohang", "inset", "column",
69 "width", "compact", "std",
70 "p1003.1-88", "p1003.1-90", "p1003.1-96",
71 "p1003.1-2001", "p1003.1-2004", "p1003.1",
72 "p1003.1b", "p1003.1b-93", "p1003.1c-95",
73 "p1003.1g-2000", "p1003.2-92", "p1387.2-95",
74 "p1003.2", "p1387.2", "isoC-90",
75 "isoC-amd1", "isoC-tcor1", "isoC-tcor2",
76 "isoC-99", "ansiC", "ansiC-89",
77 "ansiC-99", "ieee754", "iso8802-3",
78 "xpg3", "xpg4", "xpg4.2",
79 "xpg4.3", "xbd5", "xcu5",
80 "xsh5", "xns5", "xns5.2d2.0",
81 "xcurses4.2", "susv2", "susv3",
82 "svid4", "filled", "words",
83 "emphasis", "symbolic",
84 };
85
86 const struct mdoc_macro __mdoc_macros[MDOC_MAX] = {
87 { NULL, 0 }, /* \" */
88 { NULL, MDOC_PROLOGUE }, /* Dd */
89 { NULL, MDOC_PROLOGUE }, /* Dt */
90 { NULL, MDOC_PROLOGUE }, /* Os */
91 { macro_scoped, 0 }, /* Sh */
92 { macro_scoped, 0 }, /* Ss */
93 { macro_text, 0 }, /* Pp */
94 { macro_scoped_line, 0 }, /* D1 */
95 { macro_scoped_line, 0 }, /* Dl */
96 { macro_scoped, MDOC_EXPLICIT }, /* Bd */
97 { macro_close_explicit, 0 }, /* Ed */
98 { macro_scoped, MDOC_EXPLICIT }, /* Bl */
99 { macro_close_explicit, 0 }, /* El */
100 { NULL, 0 }, /* It */
101 { macro_text, MDOC_CALLABLE }, /* Ad */
102 { macro_constant, 0 }, /* An */
103 { macro_text, MDOC_CALLABLE }, /* Ar */
104 { macro_constant, MDOC_QUOTABLE }, /* Cd */
105 { macro_text, MDOC_CALLABLE }, /* Cm */
106 { macro_text, MDOC_CALLABLE }, /* Dv */
107 { macro_text, MDOC_CALLABLE }, /* Er */
108 { macro_text, MDOC_CALLABLE }, /* Ev */
109 { macro_constant, 0 }, /* Ex */
110 { macro_text, MDOC_CALLABLE }, /* Fa */
111 { macro_constant, 0 }, /* Fd */
112 { macro_text, MDOC_CALLABLE }, /* Fl */
113 { macro_text, MDOC_CALLABLE | MDOC_QUOTABLE }, /* Fn */
114 { macro_text, 0 }, /* Ft */
115 { macro_text, MDOC_CALLABLE }, /* Ic */
116 { macro_constant, 0 }, /* In */
117 { macro_text, MDOC_CALLABLE }, /* Li */
118 { macro_constant, 0 }, /* Nd */
119 { NULL, 0 }, /* Nm */
120 { macro_scoped_line, MDOC_CALLABLE }, /* Op */
121 { macro_obsolete, 0 }, /* Ot */
122 { macro_text, MDOC_CALLABLE }, /* Pa */
123 { macro_constant, 0 }, /* Rv */
124 { macro_constant, 0 }, /* St */
125 { macro_text, MDOC_CALLABLE }, /* Va */
126 { macro_text, MDOC_CALLABLE }, /* Vt */
127 { macro_text, MDOC_CALLABLE }, /* Xr */
128 { macro_constant, MDOC_QUOTABLE }, /* %A */
129 { macro_constant, MDOC_QUOTABLE }, /* %B */
130 { macro_constant, MDOC_QUOTABLE }, /* %D */
131 { macro_constant, MDOC_QUOTABLE }, /* %I */
132 { macro_constant, MDOC_QUOTABLE }, /* %J */
133 { macro_constant, MDOC_QUOTABLE }, /* %N */
134 { macro_constant, MDOC_QUOTABLE }, /* %O */
135 { macro_constant, MDOC_QUOTABLE }, /* %P */
136 { macro_constant, MDOC_QUOTABLE }, /* %R */
137 { macro_constant, MDOC_QUOTABLE }, /* %T */
138 { macro_constant, MDOC_QUOTABLE }, /* %V */
139 { NULL, 0 }, /* Ac */
140 { NULL, 0 }, /* Ao */
141 { macro_scoped_line, MDOC_CALLABLE }, /* Aq */
142 { macro_constant, 0 }, /* At */
143 { NULL, 0 }, /* Bc */
144 { macro_scoped, 0 }, /* Bf */
145 { NULL, 0 }, /* Bo */
146 { macro_scoped_line, MDOC_CALLABLE }, /* Bq */
147 { macro_constant_delimited, 0 }, /* Bsx */
148 { macro_constant_delimited, 0 }, /* Bx */
149 { macro_constant, 0 }, /* Db */
150 { NULL, 0 }, /* Dc */
151 { NULL, 0 }, /* Do */
152 { macro_scoped_line, MDOC_CALLABLE }, /* Dq */
153 { NULL, 0 }, /* Ec */
154 { macro_close_explicit, 0 }, /* Ef */
155 { macro_text, MDOC_CALLABLE }, /* Em */
156 { NULL, 0 }, /* Eo */
157 { macro_constant_delimited, 0 }, /* Fx */
158 { macro_text, 0 }, /* Ms */
159 { macro_constant_delimited, MDOC_CALLABLE }, /* No */
160 { macro_constant_delimited, MDOC_CALLABLE }, /* Ns */
161 { macro_constant_delimited, 0 }, /* Nx */
162 { macro_constant_delimited, 0 }, /* Ox */
163 { NULL, 0 }, /* Pc */
164 { macro_constant, 0 }, /* Pf */
165 { NULL, 0 }, /* Po */
166 { macro_scoped_line, MDOC_CALLABLE }, /* Pq */
167 { NULL, 0 }, /* Qc */
168 { macro_scoped_line, MDOC_CALLABLE }, /* Ql */
169 { NULL, 0 }, /* Qo */
170 { macro_scoped_line, MDOC_CALLABLE }, /* Qq */
171 { macro_scoped, MDOC_EXPLICIT }, /* Re */
172 { macro_scoped, MDOC_EXPLICIT }, /* Rs */
173 { NULL, 0 }, /* Sc */
174 { NULL, 0 }, /* So */
175 { macro_scoped_line, MDOC_CALLABLE }, /* Sq */
176 { macro_constant, 0 }, /* Sm */
177 { macro_text, MDOC_CALLABLE }, /* Sx */
178 { macro_text, MDOC_CALLABLE }, /* Sy */
179 { macro_text, MDOC_CALLABLE }, /* Tn */
180 { macro_constant_delimited, 0 }, /* Ux */
181 { NULL, 0 }, /* Xc */
182 { NULL, 0 }, /* Xo */
183 { NULL, 0 }, /* Fo */
184 { NULL, 0 }, /* Fc */
185 { NULL, 0 }, /* Oo */
186 { NULL, 0 }, /* Oc */
187 { NULL, 0 }, /* Bk */
188 { NULL, 0 }, /* Ek */
189 { macro_constant, 0 }, /* Bt */
190 { macro_constant, 0 }, /* Hf */
191 { macro_obsolete, 0 }, /* Fr */
192 { macro_constant, 0 }, /* Ud */
193 };
194
195 const char * const *mdoc_macronames = __mdoc_macronames;
196 const char * const *mdoc_argnames = __mdoc_argnames;
197 const struct mdoc_macro * const mdoc_macros = __mdoc_macros;
198
199
200 static struct mdoc_arg *argdup(size_t, const struct mdoc_arg *);
201 static void argfree(size_t, struct mdoc_arg *);
202 static void argcpy(struct mdoc_arg *,
203 const struct mdoc_arg *);
204 static char **paramdup(size_t, const char **);
205 static void paramfree(size_t, char **);
206
207 static void mdoc_node_freelist(struct mdoc_node *);
208 static void mdoc_node_append(struct mdoc *, int,
209 struct mdoc_node *);
210 static void mdoc_elem_free(struct mdoc_elem *);
211 static void mdoc_text_free(struct mdoc_text *);
212
213
214 const struct mdoc_node *
215 mdoc_result(struct mdoc *mdoc)
216 {
217
218 return(mdoc->first);
219 }
220
221
222 void
223 mdoc_free(struct mdoc *mdoc)
224 {
225
226 if (mdoc->first)
227 mdoc_node_freelist(mdoc->first);
228 if (mdoc->htab)
229 mdoc_tokhash_free(mdoc->htab);
230
231 free(mdoc);
232 }
233
234
235 struct mdoc *
236 mdoc_alloc(void *data, const struct mdoc_cb *cb)
237 {
238 struct mdoc *p;
239
240 p = xcalloc(1, sizeof(struct mdoc));
241
242 p->data = data;
243 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
244
245 p->htab = mdoc_tokhash_alloc();
246 return(p);
247 }
248
249
250 int
251 mdoc_parseln(struct mdoc *mdoc, char *buf)
252 {
253 int c, i;
254 char tmp[5];
255
256 if ('.' != *buf) {
257 mdoc_word_alloc(mdoc, 0, buf);
258 mdoc->next = MDOC_NEXT_SIBLING;
259 return(1);
260 }
261
262 if (buf[1] && '\\' == buf[1])
263 if (buf[2] && '\"' == buf[2])
264 return(1);
265
266 i = 1;
267 while (buf[i] && ! isspace(buf[i]) && i < (int)sizeof(tmp))
268 i++;
269
270 if (i == (int)sizeof(tmp))
271 return(mdoc_err(mdoc, -1, 1, ERR_MACRO_NOTSUP));
272 else if (i <= 2)
273 return(mdoc_err(mdoc, -1, 1, ERR_MACRO_NOTSUP));
274
275 i--;
276
277 (void)memcpy(tmp, buf + 1, (size_t)i);
278 tmp[i++] = 0;
279
280 if (MDOC_MAX == (c = mdoc_find(mdoc, tmp)))
281 return(mdoc_err(mdoc, c, 1, ERR_MACRO_NOTSUP));
282
283 while (buf[i] && isspace(buf[i]))
284 i++;
285
286 return(mdoc_macro(mdoc, c, 1, &i, buf));
287 }
288
289
290 void
291 mdoc_msg(struct mdoc *mdoc, int pos, const char *fmt, ...)
292 {
293 va_list ap;
294 char buf[256];
295
296 if (NULL == mdoc->cb.mdoc_msg)
297 return;
298
299 va_start(ap, fmt);
300 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
301 va_end(ap);
302
303 (*mdoc->cb.mdoc_msg)(mdoc->data, pos, buf);
304 }
305
306
307 int
308 mdoc_err(struct mdoc *mdoc, int tok, int pos, enum mdoc_err type)
309 {
310
311 if (NULL == mdoc->cb.mdoc_err)
312 return(0);
313 return((*mdoc->cb.mdoc_err)(mdoc->data, tok, pos, type));
314 }
315
316
317 int
318 mdoc_warn(struct mdoc *mdoc, int tok, int pos, enum mdoc_warn type)
319 {
320
321 if (NULL == mdoc->cb.mdoc_warn)
322 return(0);
323 return((*mdoc->cb.mdoc_warn)(mdoc->data, tok, pos, type));
324 }
325
326
327 int
328 mdoc_macro(struct mdoc *mdoc, int tok, int ppos, int *pos, char *buf)
329 {
330
331 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
332 SEC_PROLOGUE == mdoc->sec_lastn)
333 return(macro_prologue(mdoc, tok, ppos, pos, buf));
334
335 if (SEC_PROLOGUE == mdoc->sec_lastn)
336 return(mdoc_err(mdoc, tok, ppos, ERR_SEC_PROLOGUE));
337
338 if (NULL == (mdoc_macros[tok].fp)) {
339 (void)mdoc_err(mdoc, tok, ppos, ERR_MACRO_NOTSUP);
340 return(0);
341 }
342
343 if (1 != ppos && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) {
344 (void)mdoc_err(mdoc, tok, ppos, ERR_MACRO_NOTCALL);
345 return(0);
346 }
347
348 return((*mdoc_macros[tok].fp)(mdoc, tok, ppos, pos, buf));
349 }
350
351
352 static void
353 mdoc_node_append(struct mdoc *mdoc, int pos, struct mdoc_node *p)
354 {
355 const char *nn, *on, *nt, *ot, *act;
356
357 switch (p->type) {
358 case (MDOC_TEXT):
359 nn = p->data.text.string;
360 nt = "text";
361 break;
362 case (MDOC_BODY):
363 nn = mdoc_macronames[p->data.body.tok];
364 nt = "body";
365 break;
366 case (MDOC_ELEM):
367 nn = mdoc_macronames[p->data.elem.tok];
368 nt = "elem";
369 break;
370 case (MDOC_HEAD):
371 nn = mdoc_macronames[p->data.head.tok];
372 nt = "head";
373 break;
374 case (MDOC_BLOCK):
375 nn = mdoc_macronames[p->data.block.tok];
376 nt = "block";
377 break;
378 default:
379 abort();
380 /* NOTREACHED */
381 }
382
383 if (NULL == mdoc->first) {
384 assert(NULL == mdoc->last);
385 mdoc->first = p;
386 mdoc->last = p;
387 mdoc_msg(mdoc, pos, "parse: root %s `%s'", nt, nn);
388 return;
389 }
390
391 switch (mdoc->last->type) {
392 case (MDOC_TEXT):
393 on = "<text>";
394 ot = "text";
395 break;
396 case (MDOC_BODY):
397 on = mdoc_macronames[mdoc->last->data.body.tok];
398 ot = "body";
399 break;
400 case (MDOC_ELEM):
401 on = mdoc_macronames[mdoc->last->data.elem.tok];
402 ot = "elem";
403 break;
404 case (MDOC_HEAD):
405 on = mdoc_macronames[mdoc->last->data.head.tok];
406 ot = "head";
407 break;
408 case (MDOC_BLOCK):
409 on = mdoc_macronames[mdoc->last->data.block.tok];
410 ot = "block";
411 break;
412 default:
413 abort();
414 /* NOTREACHED */
415 }
416
417 switch (mdoc->next) {
418 case (MDOC_NEXT_SIBLING):
419 mdoc->last->next = p;
420 p->prev = mdoc->last;
421 p->parent = mdoc->last->parent;
422 act = "sibling";
423 break;
424 case (MDOC_NEXT_CHILD):
425 mdoc->last->child = p;
426 p->parent = mdoc->last;
427 act = "child";
428 break;
429 default:
430 abort();
431 /* NOTREACHED */
432 }
433
434 mdoc_msg(mdoc, pos, "parse: %s `%s' %s of %s `%s'",
435 nt, nn, act, ot, on);
436
437 mdoc->last = p;
438 }
439
440
441 void
442 mdoc_head_alloc(struct mdoc *mdoc, int pos, int tok)
443 {
444 struct mdoc_node *p;
445
446 assert(mdoc->first);
447 assert(mdoc->last);
448
449 p = xcalloc(1, sizeof(struct mdoc_node));
450
451 p->type = MDOC_HEAD;
452 p->data.head.tok = tok;
453
454 mdoc_node_append(mdoc, pos, p);
455 }
456
457
458 void
459 mdoc_body_alloc(struct mdoc *mdoc, int pos, int tok)
460 {
461 struct mdoc_node *p;
462
463 assert(mdoc->first);
464 assert(mdoc->last);
465
466 p = xcalloc(1, sizeof(struct mdoc_node));
467
468 p->type = MDOC_BODY;
469 p->data.body.tok = tok;
470
471 mdoc_node_append(mdoc, pos, p);
472 }
473
474
475 void
476 mdoc_block_alloc(struct mdoc *mdoc, int pos, int tok,
477 size_t argsz, const struct mdoc_arg *args)
478 {
479 struct mdoc_node *p;
480
481 p = xcalloc(1, sizeof(struct mdoc_node));
482
483 p->type = MDOC_BLOCK;
484 p->data.block.tok = tok;
485 p->data.block.argc = argsz;
486 p->data.block.argv = argdup(argsz, args);
487
488 mdoc_node_append(mdoc, pos, p);
489 }
490
491
492 void
493 mdoc_elem_alloc(struct mdoc *mdoc, int pos, int tok,
494 size_t argsz, const struct mdoc_arg *args,
495 size_t paramsz, const char **params)
496 {
497 struct mdoc_node *p;
498
499 p = xcalloc(1, sizeof(struct mdoc_node));
500 p->type = MDOC_ELEM;
501 p->data.elem.tok = tok;
502 p->data.elem.sz = paramsz;
503 p->data.elem.args = paramdup(paramsz, params);
504 p->data.elem.argc = argsz;
505 p->data.elem.argv = argdup(argsz, args);
506
507 mdoc_node_append(mdoc, pos, p);
508 }
509
510
511 void
512 mdoc_word_alloc(struct mdoc *mdoc, int pos, const char *word)
513 {
514 struct mdoc_node *p;
515
516 p = xcalloc(1, sizeof(struct mdoc_node));
517 p->type = MDOC_TEXT;
518 p->data.text.string = xstrdup(word);
519
520 mdoc_node_append(mdoc, pos, p);
521 }
522
523
524 static void
525 argfree(size_t sz, struct mdoc_arg *p)
526 {
527 int i, j;
528
529 if (0 == sz)
530 return;
531
532 assert(p);
533 /* LINTED */
534 for (i = 0; i < (int)sz; i++)
535 if (p[i].sz > 0) {
536 assert(p[i].value);
537 /* LINTED */
538 for (j = 0; j < (int)p[i].sz; j++)
539 free(p[i].value[j]);
540 }
541 free(p);
542 }
543
544
545 static void
546 mdoc_elem_free(struct mdoc_elem *p)
547 {
548
549 paramfree(p->sz, p->args);
550 argfree(p->argc, p->argv);
551 }
552
553
554 static void
555 mdoc_block_free(struct mdoc_block *p)
556 {
557
558 argfree(p->argc, p->argv);
559 }
560
561
562 static void
563 mdoc_text_free(struct mdoc_text *p)
564 {
565
566 if (p->string)
567 free(p->string);
568 }
569
570
571 void
572 mdoc_node_free(struct mdoc_node *p)
573 {
574
575 switch (p->type) {
576 case (MDOC_TEXT):
577 mdoc_text_free(&p->data.text);
578 break;
579 case (MDOC_ELEM):
580 mdoc_elem_free(&p->data.elem);
581 break;
582 case (MDOC_BLOCK):
583 mdoc_block_free(&p->data.block);
584 break;
585 default:
586 break;
587 }
588
589 free(p);
590 }
591
592
593 static void
594 mdoc_node_freelist(struct mdoc_node *p)
595 {
596
597 if (p->child)
598 mdoc_node_freelist(p->child);
599 if (p->next)
600 mdoc_node_freelist(p->next);
601
602 mdoc_node_free(p);
603 }
604
605
606 int
607 mdoc_find(const struct mdoc *mdoc, const char *key)
608 {
609
610 return(mdoc_tokhash_find(mdoc->htab, key));
611 }
612
613
614 static void
615 argcpy(struct mdoc_arg *dst, const struct mdoc_arg *src)
616 {
617 int i;
618
619 dst->arg = src->arg;
620 if (0 == (dst->sz = src->sz))
621 return;
622 dst->value = xcalloc(dst->sz, sizeof(char *));
623 for (i = 0; i < (int)dst->sz; i++)
624 dst->value[i] = xstrdup(src->value[i]);
625 }
626
627
628 static struct mdoc_arg *
629 argdup(size_t argsz, const struct mdoc_arg *args)
630 {
631 struct mdoc_arg *pp;
632 int i;
633
634 if (0 == argsz)
635 return(NULL);
636
637 pp = xcalloc((size_t)argsz, sizeof(struct mdoc_arg));
638 for (i = 0; i < (int)argsz; i++)
639 argcpy(&pp[i], &args[i]);
640
641 return(pp);
642 }
643
644
645 static void
646 paramfree(size_t sz, char **p)
647 {
648 int i;
649
650 if (0 == sz)
651 return;
652
653 assert(p);
654 /* LINTED */
655 for (i = 0; i < (int)sz; i++)
656 free(p[i]);
657 free(p);
658 }
659
660
661 static char **
662 paramdup(size_t sz, const char **p)
663 {
664 char **pp;
665 int i;
666
667 if (0 == sz)
668 return(NULL);
669
670 pp = xcalloc(sz, sizeof(char *));
671 for (i = 0; i < (int)sz; i++)
672 pp[i] = xstrdup(p[i]);
673
674 return(pp);
675 }