]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Backed-out STRIP_XO by default.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.69 2009/03/21 09:42:07 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdarg.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "private.h"
28
29 /*
30 * Main caller in the libmdoc library. This begins the parsing routine,
31 * handles allocation of data, and so forth. Most of the "work" is done
32 * in macro.c, validate.c and action.c.
33 */
34
35 static struct mdoc_node *mdoc_node_alloc(const struct mdoc *);
36 static int mdoc_node_append(struct mdoc *,
37 struct mdoc_node *);
38
39 static int parsetext(struct mdoc *, int, char *);
40 static int parsemacro(struct mdoc *, int, char *);
41 static int macrowarn(struct mdoc *, int, const char *);
42
43
44 const char *const __mdoc_macronames[MDOC_MAX] = {
45 "\\\"", "Dd", "Dt", "Os",
46 "Sh", "Ss", "Pp", "D1",
47 "Dl", "Bd", "Ed", "Bl",
48 "El", "It", "Ad", "An",
49 "Ar", "Cd", "Cm", "Dv",
50 "Er", "Ev", "Ex", "Fa",
51 "Fd", "Fl", "Fn", "Ft",
52 "Ic", "In", "Li", "Nd",
53 "Nm", "Op", "Ot", "Pa",
54 "Rv", "St", "Va", "Vt",
55 /* LINTED */
56 "Xr", "\%A", "\%B", "\%D",
57 /* LINTED */
58 "\%I", "\%J", "\%N", "\%O",
59 /* LINTED */
60 "\%P", "\%R", "\%T", "\%V",
61 "Ac", "Ao", "Aq", "At",
62 "Bc", "Bf", "Bo", "Bq",
63 "Bsx", "Bx", "Db", "Dc",
64 "Do", "Dq", "Ec", "Ef",
65 "Em", "Eo", "Fx", "Ms",
66 "No", "Ns", "Nx", "Ox",
67 "Pc", "Pf", "Po", "Pq",
68 "Qc", "Ql", "Qo", "Qq",
69 "Re", "Rs", "Sc", "So",
70 "Sq", "Sm", "Sx", "Sy",
71 "Tn", "Ux", "Xc", "Xo",
72 "Fo", "Fc", "Oo", "Oc",
73 "Bk", "Ek", "Bt", "Hf",
74 "Fr", "Ud", "Lb", "Ap",
75 "Lp", "Lk", "Mt", "Brq",
76 /* LINTED */
77 "Bro", "Brc", "\%C", "Es",
78 /* LINTED */
79 "En", "Dx", "\%Q"
80 };
81
82 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
83 "split", "nosplit", "ragged",
84 "unfilled", "literal", "file",
85 "offset", "bullet", "dash",
86 "hyphen", "item", "enum",
87 "tag", "diag", "hang",
88 "ohang", "inset", "column",
89 "width", "compact", "std",
90 "filled", "words", "emphasis",
91 "symbolic", "nested"
92 };
93
94 const char * const *mdoc_macronames = __mdoc_macronames;
95 const char * const *mdoc_argnames = __mdoc_argnames;
96
97
98 /*
99 * Get the first (root) node of the parse tree.
100 */
101 const struct mdoc_node *
102 mdoc_node(const struct mdoc *mdoc)
103 {
104
105 if (MDOC_HALT & mdoc->flags)
106 return(NULL);
107 if (mdoc->first)
108 assert(MDOC_ROOT == mdoc->first->type);
109 return(mdoc->first);
110 }
111
112
113 const struct mdoc_meta *
114 mdoc_meta(const struct mdoc *mdoc)
115 {
116
117 if (MDOC_HALT & mdoc->flags)
118 return(NULL);
119 return(&mdoc->meta);
120 }
121
122
123 /*
124 * Free up all resources contributed by a parse: the node tree, meta-data and
125 * so on. Then reallocate the root node for another parse.
126 */
127 void
128 mdoc_reset(struct mdoc *mdoc)
129 {
130
131 if (mdoc->first)
132 mdoc_node_freelist(mdoc->first);
133 if (mdoc->meta.title)
134 free(mdoc->meta.title);
135 if (mdoc->meta.os)
136 free(mdoc->meta.os);
137 if (mdoc->meta.name)
138 free(mdoc->meta.name);
139 if (mdoc->meta.arch)
140 free(mdoc->meta.arch);
141 if (mdoc->meta.vol)
142 free(mdoc->meta.vol);
143
144 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
145 mdoc->flags = 0;
146 mdoc->lastnamed = mdoc->lastsec = 0;
147
148 mdoc->first = mdoc->last =
149 xcalloc(1, sizeof(struct mdoc_node));
150 mdoc->last->type = MDOC_ROOT;
151 mdoc->next = MDOC_NEXT_CHILD;
152 }
153
154
155 /*
156 * Completely free up all resources.
157 */
158 void
159 mdoc_free(struct mdoc *mdoc)
160 {
161
162 if (mdoc->first)
163 mdoc_node_freelist(mdoc->first);
164 if (mdoc->meta.title)
165 free(mdoc->meta.title);
166 if (mdoc->meta.os)
167 free(mdoc->meta.os);
168 if (mdoc->meta.name)
169 free(mdoc->meta.name);
170 if (mdoc->meta.arch)
171 free(mdoc->meta.arch);
172 if (mdoc->meta.vol)
173 free(mdoc->meta.vol);
174
175 if (mdoc->htab)
176 mdoc_tokhash_free(mdoc->htab);
177
178 free(mdoc);
179 }
180
181
182 struct mdoc *
183 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
184 {
185 struct mdoc *p;
186
187 p = xcalloc(1, sizeof(struct mdoc));
188
189 p->data = data;
190 if (cb)
191 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
192
193 p->last = p->first =
194 xcalloc(1, sizeof(struct mdoc_node));
195 p->last->type = MDOC_ROOT;
196 p->pflags = pflags;
197 p->next = MDOC_NEXT_CHILD;
198 p->htab = mdoc_tokhash_alloc();
199 return(p);
200 }
201
202
203 /*
204 * Climb back up the parse tree, validating open scopes. Mostly calls
205 * through to macro_end in macro.c.
206 */
207 int
208 mdoc_endparse(struct mdoc *mdoc)
209 {
210
211 if (MDOC_HALT & mdoc->flags)
212 return(0);
213 if (NULL == mdoc->first)
214 return(1);
215
216 assert(mdoc->last);
217 if ( ! macro_end(mdoc)) {
218 mdoc->flags |= MDOC_HALT;
219 return(0);
220 }
221 return(1);
222 }
223
224
225 /*
226 * Main parse routine. Parses a single line -- really just hands off to
227 * the macro or text parser.
228 */
229 int
230 mdoc_parseln(struct mdoc *m, int ln, char *buf)
231 {
232
233 /* If in error-mode, then we parse no more. */
234
235 if (MDOC_HALT & m->flags)
236 return(0);
237
238 return('.' == *buf ? parsemacro(m, ln, buf) :
239 parsetext(m, ln, buf));
240 }
241
242
243 void
244 mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
245 {
246 char buf[256];
247 va_list ap;
248
249 if (NULL == mdoc->cb.mdoc_msg)
250 return;
251
252 va_start(ap, fmt);
253 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
254 va_end(ap);
255 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
256 }
257
258
259 int
260 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
261 const char *fmt, ...)
262 {
263 char buf[256];
264 va_list ap;
265
266 if (NULL == mdoc->cb.mdoc_err)
267 return(0);
268
269 va_start(ap, fmt);
270 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
271 va_end(ap);
272 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
273 }
274
275
276 int
277 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
278 enum mdoc_warn type, const char *fmt, ...)
279 {
280 char buf[256];
281 va_list ap;
282
283 if (NULL == mdoc->cb.mdoc_warn)
284 return(0);
285
286 va_start(ap, fmt);
287 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
288 va_end(ap);
289 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
290 }
291
292
293 int
294 mdoc_macro(struct mdoc *m, int tok,
295 int ln, int pp, int *pos, char *buf)
296 {
297
298 /* FIXME - these should happen during validation. */
299
300 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
301 SEC_PROLOGUE != m->lastnamed)
302 return(mdoc_perr(m, ln, pp,
303 "disallowed in document body"));
304
305 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
306 SEC_PROLOGUE == m->lastnamed)
307 return(mdoc_perr(m, ln, pp,
308 "disallowed in prologue"));
309
310 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
311 return(mdoc_perr(m, ln, pp, "%s not callable",
312 mdoc_macronames[tok]));
313
314 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
315 }
316
317
318 static int
319 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p)
320 {
321
322 assert(mdoc->last);
323 assert(mdoc->first);
324 assert(MDOC_ROOT != p->type);
325
326 switch (mdoc->next) {
327 case (MDOC_NEXT_SIBLING):
328 mdoc->last->next = p;
329 p->prev = mdoc->last;
330 p->parent = mdoc->last->parent;
331 break;
332 case (MDOC_NEXT_CHILD):
333 mdoc->last->child = p;
334 p->parent = mdoc->last;
335 break;
336 default:
337 abort();
338 /* NOTREACHED */
339 }
340
341 if ( ! mdoc_valid_pre(mdoc, p))
342 return(0);
343 if ( ! mdoc_action_pre(mdoc, p))
344 return(0);
345
346 switch (p->type) {
347 case (MDOC_HEAD):
348 assert(MDOC_BLOCK == p->parent->type);
349 p->parent->head = p;
350 break;
351 case (MDOC_TAIL):
352 assert(MDOC_BLOCK == p->parent->type);
353 p->parent->tail = p;
354 break;
355 case (MDOC_BODY):
356 assert(MDOC_BLOCK == p->parent->type);
357 p->parent->body = p;
358 break;
359 default:
360 break;
361 }
362
363 mdoc->last = p;
364 return(1);
365 }
366
367
368 static struct mdoc_node *
369 mdoc_node_alloc(const struct mdoc *mdoc)
370 {
371 struct mdoc_node *p;
372
373 p = xcalloc(1, sizeof(struct mdoc_node));
374 p->sec = mdoc->lastsec;
375
376 return(p);
377 }
378
379
380 int
381 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
382 {
383 struct mdoc_node *p;
384
385 assert(mdoc->first);
386 assert(mdoc->last);
387
388 p = mdoc_node_alloc(mdoc);
389
390 p->line = line;
391 p->pos = pos;
392 p->type = MDOC_TAIL;
393 p->tok = tok;
394
395 return(mdoc_node_append(mdoc, p));
396 }
397
398
399 int
400 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
401 {
402 struct mdoc_node *p;
403
404 assert(mdoc->first);
405 assert(mdoc->last);
406
407 p = mdoc_node_alloc(mdoc);
408
409 p->line = line;
410 p->pos = pos;
411 p->type = MDOC_HEAD;
412 p->tok = tok;
413
414 return(mdoc_node_append(mdoc, p));
415 }
416
417
418 int
419 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
420 {
421 struct mdoc_node *p;
422
423 assert(mdoc->first);
424 assert(mdoc->last);
425
426 p = mdoc_node_alloc(mdoc);
427
428 p->line = line;
429 p->pos = pos;
430 p->type = MDOC_BODY;
431 p->tok = tok;
432
433 return(mdoc_node_append(mdoc, p));
434 }
435
436
437 int
438 mdoc_root_alloc(struct mdoc *mdoc)
439 {
440 struct mdoc_node *p;
441
442 p = mdoc_node_alloc(mdoc);
443
444 p->type = MDOC_ROOT;
445
446 return(mdoc_node_append(mdoc, p));
447 }
448
449
450 int
451 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
452 int tok, struct mdoc_arg *args)
453 {
454 struct mdoc_node *p;
455
456 p = mdoc_node_alloc(mdoc);
457
458 p->pos = pos;
459 p->line = line;
460 p->type = MDOC_BLOCK;
461 p->tok = tok;
462 p->args = args;
463
464 if (args)
465 (args->refcnt)++;
466
467 return(mdoc_node_append(mdoc, p));
468 }
469
470
471 int
472 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
473 int tok, struct mdoc_arg *args)
474 {
475 struct mdoc_node *p;
476
477 p = mdoc_node_alloc(mdoc);
478
479 p->line = line;
480 p->pos = pos;
481 p->type = MDOC_ELEM;
482 p->tok = tok;
483 p->args = args;
484
485 if (args)
486 (args->refcnt)++;
487
488 return(mdoc_node_append(mdoc, p));
489 }
490
491
492 int
493 mdoc_word_alloc(struct mdoc *mdoc,
494 int line, int pos, const char *word)
495 {
496 struct mdoc_node *p;
497
498 p = mdoc_node_alloc(mdoc);
499
500 p->line = line;
501 p->pos = pos;
502 p->type = MDOC_TEXT;
503 p->string = xstrdup(word);
504
505 return(mdoc_node_append(mdoc, p));
506 }
507
508
509 void
510 mdoc_node_free(struct mdoc_node *p)
511 {
512
513 if (p->string)
514 free(p->string);
515 if (p->args)
516 mdoc_argv_free(p->args);
517 free(p);
518 }
519
520
521 void
522 mdoc_node_freelist(struct mdoc_node *p)
523 {
524
525 if (p->child)
526 mdoc_node_freelist(p->child);
527 if (p->next)
528 mdoc_node_freelist(p->next);
529
530 mdoc_node_free(p);
531 }
532
533
534 /*
535 * Parse free-form text, that is, a line that does not begin with the
536 * control character.
537 */
538 static int
539 parsetext(struct mdoc *m, int line, char *buf)
540 {
541
542 if (SEC_PROLOGUE == m->lastnamed)
543 return(mdoc_perr(m, line, 0,
544 "text disallowed in prologue"));
545
546 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
547 return(mdoc_perr(m, line, 0,
548 "blank lines only in literal context"));
549
550 if ( ! mdoc_word_alloc(m, line, 0, buf))
551 return(0);
552
553 m->next = MDOC_NEXT_SIBLING;
554 return(1);
555 }
556
557
558 static int
559 macrowarn(struct mdoc *m, int ln, const char *buf)
560 {
561 if ( ! (MDOC_IGN_MACRO & m->pflags))
562 return(mdoc_perr(m, ln, 1, "unknown macro: %s%s",
563 buf, strlen(buf) > 3 ? "..." : ""));
564 return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
565 "unknown macro: %s%s",
566 buf, strlen(buf) > 3 ? "..." : ""));
567 }
568
569
570
571 /*
572 * Parse a macro line, that is, a line beginning with the control
573 * character.
574 */
575 int
576 parsemacro(struct mdoc *m, int ln, char *buf)
577 {
578 int i, c;
579 char mac[5];
580
581 /* Comments and empties are quickly ignored. */
582
583 if (0 == buf[1])
584 return(1);
585
586 if (' ' == buf[1]) {
587 i = 2;
588 while (buf[i] && ' ' == buf[i])
589 i++;
590 if (0 == buf[i])
591 return(1);
592 return(mdoc_perr(m, ln, 1, "invalid syntax"));
593 }
594
595 if (buf[1] && '\\' == buf[1])
596 if (buf[2] && '\"' == buf[2])
597 return(1);
598
599 /* Copy the first word into a nil-terminated buffer. */
600
601 for (i = 1; i < 5; i++) {
602 if (0 == (mac[i - 1] = buf[i]))
603 break;
604 else if (' ' == buf[i])
605 break;
606 }
607
608 mac[i - 1] = 0;
609
610 if (i == 5 || i <= 2) {
611 if ( ! macrowarn(m, ln, mac))
612 goto err;
613 return(1);
614 }
615
616 if (MDOC_MAX == (c = mdoc_tokhash_find(m->htab, mac))) {
617 if ( ! macrowarn(m, ln, mac))
618 goto err;
619 return(1);
620 }
621
622 /* The macro is sane. Jump to the next word. */
623
624 while (buf[i] && ' ' == buf[i])
625 i++;
626
627 /* Begin recursive parse sequence. */
628
629 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
630 goto err;
631
632 /*
633 * If we're in literal mode, then add a newline to the end of
634 * macro lines. Our frontends will interpret this correctly
635 * (it's documented in mdoc.3).
636 */
637
638 return(1);
639
640 err: /* Error out. */
641
642 m->flags |= MDOC_HALT;
643 return(0);
644 }