]> git.cameronkatri.com Git - mandoc.git/blob - man.c
Profit from the unified struct roff_man and reduce the number of
[mandoc.git] / man.c
1 /* $Id: man.c,v 1.154 2015/04/18 16:34:25 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "libmandoc.h"
35 #include "libman.h"
36
37 const char *const __man_macronames[MAN_MAX] = {
38 "br", "TH", "SH", "SS",
39 "TP", "LP", "PP", "P",
40 "IP", "HP", "SM", "SB",
41 "BI", "IB", "BR", "RB",
42 "R", "B", "I", "IR",
43 "RI", "sp", "nf",
44 "fi", "RE", "RS", "DT",
45 "UC", "PD", "AT", "in",
46 "ft", "OP", "EX", "EE",
47 "UR", "UE", "ll"
48 };
49
50 const char * const *man_macronames = __man_macronames;
51
52 static void man_alloc1(struct roff_man *);
53 static void man_breakscope(struct roff_man *, int);
54 static void man_descope(struct roff_man *, int, int);
55 static void man_free1(struct roff_man *);
56 static struct roff_node *man_node_alloc(struct roff_man *, int, int,
57 enum roff_type, int);
58 static void man_node_append(struct roff_man *,
59 struct roff_node *);
60 static void man_node_free(struct roff_node *);
61 static void man_node_unlink(struct roff_man *,
62 struct roff_node *);
63 static int man_ptext(struct roff_man *, int, char *, int);
64 static int man_pmacro(struct roff_man *, int, char *, int);
65
66
67 const struct roff_node *
68 man_node(const struct roff_man *man)
69 {
70
71 return(man->first);
72 }
73
74 const struct roff_meta *
75 man_meta(const struct roff_man *man)
76 {
77
78 return(&man->meta);
79 }
80
81 void
82 man_reset(struct roff_man *man)
83 {
84
85 man_free1(man);
86 man_alloc1(man);
87 }
88
89 void
90 man_free(struct roff_man *man)
91 {
92
93 man_free1(man);
94 free(man);
95 }
96
97 struct roff_man *
98 man_alloc(struct roff *roff, struct mparse *parse,
99 const char *defos, int quick)
100 {
101 struct roff_man *p;
102
103 p = mandoc_calloc(1, sizeof(*p));
104
105 man_hash_init();
106 p->parse = parse;
107 p->defos = defos;
108 p->quick = quick;
109 p->roff = roff;
110
111 man_alloc1(p);
112 return(p);
113 }
114
115 void
116 man_endparse(struct roff_man *man)
117 {
118
119 man_macroend(man);
120 }
121
122 int
123 man_parseln(struct roff_man *man, int ln, char *buf, int offs)
124 {
125
126 if (man->last->type != ROFFT_EQN || ln > man->last->line)
127 man->flags |= MAN_NEWLINE;
128
129 return (roff_getcontrol(man->roff, buf, &offs) ?
130 man_pmacro(man, ln, buf, offs) :
131 man_ptext(man, ln, buf, offs));
132 }
133
134 static void
135 man_free1(struct roff_man *man)
136 {
137
138 if (man->first)
139 man_node_delete(man, man->first);
140 free(man->meta.title);
141 free(man->meta.os);
142 free(man->meta.date);
143 free(man->meta.vol);
144 free(man->meta.msec);
145 }
146
147 static void
148 man_alloc1(struct roff_man *man)
149 {
150
151 memset(&man->meta, 0, sizeof(man->meta));
152 man->macroset = MACROSET_MAN;
153 man->flags = 0;
154 man->last = mandoc_calloc(1, sizeof(*man->last));
155 man->first = man->last;
156 man->last->type = ROFFT_ROOT;
157 man->last->tok = MAN_MAX;
158 man->next = ROFF_NEXT_CHILD;
159 }
160
161
162 static void
163 man_node_append(struct roff_man *man, struct roff_node *p)
164 {
165
166 assert(man->last);
167 assert(man->first);
168 assert(p->type != ROFFT_ROOT);
169
170 switch (man->next) {
171 case ROFF_NEXT_SIBLING:
172 man->last->next = p;
173 p->prev = man->last;
174 p->parent = man->last->parent;
175 break;
176 case ROFF_NEXT_CHILD:
177 man->last->child = p;
178 p->parent = man->last;
179 break;
180 default:
181 abort();
182 /* NOTREACHED */
183 }
184
185 assert(p->parent);
186 p->parent->nchild++;
187
188 switch (p->type) {
189 case ROFFT_BLOCK:
190 if (p->tok == MAN_SH || p->tok == MAN_SS)
191 man->flags &= ~MAN_LITERAL;
192 break;
193 case ROFFT_HEAD:
194 assert(p->parent->type == ROFFT_BLOCK);
195 p->parent->head = p;
196 break;
197 case ROFFT_BODY:
198 assert(p->parent->type == ROFFT_BLOCK);
199 p->parent->body = p;
200 break;
201 default:
202 break;
203 }
204
205 man->last = p;
206
207 switch (p->type) {
208 case ROFFT_TBL:
209 /* FALLTHROUGH */
210 case ROFFT_TEXT:
211 man_valid_post(man);
212 break;
213 default:
214 break;
215 }
216 }
217
218 static struct roff_node *
219 man_node_alloc(struct roff_man *man, int line, int pos,
220 enum roff_type type, int tok)
221 {
222 struct roff_node *p;
223
224 p = mandoc_calloc(1, sizeof(*p));
225 p->line = line;
226 p->pos = pos;
227 p->type = type;
228 p->tok = tok;
229
230 if (man->flags & MAN_NEWLINE)
231 p->flags |= MAN_LINE;
232 man->flags &= ~MAN_NEWLINE;
233 return(p);
234 }
235
236 void
237 man_elem_alloc(struct roff_man *man, int line, int pos, int tok)
238 {
239 struct roff_node *p;
240
241 p = man_node_alloc(man, line, pos, ROFFT_ELEM, tok);
242 man_node_append(man, p);
243 man->next = ROFF_NEXT_CHILD;
244 }
245
246 void
247 man_head_alloc(struct roff_man *man, int line, int pos, int tok)
248 {
249 struct roff_node *p;
250
251 p = man_node_alloc(man, line, pos, ROFFT_HEAD, tok);
252 man_node_append(man, p);
253 man->next = ROFF_NEXT_CHILD;
254 }
255
256 void
257 man_body_alloc(struct roff_man *man, int line, int pos, int tok)
258 {
259 struct roff_node *p;
260
261 p = man_node_alloc(man, line, pos, ROFFT_BODY, tok);
262 man_node_append(man, p);
263 man->next = ROFF_NEXT_CHILD;
264 }
265
266 void
267 man_block_alloc(struct roff_man *man, int line, int pos, int tok)
268 {
269 struct roff_node *p;
270
271 p = man_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
272 man_node_append(man, p);
273 man->next = ROFF_NEXT_CHILD;
274 }
275
276 void
277 man_word_alloc(struct roff_man *man, int line, int pos, const char *word)
278 {
279 struct roff_node *n;
280
281 n = man_node_alloc(man, line, pos, ROFFT_TEXT, MAN_MAX);
282 n->string = roff_strdup(man->roff, word);
283 man_node_append(man, n);
284 man->next = ROFF_NEXT_SIBLING;
285 }
286
287 void
288 man_word_append(struct roff_man *man, const char *word)
289 {
290 struct roff_node *n;
291 char *addstr, *newstr;
292
293 n = man->last;
294 addstr = roff_strdup(man->roff, word);
295 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
296 free(addstr);
297 free(n->string);
298 n->string = newstr;
299 man->next = ROFF_NEXT_SIBLING;
300 }
301
302 /*
303 * Free all of the resources held by a node. This does NOT unlink a
304 * node from its context; for that, see man_node_unlink().
305 */
306 static void
307 man_node_free(struct roff_node *p)
308 {
309
310 free(p->string);
311 free(p);
312 }
313
314 void
315 man_node_delete(struct roff_man *man, struct roff_node *p)
316 {
317
318 while (p->child)
319 man_node_delete(man, p->child);
320
321 man_node_unlink(man, p);
322 man_node_free(p);
323 }
324
325 void
326 man_addeqn(struct roff_man *man, const struct eqn *ep)
327 {
328 struct roff_node *n;
329
330 n = man_node_alloc(man, ep->ln, ep->pos, ROFFT_EQN, MAN_MAX);
331 n->eqn = ep;
332 if (ep->ln > man->last->line)
333 n->flags |= MAN_LINE;
334 man_node_append(man, n);
335 man->next = ROFF_NEXT_SIBLING;
336 man_descope(man, ep->ln, ep->pos);
337 }
338
339 void
340 man_addspan(struct roff_man *man, const struct tbl_span *sp)
341 {
342 struct roff_node *n;
343
344 man_breakscope(man, MAN_MAX);
345 n = man_node_alloc(man, sp->line, 0, ROFFT_TBL, MAN_MAX);
346 n->span = sp;
347 man_node_append(man, n);
348 man->next = ROFF_NEXT_SIBLING;
349 man_descope(man, sp->line, 0);
350 }
351
352 static void
353 man_descope(struct roff_man *man, int line, int offs)
354 {
355 /*
356 * Co-ordinate what happens with having a next-line scope open:
357 * first close out the element scope (if applicable), then close
358 * out the block scope (also if applicable).
359 */
360
361 if (man->flags & MAN_ELINE) {
362 man->flags &= ~MAN_ELINE;
363 man_unscope(man, man->last->parent);
364 }
365 if ( ! (man->flags & MAN_BLINE))
366 return;
367 man->flags &= ~MAN_BLINE;
368 man_unscope(man, man->last->parent);
369 man_body_alloc(man, line, offs, man->last->tok);
370 }
371
372 static int
373 man_ptext(struct roff_man *man, int line, char *buf, int offs)
374 {
375 int i;
376
377 /* Literal free-form text whitespace is preserved. */
378
379 if (man->flags & MAN_LITERAL) {
380 man_word_alloc(man, line, offs, buf + offs);
381 man_descope(man, line, offs);
382 return(1);
383 }
384
385 for (i = offs; buf[i] == ' '; i++)
386 /* Skip leading whitespace. */ ;
387
388 /*
389 * Blank lines are ignored right after headings
390 * but add a single vertical space elsewhere.
391 */
392
393 if (buf[i] == '\0') {
394 /* Allocate a blank entry. */
395 if (man->last->tok != MAN_SH &&
396 man->last->tok != MAN_SS) {
397 man_elem_alloc(man, line, offs, MAN_sp);
398 man->next = ROFF_NEXT_SIBLING;
399 }
400 return(1);
401 }
402
403 /*
404 * Warn if the last un-escaped character is whitespace. Then
405 * strip away the remaining spaces (tabs stay!).
406 */
407
408 i = (int)strlen(buf);
409 assert(i);
410
411 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
412 if (i > 1 && '\\' != buf[i - 2])
413 mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
414 line, i - 1, NULL);
415
416 for (--i; i && ' ' == buf[i]; i--)
417 /* Spin back to non-space. */ ;
418
419 /* Jump ahead of escaped whitespace. */
420 i += '\\' == buf[i] ? 2 : 1;
421
422 buf[i] = '\0';
423 }
424 man_word_alloc(man, line, offs, buf + offs);
425
426 /*
427 * End-of-sentence check. If the last character is an unescaped
428 * EOS character, then flag the node as being the end of a
429 * sentence. The front-end will know how to interpret this.
430 */
431
432 assert(i);
433 if (mandoc_eos(buf, (size_t)i))
434 man->last->flags |= MAN_EOS;
435
436 man_descope(man, line, offs);
437 return(1);
438 }
439
440 static int
441 man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
442 {
443 struct roff_node *n;
444 const char *cp;
445 int tok;
446 int i, ppos;
447 int bline;
448 char mac[5];
449
450 ppos = offs;
451
452 /*
453 * Copy the first word into a nil-terminated buffer.
454 * Stop when a space, tab, escape, or eoln is encountered.
455 */
456
457 i = 0;
458 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
459 mac[i++] = buf[offs++];
460
461 mac[i] = '\0';
462
463 tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
464
465 if (tok == MAN_MAX) {
466 mandoc_msg(MANDOCERR_MACRO, man->parse,
467 ln, ppos, buf + ppos - 1);
468 return(1);
469 }
470
471 /* Skip a leading escape sequence or tab. */
472
473 switch (buf[offs]) {
474 case '\\':
475 cp = buf + offs + 1;
476 mandoc_escape(&cp, NULL, NULL);
477 offs = cp - buf;
478 break;
479 case '\t':
480 offs++;
481 break;
482 default:
483 break;
484 }
485
486 /* Jump to the next non-whitespace word. */
487
488 while (buf[offs] && buf[offs] == ' ')
489 offs++;
490
491 /*
492 * Trailing whitespace. Note that tabs are allowed to be passed
493 * into the parser as "text", so we only warn about spaces here.
494 */
495
496 if (buf[offs] == '\0' && buf[offs - 1] == ' ')
497 mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
498 ln, offs - 1, NULL);
499
500 /*
501 * Some macros break next-line scopes; otherwise, remember
502 * whether we are in next-line scope for a block head.
503 */
504
505 man_breakscope(man, tok);
506 bline = man->flags & MAN_BLINE;
507
508 /* Call to handler... */
509
510 assert(man_macros[tok].fp);
511 (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);
512
513 /* In quick mode (for mandocdb), abort after the NAME section. */
514
515 if (man->quick && tok == MAN_SH) {
516 n = man->last;
517 if (n->type == ROFFT_BODY &&
518 strcmp(n->prev->child->string, "NAME"))
519 return(2);
520 }
521
522 /*
523 * If we are in a next-line scope for a block head,
524 * close it out now and switch to the body,
525 * unless the next-line scope is allowed to continue.
526 */
527
528 if ( ! bline || man->flags & MAN_ELINE ||
529 man_macros[tok].flags & MAN_NSCOPED)
530 return(1);
531
532 assert(man->flags & MAN_BLINE);
533 man->flags &= ~MAN_BLINE;
534
535 man_unscope(man, man->last->parent);
536 man_body_alloc(man, ln, ppos, man->last->tok);
537 return(1);
538 }
539
540 void
541 man_breakscope(struct roff_man *man, int tok)
542 {
543 struct roff_node *n;
544
545 /*
546 * An element next line scope is open,
547 * and the new macro is not allowed inside elements.
548 * Delete the element that is being broken.
549 */
550
551 if (man->flags & MAN_ELINE && (tok == MAN_MAX ||
552 ! (man_macros[tok].flags & MAN_NSCOPED))) {
553 n = man->last;
554 assert(n->type != ROFFT_TEXT);
555 if (man_macros[n->tok].flags & MAN_NSCOPED)
556 n = n->parent;
557
558 mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
559 n->line, n->pos, "%s breaks %s",
560 tok == MAN_MAX ? "TS" : man_macronames[tok],
561 man_macronames[n->tok]);
562
563 man_node_delete(man, n);
564 man->flags &= ~MAN_ELINE;
565 }
566
567 /*
568 * A block header next line scope is open,
569 * and the new macro is not allowed inside block headers.
570 * Delete the block that is being broken.
571 */
572
573 if (man->flags & MAN_BLINE && (tok == MAN_MAX ||
574 man_macros[tok].flags & MAN_BSCOPE)) {
575 n = man->last;
576 if (n->type == ROFFT_TEXT)
577 n = n->parent;
578 if ( ! (man_macros[n->tok].flags & MAN_BSCOPE))
579 n = n->parent;
580
581 assert(n->type == ROFFT_HEAD);
582 n = n->parent;
583 assert(n->type == ROFFT_BLOCK);
584 assert(man_macros[n->tok].flags & MAN_SCOPED);
585
586 mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
587 n->line, n->pos, "%s breaks %s",
588 tok == MAN_MAX ? "TS" : man_macronames[tok],
589 man_macronames[n->tok]);
590
591 man_node_delete(man, n);
592 man->flags &= ~MAN_BLINE;
593 }
594 }
595
596 /*
597 * Unlink a node from its context. If "man" is provided, the last parse
598 * point will also be adjusted accordingly.
599 */
600 static void
601 man_node_unlink(struct roff_man *man, struct roff_node *n)
602 {
603
604 /* Adjust siblings. */
605
606 if (n->prev)
607 n->prev->next = n->next;
608 if (n->next)
609 n->next->prev = n->prev;
610
611 /* Adjust parent. */
612
613 if (n->parent) {
614 n->parent->nchild--;
615 if (n->parent->child == n)
616 n->parent->child = n->prev ? n->prev : n->next;
617 }
618
619 /* Adjust parse point, if applicable. */
620
621 if (man && man->last == n) {
622 /*XXX: this can occur when bailing from validation. */
623 /*assert(NULL == n->next);*/
624 if (n->prev) {
625 man->last = n->prev;
626 man->next = ROFF_NEXT_SIBLING;
627 } else {
628 man->last = n->parent;
629 man->next = ROFF_NEXT_CHILD;
630 }
631 }
632
633 if (man && man->first == n)
634 man->first = NULL;
635 }
636
637 const struct mparse *
638 man_mparse(const struct roff_man *man)
639 {
640
641 assert(man && man->parse);
642 return(man->parse);
643 }
644
645 void
646 man_deroff(char **dest, const struct roff_node *n)
647 {
648 char *cp;
649 size_t sz;
650
651 if (n->type != ROFFT_TEXT) {
652 for (n = n->child; n; n = n->next)
653 man_deroff(dest, n);
654 return;
655 }
656
657 /* Skip leading whitespace and escape sequences. */
658
659 cp = n->string;
660 while ('\0' != *cp) {
661 if ('\\' == *cp) {
662 cp++;
663 mandoc_escape((const char **)&cp, NULL, NULL);
664 } else if (isspace((unsigned char)*cp))
665 cp++;
666 else
667 break;
668 }
669
670 /* Skip trailing whitespace. */
671
672 for (sz = strlen(cp); sz; sz--)
673 if (0 == isspace((unsigned char)cp[sz-1]))
674 break;
675
676 /* Skip empty strings. */
677
678 if (0 == sz)
679 return;
680
681 if (NULL == *dest) {
682 *dest = mandoc_strndup(cp, sz);
683 return;
684 }
685
686 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
687 free(*dest);
688 *dest = cp;
689 }