]> git.cameronkatri.com Git - mandoc.git/blob - man_macro.c
First step towards parser unification:
[mandoc.git] / man_macro.c
1 /* $Id: man_macro.c,v 1.101 2015/04/02 21:36:50 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2012, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "roff.h"
30 #include "man.h"
31 #include "libmandoc.h"
32 #include "libman.h"
33
34 enum rew {
35 REW_REWIND,
36 REW_NOHALT,
37 REW_HALT
38 };
39
40 static void blk_close(MACRO_PROT_ARGS);
41 static void blk_exp(MACRO_PROT_ARGS);
42 static void blk_imp(MACRO_PROT_ARGS);
43 static void in_line_eoln(MACRO_PROT_ARGS);
44 static int man_args(struct man *, int,
45 int *, char *, char **);
46
47 static void rew_scope(enum roff_type,
48 struct man *, enum mant);
49 static enum rew rew_dohalt(enum mant, enum roff_type,
50 const struct man_node *);
51 static enum rew rew_block(enum mant, enum roff_type,
52 const struct man_node *);
53
54 const struct man_macro __man_macros[MAN_MAX] = {
55 { in_line_eoln, MAN_NSCOPED }, /* br */
56 { in_line_eoln, MAN_BSCOPE }, /* TH */
57 { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */
58 { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */
59 { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* TP */
60 { blk_imp, MAN_BSCOPE }, /* LP */
61 { blk_imp, MAN_BSCOPE }, /* PP */
62 { blk_imp, MAN_BSCOPE }, /* P */
63 { blk_imp, MAN_BSCOPE }, /* IP */
64 { blk_imp, MAN_BSCOPE }, /* HP */
65 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* SM */
66 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* SB */
67 { in_line_eoln, 0 }, /* BI */
68 { in_line_eoln, 0 }, /* IB */
69 { in_line_eoln, 0 }, /* BR */
70 { in_line_eoln, 0 }, /* RB */
71 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* R */
72 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* B */
73 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* I */
74 { in_line_eoln, 0 }, /* IR */
75 { in_line_eoln, 0 }, /* RI */
76 { in_line_eoln, MAN_NSCOPED }, /* sp */
77 { in_line_eoln, MAN_BSCOPE }, /* nf */
78 { in_line_eoln, MAN_BSCOPE }, /* fi */
79 { blk_close, MAN_BSCOPE }, /* RE */
80 { blk_exp, MAN_BSCOPE }, /* RS */
81 { in_line_eoln, 0 }, /* DT */
82 { in_line_eoln, 0 }, /* UC */
83 { in_line_eoln, MAN_NSCOPED }, /* PD */
84 { in_line_eoln, 0 }, /* AT */
85 { in_line_eoln, 0 }, /* in */
86 { in_line_eoln, 0 }, /* ft */
87 { in_line_eoln, 0 }, /* OP */
88 { in_line_eoln, MAN_BSCOPE }, /* EX */
89 { in_line_eoln, MAN_BSCOPE }, /* EE */
90 { blk_exp, MAN_BSCOPE }, /* UR */
91 { blk_close, MAN_BSCOPE }, /* UE */
92 { in_line_eoln, 0 }, /* ll */
93 };
94
95 const struct man_macro * const man_macros = __man_macros;
96
97
98 void
99 man_unscope(struct man *man, const struct man_node *to)
100 {
101 struct man_node *n;
102
103 to = to->parent;
104 n = man->last;
105 while (n != to) {
106
107 /* Reached the end of the document? */
108
109 if (to == NULL && ! (n->flags & MAN_VALID)) {
110 if (man->flags & (MAN_BLINE | MAN_ELINE) &&
111 man_macros[n->tok].flags & MAN_SCOPED) {
112 mandoc_vmsg(MANDOCERR_BLK_LINE,
113 man->parse, n->line, n->pos,
114 "EOF breaks %s",
115 man_macronames[n->tok]);
116 if (man->flags & MAN_ELINE)
117 man->flags &= ~MAN_ELINE;
118 else {
119 assert(n->type == ROFFT_HEAD);
120 n = n->parent;
121 man->flags &= ~MAN_BLINE;
122 }
123 man->last = n;
124 n = n->parent;
125 man_node_delete(man, man->last);
126 continue;
127 }
128 if (n->type == ROFFT_BLOCK &&
129 man_macros[n->tok].fp == blk_exp)
130 mandoc_msg(MANDOCERR_BLK_NOEND,
131 man->parse, n->line, n->pos,
132 man_macronames[n->tok]);
133 }
134
135 /*
136 * We might delete the man->last node
137 * in the post-validation phase.
138 * Save a pointer to the parent such that
139 * we know where to continue the iteration.
140 */
141
142 man->last = n;
143 n = n->parent;
144 man_valid_post(man);
145 }
146
147 /*
148 * If we ended up at the parent of the node we were
149 * supposed to rewind to, that means the target node
150 * got deleted, so add the next node we parse as a child
151 * of the parent instead of as a sibling of the target.
152 */
153
154 man->next = (man->last == to) ?
155 MAN_NEXT_CHILD : MAN_NEXT_SIBLING;
156 }
157
158 static enum rew
159 rew_block(enum mant ntok, enum roff_type type, const struct man_node *n)
160 {
161
162 if (type == ROFFT_BLOCK && n->parent->tok == ntok &&
163 n->parent->type == ROFFT_BODY)
164 return(REW_REWIND);
165 return(ntok == n->tok ? REW_HALT : REW_NOHALT);
166 }
167
168 /*
169 * There are three scope levels: scoped to the root (all), scoped to the
170 * section (all less sections), and scoped to subsections (all less
171 * sections and subsections).
172 */
173 static enum rew
174 rew_dohalt(enum mant tok, enum roff_type type, const struct man_node *n)
175 {
176 enum rew c;
177
178 /* We cannot progress beyond the root ever. */
179 if (n->type == ROFFT_ROOT)
180 return(REW_HALT);
181
182 assert(n->parent);
183
184 /* Normal nodes shouldn't go to the level of the root. */
185 if (n->parent->type == ROFFT_ROOT)
186 return(REW_REWIND);
187
188 /* Already-validated nodes should be closed out. */
189 if (MAN_VALID & n->flags)
190 return(REW_NOHALT);
191
192 /* First: rewind to ourselves. */
193 if (type == n->type && tok == n->tok) {
194 if (man_macros[n->tok].fp == blk_exp)
195 return(REW_HALT);
196 else
197 return(REW_REWIND);
198 }
199
200 /*
201 * Next follow the implicit scope-smashings as defined by man.7:
202 * section, sub-section, etc.
203 */
204
205 switch (tok) {
206 case MAN_SH:
207 break;
208 case MAN_SS:
209 /* Rewind to a section, if a block. */
210 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
211 return(c);
212 break;
213 case MAN_RS:
214 /* Preserve empty paragraphs before RS. */
215 if (0 == n->nchild && (MAN_P == n->tok ||
216 MAN_PP == n->tok || MAN_LP == n->tok))
217 return(REW_HALT);
218 /* Rewind to a subsection, if a block. */
219 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
220 return(c);
221 /* Rewind to a section, if a block. */
222 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
223 return(c);
224 break;
225 default:
226 /* Rewind to an offsetter, if a block. */
227 if (REW_NOHALT != (c = rew_block(MAN_RS, type, n)))
228 return(c);
229 /* Rewind to a subsection, if a block. */
230 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
231 return(c);
232 /* Rewind to a section, if a block. */
233 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
234 return(c);
235 break;
236 }
237
238 return(REW_NOHALT);
239 }
240
241 /*
242 * Rewinding entails ascending the parse tree until a coherent point,
243 * for example, the `SH' macro will close out any intervening `SS'
244 * scopes. When a scope is closed, it must be validated and actioned.
245 */
246 static void
247 rew_scope(enum roff_type type, struct man *man, enum mant tok)
248 {
249 struct man_node *n;
250 enum rew c;
251
252 for (n = man->last; n; n = n->parent) {
253 /*
254 * Whether we should stop immediately (REW_HALT), stop
255 * and rewind until this point (REW_REWIND), or keep
256 * rewinding (REW_NOHALT).
257 */
258 c = rew_dohalt(tok, type, n);
259 if (REW_HALT == c)
260 return;
261 if (REW_REWIND == c)
262 break;
263 }
264
265 /*
266 * Rewind until the current point. Warn if we're a roff
267 * instruction that's mowing over explicit scopes.
268 */
269
270 man_unscope(man, n);
271 }
272
273
274 /*
275 * Close out a generic explicit macro.
276 */
277 void
278 blk_close(MACRO_PROT_ARGS)
279 {
280 enum mant ntok;
281 const struct man_node *nn;
282 char *p;
283 int nrew, target;
284
285 nrew = 1;
286 switch (tok) {
287 case MAN_RE:
288 ntok = MAN_RS;
289 if ( ! man_args(man, line, pos, buf, &p))
290 break;
291 for (nn = man->last->parent; nn; nn = nn->parent)
292 if (nn->tok == ntok && nn->type == ROFFT_BLOCK)
293 nrew++;
294 target = strtol(p, &p, 10);
295 if (*p != '\0')
296 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
297 line, p - buf, "RE ... %s", p);
298 if (target == 0)
299 target = 1;
300 nrew -= target;
301 if (nrew < 1) {
302 mandoc_vmsg(MANDOCERR_RE_NOTOPEN, man->parse,
303 line, ppos, "RE %d", target);
304 return;
305 }
306 break;
307 case MAN_UE:
308 ntok = MAN_UR;
309 break;
310 default:
311 abort();
312 /* NOTREACHED */
313 }
314
315 for (nn = man->last->parent; nn; nn = nn->parent)
316 if (nn->tok == ntok && nn->type == ROFFT_BLOCK && ! --nrew)
317 break;
318
319 if (nn == NULL) {
320 mandoc_msg(MANDOCERR_BLK_NOTOPEN, man->parse,
321 line, ppos, man_macronames[tok]);
322 rew_scope(ROFFT_BLOCK, man, MAN_PP);
323 } else {
324 line = man->last->line;
325 ppos = man->last->pos;
326 ntok = man->last->tok;
327 man_unscope(man, nn);
328
329 /* Move a trailing paragraph behind the block. */
330
331 if (ntok == MAN_LP || ntok == MAN_PP || ntok == MAN_P) {
332 *pos = strlen(buf);
333 blk_imp(man, ntok, line, ppos, pos, buf);
334 }
335 }
336 }
337
338 void
339 blk_exp(MACRO_PROT_ARGS)
340 {
341 struct man_node *head;
342 char *p;
343 int la;
344
345 rew_scope(ROFFT_BLOCK, man, tok);
346 man_block_alloc(man, line, ppos, tok);
347 man_head_alloc(man, line, ppos, tok);
348 head = man->last;
349
350 la = *pos;
351 if (man_args(man, line, pos, buf, &p))
352 man_word_alloc(man, line, la, p);
353
354 if (buf[*pos] != '\0')
355 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
356 man->parse, line, *pos, "%s ... %s",
357 man_macronames[tok], buf + *pos);
358
359 man_unscope(man, head);
360 man_body_alloc(man, line, ppos, tok);
361 }
362
363 /*
364 * Parse an implicit-block macro. These contain a ROFFT_HEAD and a
365 * ROFFT_BODY contained within a ROFFT_BLOCK. Rules for closing out other
366 * scopes, such as `SH' closing out an `SS', are defined in the rew
367 * routines.
368 */
369 void
370 blk_imp(MACRO_PROT_ARGS)
371 {
372 int la;
373 char *p;
374 struct man_node *n;
375
376 rew_scope(ROFFT_BODY, man, tok);
377 rew_scope(ROFFT_BLOCK, man, tok);
378 man_block_alloc(man, line, ppos, tok);
379 man_head_alloc(man, line, ppos, tok);
380 n = man->last;
381
382 /* Add line arguments. */
383
384 for (;;) {
385 la = *pos;
386 if ( ! man_args(man, line, pos, buf, &p))
387 break;
388 man_word_alloc(man, line, la, p);
389 }
390
391 /*
392 * For macros having optional next-line scope,
393 * keep the head open if there were no arguments.
394 * For `TP', always keep the head open.
395 */
396
397 if (man_macros[tok].flags & MAN_SCOPED &&
398 (tok == MAN_TP || n == man->last)) {
399 man->flags |= MAN_BLINE;
400 return;
401 }
402
403 /* Close out the head and open the body. */
404
405 rew_scope(ROFFT_HEAD, man, tok);
406 man_body_alloc(man, line, ppos, tok);
407 }
408
409 void
410 in_line_eoln(MACRO_PROT_ARGS)
411 {
412 int la;
413 char *p;
414 struct man_node *n;
415
416 man_elem_alloc(man, line, ppos, tok);
417 n = man->last;
418
419 for (;;) {
420 if (buf[*pos] != '\0' && (tok == MAN_br ||
421 tok == MAN_fi || tok == MAN_nf)) {
422 mandoc_vmsg(MANDOCERR_ARG_SKIP,
423 man->parse, line, *pos, "%s %s",
424 man_macronames[tok], buf + *pos);
425 break;
426 }
427 if (buf[*pos] != '\0' && man->last != n &&
428 (tok == MAN_PD || tok == MAN_ft || tok == MAN_sp)) {
429 mandoc_vmsg(MANDOCERR_ARG_EXCESS,
430 man->parse, line, *pos, "%s ... %s",
431 man_macronames[tok], buf + *pos);
432 break;
433 }
434 la = *pos;
435 if ( ! man_args(man, line, pos, buf, &p))
436 break;
437 if (man_macros[tok].flags & MAN_JOIN &&
438 man->last->type == ROFFT_TEXT)
439 man_word_append(man, p);
440 else
441 man_word_alloc(man, line, la, p);
442 }
443
444 /*
445 * Append MAN_EOS in case the last snipped argument
446 * ends with a dot, e.g. `.IR syslog (3).'
447 */
448
449 if (n != man->last &&
450 mandoc_eos(man->last->string, strlen(man->last->string)))
451 man->last->flags |= MAN_EOS;
452
453 /*
454 * If no arguments are specified and this is MAN_SCOPED (i.e.,
455 * next-line scoped), then set our mode to indicate that we're
456 * waiting for terms to load into our context.
457 */
458
459 if (n == man->last && man_macros[tok].flags & MAN_SCOPED) {
460 assert( ! (man_macros[tok].flags & MAN_NSCOPED));
461 man->flags |= MAN_ELINE;
462 return;
463 }
464
465 assert(man->last->type != ROFFT_ROOT);
466 man->next = MAN_NEXT_SIBLING;
467
468 /*
469 * Rewind our element scope. Note that when TH is pruned, we'll
470 * be back at the root, so make sure that we don't clobber as
471 * its sibling.
472 */
473
474 for ( ; man->last; man->last = man->last->parent) {
475 if (man->last == n)
476 break;
477 if (man->last->type == ROFFT_ROOT)
478 break;
479 man_valid_post(man);
480 }
481
482 assert(man->last);
483
484 /*
485 * Same here regarding whether we're back at the root.
486 */
487
488 if (man->last->type != ROFFT_ROOT)
489 man_valid_post(man);
490 }
491
492
493 void
494 man_macroend(struct man *man)
495 {
496
497 man_unscope(man, man->first);
498 }
499
500 static int
501 man_args(struct man *man, int line, int *pos, char *buf, char **v)
502 {
503 char *start;
504
505 assert(*pos);
506 *v = start = buf + *pos;
507 assert(' ' != *start);
508
509 if ('\0' == *start)
510 return(0);
511
512 *v = mandoc_getarg(man->parse, v, line, pos);
513 return(1);
514 }