]> git.cameronkatri.com Git - mandoc.git/blob - man_macro.c
To better match groff parsing, reject digits and some mathematical
[mandoc.git] / man_macro.c
1 /* $Id: man_macro.c,v 1.148 2022/04/27 17:11:24 schwarze Exp $ */
2 /*
3 * Copyright (c) 2012-2015,2017-2020,2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #if DEBUG_MEMORY
30 #include "mandoc_dbg.h"
31 #endif
32 #include "mandoc.h"
33 #include "roff.h"
34 #include "man.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libman.h"
38
39 static void blk_close(MACRO_PROT_ARGS);
40 static void blk_exp(MACRO_PROT_ARGS);
41 static void blk_imp(MACRO_PROT_ARGS);
42 static void in_line_eoln(MACRO_PROT_ARGS);
43 static int man_args(struct roff_man *, int,
44 int *, char *, char **);
45 static void rew_scope(struct roff_man *, enum roff_tok);
46
47 static const struct man_macro man_macros[MAN_MAX - MAN_TH] = {
48 { in_line_eoln, MAN_XSCOPE }, /* TH */
49 { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* SH */
50 { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* SS */
51 { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* TP */
52 { blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* TQ */
53 { blk_imp, MAN_XSCOPE }, /* LP */
54 { blk_imp, MAN_XSCOPE }, /* PP */
55 { blk_imp, MAN_XSCOPE }, /* P */
56 { blk_imp, MAN_XSCOPE }, /* IP */
57 { blk_imp, MAN_XSCOPE }, /* HP */
58 { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* SM */
59 { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* SB */
60 { in_line_eoln, 0 }, /* BI */
61 { in_line_eoln, 0 }, /* IB */
62 { in_line_eoln, 0 }, /* BR */
63 { in_line_eoln, 0 }, /* RB */
64 { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* R */
65 { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* B */
66 { in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* I */
67 { in_line_eoln, 0 }, /* IR */
68 { in_line_eoln, 0 }, /* RI */
69 { blk_close, MAN_XSCOPE }, /* RE */
70 { blk_exp, MAN_XSCOPE }, /* RS */
71 { in_line_eoln, MAN_NSCOPED }, /* DT */
72 { in_line_eoln, MAN_NSCOPED }, /* UC */
73 { in_line_eoln, MAN_NSCOPED }, /* PD */
74 { in_line_eoln, MAN_NSCOPED }, /* AT */
75 { in_line_eoln, MAN_NSCOPED }, /* in */
76 { blk_imp, MAN_XSCOPE }, /* SY */
77 { blk_close, MAN_XSCOPE }, /* YS */
78 { in_line_eoln, 0 }, /* OP */
79 { in_line_eoln, MAN_XSCOPE }, /* EX */
80 { in_line_eoln, MAN_XSCOPE }, /* EE */
81 { blk_exp, MAN_XSCOPE }, /* UR */
82 { blk_close, MAN_XSCOPE }, /* UE */
83 { blk_exp, MAN_XSCOPE }, /* MT */
84 { blk_close, MAN_XSCOPE }, /* ME */
85 };
86
87
88 const struct man_macro *
89 man_macro(enum roff_tok tok)
90 {
91 assert(tok >= MAN_TH && tok <= MAN_MAX);
92 return man_macros + (tok - MAN_TH);
93 }
94
95 void
96 man_unscope(struct roff_man *man, const struct roff_node *to)
97 {
98 struct roff_node *n;
99
100 to = to->parent;
101 n = man->last;
102 while (n != to) {
103
104 /* Reached the end of the document? */
105
106 if (to == NULL && ! (n->flags & NODE_VALID)) {
107 if (man->flags & (MAN_BLINE | MAN_ELINE) &&
108 man_macro(n->tok)->flags &
109 (MAN_BSCOPED | MAN_NSCOPED)) {
110 mandoc_msg(MANDOCERR_BLK_LINE,
111 n->line, n->pos,
112 "EOF breaks %s", roff_name[n->tok]);
113 if (man->flags & MAN_ELINE) {
114 if (n->parent->type == ROFFT_ROOT ||
115 (man_macro(n->parent->tok)->flags &
116 MAN_ESCOPED) == 0)
117 man->flags &= ~MAN_ELINE;
118 } else {
119 assert(n->type == ROFFT_HEAD);
120 n = n->parent;
121 man->flags &= ~MAN_BLINE;
122 }
123 man->last = n;
124 n = n->parent;
125 roff_node_delete(man, man->last);
126 continue;
127 }
128 if (n->type == ROFFT_BLOCK &&
129 man_macro(n->tok)->fp == blk_exp)
130 mandoc_msg(MANDOCERR_BLK_NOEND,
131 n->line, n->pos, "%s",
132 roff_name[n->tok]);
133 }
134
135 /*
136 * We might delete the man->last node
137 * in the post-validation phase.
138 * Save a pointer to the parent such that
139 * we know where to continue the iteration.
140 */
141
142 man->last = n;
143 n = n->parent;
144 man->last->flags |= NODE_VALID;
145 }
146
147 /*
148 * If we ended up at the parent of the node we were
149 * supposed to rewind to, that means the target node
150 * got deleted, so add the next node we parse as a child
151 * of the parent instead of as a sibling of the target.
152 */
153
154 man->next = (man->last == to) ?
155 ROFF_NEXT_CHILD : ROFF_NEXT_SIBLING;
156 }
157
158 /*
159 * Rewinding entails ascending the parse tree until a coherent point,
160 * for example, the `SH' macro will close out any intervening `SS'
161 * scopes. When a scope is closed, it must be validated and actioned.
162 */
163 static void
164 rew_scope(struct roff_man *man, enum roff_tok tok)
165 {
166 struct roff_node *n;
167
168 /* Preserve empty paragraphs before RS. */
169
170 n = man->last;
171 if (tok == MAN_RS && n->child == NULL &&
172 (n->tok == MAN_P || n->tok == MAN_PP || n->tok == MAN_LP))
173 return;
174
175 for (;;) {
176 if (n->type == ROFFT_ROOT)
177 return;
178 if (n->flags & NODE_VALID) {
179 n = n->parent;
180 continue;
181 }
182 if (n->type != ROFFT_BLOCK) {
183 if (n->parent->type == ROFFT_ROOT) {
184 man_unscope(man, n);
185 return;
186 } else {
187 n = n->parent;
188 continue;
189 }
190 }
191 if (tok != MAN_SH && (n->tok == MAN_SH ||
192 (tok != MAN_SS && (n->tok == MAN_SS ||
193 man_macro(n->tok)->fp == blk_exp))))
194 return;
195 man_unscope(man, n);
196 n = man->last;
197 }
198 }
199
200
201 /*
202 * Close out a generic explicit macro.
203 */
204 void
205 blk_close(MACRO_PROT_ARGS)
206 {
207 enum roff_tok ctok, ntok;
208 const struct roff_node *nn;
209 char *p, *ep;
210 int cline, cpos, la, nrew, target;
211
212 nrew = 1;
213 switch (tok) {
214 case MAN_RE:
215 ntok = MAN_RS;
216 la = *pos;
217 if ( ! man_args(man, line, pos, buf, &p))
218 break;
219 for (nn = man->last->parent; nn; nn = nn->parent)
220 if (nn->tok == ntok && nn->type == ROFFT_BLOCK)
221 nrew++;
222 target = strtol(p, &ep, 10);
223 if (*ep != '\0')
224 mandoc_msg(MANDOCERR_ARG_EXCESS, line,
225 la + (buf[la] == '"') + (int)(ep - p),
226 "RE ... %s", ep);
227 free(p);
228 if (target == 0)
229 target = 1;
230 nrew -= target;
231 if (nrew < 1) {
232 mandoc_msg(MANDOCERR_RE_NOTOPEN,
233 line, ppos, "RE %d", target);
234 return;
235 }
236 break;
237 case MAN_YS:
238 ntok = MAN_SY;
239 break;
240 case MAN_UE:
241 ntok = MAN_UR;
242 break;
243 case MAN_ME:
244 ntok = MAN_MT;
245 break;
246 default:
247 abort();
248 }
249
250 for (nn = man->last->parent; nn; nn = nn->parent)
251 if (nn->tok == ntok && nn->type == ROFFT_BLOCK && ! --nrew)
252 break;
253
254 if (nn == NULL) {
255 mandoc_msg(MANDOCERR_BLK_NOTOPEN,
256 line, ppos, "%s", roff_name[tok]);
257 rew_scope(man, MAN_PP);
258 if (tok == MAN_RE) {
259 roff_elem_alloc(man, line, ppos, ROFF_br);
260 man->last->flags |= NODE_LINE |
261 NODE_VALID | NODE_ENDED;
262 man->next = ROFF_NEXT_SIBLING;
263 }
264 return;
265 }
266
267 cline = man->last->line;
268 cpos = man->last->pos;
269 ctok = man->last->tok;
270 man_unscope(man, nn);
271
272 if (tok == MAN_RE && nn->head->aux > 0)
273 roff_setreg(man->roff, "an-margin", nn->head->aux, '-');
274
275 /* Trailing text. */
276
277 if (buf[*pos] != '\0') {
278 roff_word_alloc(man, line, ppos, buf + *pos);
279 man->last->flags |= NODE_DELIMC;
280 if (mandoc_eos(man->last->string, strlen(man->last->string)))
281 man->last->flags |= NODE_EOS;
282 }
283
284 /* Move a trailing paragraph behind the block. */
285
286 if (ctok == MAN_LP || ctok == MAN_PP || ctok == MAN_P) {
287 *pos = strlen(buf);
288 blk_imp(man, ctok, cline, cpos, pos, buf);
289 }
290
291 /* Synopsis blocks need an explicit end marker for spacing. */
292
293 if (tok == MAN_YS && man->last == nn) {
294 roff_elem_alloc(man, line, ppos, tok);
295 man_unscope(man, man->last);
296 }
297 }
298
299 void
300 blk_exp(MACRO_PROT_ARGS)
301 {
302 struct roff_node *head;
303 char *p;
304 int la;
305
306 if (tok == MAN_RS) {
307 rew_scope(man, tok);
308 man->flags |= ROFF_NONOFILL;
309 }
310 roff_block_alloc(man, line, ppos, tok);
311 head = roff_head_alloc(man, line, ppos, tok);
312
313 la = *pos;
314 if (man_args(man, line, pos, buf, &p)) {
315 roff_word_alloc(man, line, la, p);
316 if (tok == MAN_RS) {
317 if (roff_getreg(man->roff, "an-margin") == 0)
318 roff_setreg(man->roff, "an-margin",
319 7 * 24, '=');
320 if ((head->aux = strtod(p, NULL) * 24.0) > 0)
321 roff_setreg(man->roff, "an-margin",
322 head->aux, '+');
323 }
324 free(p);
325 }
326
327 if (buf[*pos] != '\0')
328 mandoc_msg(MANDOCERR_ARG_EXCESS, line, *pos,
329 "%s ... %s", roff_name[tok], buf + *pos);
330
331 man_unscope(man, head);
332 roff_body_alloc(man, line, ppos, tok);
333 man->flags &= ~ROFF_NONOFILL;
334 }
335
336 /*
337 * Parse an implicit-block macro. These contain a ROFFT_HEAD and a
338 * ROFFT_BODY contained within a ROFFT_BLOCK. Rules for closing out other
339 * scopes, such as `SH' closing out an `SS', are defined in the rew
340 * routines.
341 */
342 void
343 blk_imp(MACRO_PROT_ARGS)
344 {
345 int la;
346 char *p;
347 struct roff_node *n;
348
349 rew_scope(man, tok);
350 man->flags |= ROFF_NONOFILL;
351 if (tok == MAN_SH || tok == MAN_SS)
352 man->flags &= ~ROFF_NOFILL;
353 roff_block_alloc(man, line, ppos, tok);
354 n = roff_head_alloc(man, line, ppos, tok);
355
356 /* Add line arguments. */
357
358 for (;;) {
359 la = *pos;
360 if ( ! man_args(man, line, pos, buf, &p))
361 break;
362 roff_word_alloc(man, line, la, p);
363 free(p);
364 }
365
366 /*
367 * For macros having optional next-line scope,
368 * keep the head open if there were no arguments.
369 * For `TP' and `TQ', always keep the head open.
370 */
371
372 if (man_macro(tok)->flags & MAN_BSCOPED &&
373 (tok == MAN_TP || tok == MAN_TQ || n == man->last)) {
374 man->flags |= MAN_BLINE;
375 return;
376 }
377
378 /* Close out the head and open the body. */
379
380 man_unscope(man, n);
381 roff_body_alloc(man, line, ppos, tok);
382 man->flags &= ~ROFF_NONOFILL;
383 }
384
385 void
386 in_line_eoln(MACRO_PROT_ARGS)
387 {
388 int la;
389 char *p;
390 struct roff_node *n;
391
392 roff_elem_alloc(man, line, ppos, tok);
393 n = man->last;
394
395 if (tok == MAN_EX)
396 man->flags |= ROFF_NOFILL;
397 else if (tok == MAN_EE)
398 man->flags &= ~ROFF_NOFILL;
399
400 #if DEBUG_MEMORY
401 if (tok == MAN_TH)
402 mandoc_dbg_name(buf);
403 #endif
404
405 for (;;) {
406 if (buf[*pos] != '\0' && man->last != n && tok == MAN_PD) {
407 mandoc_msg(MANDOCERR_ARG_EXCESS, line, *pos,
408 "%s ... %s", roff_name[tok], buf + *pos);
409 break;
410 }
411 la = *pos;
412 if ( ! man_args(man, line, pos, buf, &p))
413 break;
414 if (man_macro(tok)->flags & MAN_JOIN &&
415 man->last->type == ROFFT_TEXT)
416 roff_word_append(man, p);
417 else
418 roff_word_alloc(man, line, la, p);
419 free(p);
420 }
421
422 /*
423 * Append NODE_EOS in case the last snipped argument
424 * ends with a dot, e.g. `.IR syslog (3).'
425 */
426
427 if (n != man->last &&
428 mandoc_eos(man->last->string, strlen(man->last->string)))
429 man->last->flags |= NODE_EOS;
430
431 /*
432 * If no arguments are specified and this is MAN_ESCOPED (i.e.,
433 * next-line scoped), then set our mode to indicate that we're
434 * waiting for terms to load into our context.
435 */
436
437 if (n == man->last && man_macro(tok)->flags & MAN_ESCOPED) {
438 man->flags |= MAN_ELINE;
439 return;
440 }
441
442 assert(man->last->type != ROFFT_ROOT);
443 man->next = ROFF_NEXT_SIBLING;
444
445 /* Rewind our element scope. */
446
447 for ( ; man->last; man->last = man->last->parent) {
448 man->last->flags |= NODE_VALID;
449 if (man->last == n)
450 break;
451 }
452
453 /* Rewind next-line scoped ancestors, if any. */
454
455 if (man_macro(tok)->flags & MAN_ESCOPED)
456 man_descope(man, line, ppos, NULL);
457 }
458
459 void
460 man_endparse(struct roff_man *man)
461 {
462 man_unscope(man, man->meta.first);
463 }
464
465 static int
466 man_args(struct roff_man *man, int line, int *pos, char *buf, char **v)
467 {
468 char *start;
469
470 assert(*pos);
471 *v = start = buf + *pos;
472 assert(' ' != *start);
473
474 if ('\0' == *start)
475 return 0;
476
477 *v = roff_getarg(man->roff, v, line, pos);
478 return 1;
479 }