]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.78 2010/05/16 22:28:33 kristaps Exp $ */
3 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 ('.' == (c) || '\'' == (c))
32 #define ROFF_MDEBUG(p, str) \
33 fprintf(stderr, "%s: %s (%d:%d)\n", (str), \
34 roffs[(p)->last->tok].name, \
35 (p)->last->line, (p)->last->col)
37 #define ROFF_MDEBUG(p, str) while (/* CONSTCOND */ 0)
56 struct roffnode
*last
; /* leaf of stack */
57 mandocmsg msg
; /* err/warn/fatal messages */
58 void *data
; /* privdata for messages */
62 enum rofft tok
; /* type of node */
63 struct roffnode
*parent
; /* up one in stack */
64 char *end
; /* end-token: custom */
65 int line
; /* parse line */
66 int col
; /* parse col */
70 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
71 enum rofft tok, /* tok of macro */ \
72 char **bufp, /* input buffer */ \
73 size_t *szp, /* size of input buffer */ \
74 int ln, /* parse line */ \
75 int ppos, /* original pos in buffer */ \
76 int pos, /* current pos in buffer */ \
77 int *offs /* reset offset of buffer data */
79 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
82 const char *name
; /* macro name */
87 static enum rofferr
roff_if(ROFF_ARGS
);
88 static enum rofferr
roff_if_text(ROFF_ARGS
);
89 static enum rofferr
roff_ig(ROFF_ARGS
);
90 static enum rofferr
roff_ig_text(ROFF_ARGS
);
91 static enum rofferr
roff_cblock(ROFF_ARGS
);
92 static enum rofferr
roff_ccond(ROFF_ARGS
);
94 const struct roffmac roffs
[ROFF_MAX
] = {
95 { "if", roff_if
, roff_if_text
},
96 { "ig", roff_ig
, roff_ig_text
},
97 { ".", roff_cblock
, NULL
},
98 { "\\}", roff_ccond
, NULL
},
101 static void roff_free1(struct roff
*);
102 static enum rofft
roff_hash_find(const char *);
103 static void roffnode_cleanscope(struct roff
*);
104 static int roffnode_push(struct roff
*,
105 enum rofft
, int, int);
106 static void roffnode_pop(struct roff
*);
107 static enum rofft
roff_parse(const char *, int *);
111 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
112 * the nil-terminated string name could be found.
115 roff_hash_find(const char *p
)
119 /* FIXME: make this be fast and efficient. */
121 for (i
= 0; i
< (int)ROFF_MAX
; i
++)
122 if (0 == strcmp(roffs
[i
].name
, p
))
123 return((enum rofft
)i
);
130 * Pop the current node off of the stack of roff instructions currently
134 roffnode_pop(struct roff
*r
)
140 r
->last
= r
->last
->parent
;
148 * Push a roff node onto the instruction stack. This must later be
149 * removed with roffnode_pop().
152 roffnode_push(struct roff
*r
, enum rofft tok
, int line
, int col
)
156 if (NULL
== (p
= calloc(1, sizeof(struct roffnode
)))) {
157 (*r
->msg
)(MANDOCERR_MEM
, r
->data
, line
, col
, NULL
);
172 roff_free1(struct roff
*r
)
181 roff_reset(struct roff
*r
)
189 roff_free(struct roff
*r
)
198 roff_alloc(const mandocmsg msg
, void *data
)
202 if (NULL
== (r
= calloc(1, sizeof(struct roff
)))) {
203 (*msg
)(MANDOCERR_MEM
, data
, 0, 0, NULL
);
214 roff_parseln(struct roff
*r
, int ln
,
215 char **bufp
, size_t *szp
, int pos
, int *offs
)
220 if (r
->last
&& ! ROFF_CTL((*bufp
)[pos
])) {
222 * If a scope is open and we're not a macro, pass it
223 * through our text detector and continue as quickly as
227 assert(roffs
[t
].text
);
228 return((*roffs
[t
].text
)
229 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
));
230 } else if ( ! ROFF_CTL((*bufp
)[pos
]))
232 * Don't do anything if we're free-form text.
236 /* A macro-ish line with a possibly-open macro context. */
240 if (r
->last
&& r
->last
->end
) {
242 * We have a scope open that has a custom end-macro
243 * handler. Try to match it against the input.
246 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
249 for (j
= 0; r
->last
->end
[j
]; j
++, i
++)
250 if ((*bufp
)[i
] != r
->last
->end
[j
])
253 if ('\0' == r
->last
->end
[j
] &&
254 ('\0' == (*bufp
)[i
] ||
256 '\t' == (*bufp
)[i
])) {
258 roffnode_cleanscope(r
);
264 if (ROFF_MAX
== (t
= roff_parse(*bufp
, &pos
))) {
266 * This is some of groff's stranger behaviours. If we
267 * encountered a custom end-scope tag and that tag also
268 * happens to be a "real" macro, then we need to try
269 * interpreting it again as a real macro. If it's not,
270 * then return ignore. Else continue.
274 else if (NULL
== r
->last
)
277 /* FIXME: this assumes that we ignore!? */
281 assert(roffs
[t
].proc
);
282 return((*roffs
[t
].proc
)
283 (r
, t
, bufp
, szp
, ln
, ppos
, pos
, offs
));
288 roff_endparse(struct roff
*r
)
293 return((*r
->msg
)(MANDOCERR_SCOPEEXIT
, r
->data
, r
->last
->line
,
294 r
->last
->col
, NULL
));
299 * Parse a roff node's type from the input buffer. This must be in the
300 * form of ".foo xxx" in the usual way.
303 roff_parse(const char *buf
, int *pos
)
309 assert(ROFF_CTL(buf
[*pos
]));
312 while (buf
[*pos
] && (' ' == buf
[*pos
] || '\t' == buf
[*pos
]))
315 if ('\0' == buf
[*pos
])
318 for (j
= 0; j
< 4; j
++, (*pos
)++)
319 if ('\0' == (mac
[j
] = buf
[*pos
]))
321 else if (' ' == buf
[*pos
])
329 if (ROFF_MAX
== (t
= roff_hash_find(mac
)))
332 while (buf
[*pos
] && ' ' == buf
[*pos
])
341 roff_cblock(ROFF_ARGS
)
344 if (NULL
== r
->last
) {
345 if ( ! (*r
->msg
)(MANDOCERR_NOSCOPE
, r
->data
, ln
, ppos
, NULL
))
350 if (ROFF_ig
!= r
->last
->tok
) {
351 if ( ! (*r
->msg
)(MANDOCERR_NOSCOPE
, r
->data
, ln
, ppos
, NULL
))
357 if ( ! (*r
->msg
)(MANDOCERR_ARGSLOST
, r
->data
, ln
, pos
, NULL
))
360 ROFF_MDEBUG(r
, "closing ignore block");
362 roffnode_cleanscope(r
);
369 roffnode_cleanscope(struct roff
*r
)
373 if (--r
->last
->endspan
< 0)
375 ROFF_MDEBUG(r
, "closing implicit scope");
383 roff_ccond(ROFF_ARGS
)
386 if (NULL
== r
->last
) {
387 if ( ! (*r
->msg
)(MANDOCERR_NOSCOPE
, r
->data
, ln
, ppos
, NULL
))
392 if (ROFF_if
!= r
->last
->tok
) {
393 if ( ! (*r
->msg
)(MANDOCERR_NOSCOPE
, r
->data
, ln
, ppos
, NULL
))
398 if (r
->last
->endspan
> -1) {
399 if ( ! (*r
->msg
)(MANDOCERR_NOSCOPE
, r
->data
, ln
, ppos
, NULL
))
405 if ( ! (*r
->msg
)(MANDOCERR_ARGSLOST
, r
->data
, ln
, pos
, NULL
))
408 ROFF_MDEBUG(r
, "closing explicit scope");
410 roffnode_cleanscope(r
);
422 if ( ! roffnode_push(r
, tok
, ln
, ppos
))
425 if ('\0' == (*bufp
)[pos
]) {
426 ROFF_MDEBUG(r
, "opening ignore block");
431 while ((*bufp
)[pos
] && ' ' != (*bufp
)[pos
] &&
432 '\t' != (*bufp
)[pos
])
436 * Note: groff does NOT like escape characters in the input.
437 * Instead of detecting this, we're just going to let it fly and
442 sz
= (size_t)(pos
- sv
);
444 r
->last
->end
= malloc(sz
+ 1);
446 if (NULL
== r
->last
->end
) {
447 (*r
->msg
)(MANDOCERR_MEM
, r
->data
, ln
, pos
, NULL
);
451 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
452 r
->last
->end
[(int)sz
] = '\0';
454 ROFF_MDEBUG(r
, "opening explicit ignore block");
457 if ( ! (*r
->msg
)(MANDOCERR_ARGSLOST
, r
->data
, ln
, pos
, NULL
))
466 roff_ig_text(ROFF_ARGS
)
475 roff_if_text(ROFF_ARGS
)
480 if (NULL
== (ep
= strstr(st
, "\\}"))) {
481 roffnode_cleanscope(r
);
485 if (ep
> st
&& '\\' != *(ep
- 1)) {
486 ROFF_MDEBUG(r
, "closing explicit scope (in-line)");
490 roffnode_cleanscope(r
);
502 * Read ahead past the conditional.
503 * FIXME: this does not work, as conditionals don't end on
504 * whitespace, but are parsed according to a formal grammar.
505 * It's good enough for now, however.
508 while ((*bufp
)[pos
] && ' ' != (*bufp
)[pos
])
512 while (' ' == (*bufp
)[pos
])
516 * Roff is weird. If we have just white-space after the
517 * conditional, it's considered the BODY and we exit without
518 * really doing anything. Warn about this. It's probably
522 if ('\0' == (*bufp
)[pos
] && sv
!= pos
) {
523 if ( ! (*r
->msg
)(MANDOCERR_NOARGS
, r
->data
, ln
, ppos
, NULL
))
528 if ( ! roffnode_push(r
, tok
, ln
, ppos
))
531 /* Don't evaluate: just assume NO. */
533 r
->last
->endspan
= 1;
535 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
536 ROFF_MDEBUG(r
, "opening explicit scope");
537 r
->last
->endspan
= -1;
540 ROFF_MDEBUG(r
, "opening implicit scope");
543 * If there are no arguments on the line, the next-line scope is
547 if ('\0' == (*bufp
)[pos
])
550 /* Otherwise re-run the roff parser after recalculating. */