]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
`ig' support in all its glory. Try
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.78 2010/05/16 22:28:33 kristaps Exp $ */
2 /*
3 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <assert.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25
26 #include "mandoc.h"
27 #include "roff.h"
28
29 #define ROFF_CTL(c) \
30 ('.' == (c) || '\'' == (c))
31 #if 0
32 #define ROFF_MDEBUG(p, str) \
33 fprintf(stderr, "%s: %s (%d:%d)\n", (str), \
34 roffs[(p)->last->tok].name, \
35 (p)->last->line, (p)->last->col)
36 #else
37 #define ROFF_MDEBUG(p, str) while (/* CONSTCOND */ 0)
38 #endif
39
40 enum rofft {
41 ROFF_if,
42 ROFF_ig,
43 ROFF_cblock,
44 ROFF_ccond,
45 #if 0
46 ROFF_am,
47 ROFF_ami,
48 ROFF_de,
49 ROFF_dei,
50 ROFF_close,
51 #endif
52 ROFF_MAX
53 };
54
55 struct roff {
56 struct roffnode *last; /* leaf of stack */
57 mandocmsg msg; /* err/warn/fatal messages */
58 void *data; /* privdata for messages */
59 };
60
61 struct roffnode {
62 enum rofft tok; /* type of node */
63 struct roffnode *parent; /* up one in stack */
64 char *end; /* end-token: custom */
65 int line; /* parse line */
66 int col; /* parse col */
67 int endspan;
68 };
69
70 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
71 enum rofft tok, /* tok of macro */ \
72 char **bufp, /* input buffer */ \
73 size_t *szp, /* size of input buffer */ \
74 int ln, /* parse line */ \
75 int ppos, /* original pos in buffer */ \
76 int pos, /* current pos in buffer */ \
77 int *offs /* reset offset of buffer data */
78
79 typedef enum rofferr (*roffproc)(ROFF_ARGS);
80
81 struct roffmac {
82 const char *name; /* macro name */
83 roffproc proc;
84 roffproc text;
85 };
86
87 static enum rofferr roff_if(ROFF_ARGS);
88 static enum rofferr roff_if_text(ROFF_ARGS);
89 static enum rofferr roff_ig(ROFF_ARGS);
90 static enum rofferr roff_ig_text(ROFF_ARGS);
91 static enum rofferr roff_cblock(ROFF_ARGS);
92 static enum rofferr roff_ccond(ROFF_ARGS);
93
94 const struct roffmac roffs[ROFF_MAX] = {
95 { "if", roff_if, roff_if_text },
96 { "ig", roff_ig, roff_ig_text },
97 { ".", roff_cblock, NULL },
98 { "\\}", roff_ccond, NULL },
99 };
100
101 static void roff_free1(struct roff *);
102 static enum rofft roff_hash_find(const char *);
103 static void roffnode_cleanscope(struct roff *);
104 static int roffnode_push(struct roff *,
105 enum rofft, int, int);
106 static void roffnode_pop(struct roff *);
107 static enum rofft roff_parse(const char *, int *);
108
109
110 /*
111 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
112 * the nil-terminated string name could be found.
113 */
114 static enum rofft
115 roff_hash_find(const char *p)
116 {
117 int i;
118
119 /* FIXME: make this be fast and efficient. */
120
121 for (i = 0; i < (int)ROFF_MAX; i++)
122 if (0 == strcmp(roffs[i].name, p))
123 return((enum rofft)i);
124
125 return(ROFF_MAX);
126 }
127
128
129 /*
130 * Pop the current node off of the stack of roff instructions currently
131 * pending.
132 */
133 static void
134 roffnode_pop(struct roff *r)
135 {
136 struct roffnode *p;
137
138 assert(r->last);
139 p = r->last;
140 r->last = r->last->parent;
141 if (p->end)
142 free(p->end);
143 free(p);
144 }
145
146
147 /*
148 * Push a roff node onto the instruction stack. This must later be
149 * removed with roffnode_pop().
150 */
151 static int
152 roffnode_push(struct roff *r, enum rofft tok, int line, int col)
153 {
154 struct roffnode *p;
155
156 if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
157 (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
158 return(0);
159 }
160
161 p->tok = tok;
162 p->parent = r->last;
163 p->line = line;
164 p->col = col;
165
166 r->last = p;
167 return(1);
168 }
169
170
171 static void
172 roff_free1(struct roff *r)
173 {
174
175 while (r->last)
176 roffnode_pop(r);
177 }
178
179
180 void
181 roff_reset(struct roff *r)
182 {
183
184 roff_free1(r);
185 }
186
187
188 void
189 roff_free(struct roff *r)
190 {
191
192 roff_free1(r);
193 free(r);
194 }
195
196
197 struct roff *
198 roff_alloc(const mandocmsg msg, void *data)
199 {
200 struct roff *r;
201
202 if (NULL == (r = calloc(1, sizeof(struct roff)))) {
203 (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
204 return(0);
205 }
206
207 r->msg = msg;
208 r->data = data;
209 return(r);
210 }
211
212
213 enum rofferr
214 roff_parseln(struct roff *r, int ln,
215 char **bufp, size_t *szp, int pos, int *offs)
216 {
217 enum rofft t;
218 int ppos, i, j, wtf;
219
220 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
221 /*
222 * If a scope is open and we're not a macro, pass it
223 * through our text detector and continue as quickly as
224 * possible.
225 */
226 t = r->last->tok;
227 assert(roffs[t].text);
228 return((*roffs[t].text)
229 (r, t, bufp, szp, ln, pos, pos, offs));
230 } else if ( ! ROFF_CTL((*bufp)[pos]))
231 /*
232 * Don't do anything if we're free-form text.
233 */
234 return(ROFF_CONT);
235
236 /* A macro-ish line with a possibly-open macro context. */
237
238 wtf = 0;
239
240 if (r->last && r->last->end) {
241 /*
242 * We have a scope open that has a custom end-macro
243 * handler. Try to match it against the input.
244 */
245 i = pos + 1;
246 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
247 i++;
248
249 for (j = 0; r->last->end[j]; j++, i++)
250 if ((*bufp)[i] != r->last->end[j])
251 break;
252
253 if ('\0' == r->last->end[j] &&
254 ('\0' == (*bufp)[i] ||
255 ' ' == (*bufp)[i] ||
256 '\t' == (*bufp)[i])) {
257 roffnode_pop(r);
258 roffnode_cleanscope(r);
259 wtf = 1;
260 }
261 }
262
263 ppos = pos;
264 if (ROFF_MAX == (t = roff_parse(*bufp, &pos))) {
265 /*
266 * This is some of groff's stranger behaviours. If we
267 * encountered a custom end-scope tag and that tag also
268 * happens to be a "real" macro, then we need to try
269 * interpreting it again as a real macro. If it's not,
270 * then return ignore. Else continue.
271 */
272 if (wtf)
273 return(ROFF_IGN);
274 else if (NULL == r->last)
275 return(ROFF_CONT);
276
277 /* FIXME: this assumes that we ignore!? */
278 return(ROFF_IGN);
279 }
280
281 assert(roffs[t].proc);
282 return((*roffs[t].proc)
283 (r, t, bufp, szp, ln, ppos, pos, offs));
284 }
285
286
287 int
288 roff_endparse(struct roff *r)
289 {
290
291 if (NULL == r->last)
292 return(1);
293 return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
294 r->last->col, NULL));
295 }
296
297
298 /*
299 * Parse a roff node's type from the input buffer. This must be in the
300 * form of ".foo xxx" in the usual way.
301 */
302 static enum rofft
303 roff_parse(const char *buf, int *pos)
304 {
305 int j;
306 char mac[5];
307 enum rofft t;
308
309 assert(ROFF_CTL(buf[*pos]));
310 (*pos)++;
311
312 while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
313 (*pos)++;
314
315 if ('\0' == buf[*pos])
316 return(ROFF_MAX);
317
318 for (j = 0; j < 4; j++, (*pos)++)
319 if ('\0' == (mac[j] = buf[*pos]))
320 break;
321 else if (' ' == buf[*pos])
322 break;
323
324 if (j == 4 || j < 1)
325 return(ROFF_MAX);
326
327 mac[j] = '\0';
328
329 if (ROFF_MAX == (t = roff_hash_find(mac)))
330 return(t);
331
332 while (buf[*pos] && ' ' == buf[*pos])
333 (*pos)++;
334
335 return(t);
336 }
337
338
339 /* ARGSUSED */
340 static enum rofferr
341 roff_cblock(ROFF_ARGS)
342 {
343
344 if (NULL == r->last) {
345 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
346 return(ROFF_ERR);
347 return(ROFF_IGN);
348 }
349
350 if (ROFF_ig != r->last->tok) {
351 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
352 return(ROFF_ERR);
353 return(ROFF_IGN);
354 }
355
356 if ((*bufp)[pos])
357 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
358 return(ROFF_ERR);
359
360 ROFF_MDEBUG(r, "closing ignore block");
361 roffnode_pop(r);
362 roffnode_cleanscope(r);
363 return(ROFF_IGN);
364
365 }
366
367
368 static void
369 roffnode_cleanscope(struct roff *r)
370 {
371
372 while (r->last) {
373 if (--r->last->endspan < 0)
374 break;
375 ROFF_MDEBUG(r, "closing implicit scope");
376 roffnode_pop(r);
377 }
378 }
379
380
381 /* ARGSUSED */
382 static enum rofferr
383 roff_ccond(ROFF_ARGS)
384 {
385
386 if (NULL == r->last) {
387 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
388 return(ROFF_ERR);
389 return(ROFF_IGN);
390 }
391
392 if (ROFF_if != r->last->tok) {
393 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
394 return(ROFF_ERR);
395 return(ROFF_IGN);
396 }
397
398 if (r->last->endspan > -1) {
399 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
400 return(ROFF_ERR);
401 return(ROFF_IGN);
402 }
403
404 if ((*bufp)[pos])
405 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
406 return(ROFF_ERR);
407
408 ROFF_MDEBUG(r, "closing explicit scope");
409 roffnode_pop(r);
410 roffnode_cleanscope(r);
411 return(ROFF_IGN);
412 }
413
414
415 /* ARGSUSED */
416 static enum rofferr
417 roff_ig(ROFF_ARGS)
418 {
419 int sv;
420 size_t sz;
421
422 if ( ! roffnode_push(r, tok, ln, ppos))
423 return(ROFF_ERR);
424
425 if ('\0' == (*bufp)[pos]) {
426 ROFF_MDEBUG(r, "opening ignore block");
427 return(ROFF_IGN);
428 }
429
430 sv = pos;
431 while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
432 '\t' != (*bufp)[pos])
433 pos++;
434
435 /*
436 * Note: groff does NOT like escape characters in the input.
437 * Instead of detecting this, we're just going to let it fly and
438 * to hell with it.
439 */
440
441 assert(pos > sv);
442 sz = (size_t)(pos - sv);
443
444 r->last->end = malloc(sz + 1);
445
446 if (NULL == r->last->end) {
447 (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
448 return(ROFF_ERR);
449 }
450
451 memcpy(r->last->end, *bufp + sv, sz);
452 r->last->end[(int)sz] = '\0';
453
454 ROFF_MDEBUG(r, "opening explicit ignore block");
455
456 if ((*bufp)[pos])
457 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
458 return(ROFF_ERR);
459
460 return(ROFF_IGN);
461 }
462
463
464 /* ARGSUSED */
465 static enum rofferr
466 roff_ig_text(ROFF_ARGS)
467 {
468
469 return(ROFF_IGN);
470 }
471
472
473 /* ARGSUSED */
474 static enum rofferr
475 roff_if_text(ROFF_ARGS)
476 {
477 char *ep, *st;
478
479 st = &(*bufp)[pos];
480 if (NULL == (ep = strstr(st, "\\}"))) {
481 roffnode_cleanscope(r);
482 return(ROFF_IGN);
483 }
484
485 if (ep > st && '\\' != *(ep - 1)) {
486 ROFF_MDEBUG(r, "closing explicit scope (in-line)");
487 roffnode_pop(r);
488 }
489
490 roffnode_cleanscope(r);
491 return(ROFF_IGN);
492 }
493
494
495 /* ARGSUSED */
496 static enum rofferr
497 roff_if(ROFF_ARGS)
498 {
499 int sv;
500
501 /*
502 * Read ahead past the conditional.
503 * FIXME: this does not work, as conditionals don't end on
504 * whitespace, but are parsed according to a formal grammar.
505 * It's good enough for now, however.
506 */
507
508 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
509 pos++;
510
511 sv = pos;
512 while (' ' == (*bufp)[pos])
513 pos++;
514
515 /*
516 * Roff is weird. If we have just white-space after the
517 * conditional, it's considered the BODY and we exit without
518 * really doing anything. Warn about this. It's probably
519 * wrong.
520 */
521
522 if ('\0' == (*bufp)[pos] && sv != pos) {
523 if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
524 return(ROFF_ERR);
525 return(ROFF_IGN);
526 }
527
528 if ( ! roffnode_push(r, tok, ln, ppos))
529 return(ROFF_ERR);
530
531 /* Don't evaluate: just assume NO. */
532
533 r->last->endspan = 1;
534
535 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
536 ROFF_MDEBUG(r, "opening explicit scope");
537 r->last->endspan = -1;
538 pos += 2;
539 } else
540 ROFF_MDEBUG(r, "opening implicit scope");
541
542 /*
543 * If there are no arguments on the line, the next-line scope is
544 * assumed.
545 */
546
547 if ('\0' == (*bufp)[pos])
548 return(ROFF_IGN);
549
550 /* Otherwise re-run the roff parser after recalculating. */
551
552 *offs = pos;
553 return(ROFF_RERUN);
554 }