]> git.cameronkatri.com Git - mandoc.git/blob - xml.c
Considerably improved roff_text parser.
[mandoc.git] / xml.c
1 /* $Id: xml.c,v 1.4 2008/12/01 15:32:36 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <sys/param.h>
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <err.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "libmdocml.h"
29 #include "private.h"
30
31 #define INDENT 4
32 #define COLUMNS 60
33
34 #ifdef __linux__ /* FIXME */
35 #define strlcat strncat
36 #endif
37
38 enum md_tok {
39 MD_BLKIN,
40 MD_BLKOUT,
41 MD_IN,
42 MD_OUT,
43 MD_TEXT,
44 MD_OVERRIDE
45 };
46
47 struct md_xml {
48 const struct md_args *args;
49 const struct md_rbuf *rbuf;
50
51 struct md_mbuf *mbuf;
52 struct rofftree *tree;
53 size_t indent;
54 size_t pos;
55 enum md_tok last;
56 int flags;
57 #define MD_LITERAL (1 << 0) /* FIXME */
58 };
59
60 static void roffmsg(void *arg, enum roffmsg,
61 const char *, const char *, char *);
62 static int roffhead(void *);
63 static int rofftail(void *);
64 static int roffin(void *, int, int *, char **);
65 static int roffdata(void *, int, char *);
66 static int roffout(void *, int);
67 static int roffblkin(void *, int, int *, char **);
68 static int roffblkout(void *, int);
69 static int roffspecial(void *, int);
70
71 static int mbuf_newline(struct md_xml *);
72 static int mbuf_indent(struct md_xml *);
73 static int mbuf_data(struct md_xml *, int, char *);
74 static int mbuf_putstring(struct md_xml *,
75 const char *);
76 static int mbuf_nputstring(struct md_xml *,
77 const char *, size_t);
78 static int mbuf_puts(struct md_xml *, const char *);
79 static int mbuf_nputs(struct md_xml *,
80 const char *, size_t);
81
82
83 static int
84 mbuf_putstring(struct md_xml *p, const char *buf)
85 {
86
87 return(mbuf_nputstring(p, buf, strlen(buf)));
88 }
89
90
91 static int
92 mbuf_nputstring(struct md_xml *p, const char *buf, size_t sz)
93 {
94 size_t i;
95
96 for (i = 0; i < sz; i++) {
97 switch (buf[i]) {
98 case ('&'):
99 if ( ! md_buf_puts(p->mbuf, "&amp;", 5))
100 return(0);
101 p->pos += 5;
102 break;
103 case ('"'):
104 if ( ! md_buf_puts(p->mbuf, "&quot;", 6))
105 return(0);
106 p->pos += 6;
107 break;
108 default:
109 if ( ! md_buf_putchar(p->mbuf, buf[i]))
110 return(0);
111 p->pos++;
112 break;
113 }
114 }
115 return(1);
116 }
117
118
119 static int
120 mbuf_nputs(struct md_xml *p, const char *buf, size_t sz)
121 {
122
123 p->pos += sz;
124 return(md_buf_puts(p->mbuf, buf, sz));
125 }
126
127
128 static int
129 mbuf_puts(struct md_xml *p, const char *buf)
130 {
131
132 return(mbuf_nputs(p, buf, strlen(buf)));
133 }
134
135
136 static int
137 mbuf_indent(struct md_xml *p)
138 {
139 size_t i;
140
141 assert(p->pos == 0);
142
143 /* LINTED */
144 for (i = 0; i < MIN(p->indent, INDENT); i++)
145 if ( ! md_buf_putstring(p->mbuf, " "))
146 return(0);
147
148 p->pos += i * INDENT;
149 return(1);
150 }
151
152
153 static int
154 mbuf_newline(struct md_xml *p)
155 {
156
157 if ( ! md_buf_putchar(p->mbuf, '\n'))
158 return(0);
159
160 p->pos = 0;
161 return(1);
162 }
163
164
165 static int
166 mbuf_data(struct md_xml *p, int space, char *buf)
167 {
168 size_t sz;
169 char *bufp;
170
171 assert(p->mbuf);
172 assert(0 != p->indent);
173
174 if (MD_LITERAL & p->flags)
175 return(mbuf_putstring(p, buf));
176
177 while (*buf) {
178 while (*buf && isspace(*buf))
179 buf++;
180
181 if (0 == *buf)
182 break;
183
184 bufp = buf;
185 while (*buf && ! isspace(*buf))
186 buf++;
187
188 if (0 != *buf)
189 *buf++ = 0;
190
191 sz = strlen(bufp);
192
193 if (0 == p->pos) {
194 if ( ! mbuf_indent(p))
195 return(0);
196 if ( ! mbuf_nputstring(p, bufp, sz))
197 return(0);
198 if (p->indent * INDENT + sz >= COLUMNS) {
199 if ( ! mbuf_newline(p))
200 return(0);
201 continue;
202 }
203 continue;
204 }
205
206 if (space && sz + p->pos >= COLUMNS) {
207 if ( ! mbuf_newline(p))
208 return(0);
209 if ( ! mbuf_indent(p))
210 return(0);
211 } else if (space) {
212 if ( ! mbuf_nputs(p, " ", 1))
213 return(0);
214 }
215
216 if ( ! mbuf_nputstring(p, bufp, sz))
217 return(0);
218
219 if ( ! space && p->pos >= COLUMNS)
220 if ( ! mbuf_newline(p))
221 return(0);
222 }
223
224 return(1);
225 }
226
227
228 int
229 md_line_xml(void *arg, char *buf)
230 {
231 struct md_xml *p;
232
233 p = (struct md_xml *)arg;
234 return(roff_engine(p->tree, buf));
235 }
236
237
238 int
239 md_exit_xml(void *data, int flush)
240 {
241 int c;
242 struct md_xml *p;
243
244 p = (struct md_xml *)data;
245 c = roff_free(p->tree, flush);
246 free(p);
247
248 return(c);
249 }
250
251
252 void *
253 md_init_xml(const struct md_args *args,
254 struct md_mbuf *mbuf, const struct md_rbuf *rbuf)
255 {
256 struct roffcb cb;
257 struct md_xml *p;
258
259 cb.roffhead = roffhead;
260 cb.rofftail = rofftail;
261 cb.roffin = roffin;
262 cb.roffout = roffout;
263 cb.roffblkin = roffblkin;
264 cb.roffblkout = roffblkout;
265 cb.roffspecial = roffspecial;
266 cb.roffmsg = roffmsg;
267 cb.roffdata = roffdata;
268
269 if (NULL == (p = calloc(1, sizeof(struct md_xml))))
270 err(1, "malloc");
271
272 p->args = args;
273 p->mbuf = mbuf;
274 p->rbuf = rbuf;
275
276 assert(mbuf);
277
278 if (NULL == (p->tree = roff_alloc(&cb, p))) {
279 free(p);
280 return(NULL);
281 }
282
283 return(p);
284 }
285
286
287 /* ARGSUSED */
288 static int
289 roffhead(void *arg)
290 {
291 struct md_xml *p;
292
293 assert(arg);
294 p = (struct md_xml *)arg;
295
296 if ( ! mbuf_puts(p, "<?xml version=\"1.0\" "
297 "encoding=\"UTF-8\"?>\n"))
298 return(0);
299 if ( ! mbuf_puts(p, "<mdoc xmlns:block=\"block\" "
300 "xmlns:special=\"special\" "
301 "xmlns:inline=\"inline\">"))
302 return(0);
303
304 p->indent++;
305 p->last = MD_BLKIN;
306 return(mbuf_newline(p));
307 }
308
309
310 static int
311 rofftail(void *arg)
312 {
313 struct md_xml *p;
314
315 assert(arg);
316 p = (struct md_xml *)arg;
317
318 if (0 != p->pos && ! mbuf_newline(p))
319 return(0);
320
321 if ( ! mbuf_puts(p, "</mdoc>"))
322 return(0);
323
324 p->last = MD_BLKOUT;
325 return(mbuf_newline(p));
326 }
327
328
329 /* ARGSUSED */
330 static int
331 roffspecial(void *arg, int tok)
332 {
333 struct md_xml *p;
334
335 assert(arg);
336 p = (struct md_xml *)arg;
337
338 switch (tok) {
339 case (ROFF_Ns):
340 p->last = MD_OVERRIDE;
341 break;
342 default:
343 break;
344 }
345
346 return(1);
347 }
348
349
350 static int
351 roffblkin(void *arg, int tok, int *argc, char **argv)
352 {
353 struct md_xml *p;
354 int i;
355
356 assert(arg);
357 p = (struct md_xml *)arg;
358
359 if (0 != p->pos) {
360 if ( ! mbuf_newline(p))
361 return(0);
362 if ( ! mbuf_indent(p))
363 return(0);
364 } else if ( ! mbuf_indent(p))
365 return(0);
366
367 if ( ! mbuf_nputs(p, "<", 1))
368 return(0);
369 if ( ! mbuf_nputs(p, "block:", 6))
370 return(0);
371 if ( ! mbuf_puts(p, toknames[tok]))
372 return(0);
373
374 /* FIXME: xml won't like standards args (e.g., p1003.1-90). */
375
376 for (i = 0; ROFF_ARGMAX != argc[i]; i++) {
377 if ( ! mbuf_nputs(p, " ", 1))
378 return(0);
379 if ( ! mbuf_puts(p, tokargnames[argc[i]]))
380 return(0);
381 if ( ! mbuf_nputs(p, "=\"", 2))
382 return(0);
383 if ( ! mbuf_putstring(p, argv[i] ? argv[i] : "true"))
384 return(0);
385 if ( ! mbuf_nputs(p, "\"", 1))
386 return(0);
387 }
388
389 if ( ! mbuf_nputs(p, ">", 1))
390 return(0);
391
392 p->last = MD_BLKIN;
393 p->indent++;
394 return(mbuf_newline(p));
395 }
396
397
398 static int
399 roffblkout(void *arg, int tok)
400 {
401 struct md_xml *p;
402
403 assert(arg);
404 p = (struct md_xml *)arg;
405
406 p->indent--;
407
408 if (0 != p->pos) {
409 if ( ! mbuf_newline(p))
410 return(0);
411 if ( ! mbuf_indent(p))
412 return(0);
413 } else if ( ! mbuf_indent(p))
414 return(0);
415
416 if ( ! mbuf_nputs(p, "</", 2))
417 return(0);
418 if ( ! mbuf_nputs(p, "block:", 6))
419 return(0);
420 if ( ! mbuf_puts(p, toknames[tok]))
421 return(0);
422 if ( ! mbuf_nputs(p, ">", 1))
423 return(0);
424
425 p->last = MD_BLKOUT;
426 return(mbuf_newline(p));
427 }
428
429
430 static int
431 roffin(void *arg, int tok, int *argc, char **argv)
432 {
433 struct md_xml *p;
434 int i;
435
436 assert(arg);
437 p = (struct md_xml *)arg;
438
439 /*
440 * FIXME: put all of this in a buffer, then check the buffer
441 * length versus the column width for nicer output. This is a
442 * bit hacky.
443 */
444
445 if (p->pos + 11 > COLUMNS)
446 if ( ! mbuf_newline(p))
447 return(0);
448
449 if (0 != p->pos) {
450 switch (p->last) {
451 case (MD_TEXT):
452 /* FALLTHROUGH */
453 case (MD_OUT):
454 if ( ! mbuf_nputs(p, " ", 1))
455 return(0);
456 break;
457 default:
458 break;
459 }
460 } else if ( ! mbuf_indent(p))
461 return(0);
462
463 p->last = MD_IN;
464
465 if ( ! mbuf_nputs(p, "<", 1))
466 return(0);
467 if ( ! mbuf_nputs(p, "inline:", 7))
468 return(0);
469 if ( ! mbuf_puts(p, toknames[tok]))
470 return(0);
471
472 for (i = 0; ROFF_ARGMAX != argc[i]; i++) {
473 if ( ! mbuf_nputs(p, " ", 1))
474 return(0);
475 if ( ! mbuf_puts(p, tokargnames[argc[i]]))
476 return(0);
477 if ( ! mbuf_nputs(p, "=\"", 2))
478 return(0);
479 if ( ! mbuf_putstring(p, argv[i] ? argv[i] : "true"))
480 return(0);
481 if ( ! mbuf_nputs(p, "\"", 1))
482 return(0);
483 }
484 return(mbuf_nputs(p, ">", 1));
485 }
486
487
488 static int
489 roffout(void *arg, int tok)
490 {
491 struct md_xml *p;
492
493 assert(arg);
494 p = (struct md_xml *)arg;
495
496 /* Continue with a regular out token. */
497
498 if (0 == p->pos && ! mbuf_indent(p))
499 return(0);
500
501 p->last = MD_OUT;
502
503 if ( ! mbuf_nputs(p, "</", 2))
504 return(0);
505 if ( ! mbuf_nputs(p, "inline:", 7))
506 return(0);
507 if ( ! mbuf_puts(p, toknames[tok]))
508 return(0);
509 return(mbuf_nputs(p, ">", 1));
510 }
511
512
513 static void
514 roffmsg(void *arg, enum roffmsg lvl,
515 const char *buf, const char *pos, char *msg)
516 {
517 char *level;
518 struct md_xml *p;
519
520 assert(arg);
521 p = (struct md_xml *)arg;
522
523 switch (lvl) {
524 case (ROFF_WARN):
525 if ( ! (MD_WARN_ALL & p->args->warnings))
526 return;
527 level = "warning";
528 break;
529 case (ROFF_ERROR):
530 level = "error";
531 break;
532 default:
533 abort();
534 }
535
536 if (pos)
537 (void)fprintf(stderr, "%s:%zu: %s: %s (column %zu)\n",
538 p->rbuf->name, p->rbuf->line, level,
539 msg, pos - buf);
540 else
541 (void)fprintf(stderr, "%s: %s: %s\n",
542 p->rbuf->name, level, msg);
543
544 }
545
546
547 static int
548 roffdata(void *arg, int space, char *buf)
549 {
550 struct md_xml *p;
551
552 assert(arg);
553 p = (struct md_xml *)arg;
554 if ( ! mbuf_data(p, space, buf))
555 return(0);
556
557 p->last = MD_TEXT;
558 return(1);
559 }
560