]> git.cameronkatri.com Git - mandoc.git/blob - xml.c
Transition to splitting xml/ml.
[mandoc.git] / xml.c
1 /* $Id: xml.c,v 1.9 2008/12/02 18:26:57 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <sys/param.h>
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <err.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "libmdocml.h"
29 #include "private.h"
30
31 #define COLUMNS 72
32
33 enum md_ns {
34 MD_NS_BLOCK,
35 MD_NS_INLINE,
36 MD_NS_DEFAULT
37 };
38
39 enum md_tok {
40 MD_BLKIN, /* Controls spacing. */
41 MD_BLKOUT,
42 MD_IN,
43 MD_OUT,
44 MD_TEXT
45 };
46
47 struct md_xml {
48 const struct md_args *args;
49 const struct md_rbuf *rbuf;
50
51 struct md_mbuf *mbuf;
52 struct rofftree *tree;
53 size_t indent;
54 size_t pos;
55 enum md_tok last;
56 int flags;
57 #define MD_LITERAL (1 << 0) /* TODO */
58 #define MD_OVERRIDE_ONE (1 << 1)
59 #define MD_OVERRIDE_ALL (1 << 2)
60 };
61
62 static void roffmsg(void *arg, enum roffmsg,
63 const char *, const char *, char *);
64 static int roffhead(void *);
65 static int rofftail(void *);
66 static int roffin(void *, int, int *, char **);
67 static int roffdata(void *, int, char *);
68 static int roffout(void *, int);
69 static int roffblkin(void *, int, int *, char **);
70 static int roffblkout(void *, int);
71 static int roffspecial(void *, int, int *, char **, char **);
72
73 static void mbuf_mode(struct md_xml *, enum md_ns);
74 static int mbuf_newline(struct md_xml *);
75 static int xml_indent(struct md_xml *);
76 static int mbuf_data(struct md_xml *, int, char *);
77 static int xml_nputstring(struct md_xml *,
78 const char *, size_t);
79 static int xml_puts(struct md_xml *, const char *);
80 static int xml_nputs(struct md_xml *,
81 const char *, size_t);
82 static int xml_begintag(struct md_xml *, const char *,
83 enum md_ns, int *, char **);
84 static int xml_endtag(struct md_xml *,
85 const char *, enum md_ns);
86
87 #ifdef __linux__ /* FIXME: remove */
88 static size_t strlcat(char *, const char *, size_t);
89 static size_t strlcpy(char *, const char *, size_t);
90 #endif
91
92
93 static void
94 mbuf_mode(struct md_xml *p, enum md_ns ns)
95 {
96 p->flags &= ~MD_OVERRIDE_ONE;
97 p->last = ns;
98 }
99
100
101 static int
102 xml_begintag(struct md_xml *p, const char *name, enum md_ns ns,
103 int *argc, char **argv)
104 {
105 char buf[64];
106 ssize_t sz;
107 size_t res;
108
109 switch (ns) {
110 case (MD_NS_BLOCK):
111 res = strlcpy(buf, "block:", sizeof(buf));
112 assert(res < sizeof(buf));
113 break;
114 case (MD_NS_INLINE):
115 res = strlcpy(buf, "inline:", sizeof(buf));
116 assert(res < sizeof(buf));
117 break;
118 default:
119 *buf = 0;
120 break;
121 }
122
123 res = strlcat(buf, name, sizeof(buf));
124 assert(res < sizeof(buf));
125
126 if (-1 == (sz = ml_begintag(p->mbuf, buf, argc, argv)))
127 return(0);
128
129 p->pos += sz;
130 return(1);
131 }
132
133
134 static int
135 xml_endtag(struct md_xml *p, const char *name, enum md_ns ns)
136 {
137 char buf[64];
138 ssize_t sz;
139 size_t res;
140
141 switch (ns) {
142 case (MD_NS_BLOCK):
143 res = strlcpy(buf, "block:", sizeof(buf));
144 assert(res < sizeof(buf));
145 break;
146 case (MD_NS_INLINE):
147 res = strlcpy(buf, "inline:", sizeof(buf));
148 assert(res < sizeof(buf));
149 break;
150 default:
151 *buf = 0;
152 break;
153 }
154
155 res = strlcat(buf, name, sizeof(buf));
156 assert(res < sizeof(buf));
157
158 if (-1 == (sz = ml_endtag(p->mbuf, buf)))
159 return(0);
160
161 p->pos += sz;
162 return(1);
163 }
164
165
166 static int
167 xml_nputstring(struct md_xml *p, const char *buf, size_t sz)
168 {
169 ssize_t res;
170
171 if (-1 == (res = ml_nputstring(p->mbuf, buf, sz)))
172 return(0);
173 p->pos += res;
174 return(1);
175 }
176
177
178 static int
179 xml_nputs(struct md_xml *p, const char *buf, size_t sz)
180 {
181 ssize_t res;
182
183 if (-1 == (res = ml_nputs(p->mbuf, buf, sz)))
184 return(0);
185 p->pos += res;
186 return(1);
187 }
188
189
190 static int
191 xml_puts(struct md_xml *p, const char *buf)
192 {
193
194 return(xml_nputs(p, buf, strlen(buf)));
195 }
196
197
198 static int
199 xml_indent(struct md_xml *p)
200 {
201 ssize_t res;
202
203 if (-1 == (res = ml_indent(p->mbuf, p->indent)))
204 return(0);
205 p->pos += res;
206 return(1);
207 }
208
209
210 static int
211 mbuf_newline(struct md_xml *p)
212 {
213
214 if ( ! md_buf_putchar(p->mbuf, '\n'))
215 return(0);
216
217 p->pos = 0;
218 return(1);
219 }
220
221
222 static int
223 mbuf_data(struct md_xml *p, int space, char *buf)
224 {
225 size_t sz;
226 char *bufp;
227
228 assert(p->mbuf);
229 assert(0 != p->indent);
230
231 if (MD_OVERRIDE_ONE & p->flags || MD_OVERRIDE_ALL & p->flags)
232 space = 0;
233
234 if (MD_LITERAL & p->flags)
235 return(xml_nputstring(p, buf, sizeof(buf)));
236
237 while (*buf) {
238 while (*buf && isspace(*buf))
239 buf++;
240
241 if (0 == *buf)
242 break;
243
244 bufp = buf;
245 while (*buf && ! isspace(*buf))
246 buf++;
247
248 if (0 != *buf)
249 *buf++ = 0;
250
251 sz = strlen(bufp);
252
253 if (0 == p->pos) {
254 if ( ! xml_indent(p))
255 return(0);
256 if ( ! xml_nputstring(p, bufp, sz))
257 return(0);
258 if (p->indent * MAXINDENT + sz >= COLUMNS)
259 if ( ! mbuf_newline(p))
260 return(0);
261 if ( ! (MD_OVERRIDE_ALL & p->flags))
262 space = 1;
263 continue;
264 }
265
266 if (space && sz + p->pos >= COLUMNS) {
267 if ( ! mbuf_newline(p))
268 return(0);
269 if ( ! xml_indent(p))
270 return(0);
271 } else if (space) {
272 if ( ! xml_nputs(p, " ", 1))
273 return(0);
274 }
275
276 if ( ! xml_nputstring(p, bufp, sz))
277 return(0);
278
279 if ( ! (MD_OVERRIDE_ALL & p->flags))
280 space = 1;
281 }
282
283 return(1);
284 }
285
286
287 int
288 md_line_xml(void *arg, char *buf)
289 {
290 struct md_xml *p;
291
292 p = (struct md_xml *)arg;
293 return(roff_engine(p->tree, buf));
294 }
295
296
297 int
298 md_exit_xml(void *data, int flush)
299 {
300 int c;
301 struct md_xml *p;
302
303 p = (struct md_xml *)data;
304 c = roff_free(p->tree, flush);
305 free(p);
306
307 return(c);
308 }
309
310
311 void *
312 md_init_xml(const struct md_args *args,
313 struct md_mbuf *mbuf, const struct md_rbuf *rbuf)
314 {
315 struct roffcb cb;
316 struct md_xml *p;
317
318 cb.roffhead = roffhead;
319 cb.rofftail = rofftail;
320 cb.roffin = roffin;
321 cb.roffout = roffout;
322 cb.roffblkin = roffblkin;
323 cb.roffblkout = roffblkout;
324 cb.roffspecial = roffspecial;
325 cb.roffmsg = roffmsg;
326 cb.roffdata = roffdata;
327
328 if (NULL == (p = calloc(1, sizeof(struct md_xml))))
329 err(1, "malloc");
330
331 p->args = args;
332 p->mbuf = mbuf;
333 p->rbuf = rbuf;
334
335 assert(mbuf);
336
337 if (NULL == (p->tree = roff_alloc(&cb, p))) {
338 free(p);
339 return(NULL);
340 }
341
342 return(p);
343 }
344
345
346 /* ARGSUSED */
347 static int
348 roffhead(void *arg)
349 {
350 struct md_xml *p;
351
352 assert(arg);
353 p = (struct md_xml *)arg;
354
355 if (-1 == xml_puts(p, "<?xml version=\"1.0\" "
356 "encoding=\"UTF-8\"?>\n"))
357 return(0);
358 if (-1 == xml_puts(p, "<mdoc xmlns:block=\"block\" "
359 "xmlns:special=\"special\" "
360 "xmlns:inline=\"inline\">"))
361 return(0);
362
363 p->indent++;
364 mbuf_mode(p, MD_BLKIN);
365 return(mbuf_newline(p));
366 }
367
368
369 static int
370 rofftail(void *arg)
371 {
372 struct md_xml *p;
373
374 assert(arg);
375 p = (struct md_xml *)arg;
376
377 if (0 != p->pos && ! mbuf_newline(p))
378 return(0);
379
380 mbuf_mode(p, MD_BLKOUT);
381 if ( ! xml_endtag(p, "mdoc", MD_NS_DEFAULT))
382 return(0);
383 return(mbuf_newline(p));
384 }
385
386
387 /* ARGSUSED */
388 static int
389 roffspecial(void *arg, int tok, int *argc, char **argv, char **more)
390 {
391 struct md_xml *p;
392
393 assert(arg);
394 p = (struct md_xml *)arg;
395
396 /* FIXME: this is completely ad hoc. */
397
398 switch (tok) {
399 case (ROFF_Ns):
400 p->flags |= MD_OVERRIDE_ONE;
401 break;
402 case (ROFF_Sm):
403 assert(*more);
404 if (0 == strcmp(*more, "on"))
405 p->flags |= MD_OVERRIDE_ALL;
406 else
407 p->flags &= ~MD_OVERRIDE_ALL;
408 break;
409 default:
410 break;
411 }
412
413 return(1);
414 }
415
416
417 static int
418 roffblkin(void *arg, int tok, int *argc, char **argv)
419 {
420 struct md_xml *p;
421
422 assert(arg);
423 p = (struct md_xml *)arg;
424
425 if (0 != p->pos) {
426 if ( ! mbuf_newline(p))
427 return(0);
428 if ( ! xml_indent(p))
429 return(0);
430 } else if ( ! xml_indent(p))
431 return(0);
432
433 /* FIXME: xml won't like standards args (e.g., p1003.1-90). */
434
435 p->indent++;
436 mbuf_mode(p, MD_BLKIN);
437
438 if ( ! xml_begintag(p, toknames[tok], MD_NS_BLOCK,
439 argc, argv))
440 return(0);
441 return(mbuf_newline(p));
442 }
443
444
445 static int
446 roffblkout(void *arg, int tok)
447 {
448 struct md_xml *p;
449
450 assert(arg);
451 p = (struct md_xml *)arg;
452
453 p->indent--;
454
455 if (0 != p->pos) {
456 if ( ! mbuf_newline(p))
457 return(0);
458 if ( ! xml_indent(p))
459 return(0);
460 } else if ( ! xml_indent(p))
461 return(0);
462
463 mbuf_mode(p, MD_BLKOUT);
464 if ( ! xml_endtag(p, toknames[tok], MD_NS_BLOCK))
465 return(0);
466 return(mbuf_newline(p));
467 }
468
469
470 static int
471 roffin(void *arg, int tok, int *argc, char **argv)
472 {
473 struct md_xml *p;
474
475 assert(arg);
476 p = (struct md_xml *)arg;
477
478 if ( ! (MD_OVERRIDE_ONE & p->flags) &&
479 ! (MD_OVERRIDE_ALL & p->flags) &&
480 p->pos + 11 > COLUMNS)
481 if ( ! mbuf_newline(p))
482 return(0);
483
484 if (0 != p->pos && (MD_TEXT == p->last || MD_OUT == p->last)
485 && ! (MD_OVERRIDE_ONE & p->flags)
486 && ! (MD_OVERRIDE_ALL & p->flags))
487 if ( ! xml_nputs(p, " ", 1))
488 return(0);
489
490 if (0 == p->pos && ! xml_indent(p))
491 return(0);
492
493 mbuf_mode(p, MD_IN);
494 return(xml_begintag(p, toknames[tok],
495 MD_NS_INLINE, argc, argv));
496 }
497
498
499 static int
500 roffout(void *arg, int tok)
501 {
502 struct md_xml *p;
503
504 assert(arg);
505 p = (struct md_xml *)arg;
506
507 if (0 == p->pos && ! xml_indent(p))
508 return(0);
509
510 mbuf_mode(p, MD_OUT);
511 return(xml_endtag(p, toknames[tok], MD_NS_INLINE));
512 }
513
514
515 static void
516 roffmsg(void *arg, enum roffmsg lvl,
517 const char *buf, const char *pos, char *msg)
518 {
519 char *level;
520 struct md_xml *p;
521
522 assert(arg);
523 p = (struct md_xml *)arg;
524
525 switch (lvl) {
526 case (ROFF_WARN):
527 if ( ! (MD_WARN_ALL & p->args->warnings))
528 return;
529 level = "warning";
530 break;
531 case (ROFF_ERROR):
532 level = "error";
533 break;
534 default:
535 abort();
536 }
537
538 if (pos)
539 (void)fprintf(stderr, "%s:%zu: %s: %s (column %zu)\n",
540 p->rbuf->name, p->rbuf->line, level,
541 msg, pos - buf);
542 else
543 (void)fprintf(stderr, "%s: %s: %s\n",
544 p->rbuf->name, level, msg);
545
546 }
547
548
549 static int
550 roffdata(void *arg, int space, char *buf)
551 {
552 struct md_xml *p;
553
554 assert(arg);
555 p = (struct md_xml *)arg;
556 if ( ! mbuf_data(p, space, buf))
557 return(0);
558
559 mbuf_mode(p, MD_TEXT);
560 return(1);
561 }
562
563
564 #ifdef __linux /* FIXME: remove. */
565 /* $OpenBSD: strlcat.c,v 1.13 2005/08/08 08:05:37 espie Exp $ */
566
567 /*
568 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
569 *
570 * Permission to use, copy, modify, and distribute this software for any
571 * purpose with or without fee is hereby granted, provided that the
572 * above copyright notice and this permission notice appear in all
573 * copies.
574 *
575 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
576 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
577 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
578 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
579 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
580 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
581 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
582 * PERFORMANCE OF THIS SOFTWARE.
583 */
584 static size_t
585 strlcat(char *dst, const char *src, size_t siz)
586 {
587 char *d = dst;
588 const char *s = src;
589 size_t n = siz;
590 size_t dlen;
591
592 /* Find the end of dst and adjust bytes left but don't go past
593 * end */
594 while (n-- != 0 && *d != '\0')
595 d++;
596 dlen = d - dst;
597 n = siz - dlen;
598
599 if (n == 0)
600 return(dlen + strlen(s));
601 while (*s != '\0') {
602 if (n != 1) {
603 *d++ = *s;
604 n--;
605 }
606 s++;
607 }
608 *d = '\0';
609
610 return(dlen + (s - src)); /* count does not include NUL */
611 }
612
613
614 static size_t
615 strlcpy(char *dst, const char *src, size_t siz)
616 {
617 char *d = dst;
618 const char *s = src;
619 size_t n = siz;
620
621 /* Copy as many bytes as will fit */
622 if (n != 0) {
623 while (--n != 0) {
624 if ((*d++ = *s++) == '\0')
625 break;
626 }
627 }
628
629 /* Not enough room in dst, add NUL and traverse rest of src */
630 if (n == 0) {
631 if (siz != 0)
632 *d = '\0'; /* NUL-terminate dst */
633 while (*s++)
634 ;
635 }
636
637 return(s - src - 1); /* count does not include NUL */
638 }
639 #endif /*__linux__*/