]> git.cameronkatri.com Git - mandoc.git/blob - libmdocml.c
Initial foray into roff-parsing.
[mandoc.git] / libmdocml.c
1 /* $Id: libmdocml.c,v 1.4 2008/11/23 11:05:25 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <fcntl.h>
21 #include <err.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26
27 #include "libmdocml.h"
28
29 #define BUFFER_LINE BUFSIZ
30
31 struct md_rbuf {
32 int fd;
33 char *name;
34 char *buf;
35 size_t bufsz;
36 size_t line;
37 };
38
39 struct md_mbuf {
40 int fd;
41 char *name;
42 char *buf;
43 size_t bufsz;
44 size_t pos;
45 };
46
47 typedef int (*md_line) (const struct md_args *, struct md_mbuf *,
48 const struct md_rbuf *,
49 const char *, size_t);
50 typedef int (*md_init) (const struct md_args *, struct md_mbuf *);
51 typedef int (*md_exit) (const struct md_args *, struct md_mbuf *);
52
53 static int md_line_dummy(const struct md_args *,
54 struct md_mbuf *,
55 const struct md_rbuf *,
56 const char *, size_t);
57
58 static int md_line_html4_strict(const struct md_args *,
59 struct md_mbuf *,
60 const struct md_rbuf *,
61 const char *, size_t);
62 static int md_init_html4_strict(const struct md_args *,
63 struct md_mbuf *);
64 static int md_exit_html4_strict(const struct md_args *,
65 struct md_mbuf *);
66
67 static int md_run_enter(const struct md_args *,
68 struct md_mbuf *, struct md_rbuf *);
69 static int md_run_leave(const struct md_args *,
70 struct md_mbuf *,
71 struct md_rbuf *, int);
72
73 static ssize_t md_buf_fill(struct md_rbuf *);
74 static int md_buf_flush(struct md_mbuf *);
75 static int md_buf_putchar(struct md_mbuf *, char);
76 static int md_buf_putstring(struct md_mbuf *,
77 const char *);
78 static int md_buf_puts(struct md_mbuf *,
79 const char *, size_t);
80
81
82 static ssize_t
83 md_buf_fill(struct md_rbuf *in)
84 {
85 ssize_t ssz;
86
87 assert(in);
88 assert(in->buf);
89 assert(in->bufsz > 0);
90 assert(in->name);
91
92 if (-1 == (ssz = read(in->fd, in->buf, in->bufsz)))
93 warn("%s", in->name);
94
95 return(ssz);
96 }
97
98
99 static int
100 md_buf_flush(struct md_mbuf *buf)
101 {
102 ssize_t sz;
103
104 assert(buf);
105 assert(buf->buf);
106 assert(buf->name);
107
108 if (0 == buf->pos)
109 return(1);
110
111 sz = write(buf->fd, buf->buf, buf->pos);
112
113 if (-1 == sz) {
114 warn("%s", buf->name);
115 return(0);
116 } else if ((size_t)sz != buf->pos) {
117 warnx("%s: short write", buf->name);
118 return(0);
119 }
120
121 buf->pos = 0;
122 return(1);
123 }
124
125
126 static int
127 md_buf_putchar(struct md_mbuf *buf, char c)
128 {
129 return(md_buf_puts(buf, &c, 1));
130 }
131
132
133 static int
134 md_buf_putstring(struct md_mbuf *buf, const char *p)
135 {
136 return(md_buf_puts(buf, p, strlen(p)));
137 }
138
139
140 static int
141 md_buf_puts(struct md_mbuf *buf, const char *p, size_t sz)
142 {
143 size_t ssz;
144
145 assert(p);
146 assert(buf);
147 assert(buf->buf);
148
149 /* LINTED */
150 while (buf->pos + sz > buf->bufsz) {
151 ssz = buf->bufsz - buf->pos;
152 (void)memcpy(/* LINTED */
153 buf->buf + buf->pos, p, ssz);
154 p += (long)ssz;
155 sz -= ssz;
156 buf->pos += ssz;
157
158 if ( ! md_buf_flush(buf))
159 return(0);
160 }
161
162 (void)memcpy(/* LINTED */
163 buf->buf + buf->pos, p, sz);
164 buf->pos += sz;
165 return(1);
166 }
167
168
169 static int
170 md_run_leave(const struct md_args *args,
171 struct md_mbuf *mbuf, struct md_rbuf *rbuf, int c)
172 {
173 assert(args);
174 assert(mbuf);
175 assert(rbuf);
176
177 /* Run exiters. */
178 switch (args->type) {
179 case (MD_HTML4_STRICT):
180 if ( ! md_exit_html4_strict(args, mbuf))
181 return(-1);
182 break;
183 case (MD_DUMMY):
184 break;
185 default:
186 abort();
187 }
188
189 /* Make final flush of buffer. */
190 if ( ! md_buf_flush(mbuf))
191 return(-1);
192
193 return(c);
194 }
195
196
197 static int
198 md_run_enter(const struct md_args *args,
199 struct md_mbuf *mbuf, struct md_rbuf *rbuf)
200 {
201 ssize_t sz, i;
202 char line[BUFFER_LINE];
203 size_t pos;
204 md_line fp;
205
206 assert(args);
207 assert(mbuf);
208 assert(rbuf);
209
210 /* Function ptrs to line-parsers. */
211 switch (args->type) {
212 case (MD_HTML4_STRICT):
213 fp = md_line_html4_strict;
214 break;
215 case (MD_DUMMY):
216 fp = md_line_dummy;
217 break;
218 default:
219 abort();
220 }
221
222 /* LINTED */
223 for (pos = 0; ; ) {
224 if (-1 == (sz = md_buf_fill(rbuf)))
225 return(-1);
226 else if (0 == sz)
227 break;
228
229 for (i = 0; i < sz; i++) {
230 if ('\n' == rbuf->buf[i]) {
231 if ( ! (*fp)(args, mbuf, rbuf, line, pos))
232 return(-1);
233 rbuf->line++;
234 pos = 0;
235 continue;
236 }
237
238 if (pos < BUFFER_LINE) {
239 /* LINTED */
240 line[pos++] = rbuf->buf[i];
241 continue;
242 }
243
244 warnx("%s: line %zu too long",
245 rbuf->name, rbuf->line);
246 return(-1);
247 }
248 }
249
250 if (0 != pos && ! (*fp)(args, mbuf, rbuf, line, pos))
251 return(-1);
252
253 return(md_run_leave(args, mbuf, rbuf, 0));
254 }
255
256
257 int
258 md_run(const struct md_args *args,
259 const struct md_buf *out, const struct md_buf *in)
260 {
261 struct md_mbuf mbuf;
262 struct md_rbuf rbuf;
263
264 assert(args);
265 assert(in);
266 assert(out);
267
268 (void)memcpy(&mbuf, out, sizeof(struct md_buf));
269 (void)memcpy(&rbuf, in, sizeof(struct md_buf));
270
271 mbuf.pos = 0;
272 rbuf.line = 1;
273
274 /* Run initialisers. */
275 switch (args->type) {
276 case (MD_HTML4_STRICT):
277 if ( ! md_init_html4_strict(args, &mbuf))
278 return(-1);
279 break;
280 case (MD_DUMMY):
281 break;
282 default:
283 abort();
284 }
285
286 /* Go into mainline. */
287 return(md_run_enter(args, &mbuf, &rbuf));
288 }
289
290
291 static int
292 md_line_dummy(const struct md_args *args, struct md_mbuf *out,
293 const struct md_rbuf *in, const char *buf, size_t sz)
294 {
295
296 assert(buf);
297 assert(out);
298 assert(in);
299 assert(args);
300
301 if ( ! md_buf_puts(out, buf, sz))
302 return(0);
303 if ( ! md_buf_putchar(out, '\n'))
304 return(0);
305
306 return(1);
307 }
308
309
310 static int
311 md_exit_html4_strict(const struct md_args *args, struct md_mbuf *out)
312 {
313 char *tail;
314
315 assert(out);
316 assert(args);
317
318 tail = " </pre>\n"
319 " </body>\n"
320 "</html>\n";
321
322 if ( ! md_buf_putstring(out, tail))
323 return(0);
324
325 return(1);
326 }
327
328
329 static int
330 md_init_html4_strict(const struct md_args *args, struct md_mbuf *out)
331 {
332 char *head;
333
334 assert(out);
335 assert(args);
336
337 head = "<html>\n"
338 " <head>\n"
339 " <title>Manual Page</title>\n"
340 " </head>\n"
341 " <body>\n"
342 " <pre>\n";
343
344 if ( ! md_buf_putstring(out, head))
345 return(0);
346
347 return(1);
348 }
349
350
351 struct md_roff_macro {
352 char name[2];
353 int flags;
354 #define MD_PARSED (1 << 0)
355 #define MD_CALLABLE (1 << 1)
356 #define MD_TITLE (1 << 2)
357 };
358
359 struct md_roff_macro[] = {
360 { "Dd", MD_TITLE },
361 { "Dt", MD_TITLE },
362 { "Os", MD_TITLE },
363 { "Sh", MD_PARSED },
364 };
365
366
367 static int
368 md_roff(struct md_mbuf *out, const struct md_rbuf *in,
369 const char *buf, size_t sz)
370 {
371
372 assert(out);
373 assert(in);
374 assert(buf);
375 assert(sz >= 1);
376 }
377
378
379 static int
380 md_line_html4_strict(const struct md_args *args, struct md_mbuf *out,
381 const struct md_rbuf *in, const char *buf, size_t sz)
382 {
383
384 assert(args);
385 assert(in);
386
387 if (0 == sz) {
388 warnx("%s: blank line (line %zu)", in->name, in->line);
389 return(0);
390 }
391
392 if ('.' == *buf) {
393 return(1);
394 }
395
396 return(md_buf_puts(out, buf, sz));
397 }