]> git.cameronkatri.com Git - mandoc.git/blob - html4_strict.c
Segmentation into html and dummy parsers.
[mandoc.git] / html4_strict.c
1 /* $Id: html4_strict.c,v 1.1 2008/11/23 16:53:18 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <time.h>
26
27 #include "libmdocml.h"
28 #include "private.h"
29
30 enum roffd {
31 ROFF_ENTER = 0,
32 ROFF_EXIT
33 };
34
35 enum rofftype {
36 ROFF_NONE = 0,
37 ROFF_LAYOUT
38 };
39
40 struct rofftree;
41
42 #define ROFFCALL_ARGS const struct md_args *arg, \
43 struct md_mbuf *out, \
44 const struct md_rbuf *in, \
45 const char *buf, size_t sz, \
46 size_t pos, enum roffd type, \
47 struct rofftree *tree
48 typedef int (*roffcall)(ROFFCALL_ARGS);
49
50 static int roff_Dd(ROFFCALL_ARGS);
51 static int roff_Dt(ROFFCALL_ARGS);
52 static int roff_Os(ROFFCALL_ARGS);
53 static int roff_Sh(ROFFCALL_ARGS);
54
55 struct rofftok {
56 char id;
57 #define ROFF___ 0
58 #define ROFF_Dd 1
59 #define ROFF_Dt 2
60 #define ROFF_Os 3
61 #define ROFF_Sh 4
62 #define ROFF_Max 5
63 char name[2];
64 roffcall cb;
65 enum rofftype type;
66 int flags;
67 #define ROFF_NESTED (1 << 0)
68 };
69
70 static const struct rofftok tokens[ROFF_Max] = {
71 { ROFF___, "\\\"", NULL, ROFF_NONE, 0 },
72 { ROFF_Dd, "Dd", roff_Dd, ROFF_NONE, 0 },
73 { ROFF_Dt, "Dt", roff_Dt, ROFF_NONE, 0 },
74 { ROFF_Os, "Os", roff_Os, ROFF_LAYOUT, 0 },
75 { ROFF_Sh, "Sh", roff_Sh, ROFF_LAYOUT, 0 },
76 };
77
78 struct roffnode {
79 int tok;
80 struct roffnode *parent;
81 /* TODO: line number at acquisition. */
82 };
83
84 struct rofftree {
85 struct roffnode *last;
86
87 time_t date;
88 char title[256];
89 char section[256];
90 char volume[256];
91
92 int state;
93 #define ROFF_PRELUDE (1 << 0)
94 #define ROFF_PRELUDE_Os (1 << 1)
95 #define ROFF_PRELUDE_Dt (1 << 2)
96 #define ROFF_PRELUDE_Dd (1 << 3)
97 };
98
99 static int rofffind(const char *);
100 static int roffparse(const struct md_args *,
101 struct md_mbuf *,
102 const struct md_rbuf *,
103 const char *, size_t,
104 struct rofftree *);
105 static int textparse(struct md_mbuf *,
106 const struct md_rbuf *,
107 const char *, size_t,
108 const struct rofftree *);
109
110
111 int
112 md_exit_html4_strict(const struct md_args *args, struct md_mbuf *out,
113 const struct md_rbuf *in, void *data)
114 {
115 struct rofftree *tree;
116 int error;
117
118 assert(args);
119 assert(data);
120 tree = (struct rofftree *)data;
121 error = 0;
122
123 while (tree->last)
124 if ( ! (*tokens[tree->last->tok].cb)
125 (args, error ? NULL : out, in, NULL,
126 0, 0, ROFF_EXIT, tree))
127 error = 1;
128
129 free(tree);
130 return(error ? 0 : 1);
131 }
132
133
134 int
135 md_init_html4_strict(const struct md_args *args, struct md_mbuf *out,
136 const struct md_rbuf *in, void **data)
137 {
138 struct rofftree *tree;
139
140 assert(args);
141 assert(in);
142 assert(out);
143 assert(data);
144
145 /* TODO: write HTML-DTD header. */
146
147 if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
148 warn("malloc");
149 return(0);
150 }
151
152 *data = tree;
153 return(1);
154 }
155
156
157 int
158 md_line_html4_strict(const struct md_args *args, struct md_mbuf *out,
159 const struct md_rbuf *in, const char *buf,
160 size_t sz, void *data)
161 {
162 struct rofftree *tree;
163
164 assert(args);
165 assert(in);
166 assert(data);
167
168 tree = (struct rofftree *)data;
169
170 if (0 == sz) {
171 warnx("%s: blank line (line %zu)", in->name, in->line);
172 return(0);
173 } else if ('.' != *buf)
174 return(textparse(out, in, buf, sz, tree));
175
176 return(roffparse(args, out, in, buf, sz, tree));
177 }
178
179
180 static int
181 textparse(struct md_mbuf *out, const struct md_rbuf *in,
182 const char *buf, size_t sz,
183 const struct rofftree *tree)
184 {
185
186 assert(tree);
187 assert(out);
188 assert(in);
189 assert(buf);
190 assert(sz > 0);
191
192 if (NULL == tree->last) {
193 warnx("%s: unexpected text (line %zu)",
194 in->name, in->line);
195 return(0);
196 } else if (NULL == tree->last->parent) {
197 warnx("%s: disallowed text (line %zu)",
198 in->name, in->line);
199 return(0);
200 }
201
202 if ( ! md_buf_puts(out, buf, sz))
203 return(0);
204 return(md_buf_putstring(out, " "));
205 }
206
207
208 static int
209 roffparse(const struct md_args *args, struct md_mbuf *out,
210 const struct md_rbuf *in, const char *buf,
211 size_t sz, struct rofftree *tree)
212 {
213 int tokid, t;
214 size_t pos;
215 struct roffnode *node;
216
217 assert(args);
218 assert(out);
219 assert(in);
220 assert(buf);
221 assert(sz > 0);
222 assert(tree);
223
224 /*
225 * Extract the token identifier from the buffer. If there's no
226 * callback for the token (comment, etc.) then exit immediately.
227 * We don't do any error handling (yet), so if the token doesn't
228 * exist, die.
229 */
230
231 if (3 > sz) {
232 warnx("%s: malformed input (line %zu, col 1)",
233 in->name, in->line);
234 return(0);
235 } else if (ROFF_Max == (tokid = rofffind(buf + 1))) {
236 warnx("%s: unknown token `%c%c' (line %zu, col 1)",
237 in->name, *(buf + 1),
238 *(buf + 2), in->line);
239 return(0);
240 } else if (NULL == tokens[tokid].cb)
241 return(1); /* Skip token. */
242
243 pos = 3;
244
245 /*
246 * If this is a non-nestable layout token and we're below a
247 * token of the same type, then recurse upward to the token,
248 * closing out the interim scopes.
249 *
250 * If there's a nested token on the chain, then raise an error
251 * as nested tokens have corresponding "ending" tokens and we're
252 * breaking their scope.
253 */
254
255 node = NULL;
256
257 if (ROFF_LAYOUT == tokens[tokid].type &&
258 ! (ROFF_NESTED & tokens[tokid].flags)) {
259 for (node = tree->last; node; node = node->parent) {
260 if (node->tok == tokid)
261 break;
262
263 /* Don't break nested scope. */
264
265 if ( ! (ROFF_NESTED & tokens[node->tok].flags))
266 continue;
267 warnx("%s: scope of %s broken by %s "
268 "(line %zu, col %zu)",
269 in->name, tokens[tokid].name,
270 tokens[node->tok].name,
271 in->line, pos);
272 return(0);
273 }
274 }
275 if (node) {
276 assert(ROFF_LAYOUT == tokens[tokid].type);
277 assert( ! (ROFF_NESTED & tokens[tokid].flags));
278 assert(node->tok == tokid);
279
280 /* Clear up to last scoped token. */
281
282 do {
283 t = tree->last->tok;
284 if ( ! (*tokens[tree->last->tok].cb)
285 (args, out, in, NULL,
286 0, 0, ROFF_EXIT, tree))
287 return(0);
288 } while (t != tokid);
289 }
290
291 /* Proceed with actual token processing. */
292
293 return((*tokens[tokid].cb)(args, out, in, buf, sz,
294 pos, ROFF_ENTER, tree));
295 }
296
297
298 static int
299 rofffind(const char *name)
300 {
301 size_t i;
302
303 assert(name);
304 /* FIXME: use a table, this is slow but ok for now. */
305 for (i = 0; i < ROFF_Max; i++)
306 if (0 == strncmp(name, tokens[i].name, 2))
307 return(i);
308
309 return(ROFF_Max);
310 }
311
312
313 /* ARGUSED */
314 static int
315 roff_Dd(ROFFCALL_ARGS)
316 {
317
318 assert(in);
319 assert(tree);
320 assert(arg);
321 assert(out);
322 assert(buf);
323 assert(sz > 0);
324 assert(pos > 0);
325 assert(type == ROFF_ENTER);
326
327 if (tree->last) {
328 warnx("%s: superfluous prelude (line %zu, col %zu)",
329 in->name, in->line, pos);
330 return(0);
331 }
332
333 if (0 != tree->state) {
334 warnx("%s: bad manual prelude (line %zu, col %zu)",
335 in->name, in->line, pos);
336 return(1);
337 }
338
339 /* TODO: parse date from buffer. */
340
341 tree->date = time(NULL);
342 tree->state |= ROFF_PRELUDE_Dd;
343 return(1);
344 }
345
346
347 static int
348 roff_Dt(ROFFCALL_ARGS)
349 {
350
351 assert(in);
352 assert(tree);
353 assert(arg);
354 assert(out);
355 assert(buf);
356 assert(sz > 0);
357 assert(pos > 0);
358 assert(type == ROFF_ENTER);
359
360 if (tree->last) {
361 warnx("%s: superfluous prelude (line %zu, col %zu)",
362 in->name, in->line, pos);
363 return(0);
364 }
365
366 if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
367 (ROFF_PRELUDE_Os & tree->state) ||
368 (ROFF_PRELUDE_Dt & tree->state)) {
369 warnx("%s: bad manual prelude (line %zu, col %zu)",
370 in->name, in->line, pos);
371 return(1);
372 }
373
374 /* TODO: parse titles from buffer. */
375
376 tree->state |= ROFF_PRELUDE_Dt;
377 return(1);
378 }
379
380
381 static int
382 roff_Os(ROFFCALL_ARGS)
383 {
384 struct roffnode *node;
385
386 assert(arg);
387 assert(tree);
388 assert(in);
389
390 if (ROFF_EXIT == type) {
391 assert(tree->last);
392 assert(tree->last->tok == ROFF_Os);
393
394 /* TODO: flush out ML footer. */
395
396 node = tree->last;
397 tree->last = node->parent;
398 free(node);
399
400 return(1);
401 }
402
403 assert(out);
404 assert(buf);
405 assert(sz > 0);
406 assert(pos > 0);
407
408 if (tree->last) {
409 warnx("%s: superfluous prelude (line %zu, col %zu)",
410 in->name, in->line, pos);
411 return(0);
412 }
413
414 if ((ROFF_PRELUDE_Os & tree->state) ||
415 ! (ROFF_PRELUDE_Dt & tree->state) ||
416 ! (ROFF_PRELUDE_Dd & tree->state)) {
417 warnx("%s: bad manual prelude (line %zu, col %zu)",
418 in->name, in->line, pos);
419 return(1);
420 }
421
422 node = malloc(sizeof(struct roffnode));
423 if (NULL == node) {
424 warn("malloc");
425 return(0);
426 }
427 node->tok = ROFF_Os;
428 node->parent = NULL;
429
430 tree->state |= ROFF_PRELUDE_Os;
431 tree->last = node;
432
433 return(1);
434 }
435
436
437 static int
438 roff_Sh(ROFFCALL_ARGS)
439 {
440
441 assert(arg);
442 /*assert(out);*/(void)out;
443 assert(in);
444 /*assert(buf);*/(void)buf;
445 (void)sz;
446 (void)pos;
447 (void)type;
448 assert(tree);
449 return(1);
450 }
451