]> git.cameronkatri.com Git - mandoc.git/blob - argv.c
In-progress migration to ARGV separation.
[mandoc.git] / argv.c
1 /* $Id: argv.c,v 1.22 2009/01/20 20:56:21 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25
26 #include "private.h"
27
28 /*
29 * Parse arguments and parameters of macros. Arguments follow the
30 * syntax of `-arg [val [valN...]]', while parameters are free-form text
31 * following arguments (if any). This file must correctly handle the
32 * strange punctuation rules dictated by groff.
33 */
34
35 /* FIXME: .It called with -column and quoted arguments. */
36
37 static int lookup(int, const char *);
38 static int parse(struct mdoc *, int,
39 struct mdoc_arg *, int *, char *);
40 static int parse_single(struct mdoc *, int,
41 struct mdoc_arg *, int *, char *);
42 static int parse_multi(struct mdoc *, int,
43 struct mdoc_arg *, int *, char *);
44 static int postparse(struct mdoc *, int,
45 const struct mdoc_arg *, int);
46
47 #define ARGS_QUOTED (1 << 0)
48 #define ARGS_DELIM (1 << 1)
49 #define ARGS_TABSEP (1 << 2)
50
51 static int mdoc_argflags[MDOC_MAX] = {
52 0, /* \" */
53 0, /* Dd */
54 0, /* Dt */
55 0, /* Os */
56 0, /* Sh */
57 0, /* Ss */
58 ARGS_DELIM, /* Pp */
59 ARGS_DELIM, /* D1 */
60 ARGS_DELIM, /* Dl */
61 0, /* Bd */
62 0, /* Ed */
63 0, /* Bl */
64 0, /* El */
65 ARGS_DELIM, /* It */
66 ARGS_DELIM, /* Ad */
67 ARGS_DELIM, /* An */
68 ARGS_DELIM, /* Ar */
69 ARGS_QUOTED, /* Cd */
70 ARGS_DELIM, /* Cm */
71 ARGS_DELIM, /* Dv */
72 ARGS_DELIM, /* Er */
73 ARGS_DELIM, /* Ev */
74 0, /* Ex */
75 ARGS_DELIM | ARGS_QUOTED, /* Fa */
76 0, /* Fd */
77 ARGS_DELIM, /* Fl */
78 ARGS_DELIM | ARGS_QUOTED, /* Fn */
79 ARGS_DELIM | ARGS_QUOTED, /* Ft */
80 ARGS_DELIM, /* Ic */
81 0, /* In */
82 ARGS_DELIM, /* Li */
83 0, /* Nd */
84 ARGS_DELIM, /* Nm */
85 ARGS_DELIM, /* Op */
86 0, /* Ot */
87 ARGS_DELIM, /* Pa */
88 0, /* Rv */
89 ARGS_DELIM, /* St */
90 ARGS_DELIM, /* Va */
91 ARGS_DELIM, /* Vt */
92 ARGS_DELIM, /* Xr */
93 ARGS_QUOTED, /* %A */
94 ARGS_QUOTED, /* %B */
95 ARGS_QUOTED, /* %D */
96 ARGS_QUOTED, /* %I */
97 ARGS_QUOTED, /* %J */
98 ARGS_QUOTED, /* %N */
99 ARGS_QUOTED, /* %O */
100 ARGS_QUOTED, /* %P */
101 ARGS_QUOTED, /* %R */
102 ARGS_QUOTED, /* %T */
103 ARGS_QUOTED, /* %V */
104 ARGS_DELIM, /* Ac */
105 0, /* Ao */
106 ARGS_DELIM, /* Aq */
107 ARGS_DELIM, /* At */
108 ARGS_DELIM, /* Bc */
109 0, /* Bf */
110 0, /* Bo */
111 ARGS_DELIM, /* Bq */
112 ARGS_DELIM, /* Bsx */
113 ARGS_DELIM, /* Bx */
114 0, /* Db */
115 ARGS_DELIM, /* Dc */
116 0, /* Do */
117 ARGS_DELIM, /* Dq */
118 ARGS_DELIM, /* Ec */
119 0, /* Ef */
120 ARGS_DELIM, /* Em */
121 0, /* Eo */
122 ARGS_DELIM, /* Fx */
123 ARGS_DELIM, /* Ms */
124 ARGS_DELIM, /* No */
125 ARGS_DELIM, /* Ns */
126 ARGS_DELIM, /* Nx */
127 ARGS_DELIM, /* Ox */
128 ARGS_DELIM, /* Pc */
129 ARGS_DELIM, /* Pf */
130 0, /* Po */
131 ARGS_DELIM, /* Pq */
132 ARGS_DELIM, /* Qc */
133 ARGS_DELIM, /* Ql */
134 0, /* Qo */
135 ARGS_DELIM, /* Qq */
136 0, /* Re */
137 0, /* Rs */
138 ARGS_DELIM, /* Sc */
139 0, /* So */
140 ARGS_DELIM, /* Sq */
141 0, /* Sm */
142 ARGS_DELIM, /* Sx */
143 ARGS_DELIM, /* Sy */
144 ARGS_DELIM, /* Tn */
145 ARGS_DELIM, /* Ux */
146 ARGS_DELIM, /* Xc */
147 0, /* Xo */
148 0, /* Fo */
149 0, /* Fc */
150 0, /* Oo */
151 ARGS_DELIM, /* Oc */
152 0, /* Bk */
153 0, /* Ek */
154 0, /* Bt */
155 0, /* Hf */
156 0, /* Fr */
157 0, /* Ud */
158 };
159
160
161 int
162 mdoc_args(struct mdoc *mdoc, int line,
163 int *pos, char *buf, int tok, char **v)
164 {
165 int i, fl;
166 struct mdoc_node *n;
167
168 fl = 0 == tok ? 0 : mdoc_argflags[tok];
169 if (MDOC_It == tok) {
170 n = mdoc->last->parent;
171 /* FIXME: scan for ARGS_TABSEP. */
172
173 }
174
175 if (0 == buf[*pos])
176 return(ARGS_EOLN);
177
178 if ('\"' == buf[*pos] && ! (fl & ARGS_QUOTED))
179 if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX, "unexpected quoted parameter"))
180 return(ARGS_ERROR);
181
182 if ('-' == buf[*pos])
183 if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX, "argument-like parameter"))
184 return(ARGS_ERROR);
185
186 if ((fl & ARGS_DELIM) && mdoc_iscdelim(buf[*pos])) {
187 /*
188 * If ARGS_DELIM, return ARGS_PUNCT if only space-separated
189 * punctuation remains.
190 */
191 for (i = *pos; buf[i]; ) {
192 if ( ! mdoc_iscdelim(buf[i]))
193 break;
194 i++;
195 if (0 == buf[i] || ! isspace((int)buf[i]))
196 break;
197 i++;
198 while (buf[i] && isspace((int)buf[i]))
199 i++;
200 }
201 if (0 == buf[i]) {
202 *v = &buf[*pos];
203 return(ARGS_PUNCT);
204 }
205 }
206
207 /* Parse routine for non-quoted string. */
208
209 assert(*pos > 0);
210 if ('\"' != buf[*pos] || ! (ARGS_QUOTED & fl)) {
211 *v = &buf[*pos];
212
213 /* FIXME: UGLY tab-sep processing. */
214
215 if (ARGS_TABSEP & fl)
216 while (buf[*pos]) {
217 if ('\t' == buf[*pos])
218 break;
219 if ('T' == buf[*pos]) {
220 (*pos)++;
221 if (0 == buf[*pos])
222 break;
223 if ('a' == buf[*pos]) {
224 buf[*pos - 1] = 0;
225 break;
226 }
227 }
228 (*pos)++;
229 }
230 else {
231 while (buf[*pos]) {
232 if (isspace((int)buf[*pos]))
233 if ('\\' != buf[*pos - 1])
234 break;
235 (*pos)++;
236 }
237 }
238
239 if (0 == buf[*pos])
240 return(ARGS_WORD);
241
242 buf[(*pos)++] = 0;
243
244 if (0 == buf[*pos])
245 return(ARGS_WORD);
246
247 if ( ! (ARGS_TABSEP & fl))
248 while (buf[*pos] && isspace((int)buf[*pos]))
249 (*pos)++;
250
251 if (buf[*pos])
252 return(ARGS_WORD);
253
254 if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_COMPAT, "whitespace at end-of-line"))
255 return(ARGS_ERROR);
256
257 return(ARGS_WORD);
258 }
259
260 /*
261 * If we're a quoted string (and quoted strings are allowed),
262 * then parse ahead to the next quote. If none's found, it's an
263 * error. After, parse to the next word.
264 */
265
266 *v = &buf[++(*pos)];
267
268 while (buf[*pos] && '\"' != buf[*pos])
269 (*pos)++;
270
271 if (0 == buf[*pos]) {
272 (void)mdoc_perr(mdoc, line, *pos, "unterminated quoted parameter");
273 return(ARGS_ERROR);
274 }
275
276 buf[(*pos)++] = 0;
277 if (0 == buf[*pos])
278 return(ARGS_QWORD);
279
280 while (buf[*pos] && isspace((int)buf[*pos]))
281 (*pos)++;
282
283 if (buf[*pos])
284 return(ARGS_QWORD);
285
286 if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_COMPAT, "whitespace at end-of-line"))
287 return(ARGS_ERROR);
288
289 return(ARGS_QWORD);
290 }
291
292
293 static int
294 lookup(int tok, const char *argv)
295 {
296
297 switch (tok) {
298 case (MDOC_An):
299 if (xstrcmp(argv, "split"))
300 return(MDOC_Split);
301 else if (xstrcmp(argv, "nosplit"))
302 return(MDOC_Nosplit);
303 break;
304
305 case (MDOC_Bd):
306 if (xstrcmp(argv, "ragged"))
307 return(MDOC_Ragged);
308 else if (xstrcmp(argv, "unfilled"))
309 return(MDOC_Unfilled);
310 else if (xstrcmp(argv, "filled"))
311 return(MDOC_Filled);
312 else if (xstrcmp(argv, "literal"))
313 return(MDOC_Literal);
314 else if (xstrcmp(argv, "file"))
315 return(MDOC_File);
316 else if (xstrcmp(argv, "offset"))
317 return(MDOC_Offset);
318 break;
319
320 case (MDOC_Bf):
321 if (xstrcmp(argv, "emphasis"))
322 return(MDOC_Emphasis);
323 else if (xstrcmp(argv, "literal"))
324 return(MDOC_Literal);
325 else if (xstrcmp(argv, "symbolic"))
326 return(MDOC_Symbolic);
327 break;
328
329 case (MDOC_Bk):
330 if (xstrcmp(argv, "words"))
331 return(MDOC_Words);
332 break;
333
334 case (MDOC_Bl):
335 if (xstrcmp(argv, "bullet"))
336 return(MDOC_Bullet);
337 else if (xstrcmp(argv, "dash"))
338 return(MDOC_Dash);
339 else if (xstrcmp(argv, "hyphen"))
340 return(MDOC_Hyphen);
341 else if (xstrcmp(argv, "item"))
342 return(MDOC_Item);
343 else if (xstrcmp(argv, "enum"))
344 return(MDOC_Enum);
345 else if (xstrcmp(argv, "tag"))
346 return(MDOC_Tag);
347 else if (xstrcmp(argv, "diag"))
348 return(MDOC_Diag);
349 else if (xstrcmp(argv, "hang"))
350 return(MDOC_Hang);
351 else if (xstrcmp(argv, "ohang"))
352 return(MDOC_Ohang);
353 else if (xstrcmp(argv, "inset"))
354 return(MDOC_Inset);
355 else if (xstrcmp(argv, "column"))
356 return(MDOC_Column);
357 else if (xstrcmp(argv, "width"))
358 return(MDOC_Width);
359 else if (xstrcmp(argv, "offset"))
360 return(MDOC_Offset);
361 else if (xstrcmp(argv, "compact"))
362 return(MDOC_Compact);
363 break;
364
365 case (MDOC_Rv):
366 /* FALLTHROUGH */
367 case (MDOC_Ex):
368 if (xstrcmp(argv, "std"))
369 return(MDOC_Std);
370 break;
371
372 case (MDOC_St):
373 if (xstrcmp(argv, "p1003.1-88"))
374 return(MDOC_p1003_1_88);
375 else if (xstrcmp(argv, "p1003.1-90"))
376 return(MDOC_p1003_1_90);
377 else if (xstrcmp(argv, "p1003.1-96"))
378 return(MDOC_p1003_1_96);
379 else if (xstrcmp(argv, "p1003.1-2001"))
380 return(MDOC_p1003_1_2001);
381 else if (xstrcmp(argv, "p1003.1-2004"))
382 return(MDOC_p1003_1_2004);
383 else if (xstrcmp(argv, "p1003.1"))
384 return(MDOC_p1003_1);
385 else if (xstrcmp(argv, "p1003.1b"))
386 return(MDOC_p1003_1b);
387 else if (xstrcmp(argv, "p1003.1b-93"))
388 return(MDOC_p1003_1b_93);
389 else if (xstrcmp(argv, "p1003.1c-95"))
390 return(MDOC_p1003_1c_95);
391 else if (xstrcmp(argv, "p1003.1g-2000"))
392 return(MDOC_p1003_1g_2000);
393 else if (xstrcmp(argv, "p1003.2-92"))
394 return(MDOC_p1003_2_92);
395 else if (xstrcmp(argv, "p1003.2-95"))
396 return(MDOC_p1387_2_95);
397 else if (xstrcmp(argv, "p1003.2"))
398 return(MDOC_p1003_2);
399 else if (xstrcmp(argv, "p1387.2-95"))
400 return(MDOC_p1387_2);
401 else if (xstrcmp(argv, "isoC-90"))
402 return(MDOC_isoC_90);
403 else if (xstrcmp(argv, "isoC-amd1"))
404 return(MDOC_isoC_amd1);
405 else if (xstrcmp(argv, "isoC-tcor1"))
406 return(MDOC_isoC_tcor1);
407 else if (xstrcmp(argv, "isoC-tcor2"))
408 return(MDOC_isoC_tcor2);
409 else if (xstrcmp(argv, "isoC-99"))
410 return(MDOC_isoC_99);
411 else if (xstrcmp(argv, "ansiC"))
412 return(MDOC_ansiC);
413 else if (xstrcmp(argv, "ansiC-89"))
414 return(MDOC_ansiC_89);
415 else if (xstrcmp(argv, "ansiC-99"))
416 return(MDOC_ansiC_99);
417 else if (xstrcmp(argv, "ieee754"))
418 return(MDOC_ieee754);
419 else if (xstrcmp(argv, "iso8802-3"))
420 return(MDOC_iso8802_3);
421 else if (xstrcmp(argv, "xpg3"))
422 return(MDOC_xpg3);
423 else if (xstrcmp(argv, "xpg4"))
424 return(MDOC_xpg4);
425 else if (xstrcmp(argv, "xpg4.2"))
426 return(MDOC_xpg4_2);
427 else if (xstrcmp(argv, "xpg4.3"))
428 return(MDOC_xpg4_3);
429 else if (xstrcmp(argv, "xbd5"))
430 return(MDOC_xbd5);
431 else if (xstrcmp(argv, "xcu5"))
432 return(MDOC_xcu5);
433 else if (xstrcmp(argv, "xsh5"))
434 return(MDOC_xsh5);
435 else if (xstrcmp(argv, "xns5"))
436 return(MDOC_xns5);
437 else if (xstrcmp(argv, "xns5.2d2.0"))
438 return(MDOC_xns5_2d2_0);
439 else if (xstrcmp(argv, "xcurses4.2"))
440 return(MDOC_xcurses4_2);
441 else if (xstrcmp(argv, "susv2"))
442 return(MDOC_susv2);
443 else if (xstrcmp(argv, "susv3"))
444 return(MDOC_susv3);
445 else if (xstrcmp(argv, "svid4"))
446 return(MDOC_svid4);
447 break;
448
449 default:
450 break;
451 }
452
453 return(MDOC_ARG_MAX);
454 }
455
456
457 static int
458 postparse(struct mdoc *mdoc, int line, const struct mdoc_arg *v, int pos)
459 {
460
461 switch (v->arg) {
462 case (MDOC_Offset):
463 assert(v->value);
464 assert(v->value[0]);
465 if (xstrcmp(v->value[0], "left"))
466 break;
467 if (xstrcmp(v->value[0], "right"))
468 break;
469 if (xstrcmp(v->value[0], "center"))
470 break;
471 if (xstrcmp(v->value[0], "indent"))
472 break;
473 if (xstrcmp(v->value[0], "indent-two"))
474 break;
475 return(mdoc_perr(mdoc, line, pos, "invalid offset value"));
476 default:
477 break;
478 }
479
480 return(1);
481 }
482
483
484 static int
485 parse_multi(struct mdoc *mdoc, int line,
486 struct mdoc_arg *v, int *pos, char *buf)
487 {
488 int c, ppos;
489 char *p;
490
491 v->sz = 0;
492 v->value = xcalloc(MDOC_LINEARG_MAX, sizeof(char *));
493
494 ppos = *pos;
495
496 for (v->sz = 0; v->sz < MDOC_LINEARG_MAX; v->sz++) {
497 if ('-' == buf[*pos])
498 break;
499 c = mdoc_args(mdoc, line, pos, buf, ARGS_QUOTED, &p);
500 if (ARGS_ERROR == c) {
501 free(v->value);
502 return(0);
503 } else if (ARGS_EOLN == c)
504 break;
505 v->value[v->sz] = p;
506 }
507
508 if (0 < v->sz && v->sz < MDOC_LINEARG_MAX)
509 return(1);
510
511 free(v->value);
512 return(mdoc_perr(mdoc, line, ppos, 0 == v->sz ?
513 "argument requires a value" :
514 "too many values to argument"));
515 }
516
517
518 static int
519 parse_single(struct mdoc *mdoc, int line,
520 struct mdoc_arg *v, int *pos, char *buf)
521 {
522 int c, ppos;
523 char *p;
524
525 ppos = *pos;
526
527 c = mdoc_args(mdoc, line, pos, buf, ARGS_QUOTED, &p);
528 if (ARGS_ERROR == c)
529 return(0);
530 if (ARGS_EOLN == c)
531 return(mdoc_perr(mdoc, line, ppos, "argument requires a value"));
532
533 v->sz = 1;
534 v->value = xcalloc(1, sizeof(char *));
535 v->value[0] = p;
536 return(1);
537 }
538
539
540 static int
541 parse(struct mdoc *mdoc, int line,
542 struct mdoc_arg *v, int *pos, char *buf)
543 {
544
545 v->sz = 0;
546 v->value = NULL;
547
548 switch (v->arg) {
549 case(MDOC_Std):
550 /* FALLTHROUGH */
551 case(MDOC_Width):
552 /* FALLTHROUGH */
553 case(MDOC_Offset):
554 return(parse_single(mdoc, line, v, pos, buf));
555 case(MDOC_Column):
556 return(parse_multi(mdoc, line, v, pos, buf));
557 default:
558 break;
559 }
560
561 return(1);
562 }
563
564
565 int
566 mdoc_argv(struct mdoc *mdoc, int line, int tok,
567 struct mdoc_arg *v, int *pos, char *buf)
568 {
569 int i, ppos;
570 char *argv;
571
572 (void)memset(v, 0, sizeof(struct mdoc_arg));
573
574 if (0 == buf[*pos])
575 return(ARGV_EOLN);
576
577 assert( ! isspace((int)buf[*pos]));
578
579 if ('-' != buf[*pos])
580 return(ARGV_WORD);
581
582 i = *pos;
583 argv = &buf[++(*pos)];
584
585 v->line = line;
586 v->pos = *pos;
587
588 assert(*pos > 0);
589 while (buf[*pos]) {
590 if (isspace((int)buf[*pos]))
591 if ('\\' != buf[*pos - 1])
592 break;
593 (*pos)++;
594 }
595
596 if (buf[*pos])
597 buf[(*pos)++] = 0;
598
599 if (MDOC_ARG_MAX == (v->arg = lookup(tok, argv))) {
600 if ( ! mdoc_pwarn(mdoc, line, i, WARN_SYNTAX, "argument-like parameter"))
601 return(ARGV_ERROR);
602 return(ARGV_WORD);
603 }
604
605 while (buf[*pos] && isspace((int)buf[*pos]))
606 (*pos)++;
607
608 /* FIXME: whitespace if no value. */
609
610 ppos = *pos;
611 if ( ! parse(mdoc, line, v, pos, buf))
612 return(ARGV_ERROR);
613 if ( ! postparse(mdoc, line, v, ppos))
614 return(ARGV_ERROR);
615
616 return(ARGV_ARG);
617 }
618
619
620 void
621 mdoc_argv_free(int sz, struct mdoc_arg *arg)
622 {
623 int i;
624
625 for (i = 0; i < sz; i++) {
626 if (0 == arg[i].sz) {
627 assert(NULL == arg[i].value);
628 continue;
629 }
630 assert(arg[i].value);
631 free(arg[i].value);
632 }
633 }
634