]>
git.cameronkatri.com Git - mandoc.git/blob - argv.c
3621c6732d24a70c1cddc596427131e3e5c007d7
1 /* $Id: argv.c,v 1.37 2009/03/05 13:12:12 kristaps Exp $ */
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
29 * Routines to parse arguments of macros. Arguments follow the syntax
30 * of `-arg [val [valN...]]'. Arguments come in all types: quoted
31 * arguments, multiple arguments per value, no-value arguments, etc.
34 #define ARGS_QUOTED (1 << 0)
35 #define ARGS_DELIM (1 << 1)
36 #define ARGS_TABSEP (1 << 2)
38 #define ARGV_NONE (1 << 0)
39 #define ARGV_SINGLE (1 << 1)
40 #define ARGV_MULTI (1 << 2)
41 #define ARGV_OPT_SINGLE (1 << 3)
43 static int argv_a2arg(int, const char *);
44 static int args(struct mdoc
*, int, int *,
45 char *, int, char **);
46 static int argv(struct mdoc
*, int, int,
47 struct mdoc_arg
*, int *, char *);
48 static int argv_single(struct mdoc
*, int,
49 struct mdoc_arg
*, int *, char *);
50 static int argv_opt_single(struct mdoc
*, int,
51 struct mdoc_arg
*, int *, char *);
52 static int argv_multi(struct mdoc
*, int,
53 struct mdoc_arg
*, int *, char *);
54 static int pwarn(struct mdoc
*, int, int, int);
55 static int perr(struct mdoc
*, int, int, int);
57 /* Warning messages. */
70 /* Per-argument flags. */
72 static int mdoc_argvflags
[MDOC_ARG_MAX
] = {
73 ARGV_NONE
, /* MDOC_Split */
74 ARGV_NONE
, /* MDOC_Nosplit */
75 ARGV_NONE
, /* MDOC_Ragged */
76 ARGV_NONE
, /* MDOC_Unfilled */
77 ARGV_NONE
, /* MDOC_Literal */
78 ARGV_NONE
, /* MDOC_File */
79 ARGV_SINGLE
, /* MDOC_Offset */
80 ARGV_NONE
, /* MDOC_Bullet */
81 ARGV_NONE
, /* MDOC_Dash */
82 ARGV_NONE
, /* MDOC_Hyphen */
83 ARGV_NONE
, /* MDOC_Item */
84 ARGV_NONE
, /* MDOC_Enum */
85 ARGV_NONE
, /* MDOC_Tag */
86 ARGV_NONE
, /* MDOC_Diag */
87 ARGV_NONE
, /* MDOC_Hang */
88 ARGV_NONE
, /* MDOC_Ohang */
89 ARGV_NONE
, /* MDOC_Inset */
90 ARGV_MULTI
, /* MDOC_Column */
91 ARGV_SINGLE
, /* MDOC_Width */
92 ARGV_NONE
, /* MDOC_Compact */
93 ARGV_SINGLE
, /* MDOC_Std */
94 ARGV_NONE
, /* MDOC_p1003_1_88 */
95 ARGV_NONE
, /* MDOC_p1003_1_90 */
96 ARGV_NONE
, /* MDOC_p1003_1_96 */
97 ARGV_NONE
, /* MDOC_p1003_1_2001 */
98 ARGV_NONE
, /* MDOC_p1003_1_2004 */
99 ARGV_NONE
, /* MDOC_p1003_1 */
100 ARGV_NONE
, /* MDOC_p1003_1b */
101 ARGV_NONE
, /* MDOC_p1003_1b_93 */
102 ARGV_NONE
, /* MDOC_p1003_1c_95 */
103 ARGV_NONE
, /* MDOC_p1003_1g_2000 */
104 ARGV_NONE
, /* MDOC_p1003_2_92 */
105 ARGV_NONE
, /* MDOC_p1387_2_95 */
106 ARGV_NONE
, /* MDOC_p1003_2 */
107 ARGV_NONE
, /* MDOC_p1387_2 */
108 ARGV_NONE
, /* MDOC_isoC_90 */
109 ARGV_NONE
, /* MDOC_isoC_amd1 */
110 ARGV_NONE
, /* MDOC_isoC_tcor1 */
111 ARGV_NONE
, /* MDOC_isoC_tcor2 */
112 ARGV_NONE
, /* MDOC_isoC_99 */
113 ARGV_NONE
, /* MDOC_ansiC */
114 ARGV_NONE
, /* MDOC_ansiC_89 */
115 ARGV_NONE
, /* MDOC_ansiC_99 */
116 ARGV_NONE
, /* MDOC_ieee754 */
117 ARGV_NONE
, /* MDOC_iso8802_3 */
118 ARGV_NONE
, /* MDOC_xpg3 */
119 ARGV_NONE
, /* MDOC_xpg4 */
120 ARGV_NONE
, /* MDOC_xpg4_2 */
121 ARGV_NONE
, /* MDOC_xpg4_3 */
122 ARGV_NONE
, /* MDOC_xbd5 */
123 ARGV_NONE
, /* MDOC_xcu5 */
124 ARGV_NONE
, /* MDOC_xsh5 */
125 ARGV_NONE
, /* MDOC_xns5 */
126 ARGV_NONE
, /* MDOC_xns5_2d2_0 */
127 ARGV_NONE
, /* MDOC_xcurses4_2 */
128 ARGV_NONE
, /* MDOC_susv2 */
129 ARGV_NONE
, /* MDOC_susv3 */
130 ARGV_NONE
, /* MDOC_svid4 */
131 ARGV_NONE
, /* MDOC_Filled */
132 ARGV_NONE
, /* MDOC_Words */
133 ARGV_NONE
, /* MDOC_Emphasis */
134 ARGV_NONE
/* MDOC_Symbolic */
137 static int mdoc_argflags
[MDOC_MAX
] = {
155 ARGS_QUOTED
, /* Cd */
161 ARGS_DELIM
| ARGS_QUOTED
, /* Fa */
164 ARGS_DELIM
| ARGS_QUOTED
, /* Fn */
165 ARGS_DELIM
| ARGS_QUOTED
, /* Ft */
179 ARGS_QUOTED
, /* %A */
180 ARGS_QUOTED
, /* %B */
181 ARGS_QUOTED
, /* %D */
182 ARGS_QUOTED
, /* %I */
183 ARGS_QUOTED
, /* %J */
184 ARGS_QUOTED
, /* %N */
185 ARGS_QUOTED
, /* %O */
186 ARGS_QUOTED
, /* %P */
187 ARGS_QUOTED
, /* %R */
188 ARGS_QUOTED
, /* %T */
189 ARGS_QUOTED
, /* %V */
198 ARGS_DELIM
, /* Bsx */
248 perr(struct mdoc
*mdoc
, int line
, int pos
, int code
)
254 c
= mdoc_perr(mdoc
, line
, pos
,
255 "unterminated quoted parameter");
258 c
= mdoc_perr(mdoc
, line
, pos
,
259 "argument requires a value");
262 c
= mdoc_perr(mdoc
, line
, pos
,
263 "too many values for argument");
274 pwarn(struct mdoc
*mdoc
, int line
, int pos
, int code
)
280 c
= mdoc_pwarn(mdoc
, line
, pos
, WARN_SYNTAX
,
281 "unexpected quoted parameter");
284 c
= mdoc_pwarn(mdoc
, line
, pos
, WARN_SYNTAX
,
285 "argument-like parameter");
288 c
= mdoc_pwarn(mdoc
, line
, pos
, WARN_SYNTAX
,
289 "last list column is empty");
292 c
= mdoc_pwarn(mdoc
, line
, pos
, WARN_COMPAT
,
293 "trailing whitespace");
304 mdoc_args(struct mdoc
*mdoc
, int line
,
305 int *pos
, char *buf
, int tok
, char **v
)
310 fl
= (0 == tok
) ? 0 : mdoc_argflags
[tok
];
313 * Override per-macro argument flags with context-specific ones.
314 * As of now, this is only valid for `It' depending on its list
320 for (n
= mdoc
->last
; n
; n
= n
->parent
)
321 if (MDOC_BLOCK
== n
->type
&& MDOC_Bl
== n
->tok
)
325 c
= (int)n
->data
.block
.argc
;
329 * Using `Bl -column' adds ARGS_TABSEP to the arguments
330 * and invalidates ARGS_DELIM. Using `Bl -diag' allows
331 * for quoted arguments.
335 for (i
= 0; i
< c
; i
++) {
336 switch (n
->data
.block
.argv
[i
].arg
) {
354 /* Continue parsing the arguments themselves... */
356 return(args(mdoc
, line
, pos
, buf
, fl
, v
));
361 args(struct mdoc
*mdoc
, int line
,
362 int *pos
, char *buf
, int fl
, char **v
)
372 if ('\"' == buf
[*pos
] && ! (fl
& ARGS_QUOTED
))
373 if ( ! pwarn(mdoc
, line
, *pos
, WQUOTPARM
))
376 if ('-' == buf
[*pos
])
377 if ( ! pwarn(mdoc
, line
, *pos
, WARGVPARM
))
381 * If the first character is a delimiter and we're to look for
382 * delimited strings, then pass down the buffer seeing if it
383 * follows the pattern of [[::delim::][ ]+]+.
386 if ((fl
& ARGS_DELIM
) && mdoc_iscdelim(buf
[*pos
])) {
387 for (i
= *pos
; buf
[i
]; ) {
388 if ( ! mdoc_iscdelim(buf
[i
]))
391 /* There must be at least one space... */
392 if (0 == buf
[i
] || ! isspace((u_char
)buf
[i
]))
395 while (buf
[i
] && isspace((u_char
)buf
[i
]))
404 /* First parse non-quoted strings. */
406 if ('\"' != buf
[*pos
] || ! (ARGS_QUOTED
& fl
)) {
410 * Thar be dragons here! If we're tab-separated, search
411 * ahead for either a tab or the `Ta' macro. If a tab
412 * is detected, it mustn't be escaped; if a `Ta' is
413 * detected, it must be space-buffered before and after.
414 * If either of these hold true, then prune out the
415 * extra spaces and call it an argument.
418 if (ARGS_TABSEP
& fl
) {
419 /* Scan ahead to unescaped tab. */
421 for (p
= *v
; ; p
++) {
422 if (NULL
== (p
= strchr(p
, '\t')))
426 if ('\\' != *(p
- 1))
430 /* Scan ahead to unescaped `Ta'. */
432 for (pp
= *v
; ; pp
++) {
433 if (NULL
== (pp
= strstr(pp
, "Ta")))
435 if (pp
> *v
&& ' ' != *(pp
- 1))
437 if (' ' == *(pp
+ 2) || 0 == *(pp
+ 2))
441 /* Choose delimiter tab/Ta. */
444 p
= (p
< pp
? p
: pp
);
448 /* Strip delimiter's preceding whitespace. */
452 while (pp
> *v
&& ' ' == *pp
)
454 if (pp
== *v
&& ' ' == *pp
)
460 /* ...in- and proceding whitespace. */
462 if (p
&& ('\t' != *p
)) {
473 *pos
+= (int)(p
- *v
);
477 if ( ! pwarn(mdoc
, line
, *pos
, WCOLEMPTY
))
479 if (p
&& 0 == *p
&& p
> *v
&& ' ' == *(p
- 1))
480 if ( ! pwarn(mdoc
, line
, *pos
, WTAILWS
))
486 /* Configure the eoln case, too. */
491 if (p
> *v
&& ' ' == *(p
- 1))
492 if ( ! pwarn(mdoc
, line
, *pos
, WTAILWS
))
494 *pos
+= (int)(p
- *v
);
499 /* Do non-tabsep look-ahead here. */
501 if ( ! (ARGS_TABSEP
& fl
))
503 if (isspace((u_char
)buf
[*pos
]))
504 if ('\\' != buf
[*pos
- 1])
517 if ( ! (ARGS_TABSEP
& fl
))
518 while (buf
[*pos
] && isspace((u_char
)buf
[*pos
]))
524 if ( ! pwarn(mdoc
, line
, *pos
, WTAILWS
))
531 * If we're a quoted string (and quoted strings are allowed),
532 * then parse ahead to the next quote. If none's found, it's an
533 * error. After, parse to the next word.
538 while (buf
[*pos
] && '\"' != buf
[*pos
])
541 if (0 == buf
[*pos
]) {
542 (void)perr(mdoc
, line
, *pos
, EQUOTTERM
);
550 while (buf
[*pos
] && isspace((u_char
)buf
[*pos
]))
556 if ( ! pwarn(mdoc
, line
, *pos
, WTAILWS
))
564 argv_a2arg(int tok
, const char *argv
)
568 * Parse an argument identifier from its text. XXX - this
569 * should really be table-driven to clarify the code.
571 * If you add an argument to the list, make sure that you
572 * register it here with its one or more macros!
577 if (xstrcmp(argv
, "split"))
579 else if (xstrcmp(argv
, "nosplit"))
580 return(MDOC_Nosplit
);
584 if (xstrcmp(argv
, "ragged"))
586 else if (xstrcmp(argv
, "unfilled"))
587 return(MDOC_Unfilled
);
588 else if (xstrcmp(argv
, "filled"))
590 else if (xstrcmp(argv
, "literal"))
591 return(MDOC_Literal
);
592 else if (xstrcmp(argv
, "file"))
594 else if (xstrcmp(argv
, "offset"))
599 if (xstrcmp(argv
, "emphasis"))
600 return(MDOC_Emphasis
);
601 else if (xstrcmp(argv
, "literal"))
602 return(MDOC_Literal
);
603 else if (xstrcmp(argv
, "symbolic"))
604 return(MDOC_Symbolic
);
608 if (xstrcmp(argv
, "words"))
613 if (xstrcmp(argv
, "bullet"))
615 else if (xstrcmp(argv
, "dash"))
617 else if (xstrcmp(argv
, "hyphen"))
619 else if (xstrcmp(argv
, "item"))
621 else if (xstrcmp(argv
, "enum"))
623 else if (xstrcmp(argv
, "tag"))
625 else if (xstrcmp(argv
, "diag"))
627 else if (xstrcmp(argv
, "hang"))
629 else if (xstrcmp(argv
, "ohang"))
631 else if (xstrcmp(argv
, "inset"))
633 else if (xstrcmp(argv
, "column"))
635 else if (xstrcmp(argv
, "width"))
637 else if (xstrcmp(argv
, "offset"))
639 else if (xstrcmp(argv
, "compact"))
640 return(MDOC_Compact
);
646 if (xstrcmp(argv
, "std"))
651 if (xstrcmp(argv
, "p1003.1-88"))
652 return(MDOC_p1003_1_88
);
653 else if (xstrcmp(argv
, "p1003.1-90"))
654 return(MDOC_p1003_1_90
);
655 else if (xstrcmp(argv
, "p1003.1-96"))
656 return(MDOC_p1003_1_96
);
657 else if (xstrcmp(argv
, "p1003.1-2001"))
658 return(MDOC_p1003_1_2001
);
659 else if (xstrcmp(argv
, "p1003.1-2004"))
660 return(MDOC_p1003_1_2004
);
661 else if (xstrcmp(argv
, "p1003.1"))
662 return(MDOC_p1003_1
);
663 else if (xstrcmp(argv
, "p1003.1b"))
664 return(MDOC_p1003_1b
);
665 else if (xstrcmp(argv
, "p1003.1b-93"))
666 return(MDOC_p1003_1b_93
);
667 else if (xstrcmp(argv
, "p1003.1c-95"))
668 return(MDOC_p1003_1c_95
);
669 else if (xstrcmp(argv
, "p1003.1g-2000"))
670 return(MDOC_p1003_1g_2000
);
671 else if (xstrcmp(argv
, "p1003.2-92"))
672 return(MDOC_p1003_2_92
);
673 else if (xstrcmp(argv
, "p1003.2-95"))
674 return(MDOC_p1387_2_95
);
675 else if (xstrcmp(argv
, "p1003.2"))
676 return(MDOC_p1003_2
);
677 else if (xstrcmp(argv
, "p1387.2-95"))
678 return(MDOC_p1387_2
);
679 else if (xstrcmp(argv
, "isoC-90"))
680 return(MDOC_isoC_90
);
681 else if (xstrcmp(argv
, "isoC-amd1"))
682 return(MDOC_isoC_amd1
);
683 else if (xstrcmp(argv
, "isoC-tcor1"))
684 return(MDOC_isoC_tcor1
);
685 else if (xstrcmp(argv
, "isoC-tcor2"))
686 return(MDOC_isoC_tcor2
);
687 else if (xstrcmp(argv
, "isoC-99"))
688 return(MDOC_isoC_99
);
689 else if (xstrcmp(argv
, "ansiC"))
691 else if (xstrcmp(argv
, "ansiC-89"))
692 return(MDOC_ansiC_89
);
693 else if (xstrcmp(argv
, "ansiC-99"))
694 return(MDOC_ansiC_99
);
695 else if (xstrcmp(argv
, "ieee754"))
696 return(MDOC_ieee754
);
697 else if (xstrcmp(argv
, "iso8802-3"))
698 return(MDOC_iso8802_3
);
699 else if (xstrcmp(argv
, "xpg3"))
701 else if (xstrcmp(argv
, "xpg4"))
703 else if (xstrcmp(argv
, "xpg4.2"))
705 else if (xstrcmp(argv
, "xpg4.3"))
707 else if (xstrcmp(argv
, "xbd5"))
709 else if (xstrcmp(argv
, "xcu5"))
711 else if (xstrcmp(argv
, "xsh5"))
713 else if (xstrcmp(argv
, "xns5"))
715 else if (xstrcmp(argv
, "xns5.2d2.0"))
716 return(MDOC_xns5_2d2_0
);
717 else if (xstrcmp(argv
, "xcurses4.2"))
718 return(MDOC_xcurses4_2
);
719 else if (xstrcmp(argv
, "susv2"))
721 else if (xstrcmp(argv
, "susv3"))
723 else if (xstrcmp(argv
, "svid4"))
731 return(MDOC_ARG_MAX
);
736 argv_multi(struct mdoc
*mdoc
, int line
,
737 struct mdoc_arg
*v
, int *pos
, char *buf
)
743 v
->value
= xcalloc(MDOC_LINEARG_MAX
, sizeof(char *));
747 for (v
->sz
= 0; v
->sz
< MDOC_LINEARG_MAX
; v
->sz
++) {
748 if ('-' == buf
[*pos
])
750 c
= args(mdoc
, line
, pos
, buf
, ARGS_QUOTED
, &p
);
751 if (ARGS_ERROR
== c
) {
754 } else if (ARGS_EOLN
== c
)
756 v
->value
[(int)v
->sz
] = p
;
759 if (0 < v
->sz
&& v
->sz
< MDOC_LINEARG_MAX
)
764 return(perr(mdoc
, line
, ppos
, EARGVAL
));
766 return(perr(mdoc
, line
, ppos
, EARGMANY
));
771 argv_opt_single(struct mdoc
*mdoc
, int line
,
772 struct mdoc_arg
*v
, int *pos
, char *buf
)
779 if ('-' == buf
[*pos
])
782 c
= args(mdoc
, line
, pos
, buf
, ARGS_QUOTED
, &p
);
789 v
->value
= xcalloc(1, sizeof(char *));
796 * Parse a single, mandatory value from the stream.
799 argv_single(struct mdoc
*mdoc
, int line
,
800 struct mdoc_arg
*v
, int *pos
, char *buf
)
807 c
= args(mdoc
, line
, pos
, buf
, ARGS_QUOTED
, &p
);
811 return(perr(mdoc
, line
, ppos
, EARGVAL
));
814 v
->value
= xcalloc(1, sizeof(char *));
821 * Determine rules for parsing arguments. Arguments can either accept
822 * no parameters, an optional single parameter, one parameter, or
823 * multiple parameters.
826 argv(struct mdoc
*mdoc
, int tok
, int line
,
827 struct mdoc_arg
*v
, int *pos
, char *buf
)
833 fl
= mdoc_argvflags
[v
->arg
];
836 * Override the default per-argument value.
841 fl
= ARGV_OPT_SINGLE
;
849 return(argv_single(mdoc
, line
, v
, pos
, buf
));
851 return(argv_multi(mdoc
, line
, v
, pos
, buf
));
852 case (ARGV_OPT_SINGLE
):
853 return(argv_opt_single(mdoc
, line
, v
, pos
, buf
));
864 * Parse an argument from line text. This comes in the form of -key
865 * [value0...], which may either have a single mandatory value, at least
866 * one mandatory value, an optional single value, or no value.
869 mdoc_argv(struct mdoc
*mdoc
, int line
, int tok
,
870 struct mdoc_arg
*v
, int *pos
, char *buf
)
875 (void)memset(v
, 0, sizeof(struct mdoc_arg
));
880 assert( ! isspace((u_char
)buf
[*pos
]));
882 if ('-' != buf
[*pos
])
895 if (isspace((u_char
)buf
[*pos
]))
896 if ('\\' != buf
[*pos
- 1])
905 * We now parse out the per-macro arguments. XXX - this can be
906 * made much cleaner using per-argument tables. See argv_a2arg
910 if (MDOC_ARG_MAX
== (v
->arg
= argv_a2arg(tok
, p
))) {
911 if ( ! pwarn(mdoc
, line
, i
, WARGVPARM
))
916 while (buf
[*pos
] && isspace((u_char
)buf
[*pos
]))
919 /* FIXME: whitespace if no value. */
921 if ( ! argv(mdoc
, tok
, line
, v
, pos
, buf
))
929 mdoc_argv_free(int sz
, struct mdoc_arg
*arg
)
933 for (i
= 0; i
< sz
; i
++) {
934 if (0 == arg
[i
].sz
) {
935 assert(NULL
== arg
[i
].value
);
938 assert(arg
[i
].value
);