]>
git.cameronkatri.com Git - mandoc.git/blob - roff_escape.c
8145a9dd39634721185f394f21b0929b3b18db39
3 * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
4 * Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 * Parser for roff(7) escape sequences.
20 * To be used by all mandoc(1) parsers and formatters.
33 * Traditional escape sequence interpreter for general use
34 * including in high-level formatters. This function does not issue
35 * diagnostics and is not usable for expansion in the roff(7) parser.
36 * It is documented in the mandoc_escape(3) manual page.
39 mandoc_escape(const char **rendarg
, const char **rarg
, int *rargl
)
41 int iarg
, iendarg
, iend
;
44 rval
= roff_escape(--*rendarg
, 0, 0,
45 NULL
, NULL
, &iarg
, &iendarg
, &iend
);
46 assert(rval
!= ESCAPE_EXPAND
);
48 *rarg
= *rendarg
+ iarg
;
50 *rargl
= iendarg
- iarg
;
56 * Full-featured escape sequence parser.
57 * If it encounters a nested escape sequence that requires expansion
58 * by the parser and re-parsing, the positions of that inner escape
59 * sequence are returned in *resc ... *rend.
60 * Otherwise, *resc is set to aesc and the positions of the escape
61 * sequence starting at aesc are returned.
62 * Diagnostic messages are generated if and only if resc != NULL,
63 * that is, if and only if called by roff_expand().
66 roff_escape(const char *buf
, const int ln
, const int aesc
,
67 int *resc
, int *rnam
, int *rarg
, int *rendarg
, int *rend
)
69 int iesc
; /* index of leading escape char */
70 int inam
; /* index of escape name */
71 int iarg
; /* index beginning the argument */
72 int iendarg
; /* index right after the argument */
73 int iend
; /* index right after the sequence */
74 int sesc
, snam
, sarg
, sendarg
, send
; /* for sub-escape */
75 int maxl
; /* expected length of the argument */
76 int argl
; /* actual length of the argument */
77 int c
, i
; /* for \[char...] parsing */
78 int valid_A
; /* for \A parsing */
79 enum mandoc_esc rval
; /* return value */
80 enum mandocerr err
; /* diagnostic code */
81 char term
; /* byte terminating the argument */
84 * Treat "\E" just like "\";
85 * it only makes a difference in copy mode.
91 } while (buf
[inam
] == 'E');
94 * Sort the following cases first by syntax category,
95 * then by escape sequence type, and finally by ASCII code.
98 iarg
= iendarg
= iend
= inam
+ 1;
104 /* Escape sequences taking no arguments at all. */
109 rval
= ESCAPE_UNSUPP
;
125 rval
= ESCAPE_IGNORE
;
149 rval
= ESCAPE_SPECIAL
;
155 rval
= ESCAPE_NOSPACE
;
158 rval
= ESCAPE_SKIPCHAR
;
161 /* Standard argument format. */
168 rval
= ESCAPE_EXPAND
;
176 rval
= ESCAPE_IGNORE
;
180 rval
= ESCAPE_SPECIAL
;
181 iendarg
= iend
= --iarg
;
187 /* Quoted arguments */
192 rval
= ESCAPE_EXPAND
;
205 rval
= ESCAPE_IGNORE
;
209 if (buf
[iarg
] != '\'') {
213 rval
= ESCAPE_SPECIAL
;
217 rval
= ESCAPE_NUMBERED
;
229 rval
= ESCAPE_OVERSTRIKE
;
233 /* Sizes support both forms, with additional peculiarities. */
236 rval
= ESCAPE_IGNORE
;
237 if (buf
[iarg
] == '+' || buf
[iarg
] == '-'||
238 buf
[iarg
] == ASCII_HYPH
)
256 if (buf
[iarg
- 1] == 's' &&
257 isdigit((unsigned char)buf
[iarg
+ 1])) {
266 iendarg
= iend
= iarg
;
269 /* Decide how to end the argument. */
271 if ((term
== '\b' || (term
== '\0' && maxl
== INT_MAX
)) &&
272 buf
[iarg
] == buf
[iesc
] && roff_escape(buf
, ln
, iendarg
,
273 &sesc
, &snam
, &sarg
, &sendarg
, &send
) == ESCAPE_EXPAND
)
277 if ((buf
[inam
] == 'N' && isdigit((unsigned char)buf
[iarg
])) ||
278 (buf
[inam
] == 'h' && strchr(" %&()*+-./0123456789:<=>",
279 buf
[iarg
]) != NULL
)) {
280 iendarg
= iend
= iarg
+ 1;
285 } else if (term
== '\0' && maxl
== INT_MAX
) {
286 if (buf
[inam
] == 'n' && (buf
[iarg
] == '+' || buf
[iarg
] == '-'))
294 if (buf
[++iarg
] == ' ') {
295 iendarg
= iend
= iarg
+ 1;
307 /* Advance to the end of the argument. */
312 if (buf
[iendarg
] == '\0') {
313 /* Ignore an incomplete argument except for \w. */
314 if (buf
[inam
] != 'w')
316 if (rval
== ESCAPE_EXPAND
)
317 err
= MANDOCERR_ESC_BAD
;
322 if (buf
[iendarg
] == term
) {
326 if (buf
[inam
] == 'N' &&
327 isdigit((unsigned char)buf
[iendarg
]) == 0) {
331 if (buf
[iendarg
] == buf
[iesc
]) {
332 switch (roff_escape(buf
, ln
, iendarg
,
333 &sesc
, &snam
, &sarg
, &sendarg
, &send
)) {
342 iendarg
= iend
= send
;
344 if (buf
[iendarg
] == ' ' || buf
[iendarg
] == '\t')
352 /* Post-process depending on the content of the argument. */
354 argl
= iendarg
- iarg
;
357 if (resc
== NULL
&& argl
== 2 &&
358 buf
[iarg
] == '.' && buf
[iarg
+ 1] == 'T')
359 rval
= ESCAPE_DEVICE
;
368 rval
= ESCAPE_UNSUPP
;
374 rval
= argl
== 1 ? ESCAPE_IGNORE
: ESCAPE_ERROR
;
377 rval
= buf
[iarg
- 1] == '[' ? ESCAPE_UNSUPP
:
391 rval
= mandoc_font(buf
+ iarg
, argl
);
397 * The file chars.c only provides one common list of
398 * character names, but \[-] == \- is the only one of
399 * the characters with one-byte names that allows
400 * enclosing the name in brackets.
403 if (term
!= '\0' && argl
== 1 && buf
[iarg
] != '-') {
408 /* Treat \[char...] as an alias for \N'...'. */
410 if (buf
[iarg
] == 'c') {
411 if (argl
< 6 || argl
> 7 ||
412 strncmp(buf
+ iarg
, "char", 4) != 0 ||
413 (int)strspn(buf
+ iarg
+ 4, "0123456789")
417 for (i
= iarg
; i
< iendarg
; i
++)
418 c
= 10 * c
+ (buf
[i
] - '0');
419 if (c
< 0x21 || (c
> 0x7e && c
< 0xa0) || c
> 0xff)
422 rval
= ESCAPE_NUMBERED
;
427 * Unicode escapes are defined in groff as \[u0000]
428 * to \[u10FFFF], where the contained value must be
429 * a valid Unicode codepoint. Here, however, only
430 * check the length and range.
433 if (buf
[iarg
] != 'u' || argl
< 5 || argl
> 7)
436 (buf
[iarg
+ 1] != '1' || buf
[iarg
+ 2] != '0'))
438 if (argl
== 6 && buf
[iarg
+ 1] == '0')
440 if (argl
== 5 && buf
[iarg
+ 1] == 'D' &&
441 strchr("89ABCDEF", buf
[iarg
+ 2]) != NULL
)
443 if ((int)strspn(buf
+ iarg
+ 1, "0123456789ABCDEFabcdef")
445 rval
= ESCAPE_UNICODE
;
458 rval
= ESCAPE_EXPAND
;
473 * Diagnostic messages are only issued when called
474 * from the parser, not when called from the formatters.
480 err
= MANDOCERR_ESC_BAD
;
483 err
= MANDOCERR_ESC_UNSUPP
;
486 if (buf
[inam
] != '\\' && buf
[inam
] != '.')
487 err
= MANDOCERR_ESC_UNDEF
;
490 if (mchars_spec2cp(buf
+ iarg
, argl
) < 0)
491 err
= MANDOCERR_ESC_BAD
;
496 if (err
!= MANDOCERR_OK
)
497 mandoc_msg(err
, ln
, iesc
, "%.*s", iend
- iesc
, buf
+ iesc
);