]>
git.cameronkatri.com Git - mandoc.git/blob - roff_escape.c
3 * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
4 * Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 * Parser for roff(7) escape sequences.
20 * To be used by all mandoc(1) parsers and formatters.
33 * Traditional escape sequence interpreter for general use
34 * including in high-level formatters. This function does not issue
35 * diagnostics and is not usable for expansion in the roff(7) parser.
36 * It is documented in the mandoc_escape(3) manual page.
39 mandoc_escape(const char **rendarg
, const char **rarg
, int *rargl
)
41 int iarg
, iendarg
, iend
;
44 rval
= roff_escape(--*rendarg
, 0, 0,
45 NULL
, NULL
, &iarg
, &iendarg
, &iend
);
46 assert(rval
!= ESCAPE_EXPAND
);
48 *rarg
= *rendarg
+ iarg
;
50 *rargl
= iendarg
- iarg
;
56 * Full-featured escape sequence parser.
57 * If it encounters a nested escape sequence that requires expansion
58 * by the parser and re-parsing, the positions of that inner escape
59 * sequence are returned in *resc ... *rend.
60 * Otherwise, *resc is set to aesc and the positions of the escape
61 * sequence starting at aesc are returned.
62 * Diagnostic messages are generated if and only if resc != NULL,
63 * that is, if and only if called by roff_expand().
66 roff_escape(const char *buf
, const int ln
, const int aesc
,
67 int *resc
, int *rnam
, int *rarg
, int *rendarg
, int *rend
)
69 int iesc
; /* index of leading escape char */
70 int inam
; /* index of escape name */
71 int iarg
; /* index beginning the argument */
72 int iendarg
; /* index right after the argument */
73 int iend
; /* index right after the sequence */
74 int sesc
, snam
, sarg
, sendarg
, send
; /* for sub-escape */
75 int maxl
; /* expected length of the argument */
76 int argl
; /* actual length of the argument */
77 int c
, i
; /* for \[char...] parsing */
78 int valid_A
; /* for \A parsing */
79 enum mandoc_esc rval
; /* return value */
80 enum mandocerr err
; /* diagnostic code */
81 char term
; /* byte terminating the argument */
84 * Treat "\E" just like "\";
85 * it only makes a difference in copy mode.
91 } while (buf
[inam
] == 'E');
94 * Sort the following cases first by syntax category,
95 * then by escape sequence type, and finally by ASCII code.
98 iarg
= iendarg
= iend
= inam
+ 1;
103 /* Escape sequences taking no arguments at all. */
108 rval
= ESCAPE_UNSUPP
;
124 rval
= ESCAPE_IGNORE
;
148 rval
= ESCAPE_SPECIAL
;
154 rval
= ESCAPE_NOSPACE
;
157 rval
= ESCAPE_SKIPCHAR
;
160 /* Standard argument format. */
167 rval
= ESCAPE_EXPAND
;
175 rval
= ESCAPE_IGNORE
;
179 rval
= ESCAPE_SPECIAL
;
180 iendarg
= iend
= --iarg
;
186 /* Quoted arguments */
191 rval
= ESCAPE_EXPAND
;
204 rval
= ESCAPE_IGNORE
;
208 if (buf
[iarg
] != '\'') {
212 rval
= ESCAPE_SPECIAL
;
216 rval
= ESCAPE_NUMBERED
;
228 rval
= ESCAPE_OVERSTRIKE
;
232 /* Sizes support both forms, with additional peculiarities. */
235 rval
= ESCAPE_IGNORE
;
236 if (buf
[iarg
] == '+' || buf
[iarg
] == '-'||
237 buf
[iarg
] == ASCII_HYPH
)
255 if (buf
[iarg
- 1] == 's' &&
256 isdigit((unsigned char)buf
[iarg
+ 1])) {
265 iendarg
= iend
= iarg
;
268 /* Decide how to end the argument. */
270 if ((term
== '\b' || (term
== '\0' && maxl
== INT_MAX
)) &&
271 buf
[iarg
] == buf
[iesc
] && roff_escape(buf
, ln
, iendarg
,
272 &sesc
, &snam
, &sarg
, &sendarg
, &send
) == ESCAPE_EXPAND
)
276 if ((buf
[inam
] == 'N' && isdigit((unsigned char)buf
[iarg
])) ||
277 (buf
[inam
] == 'h' && strchr(" %&()*+-./0123456789:<=>",
278 buf
[iarg
]) != NULL
)) {
279 iendarg
= iend
= iarg
+ 1;
284 } else if (term
== '\0' && maxl
== INT_MAX
) {
285 if (buf
[inam
] == 'n' && (buf
[iarg
] == '+' || buf
[iarg
] == '-'))
293 if (buf
[++iarg
] == ' ') {
294 iendarg
= iend
= iarg
+ 1;
306 /* Advance to the end of the argument. */
311 if (buf
[iendarg
] == '\0') {
312 /* Ignore an incomplete argument except for \w. */
313 if (buf
[inam
] != 'w')
317 if (buf
[iendarg
] == term
) {
321 if (buf
[inam
] == 'N' &&
322 isdigit((unsigned char)buf
[iendarg
]) == 0) {
326 if (buf
[iendarg
] == buf
[iesc
]) {
327 switch (roff_escape(buf
, ln
, iendarg
,
328 &sesc
, &snam
, &sarg
, &sendarg
, &send
)) {
337 iendarg
= iend
= send
;
339 if (buf
[iendarg
] == ' ' || buf
[iendarg
] == '\t')
346 if (resc
!= NULL
&& ((maxl
!= INT_MAX
&& maxl
!= 0) ||
347 (term
!= '\0' && buf
[iendarg
] != term
)))
348 mandoc_msg(MANDOCERR_ESC_BAD
, ln
, iesc
, "%s", buf
+ iesc
);
350 /* Post-process depending on the content of the argument. */
352 argl
= iendarg
- iarg
;
355 if (resc
== NULL
&& argl
== 2 &&
356 buf
[iarg
] == '.' && buf
[iarg
+ 1] == 'T')
357 rval
= ESCAPE_DEVICE
;
366 rval
= ESCAPE_UNSUPP
;
372 rval
= argl
== 1 ? ESCAPE_IGNORE
: ESCAPE_ERROR
;
375 rval
= buf
[iarg
- 1] == '[' ? ESCAPE_UNSUPP
:
389 rval
= mandoc_font(buf
+ iarg
, argl
);
395 * The file chars.c only provides one common list of
396 * character names, but \[-] == \- is the only one of
397 * the characters with one-byte names that allows
398 * enclosing the name in brackets.
401 if (term
!= '\0' && argl
== 1 && buf
[iarg
] != '-') {
406 /* Treat \[char...] as an alias for \N'...'. */
408 if (buf
[iarg
] == 'c') {
409 if (argl
< 6 || argl
> 7 ||
410 strncmp(buf
+ iarg
, "char", 4) != 0 ||
411 (int)strspn(buf
+ iarg
+ 4, "0123456789")
415 for (i
= iarg
; i
< iendarg
; i
++)
416 c
= 10 * c
+ (buf
[i
] - '0');
417 if (c
< 0x21 || (c
> 0x7e && c
< 0xa0) || c
> 0xff)
420 rval
= ESCAPE_NUMBERED
;
425 * Unicode escapes are defined in groff as \[u0000]
426 * to \[u10FFFF], where the contained value must be
427 * a valid Unicode codepoint. Here, however, only
428 * check the length and range.
431 if (buf
[iarg
] != 'u' || argl
< 5 || argl
> 7)
434 (buf
[iarg
+ 1] != '1' || buf
[iarg
+ 2] != '0'))
436 if (argl
== 6 && buf
[iarg
+ 1] == '0')
438 if (argl
== 5 && buf
[iarg
+ 1] == 'D' &&
439 strchr("89ABCDEF", buf
[iarg
+ 2]) != NULL
)
441 if ((int)strspn(buf
+ iarg
+ 1, "0123456789ABCDEFabcdef")
443 rval
= ESCAPE_UNICODE
;
456 rval
= ESCAPE_EXPAND
;
471 * Diagnostic messages are only issued when called
472 * from the parser, not when called from the formatters.
478 err
= MANDOCERR_ESC_BAD
;
481 err
= MANDOCERR_ESC_UNSUPP
;
484 if (buf
[inam
] == '\\')
486 err
= MANDOCERR_ESC_UNDEF
;
489 if (mchars_spec2cp(buf
+ iarg
, argl
) >= 0)
491 err
= MANDOCERR_ESC_BAD
;
496 mandoc_msg(err
, ln
, iesc
, "%.*s", iend
- iesc
, buf
+ iesc
);