]>
git.cameronkatri.com Git - mandoc.git/blob - roff_escape.c
3fdcf8d251305701bb89178253a84d9283e687b4
1 /* $Id: roff_escape.c,v 1.12 2022/06/06 19:23:13 schwarze Exp $ */
3 * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
4 * Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 * Parser for roff(7) escape sequences.
20 * To be used by all mandoc(1) parsers and formatters.
33 * Traditional escape sequence interpreter for general use
34 * including in high-level formatters. This function does not issue
35 * diagnostics and is not usable for expansion in the roff(7) parser.
36 * It is documented in the mandoc_escape(3) manual page.
39 mandoc_escape(const char **rendarg
, const char **rarg
, int *rargl
)
41 int iarg
, iendarg
, iend
;
44 rval
= roff_escape(--*rendarg
, 0, 0,
45 NULL
, NULL
, &iarg
, &iendarg
, &iend
);
46 assert(rval
!= ESCAPE_EXPAND
);
48 *rarg
= *rendarg
+ iarg
;
50 *rargl
= iendarg
- iarg
;
56 * Full-featured escape sequence parser.
57 * If it encounters a nested escape sequence that requires expansion
58 * by the parser and re-parsing, the positions of that inner escape
59 * sequence are returned in *resc ... *rend.
60 * Otherwise, *resc is set to aesc and the positions of the escape
61 * sequence starting at aesc are returned.
62 * Diagnostic messages are generated if and only if resc != NULL,
63 * that is, if and only if called by roff_expand().
66 roff_escape(const char *buf
, const int ln
, const int aesc
,
67 int *resc
, int *rnam
, int *rarg
, int *rendarg
, int *rend
)
69 int iesc
; /* index of leading escape char */
70 int inam
; /* index of escape name */
71 int iarg
; /* index beginning the argument */
72 int iendarg
; /* index right after the argument */
73 int iend
; /* index right after the sequence */
74 int sesc
, snam
, sarg
, sendarg
, send
; /* for sub-escape */
75 int maxl
; /* expected length of the argument */
76 int argl
; /* actual length of the argument */
77 int c
, i
; /* for \[char...] parsing */
78 int valid_A
; /* for \A parsing */
79 enum mandoc_esc rval
; /* return value */
80 enum mandocerr err
; /* diagnostic code */
81 char term
; /* byte terminating the argument */
84 * Treat "\E" just like "\";
85 * it only makes a difference in copy mode.
91 } while (buf
[inam
] == 'E');
94 * Sort the following cases first by syntax category,
95 * then by escape sequence type, and finally by ASCII code.
98 iarg
= iendarg
= iend
= inam
+ 1;
104 /* Escape sequences taking no arguments at all. */
109 rval
= ESCAPE_UNSUPP
;
125 rval
= ESCAPE_IGNORE
;
149 rval
= ESCAPE_SPECIAL
;
155 rval
= ESCAPE_NOSPACE
;
158 rval
= ESCAPE_SKIPCHAR
;
161 /* Standard argument format. */
168 rval
= ESCAPE_EXPAND
;
176 rval
= ESCAPE_IGNORE
;
180 rval
= ESCAPE_SPECIAL
;
181 iendarg
= iend
= --iarg
;
187 /* Quoted arguments */
192 rval
= ESCAPE_EXPAND
;
205 rval
= ESCAPE_IGNORE
;
209 rval
= ESCAPE_SPECIAL
;
213 rval
= ESCAPE_NUMBERED
;
225 rval
= ESCAPE_OVERSTRIKE
;
229 /* Sizes support both forms, with additional peculiarities. */
232 rval
= ESCAPE_IGNORE
;
233 if (buf
[iarg
] == '+' || buf
[iarg
] == '-'||
234 buf
[iarg
] == ASCII_HYPH
)
252 if (buf
[iarg
- 1] == 's' &&
253 isdigit((unsigned char)buf
[iarg
+ 1])) {
262 iendarg
= iend
= iarg
;
265 /* Decide how to end the argument. */
267 if ((term
== '\b' || (term
== '\0' && maxl
== INT_MAX
)) &&
268 buf
[iarg
] == buf
[iesc
] && roff_escape(buf
, ln
, iendarg
,
269 &sesc
, &snam
, &sarg
, &sendarg
, &send
) == ESCAPE_EXPAND
)
273 if (strchr("BDHLRSvxNhl", buf
[inam
]) != NULL
&&
274 strchr(" %&()*+-./0123456789:<=>", buf
[iarg
]) != NULL
) {
275 if (rval
!= ESCAPE_EXPAND
)
277 if (buf
[inam
] != 'D') {
278 iendarg
= iend
= iarg
+ 1;
283 } else if (term
== '\0' && maxl
== INT_MAX
) {
284 if (buf
[inam
] == 'n' && (buf
[iarg
] == '+' || buf
[iarg
] == '-'))
292 if (buf
[++iarg
] == ' ') {
293 iendarg
= iend
= iarg
+ 1;
305 /* Advance to the end of the argument. */
310 if (buf
[iendarg
] == '\0') {
311 err
= MANDOCERR_ESC_INCOMPLETE
;
312 if (rval
!= ESCAPE_EXPAND
)
314 /* Ignore an incomplete argument except for \w. */
315 if (buf
[inam
] != 'w')
319 if (buf
[iendarg
] == term
) {
323 if (buf
[inam
] == 'N' &&
324 isdigit((unsigned char)buf
[iendarg
]) == 0) {
328 if (buf
[iendarg
] == buf
[iesc
]) {
329 switch (roff_escape(buf
, ln
, iendarg
,
330 &sesc
, &snam
, &sarg
, &sendarg
, &send
)) {
339 iendarg
= iend
= send
;
341 if (buf
[iendarg
] == ' ' || buf
[iendarg
] == '\t')
349 /* Post-process depending on the content of the argument. */
351 argl
= iendarg
- iarg
;
354 if (resc
== NULL
&& argl
== 2 &&
355 buf
[iarg
] == '.' && buf
[iarg
+ 1] == 'T')
356 rval
= ESCAPE_DEVICE
;
365 rval
= ESCAPE_UNSUPP
;
371 rval
= argl
== 1 ? ESCAPE_IGNORE
: ESCAPE_ERROR
;
374 rval
= buf
[iarg
- 1] == '[' ? ESCAPE_UNSUPP
:
388 rval
= mandoc_font(buf
+ iarg
, argl
);
393 err
= MANDOCERR_ESC_BADCHAR
;
399 * The file chars.c only provides one common list of
400 * character names, but \[-] == \- is the only one of
401 * the characters with one-byte names that allows
402 * enclosing the name in brackets.
405 if (term
!= '\0' && argl
== 1 && buf
[iarg
] != '-') {
406 err
= MANDOCERR_ESC_BADCHAR
;
411 /* Treat \[char...] as an alias for \N'...'. */
413 if (buf
[iarg
] == 'c') {
414 if (argl
< 6 || argl
> 7 ||
415 strncmp(buf
+ iarg
, "char", 4) != 0 ||
416 (int)strspn(buf
+ iarg
+ 4, "0123456789")
420 for (i
= iarg
; i
< iendarg
; i
++)
421 c
= 10 * c
+ (buf
[i
] - '0');
422 if (c
< 0x21 || (c
> 0x7e && c
< 0xa0) || c
> 0xff) {
423 err
= MANDOCERR_ESC_BADCHAR
;
427 rval
= ESCAPE_NUMBERED
;
432 * Unicode escapes are defined in groff as \[u0000]
433 * to \[u10FFFF], where the contained value must be
434 * a valid Unicode codepoint. Here, however, only
435 * check the length and range.
438 if (buf
[iarg
] != 'u' || argl
< 5 || argl
> 7)
441 (buf
[iarg
+ 1] != '1' || buf
[iarg
+ 2] != '0')) {
442 err
= MANDOCERR_ESC_BADCHAR
;
445 if (argl
== 6 && buf
[iarg
+ 1] == '0') {
446 err
= MANDOCERR_ESC_BADCHAR
;
449 if (argl
== 5 && buf
[iarg
+ 1] == 'D' &&
450 strchr("89ABCDEF", buf
[iarg
+ 2]) != NULL
) {
451 err
= MANDOCERR_ESC_BADCHAR
;
454 if ((int)strspn(buf
+ iarg
+ 1, "0123456789ABCDEFabcdef")
456 rval
= ESCAPE_UNICODE
;
469 rval
= ESCAPE_EXPAND
;
484 * Diagnostic messages are only issued when called
485 * from the parser, not when called from the formatters.
491 if (err
== MANDOCERR_OK
)
492 err
= MANDOCERR_ESC_BAD
;
495 err
= MANDOCERR_ESC_UNSUPP
;
498 if (buf
[inam
] != '\\' && buf
[inam
] != '.')
499 err
= MANDOCERR_ESC_UNDEF
;
502 if (mchars_spec2cp(buf
+ iarg
, argl
) >= 0)
504 else if (err
== MANDOCERR_OK
)
505 err
= MANDOCERR_ESC_UNKCHAR
;
510 if (err
!= MANDOCERR_OK
)
511 mandoc_msg(err
, ln
, iesc
, "%.*s", iend
- iesc
, buf
+ iesc
);