]>
git.cameronkatri.com Git - mandoc.git/blob - roff_escape.c
1 /* $Id: roff_escape.c,v 1.14 2022/06/08 13:23:57 schwarze Exp $ */
3 * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
4 * Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 * Parser for roff(7) escape sequences.
20 * To be used by all mandoc(1) parsers and formatters.
33 * Traditional escape sequence interpreter for general use
34 * including in high-level formatters. This function does not issue
35 * diagnostics and is not usable for expansion in the roff(7) parser.
36 * It is documented in the mandoc_escape(3) manual page.
39 mandoc_escape(const char **rendarg
, const char **rarg
, int *rargl
)
41 int iarg
, iendarg
, iend
;
44 rval
= roff_escape(--*rendarg
, 0, 0,
45 NULL
, NULL
, &iarg
, &iendarg
, &iend
);
46 assert(rval
!= ESCAPE_EXPAND
);
48 *rarg
= *rendarg
+ iarg
;
50 *rargl
= iendarg
- iarg
;
56 * Full-featured escape sequence parser.
57 * If it encounters a nested escape sequence that requires expansion
58 * by the parser and re-parsing, the positions of that inner escape
59 * sequence are returned in *resc ... *rend.
60 * Otherwise, *resc is set to aesc and the positions of the escape
61 * sequence starting at aesc are returned.
62 * Diagnostic messages are generated if and only if ln != 0,
63 * that is, if and only if called by roff_expand().
66 roff_escape(const char *buf
, const int ln
, const int aesc
,
67 int *resc
, int *rnam
, int *rarg
, int *rendarg
, int *rend
)
69 int iesc
; /* index of leading escape char */
70 int inam
; /* index of escape name */
71 int iarg
; /* index beginning the argument */
72 int iendarg
; /* index right after the argument */
73 int iend
; /* index right after the sequence */
74 int sesc
, snam
, sarg
, sendarg
, send
; /* for sub-escape */
75 int escterm
; /* whether term is escaped */
76 int maxl
; /* expected length of the argument */
77 int argl
; /* actual length of the argument */
78 int c
, i
; /* for \[char...] parsing */
79 int valid_A
; /* for \A parsing */
80 enum mandoc_esc rval
; /* return value */
81 enum mandoc_esc stype
; /* for sub-escape */
82 enum mandocerr err
; /* diagnostic code */
83 char term
; /* byte terminating the argument */
86 * Treat "\E" just like "\";
87 * it only makes a difference in copy mode.
93 } while (buf
[inam
] == 'E');
96 * Sort the following cases first by syntax category,
97 * then by escape sequence type, and finally by ASCII code.
100 iarg
= iendarg
= iend
= inam
+ 1;
106 /* Escape sequences taking no arguments at all. */
111 rval
= ESCAPE_UNSUPP
;
127 rval
= ESCAPE_IGNORE
;
151 rval
= ESCAPE_SPECIAL
;
157 rval
= ESCAPE_NOSPACE
;
160 rval
= ESCAPE_SKIPCHAR
;
163 /* Standard argument format. */
170 rval
= ESCAPE_EXPAND
;
178 rval
= ESCAPE_IGNORE
;
182 rval
= ESCAPE_SPECIAL
;
183 iendarg
= iend
= --iarg
;
189 /* Quoted arguments */
194 rval
= ESCAPE_EXPAND
;
207 rval
= ESCAPE_IGNORE
;
211 rval
= ESCAPE_SPECIAL
;
215 rval
= ESCAPE_NUMBERED
;
227 rval
= ESCAPE_OVERSTRIKE
;
231 /* Sizes support both forms, with additional peculiarities. */
234 rval
= ESCAPE_IGNORE
;
235 if (buf
[iarg
] == '+' || buf
[iarg
] == '-'||
236 buf
[iarg
] == ASCII_HYPH
)
254 if (buf
[iarg
- 1] == 's' &&
255 isdigit((unsigned char)buf
[iarg
+ 1])) {
264 iendarg
= iend
= iarg
;
267 /* Decide how to end the argument. */
270 stype
= ESCAPE_EXPAND
;
271 if ((term
== '\b' || (term
== '\0' && maxl
== INT_MAX
)) &&
272 buf
[iarg
] == buf
[iesc
]) {
273 stype
= roff_escape(buf
, ln
, iendarg
,
274 &sesc
, &snam
, &sarg
, &sendarg
, &send
);
275 if (stype
== ESCAPE_EXPAND
)
280 if (stype
== ESCAPE_UNDEF
)
282 if (stype
!= ESCAPE_EXPAND
&& stype
!= ESCAPE_UNDEF
) {
283 if (strchr("BHLRSNhlvx", buf
[inam
]) != NULL
&&
284 strchr(" ,.0DLOXYZ^abdhlortuvx|~",
285 buf
[snam
]) != NULL
) {
286 err
= MANDOCERR_ESC_DELIM
;
288 iarg
= iendarg
= sesc
;
294 } else if (strchr("BDHLRSvxNhl", buf
[inam
]) != NULL
&&
295 strchr(" %&()*+-./0123456789:<=>", buf
[iarg
]) != NULL
) {
296 err
= MANDOCERR_ESC_DELIM
;
297 if (rval
!= ESCAPE_EXPAND
)
299 if (buf
[inam
] != 'D') {
300 iendarg
= iend
= iarg
+ 1;
306 } else if (term
== '\0' && maxl
== INT_MAX
) {
307 if (buf
[inam
] == 'n' && (buf
[iarg
] == '+' || buf
[iarg
] == '-'))
315 if (buf
[++iarg
] == ' ') {
316 iendarg
= iend
= iarg
+ 1;
317 err
= MANDOCERR_ESC_ARG
;
329 /* Advance to the end of the argument. */
334 if (buf
[iendarg
] == '\0') {
335 err
= MANDOCERR_ESC_INCOMPLETE
;
336 if (rval
!= ESCAPE_EXPAND
&&
337 rval
!= ESCAPE_OVERSTRIKE
)
339 /* Usually, ignore an incomplete argument. */
340 if (strchr("Aow", buf
[inam
]) == NULL
)
344 if (escterm
== 0 && buf
[iendarg
] == term
) {
348 if (buf
[iendarg
] == buf
[iesc
]) {
349 stype
= roff_escape(buf
, ln
, iendarg
,
350 &sesc
, &snam
, &sarg
, &sendarg
, &send
);
351 if (stype
== ESCAPE_EXPAND
)
355 (buf
[snam
] == term
|| buf
[inam
] == 'N'))
357 if (stype
!= ESCAPE_UNDEF
)
360 } else if (buf
[inam
] == 'N' &&
361 isdigit((unsigned char)buf
[iendarg
]) == 0) {
365 if (buf
[iendarg
] == ' ' || buf
[iendarg
] == '\t')
373 /* Post-process depending on the content of the argument. */
375 argl
= iendarg
- iarg
;
378 if (resc
== NULL
&& argl
== 2 &&
379 buf
[iarg
] == '.' && buf
[iarg
+ 1] == 'T')
380 rval
= ESCAPE_DEVICE
;
389 rval
= ESCAPE_UNSUPP
;
396 rval
= ESCAPE_IGNORE
;
398 err
= MANDOCERR_ESC_ARG
;
403 if (buf
[iarg
- 1] == '[')
404 rval
= ESCAPE_UNSUPP
;
406 err
= MANDOCERR_ESC_ARG
;
411 err
= MANDOCERR_ESC_ARG
;
422 rval
= mandoc_font(buf
+ iarg
, argl
);
423 if (rval
== ESCAPE_ERROR
)
424 err
= MANDOCERR_ESC_ARG
;
429 err
= MANDOCERR_ESC_BADCHAR
;
435 * The file chars.c only provides one common list of
436 * character names, but \[-] == \- is the only one of
437 * the characters with one-byte names that allows
438 * enclosing the name in brackets.
441 if (term
!= '\0' && argl
== 1 && buf
[iarg
] != '-') {
442 err
= MANDOCERR_ESC_BADCHAR
;
447 /* Treat \[char...] as an alias for \N'...'. */
449 if (buf
[iarg
] == 'c') {
450 if (argl
< 6 || argl
> 7 ||
451 strncmp(buf
+ iarg
, "char", 4) != 0 ||
452 (int)strspn(buf
+ iarg
+ 4, "0123456789")
456 for (i
= iarg
; i
< iendarg
; i
++)
457 c
= 10 * c
+ (buf
[i
] - '0');
458 if (c
< 0x21 || (c
> 0x7e && c
< 0xa0) || c
> 0xff) {
459 err
= MANDOCERR_ESC_BADCHAR
;
463 rval
= ESCAPE_NUMBERED
;
468 * Unicode escapes are defined in groff as \[u0000]
469 * to \[u10FFFF], where the contained value must be
470 * a valid Unicode codepoint. Here, however, only
471 * check the length and range.
474 if (buf
[iarg
] != 'u' || argl
< 5 || argl
> 7)
477 (buf
[iarg
+ 1] != '1' || buf
[iarg
+ 2] != '0')) {
478 err
= MANDOCERR_ESC_BADCHAR
;
481 if (argl
== 6 && buf
[iarg
+ 1] == '0') {
482 err
= MANDOCERR_ESC_BADCHAR
;
485 if (argl
== 5 && buf
[iarg
+ 1] == 'D' &&
486 strchr("89ABCDEF", buf
[iarg
+ 2]) != NULL
) {
487 err
= MANDOCERR_ESC_BADCHAR
;
490 if ((int)strspn(buf
+ iarg
+ 1, "0123456789ABCDEFabcdef")
492 rval
= ESCAPE_UNICODE
;
505 rval
= ESCAPE_EXPAND
;
522 * Diagnostic messages are only issued when called
523 * from the parser, not when called from the formatters.
528 err
= MANDOCERR_ESC_UNSUPP
;
531 if (buf
[inam
] != '\\' && buf
[inam
] != '.')
532 err
= MANDOCERR_ESC_UNDEF
;
535 if (mchars_spec2cp(buf
+ iarg
, argl
) >= 0)
537 else if (err
== MANDOCERR_OK
)
538 err
= MANDOCERR_ESC_UNKCHAR
;
543 if (err
!= MANDOCERR_OK
)
544 mandoc_msg(err
, ln
, iesc
, "%.*s", iend
- iesc
, buf
+ iesc
);