From 5fd83771641d15c418f747bd343ba6738d3875f7 Mon Sep 17 00:00:00 2001 From: Cameron Katri Date: Sun, 9 May 2021 14:20:58 -0400 Subject: Import macOS userland adv_cmds-176 basic_cmds-55 bootstrap_cmds-116.100.1 developer_cmds-66 diskdev_cmds-667.40.1 doc_cmds-53.60.1 file_cmds-321.40.3 mail_cmds-35 misc_cmds-34 network_cmds-606.40.1 patch_cmds-17 remote_cmds-63 shell_cmds-216.60.1 system_cmds-880.60.2 text_cmds-106 --- developer_cmds/indent/lexi.c | 608 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 608 insertions(+) create mode 100644 developer_cmds/indent/lexi.c (limited to 'developer_cmds/indent/lexi.c') diff --git a/developer_cmds/indent/lexi.c b/developer_cmds/indent/lexi.c new file mode 100644 index 0000000..fbee368 --- /dev/null +++ b/developer_cmds/indent/lexi.c @@ -0,0 +1,608 @@ +/* + * Copyright (c) 1985 Sun Microsystems, Inc. + * Copyright (c) 1980, 1993 + * The Regents of the University of California. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if 0 +#ifndef lint +static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ +#endif +#include +__FBSDID("$FreeBSD: src/usr.bin/indent/lexi.c,v 1.21 2010/04/15 21:41:07 avg Exp $"); + +/* + * Here we have the token scanner for indent. It scans off one token and puts + * it in the global variable "token". It returns a code, indicating the type + * of token scanned. + */ + +#include +#include +#include +#include +#include +#include "indent_globs.h" +#include "indent_codes.h" +#include "indent.h" + +#define alphanum 1 +#define opchar 3 + +struct templ { + const char *rwd; + int rwcode; +}; + +struct templ specials[1000] = +{ + {"switch", 1}, + {"case", 2}, + {"break", 0}, + {"struct", 3}, + {"union", 3}, + {"enum", 3}, + {"default", 2}, + {"int", 4}, + {"char", 4}, + {"float", 4}, + {"double", 4}, + {"long", 4}, + {"short", 4}, + {"typdef", 4}, + {"unsigned", 4}, + {"register", 4}, + {"static", 4}, + {"global", 4}, + {"extern", 4}, + {"void", 4}, + {"const", 4}, + {"volatile", 4}, + {"goto", 0}, + {"return", 0}, + {"if", 5}, + {"while", 5}, + {"for", 5}, + {"else", 6}, + {"do", 6}, + {"sizeof", 7}, + {0, 0} +}; + +char chartype[128] = +{ /* this is used to facilitate the decision of + * what type (alphanumeric, operator) each + * character is */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 0, 0, 1, 3, 3, 0, + 0, 0, 3, 3, 0, 3, 0, 3, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 0, 3, 3, 3, 3, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 3, 1, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 3, 0, 3, 0 +}; + +int +lexi(void) +{ + int unary_delim; /* this is set to 1 if the current token + * forces a following operator to be unary */ + static int last_code; /* the last token type returned */ + static int l_struct; /* set to 1 if the last token was 'struct' */ + int code; /* internal code to be returned */ + char qchar; /* the delimiter character for a string */ + + e_token = s_token; /* point to start of place to save token */ + unary_delim = false; + ps.col_1 = ps.last_nl; /* tell world that this token started in + * column 1 iff the last thing scanned was nl */ + ps.last_nl = false; + + while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ + ps.col_1 = false; /* leading blanks imply token is not in column + * 1 */ + if (++buf_ptr >= buf_end) + fill_buffer(); + } + + /* Scan an alphanumeric token */ + if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { + /* + * we have a character or number + */ + const char *j; /* used for searching thru list of + * + * reserved words */ + struct templ *p; + + if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { + int seendot = 0, + seenexp = 0, + seensfx = 0; + if (*buf_ptr == '0' && + (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { + *e_token++ = *buf_ptr++; + *e_token++ = *buf_ptr++; + while (isxdigit(*buf_ptr)) { + CHECK_SIZE_TOKEN; + *e_token++ = *buf_ptr++; + } + } + else + while (1) { + if (*buf_ptr == '.') { + if (seendot) + break; + else + seendot++; + } + CHECK_SIZE_TOKEN; + *e_token++ = *buf_ptr++; + if (!isdigit(*buf_ptr) && *buf_ptr != '.') { + if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) + break; + else { + seenexp++; + seendot++; + CHECK_SIZE_TOKEN; + *e_token++ = *buf_ptr++; + if (*buf_ptr == '+' || *buf_ptr == '-') + *e_token++ = *buf_ptr++; + } + } + } + while (1) { + if (!(seensfx & 1) && + (*buf_ptr == 'U' || *buf_ptr == 'u')) { + CHECK_SIZE_TOKEN; + *e_token++ = *buf_ptr++; + seensfx |= 1; + continue; + } + if (!(seensfx & 2) && + (*buf_ptr == 'L' || *buf_ptr == 'l')) { + CHECK_SIZE_TOKEN; + if (buf_ptr[1] == buf_ptr[0]) + *e_token++ = *buf_ptr++; + *e_token++ = *buf_ptr++; + seensfx |= 2; + continue; + } + break; + } + } + else + while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { + /* fill_buffer() terminates buffer with newline */ + if (*buf_ptr == BACKSLASH) { + if (*(buf_ptr + 1) == '\n') { + buf_ptr += 2; + if (buf_ptr >= buf_end) + fill_buffer(); + } else + break; + } + CHECK_SIZE_TOKEN; + /* copy it over */ + *e_token++ = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + } + *e_token++ = '\0'; + while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ + if (++buf_ptr >= buf_end) + fill_buffer(); + } + ps.its_a_keyword = false; + ps.sizeof_keyword = false; + if (l_struct && !ps.p_l_follow) { + /* if last token was 'struct' and we're not + * in parentheses, then this token + * should be treated as a declaration */ + l_struct = false; + last_code = ident; + ps.last_u_d = true; + return (decl); + } + ps.last_u_d = l_struct; /* Operator after identifier is binary + * unless last token was 'struct' */ + l_struct = false; + last_code = ident; /* Remember that this is the code we will + * return */ + + if (auto_typedefs) { + const char *q = s_token; + size_t q_len = strlen(q); + /* Check if we have an "_t" in the end */ + if (q_len > 2 && + (strcmp(q + q_len - 2, "_t") == 0)) { + ps.its_a_keyword = true; + ps.last_u_d = true; + goto found_auto_typedef; + } + } + + /* + * This loop will check if the token is a keyword. + */ + for (p = specials; (j = p->rwd) != 0; p++) { + const char *q = s_token; /* point at scanned token */ + if (*j++ != *q++ || *j++ != *q++) + continue; /* This test depends on the fact that + * identifiers are always at least 1 character + * long (ie. the first two bytes of the + * identifier are always meaningful) */ + if (q[-1] == 0) + break; /* If its a one-character identifier */ + while (*q++ == *j) + if (*j++ == 0) + goto found_keyword; /* I wish that C had a multi-level + * break... */ + } + if (p->rwd) { /* we have a keyword */ + found_keyword: + ps.its_a_keyword = true; + ps.last_u_d = true; + switch (p->rwcode) { + case 1: /* it is a switch */ + return (swstmt); + case 2: /* a case or default */ + return (casestmt); + + case 3: /* a "struct" */ + /* + * Next time around, we will want to know that we have had a + * 'struct' + */ + l_struct = true; + /* FALLTHROUGH */ + + case 4: /* one of the declaration keywords */ + found_auto_typedef: + if (ps.p_l_follow) { + ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask; + break; /* inside parens: cast, param list or sizeof */ + } + last_code = decl; + return (decl); + + case 5: /* if, while, for */ + return (sp_paren); + + case 6: /* do, else */ + return (sp_nparen); + + case 7: + ps.sizeof_keyword = true; + default: /* all others are treated like any other + * identifier */ + return (ident); + } /* end of switch */ + } /* end of if (found_it) */ + if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { + char *tp = buf_ptr; + while (tp < buf_end) + if (*tp++ == ')' && (*tp == ';' || *tp == ',')) + goto not_proc; + strncpy(ps.procname, token, sizeof ps.procname - 1); + ps.in_parameter_declaration = 1; + rparen_count = 1; + not_proc:; + } + /* + * The following hack attempts to guess whether or not the current + * token is in fact a declaration keyword -- one that has been + * typedefd + */ + if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') + && !ps.p_l_follow + && !ps.block_init + && (ps.last_token == rparen || ps.last_token == semicolon || + ps.last_token == decl || + ps.last_token == lbrace || ps.last_token == rbrace)) { + ps.its_a_keyword = true; + ps.last_u_d = true; + last_code = decl; + return decl; + } + if (last_code == decl) /* if this is a declared variable, then + * following sign is unary */ + ps.last_u_d = true; /* will make "int a -1" work */ + last_code = ident; + return (ident); /* the ident is not in the list */ + } /* end of procesing for alpanum character */ + + /* Scan a non-alphanumeric token */ + + *e_token++ = *buf_ptr; /* if it is only a one-character token, it is + * moved here */ + *e_token = '\0'; + if (++buf_ptr >= buf_end) + fill_buffer(); + + switch (*token) { + case '\n': + unary_delim = ps.last_u_d; + ps.last_nl = true; /* remember that we just had a newline */ + code = (had_eof ? 0 : newline); + + /* + * if data has been exhausted, the newline is a dummy, and we should + * return code to stop + */ + break; + + case '\'': /* start of quoted character */ + case '"': /* start of string */ + qchar = *token; + if (troff) { + e_token[-1] = '`'; + if (qchar == '"') + *e_token++ = '`'; + e_token = chfont(&bodyf, &stringf, e_token); + } + do { /* copy the string */ + while (1) { /* move one character or [/] */ + if (*buf_ptr == '\n') { + diag2(1, "Unterminated literal"); + goto stop_lit; + } + CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, + * since CHECK_SIZE guarantees that there + * are at least 5 entries left */ + *e_token = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + if (*e_token == BACKSLASH) { /* if escape, copy extra char */ + if (*buf_ptr == '\n') /* check for escaped newline */ + ++line_no; + if (troff) { + *++e_token = BACKSLASH; + if (*buf_ptr == BACKSLASH) + *++e_token = BACKSLASH; + } + *++e_token = *buf_ptr++; + ++e_token; /* we must increment this again because we + * copied two chars */ + if (buf_ptr >= buf_end) + fill_buffer(); + } + else + break; /* we copied one character */ + } /* end of while (1) */ + } while (*e_token++ != qchar); + if (troff) { + e_token = chfont(&stringf, &bodyf, e_token - 1); + if (qchar == '"') + *e_token++ = '\''; + } +stop_lit: + code = ident; + break; + + case ('('): + case ('['): + unary_delim = true; + code = lparen; + break; + + case (')'): + case (']'): + code = rparen; + break; + + case '#': + unary_delim = ps.last_u_d; + code = preesc; + break; + + case '?': + unary_delim = true; + code = question; + break; + + case (':'): + code = colon; + unary_delim = true; + break; + + case (';'): + unary_delim = true; + code = semicolon; + break; + + case ('{'): + unary_delim = true; + + /* + * if (ps.in_or_st) ps.block_init = 1; + */ + /* ? code = ps.block_init ? lparen : lbrace; */ + code = lbrace; + break; + + case ('}'): + unary_delim = true; + /* ? code = ps.block_init ? rparen : rbrace; */ + code = rbrace; + break; + + case 014: /* a form feed */ + unary_delim = ps.last_u_d; + ps.last_nl = true; /* remember this so we can set 'ps.col_1' + * right */ + code = form_feed; + break; + + case (','): + unary_delim = true; + code = comma; + break; + + case '.': + unary_delim = false; + code = period; + break; + + case '-': + case '+': /* check for -, +, --, ++ */ + code = (ps.last_u_d ? unary_op : binary_op); + unary_delim = true; + + if (*buf_ptr == token[0]) { + /* check for doubled character */ + *e_token++ = *buf_ptr++; + /* buffer overflow will be checked at end of loop */ + if (last_code == ident || last_code == rparen) { + code = (ps.last_u_d ? unary_op : postop); + /* check for following ++ or -- */ + unary_delim = false; + } + } + else if (*buf_ptr == '=') + /* check for operator += */ + *e_token++ = *buf_ptr++; + else if (*buf_ptr == '>') { + /* check for operator -> */ + *e_token++ = *buf_ptr++; + if (!pointer_as_binop) { + unary_delim = false; + code = unary_op; + ps.want_blank = false; + } + } + break; /* buffer overflow will be checked at end of + * switch */ + + case '=': + if (ps.in_or_st) + ps.block_init = 1; +#ifdef undef + if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ + e_token[-1] = *buf_ptr++; + if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) + *e_token++ = *buf_ptr++; + *e_token++ = '='; /* Flip =+ to += */ + *e_token = 0; + } +#else + if (*buf_ptr == '=') {/* == */ + *e_token++ = '='; /* Flip =+ to += */ + buf_ptr++; + *e_token = 0; + } +#endif + code = binary_op; + unary_delim = true; + break; + /* can drop thru!!! */ + + case '>': + case '<': + case '!': /* ops like <, <<, <=, !=, etc */ + if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { + *e_token++ = *buf_ptr; + if (++buf_ptr >= buf_end) + fill_buffer(); + } + if (*buf_ptr == '=') + *e_token++ = *buf_ptr++; + code = (ps.last_u_d ? unary_op : binary_op); + unary_delim = true; + break; + + default: + if (token[0] == '/' && *buf_ptr == '*') { + /* it is start of comment */ + *e_token++ = '*'; + + if (++buf_ptr >= buf_end) + fill_buffer(); + + code = comment; + unary_delim = ps.last_u_d; + break; + } + while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { + /* + * handle ||, &&, etc, and also things as in int *****i + */ + *e_token++ = *buf_ptr; + if (++buf_ptr >= buf_end) + fill_buffer(); + } + code = (ps.last_u_d ? unary_op : binary_op); + unary_delim = true; + + + } /* end of switch */ + if (code != newline) { + l_struct = false; + last_code = code; + } + if (buf_ptr >= buf_end) /* check for input buffer empty */ + fill_buffer(); + ps.last_u_d = unary_delim; + *e_token = '\0'; /* null terminate the token */ + return (code); +} + +/* + * Add the given keyword to the keyword table, using val as the keyword type + */ +void +addkey(char *key, int val) +{ + struct templ *p = specials; + while (p->rwd) + if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) + return; + else + p++; + if (p >= specials + sizeof specials / sizeof specials[0]) + return; /* For now, table overflows are silently + * ignored */ + p->rwd = key; + p->rwcode = val; + p[1].rwd = 0; + p[1].rwcode = 0; +} -- cgit v1.2.3-56-ge451