diff options
Diffstat (limited to 'developer_cmds/ctags/C.c')
-rw-r--r-- | developer_cmds/ctags/C.c | 555 |
1 files changed, 555 insertions, 0 deletions
diff --git a/developer_cmds/ctags/C.c b/developer_cmds/ctags/C.c new file mode 100644 index 0000000..c99d617 --- /dev/null +++ b/developer_cmds/ctags/C.c @@ -0,0 +1,555 @@ +/* $NetBSD: C.c,v 1.5 1998/07/24 07:30:08 ross Exp $ */ + +/* + * Copyright (c) 1987, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#ifndef lint +#if 0 +static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; +#else +__RCSID("$NetBSD: C.c,v 1.5 1998/07/24 07:30:08 ross Exp $"); +#endif +#endif /* not lint */ + +#include <limits.h> +#include <stdio.h> +#include <string.h> + +#include "ctags.h" + +static int func_entry __P((void)); +static void hash_entry __P((void)); +static void skip_string __P((int)); +static int str_entry __P((int)); + +/* + * c_entries -- + * read .c and .h files and call appropriate routines + */ +void +c_entries() +{ + int c; /* current character */ + int level; /* brace level */ + int token; /* if reading a token */ + int t_def; /* if reading a typedef */ + int t_level; /* typedef's brace level */ + char *sp; /* buffer pointer */ + char tok[MAXTOKEN]; /* token buffer */ + + lineftell = ftell(inf); + sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; + while (GETC(!=, EOF)) { + switch (c) { + /* + * Here's where it DOESN'T handle: { + * foo(a) + * { + * #ifdef notdef + * } + * #endif + * if (a) + * puts("hello, world"); + * } + */ + case '{': + ++level; + goto endtok; + case '}': + /* + * if level goes below zero, try and fix + * it, even though we've already messed up + */ + if (--level < 0) + level = 0; + goto endtok; + + case '\n': + SETLINE; + /* + * the above 3 cases are similar in that they + * are special characters that also end tokens. + */ + endtok: if (sp > tok) { + *sp = EOS; + token = YES; + sp = tok; + } + else + token = NO; + continue; + + /* + * We ignore quoted strings and character constants + * completely. + */ + case '"': + case '\'': + (void)skip_string(c); + break; + + /* + * comments can be fun; note the state is unchanged after + * return, in case we found: + * "foo() XX comment XX { int bar; }" + */ + case '/': + if (GETC(==, '*')) { + skip_comment(); + continue; + } + (void)ungetc(c, inf); + c = '/'; + goto storec; + + /* hash marks flag #define's. */ + case '#': + if (sp == tok) { + hash_entry(); + break; + } + goto storec; + + /* + * if we have a current token, parenthesis on + * level zero indicates a function. + */ + case '(': + if (!level && token) { + int curline; + + if (sp != tok) + *sp = EOS; + /* + * grab the line immediately, we may + * already be wrong, for example, + * foo\n + * (arg1, + */ + ct_getline(); + curline = lineno; + if (func_entry()) { + ++level; + pfnote(tok, curline); + } + break; + } + goto storec; + + /* + * semi-colons indicate the end of a typedef; if we find a + * typedef we search for the next semi-colon of the same + * level as the typedef. Ignoring "structs", they are + * tricky, since you can find: + * + * "typedef long time_t;" + * "typedef unsigned int u_int;" + * "typedef unsigned int u_int [10];" + * + * If looking at a typedef, we save a copy of the last token + * found. Then, when we find the ';' we take the current + * token if it starts with a valid token name, else we take + * the one we saved. There's probably some reasonable + * alternative to this... + */ + case ';': + if (t_def && level == t_level) { + t_def = NO; + ct_getline(); + if (sp != tok) + *sp = EOS; + pfnote(tok, lineno); + break; + } + goto storec; + + /* + * store characters until one that can't be part of a token + * comes along; check the current token against certain + * reserved words. + */ + default: + storec: if (!intoken(c)) { + if (sp == tok) + break; + *sp = EOS; + if (tflag) { + /* no typedefs inside typedefs */ + if (!t_def && + !memcmp(tok, "typedef",8)) { + t_def = YES; + t_level = level; + break; + } + /* catch "typedef struct" */ + if ((!t_def || t_level < level) + && (!memcmp(tok, "struct", 7) + || !memcmp(tok, "union", 6) + || !memcmp(tok, "enum", 5))) { + /* + * get line immediately; + * may change before '{' + */ + ct_getline(); + if (str_entry(c)) + ++level; + break; + /* } */ + } + } + sp = tok; + } + else if (sp != tok || begtoken(c)) { + *sp++ = c; + token = YES; + } + continue; + } + + sp = tok; + token = NO; + } +} + +/* + * func_entry -- + * handle a function reference + */ +static int +func_entry() +{ + int c; /* current character */ + int level = 0; /* for matching '()' */ + static char attribute[] = "__attribute__"; + char maybe_attribute[sizeof attribute + 1], + *anext; + + /* + * Find the end of the assumed function declaration. + * Note that ANSI C functions can have type definitions so keep + * track of the parentheses nesting level. + */ + while (GETC(!=, EOF)) { + switch (c) { + case '\'': + case '"': + /* skip strings and character constants */ + skip_string(c); + break; + case '/': + /* skip comments */ + if (GETC(==, '*')) + skip_comment(); + break; + case '(': + level++; + break; + case ')': + if (level == 0) + goto fnd; + level--; + break; + case '\n': + SETLINE; + } + } + return (NO); +fnd: + /* + * we assume that the character after a function's right paren + * is a token character if it's a function and a non-token + * character if it's a declaration. Comments don't count... + */ + for (anext = maybe_attribute;;) { + while (GETC(!=, EOF) && iswhite(c)) + if (c == '\n') + SETLINE; + if (c == EOF) + return NO; + /* + * Recognize the gnu __attribute__ extension, which would + * otherwise make the heuristic test DTWT + */ + if (anext == maybe_attribute) { + if (intoken(c)) { + *anext++ = c; + continue; + } + } else { + if (intoken(c)) { + if (anext - maybe_attribute + < (int)(sizeof attribute - 1)) + *anext++ = c; + else break; + continue; + } else { + *anext++ = '\0'; + if (strcmp(maybe_attribute, attribute) == 0) { + (void)ungetc(c, inf); + return NO; + } + break; + } + } + if (intoken(c) || c == '{') + break; + if (c == '/' && GETC(==, '*')) + skip_comment(); + else { /* don't ever "read" '/' */ + (void)ungetc(c, inf); + return (NO); + } + } + if (c != '{') + (void)skip_key('{'); + return (YES); +} + +/* + * hash_entry -- + * handle a line starting with a '#' + */ +static void +hash_entry() +{ + int c; /* character read */ + int curline; /* line started on */ + char *sp; /* buffer pointer */ + char tok[MAXTOKEN]; /* storage buffer */ + + curline = lineno; + for (sp = tok;;) { /* get next token */ + if (GETC(==, EOF)) + return; + if (iswhite(c)) + break; + *sp++ = c; + } + *sp = EOS; + if (memcmp(tok, "define", 6)) /* only interested in #define's */ + goto skip; + for (;;) { /* this doesn't handle "#define \n" */ + if (GETC(==, EOF)) + return; + if (!iswhite(c)) + break; + } + for (sp = tok;;) { /* get next token */ + *sp++ = c; + if (GETC(==, EOF)) + return; + /* + * this is where it DOESN'T handle + * "#define \n" + */ + if (!intoken(c)) + break; + } + *sp = EOS; + if (dflag || c == '(') { /* only want macros */ + ct_getline(); + pfnote(tok, curline); + } +skip: if (c == '\n') { /* get rid of rest of define */ + SETLINE + if (*(sp - 1) != '\\') + return; + } + (void)skip_key('\n'); +} + +/* + * str_entry -- + * handle a struct, union or enum entry + */ +static int +str_entry(c) + int c; /* current character */ +{ + int curline; /* line started on */ + char *sp; /* buffer pointer */ + char tok[LINE_MAX]; /* storage buffer */ + + curline = lineno; + while (iswhite(c)) + if (GETC(==, EOF)) + return (NO); + if (c == '{') /* it was "struct {" */ + return (YES); + for (sp = tok;;) { /* get next token */ + *sp++ = c; + if (GETC(==, EOF)) + return (NO); + if (!intoken(c)) + break; + } + + switch (c) { + case '{': /* it was "struct foo{" */ + --sp; + break; + case '\n': /* it was "struct foo\n" */ + SETLINE; + if(GETC(!=, '/')) + { + while (GETC(!=, EOF)) + if (!iswhite(c)) + break; + if (c != '{') { + (void)ungetc(c, inf); + return (NO); + } + break; + } + case '/': + /* skip comments */ + if (GETC(==, '*')) { + skip_comment(); + } + /*FALLTHROUGH*/ + default: /* probably "struct foo " */ + while (GETC(!=, EOF)) + if (!iswhite(c)) + break; + if (c != '{') { + (void)ungetc(c, inf); + return (NO); + } + } + *sp = EOS; + pfnote(tok, curline); + return (YES); +} + +/* + * skip_comment -- + * skip over comment + */ +void +skip_comment() +{ + int c; /* character read */ + int star; /* '*' flag */ + + for (star = 0; GETC(!=, EOF);) + switch(c) { + /* comments don't nest, nor can they be escaped. */ + case '*': + star = YES; + break; + case '/': + if (star) + return; + break; + case '\n': + SETLINE; + /*FALLTHROUGH*/ + default: + star = NO; + break; + } +} + +/* + * skip_string -- + * skip to the end of a string or character constant. + */ +void +skip_string(key) + int key; +{ + int c, + skip; + + for (skip = NO; GETC(!=, EOF); ) + switch (c) { + case '\\': /* a backslash escapes anything */ + skip = !skip; /* we toggle in case it's "\\" */ + break; + case '\n': + SETLINE; + /*FALLTHROUGH*/ + default: + if (c == key && !skip) + return; + skip = NO; + } +} + +/* + * skip_key -- + * skip to next char "key" + */ +int +skip_key(key) + int key; +{ + int c, + skip, + retval; + + for (skip = retval = NO; GETC(!=, EOF);) + switch(c) { + case '\\': /* a backslash escapes anything */ + skip = !skip; /* we toggle in case it's "\\" */ + break; + case ';': /* special case for yacc; if one */ + case '|': /* of these chars occurs, we may */ + retval = YES; /* have moved out of the rule */ + break; /* not used by C */ + case '\'': + case '"': + /* skip strings and character constants */ + skip_string(c); + break; + case '/': + /* skip comments */ + if (GETC(==, '*')) { + skip_comment(); + break; + } + (void)ungetc(c, inf); + c = '/'; + goto norm; + case '\n': + SETLINE; + /*FALLTHROUGH*/ + default: + norm: + if (c == key && !skip) + return (retval); + skip = NO; + } + return (retval); +} |