From 0fadf54b2f467105afe2720bd68c54e6cfc2cc2e Mon Sep 17 00:00:00 2001 From: Royce Mitchell III Date: Thu, 6 Nov 2003 05:55:26 +0000 Subject: [PATCH] lots of stuff - tracking multiple headers, include dependencies, preprocessor logic, etc svn path=/trunk/; revision=6542 --- reactos/apps/utils/sdkparse/.cvsignore | 3 +- reactos/apps/utils/sdkparse/Header.h | 26 ++ reactos/apps/utils/sdkparse/Symbol.h | 18 ++ reactos/apps/utils/sdkparse/Type.h | 18 ++ reactos/apps/utils/sdkparse/iskeyword.cpp | 80 +++++ reactos/apps/utils/sdkparse/sdkparse.cpp | 377 ++++++++++++++++++---- reactos/apps/utils/sdkparse/sdkparse.dsp | 12 + reactos/apps/utils/sdkparse/skip_ws.cpp | 5 + reactos/apps/utils/sdkparse/skip_ws.h | 2 + reactos/apps/utils/sdkparse/tokenize.cpp | 21 +- 10 files changed, 503 insertions(+), 59 deletions(-) create mode 100644 reactos/apps/utils/sdkparse/Header.h create mode 100644 reactos/apps/utils/sdkparse/Symbol.h create mode 100644 reactos/apps/utils/sdkparse/Type.h diff --git a/reactos/apps/utils/sdkparse/.cvsignore b/reactos/apps/utils/sdkparse/.cvsignore index f694c1a069b..0cbdeb36635 100644 --- a/reactos/apps/utils/sdkparse/.cvsignore +++ b/reactos/apps/utils/sdkparse/.cvsignore @@ -1,3 +1,4 @@ *.ncb *.plg -Debug \ No newline at end of file +Debug +*.opt \ No newline at end of file diff --git a/reactos/apps/utils/sdkparse/Header.h b/reactos/apps/utils/sdkparse/Header.h new file mode 100644 index 00000000000..9c14ef5d2c4 --- /dev/null +++ b/reactos/apps/utils/sdkparse/Header.h @@ -0,0 +1,26 @@ +// Header.h + +#ifndef HEADER_H +#define HEADER_H + +#include "Symbol.h" + +class Header +{ +public: + std::string filename; + std::vector includes, libc_includes, pragmas; + std::vector symbols; + bool done, externc; + + std::vector ifs, ifspreproc; + + Header ( const std::string& filename_ ) + : filename(filename_) + { + done = false; + externc = false; + } +}; + +#endif//HEADER_H diff --git a/reactos/apps/utils/sdkparse/Symbol.h b/reactos/apps/utils/sdkparse/Symbol.h new file mode 100644 index 00000000000..9af0378ea0b --- /dev/null +++ b/reactos/apps/utils/sdkparse/Symbol.h @@ -0,0 +1,18 @@ +// Symbol.h + +#ifndef SYMBOL_H +#define SYMBOL_H + +#include "Type.h" + +class Symbol +{ +public: + Type type; + std::vector names; + std::vector dependencies; + std::vector ifs; + std::string definition; +}; + +#endif//SYMBOL_H diff --git a/reactos/apps/utils/sdkparse/Type.h b/reactos/apps/utils/sdkparse/Type.h new file mode 100644 index 00000000000..db42f33a535 --- /dev/null +++ b/reactos/apps/utils/sdkparse/Type.h @@ -0,0 +1,18 @@ +// Type.h + +#ifndef TYPE_H +#define TYPE_H + +typedef enum +{ + T_UNKNOWN = -1, + T_TIDENT, + T_MACRO, + T_DEFINE, + T_VARIABLE, + T_FUNCTION, + T_FUNCTION_PTR, + T_STRUCT +} Type; + +#endif//TYPE_H diff --git a/reactos/apps/utils/sdkparse/iskeyword.cpp b/reactos/apps/utils/sdkparse/iskeyword.cpp index a84abde8cbc..6649d22fb46 100644 --- a/reactos/apps/utils/sdkparse/iskeyword.cpp +++ b/reactos/apps/utils/sdkparse/iskeyword.cpp @@ -15,42 +15,122 @@ bool iskeyword ( const string& ident ) #define I(s) if ( ident == #s ) return true; switch ( ident[0] ) { + case '_': + I(__cdecl); + I(__declspec); + I(__except); + I(__fastcall); + I(__finally); + I(__inline); + I(__int8); + I(__int16); + I(__int32); + I(__int64); + I(__leave); + I(__stdcall); + I(__try); + break; case 'b': I(bool); + I(break); break; case 'c': + I(case); + I(catch); I(char); + I(class); I(const); + I(const_cast); + I(continue); break; case 'd': + I(default); + I(delete); + I(dllexport); + I(dllimport); I(do); I(double); + I(dynamic_cast); + break; + case 'e': + I(else); + I(enum); + I(explicit); + I(extern); break; case 'f': I(false); I(float); I(for); + I(friend); + break; + case 'g': + I(goto); break; case 'i': I(if); + I(inline); I(int); break; case 'l': I(long); break; + case 'm': + I(mutable); + break; + case 'n': + I(naked); + I(namespace); + I(new); + I(noreturn); + break; + case 'o': + I(operator); + break; + case 'p': + I(private); + I(protected); + I(public); + break; case 'r': + I(register); + I(reinterpret_cast); I(return); break; case 's': I(short); + I(signed); + I(sizeof); + I(static); + I(static_cast); I(struct); I(switch); break; case 't': + I(template); + I(this); + I(thread); + I(throw); I(true); + I(try); I(typedef); + I(typeid); + I(typename); + break; + case 'u': + I(union); + I(unsigned); + I(using); + I(uuid); + I(__uuidof); + break; + case 'v': + I(virtual); + I(void); + I(volatile); break; case 'w': + I(wmain); I(while); break; } diff --git a/reactos/apps/utils/sdkparse/sdkparse.cpp b/reactos/apps/utils/sdkparse/sdkparse.cpp index 56216438335..450d870a1b6 100644 --- a/reactos/apps/utils/sdkparse/sdkparse.cpp +++ b/reactos/apps/utils/sdkparse/sdkparse.cpp @@ -4,10 +4,15 @@ #pragma warning ( disable : 4786 ) #endif//_MSC_VER +#define WIN32_LEAN_AND_MEAN +#include + #include #include #include +#include "EnumFilesImpl.h" + #include "assert.h" #include "File.h" #include "binary2cstr.h" @@ -15,31 +20,28 @@ #include "tokenize.h" #include "skip_ws.h" #include "iskeyword.h" +#include "Type.h" +#include "Header.h" using std::string; using std::vector; -typedef enum -{ - T_UNKNOWN = -1, - T_MACRO, - T_DEFINE, - T_VARIABLE, - T_FUNCTION, - T_FUNCTION_PTR, - T_STRUCT -} Type; +vector headers; bool import_file ( const char* filename ); -char* findend ( char* p ); +char* findend ( char* p, bool& externc ); Type identify ( const vector& tokens, int off = 0 ); Type process ( const string& element, vector& names, bool& isTypedef, vector& dependencies ); +void process_preprocessor ( const char* filename, Header& h, const string& element ); +void process_c ( Header& h, const string& element ); int parse_type ( Type t, const vector& tokens, int off, vector& names, vector& dependencies ); +int parse_tident ( const vector& tokens, int off, vector& names, vector& dependencies ); int parse_variable ( const vector& tokens, int off, vector& names, vector& dependencies ); int parse_struct ( const vector& tokens, int off, vector& names, vector& dependencies ); int parse_function ( const vector& tokens, int off, vector& names, vector& dependencies ); int parse_function_ptr ( const vector& tokens, int off, vector& names, vector& dependencies ); +/* #ifndef ASSERT #define ASSERT(x) \ do \ @@ -52,19 +54,50 @@ do \ } \ }while(0) #endif//ASSERT +*/ + +bool is_libc_include ( const string& inc ) +{ + string s ( inc ); + strlwr ( &s[0] ); + if ( s == "limits.h" ) + return true; + if ( s == "stdarg.h" ) + return true; + if ( s == "basetsd.h" ) + return true; + return false; +} + +BOOL FileEnumProc ( PWIN32_FIND_DATA pwfd, const char* filename, long lParam ) +{ + if ( !is_libc_include ( filename ) ) + import_file ( filename ); + return TRUE; +} void main() { - import_file ( "test.h" ); + EnumFilesInDirectory ( "c:/cvs/reactos/include", "*.h", FileEnumProc, 0, TRUE, FALSE ); } bool import_file ( const char* filename ) { + int i; + + for ( i = 0; i < headers.size(); i++ ) + { + if ( headers[i]->filename == filename ) + return true; + } + + printf ( "%s\n", filename ); + string s; if ( !File::LoadIntoString ( s, filename ) ) { printf ( "Couldn't load \"%s\" for input.\n", filename ); - return false; + exit(0); } // strip comments from the file... @@ -76,6 +109,9 @@ bool import_file ( const char* filename ) File::SaveFromString ( s, no_comments ); }*/ + Header* h = new Header ( filename ); + headers.push_back ( h ); + char* p = &s[0]; while ( p ) { @@ -86,52 +122,227 @@ bool import_file ( const char* filename ) // check for pre-processor command if ( *p == '#' ) { - p = strchr ( p, '\n' ); - if ( p ) - p++; + char* end = strchr ( p, '\n' ); + if ( !end ) + end = p + strlen(p); + string element ( p, end-p ); + + process_preprocessor ( filename, *h, element ); + + p = end; + } + else if ( *p == '}' && h->externc ) + { + p++; + p = skip_ws ( p ); + + if ( *p == ';' ) p++; } else { - char* end = findend ( p ); - if ( !end ) - end = p + strlen(p); - else if ( *end ) - end++; - string element ( p, end-p ); + bool externc = false; + char* end = findend ( p, externc ); + if ( externc ) + h->externc = true; + else + { + if ( !end ) + end = p + strlen(p); + else if ( *end ) + end++; + string element ( p, end-p ); + + process_c ( *h, element ); + } p = end; - - printf ( "\"%s\"\n\n", binary2cstr(element).c_str() ); - - vector names, dependencies; - bool isTypedef; - Type t = process ( element, names, isTypedef, dependencies ); - - printf ( "names: " ); - if ( names.size() ) - { - printf ( "%s", names[0].c_str() ); - for ( int i = 1; i < names.size(); i++ ) - printf ( ", %s", names[i].c_str() ); - } - else - printf ( "(none)" ); - printf ( "\n\n" ); - - printf ( "dependencies: " ); - if ( dependencies.size() ) - { - printf ( "%s", dependencies[0].c_str() ); - for ( int i = 1; i < dependencies.size(); i++ ) - printf ( ", %s", dependencies[i].c_str() ); - } - else - printf ( "(none)" ); - printf ( "\n\n" ); } } + h->done = true; return true; } +string get_hdrguardtext ( const char* filename ) +{ + string s ( filename ); + char* p = &s[0]; + char* p2; + while ( (p2 = strchr(p, '\\')) ) + *p2 = '/'; + while ( (p2 = strchr(p,'/')) ) + p = p2 + 1; + char* end = strchr ( p, '.' ); + ASSERT(end); + while ( (p2 = strchr(end+1,'.')) ) + end = p2; + string hdrguardtext ( p, end-p ); + strupr ( &hdrguardtext[0] ); + return hdrguardtext; +} + +void process_preprocessor ( const char* filename, Header& h, const string& element ) +{ + string hdrguardtext ( get_hdrguardtext ( filename ) ); + + const char* p = &element[0]; + ASSERT ( *p == '#' ); + p++; + p = skip_ws ( p ); + const char* end = p; + while ( iscsym(*end) ) + end++; + string preproc ( p, end-p ); + p = end+1; + p = skip_ws ( p ); + + if ( preproc == "include" ) + { + ASSERT ( *p == '<' || *p == '\"' ); + p++; + p = skip_ws ( p ); + const char* end = strpbrk ( p, ">\"" ); + if ( !end ) + end = p + strlen(p); + while ( end > p && isspace(end[-1]) ) + end--; + string include_filename ( p, end-p ); + if ( is_libc_include ( include_filename ) ) + h.libc_includes.push_back ( include_filename ); + else + { + bool loaded = false; + for ( int i = 0; i < headers.size(); i++ ) + { + if ( headers[i]->filename == include_filename ) + { + if ( !headers[i]->done ) + { + printf ( "circular dependency between '%s' and '%s'\n", filename, include_filename.c_str() ); + exit ( -1 ); + } + loaded = true; + } + } + if ( !loaded ) + import_file ( include_filename.c_str() ); + h.includes.push_back ( include_filename ); + } + } + else if ( preproc == "define" ) + { + size_t len = element.size(); + if ( strstr ( element.c_str(), hdrguardtext.c_str() ) + && element[len-2] == '_' + && element[len-1] == 'H' ) + { + // header include guard... ignore! + return; + } + Symbol *s = new Symbol; + s->type = T_DEFINE; + + p += 6; + p = skip_ws ( p ); + + const char* end = p; + while ( iscsym(*end) ) + end++; + + s->names.push_back ( string(p,end-p) ); + + s->definition = element; + + h.symbols.push_back ( s ); + } + else if ( preproc == "if" || preproc == "ifdef" || preproc == "ifndef" ) + { + size_t len = element.size(); + // check for header include guard... + if ( strstr ( element.c_str(), hdrguardtext.c_str() ) + && element[len-2] == '_' + && element[len-1] == 'H' ) + h.ifs.push_back ( string("") ); + else + h.ifs.push_back ( element ); + h.ifspreproc.push_back ( preproc ); + } + else if ( preproc == "endif" ) + { + h.ifs.pop_back(); + h.ifspreproc.pop_back(); + } + else if ( preproc == "else" ) + { + string& oldpre = h.ifspreproc.back(); + ASSERT ( oldpre != "else" ); + if ( oldpre == "ifdef" ) + h.ifs.back() = "ifndef"; + else if ( oldpre == "ifndef" ) + h.ifs.back() = "ifdef"; + else if ( oldpre == "if" ) + h.ifs.back() = string("!(") + h.ifs.back() + ")"; + else + { + printf ( "unrecognized preproc '%s'\n", oldpre.c_str() ); + ASSERT(0); + return; + } + oldpre = "else"; + } + else if ( preproc == "include_next" ) + { + // we can safely ignore this command... + } + else if ( preproc == "pragma" ) + { + h.pragmas.push_back ( element ); + } + else + { + printf ( "process_preprocessor() choked on '%s'\n", preproc.c_str() ); + } +} + +void process_c ( Header& h, const string& element ) +{ + //printf ( "\"%s\"\n\n", binary2cstr(element).c_str() ); + + bool isTypedef; + + Symbol *s = new Symbol; + s->definition = element; + s->type = process ( element, s->names, isTypedef, s->dependencies ); + + for ( int i = 0; i < h.ifs.size(); i++ ) + { + if ( h.ifs[i].size() ) + s->ifs.push_back ( h.ifs[i] ); + } + + /*printf ( "names: " ); + if ( s->names.size() ) + { + printf ( "%s", s->names[0].c_str() ); + for ( int i = 1; i < s->names.size(); i++ ) + printf ( ", %s", s->names[i].c_str() ); + } + else + printf ( "(none)" ); + printf ( "\n\n" ); + + printf ( "dependencies: " ); + if ( s->dependencies.size() ) + { + printf ( "%s", s->dependencies[0].c_str() ); + for ( int i = 1; i < s->dependencies.size(); i++ ) + printf ( ", %s", s->dependencies[i].c_str() ); + } + else + printf ( "(none)" ); + printf ( "\n\n" );*/ + + h.symbols.push_back ( s ); +} + char* skipsemi ( char* p ) { if ( *p != '{' ) // } @@ -157,8 +368,32 @@ char* skipsemi ( char* p ) } } -char* findend ( char* p ) +char* findend ( char* p, bool& externc ) { + // special-case for 'extern "C"' + if ( !strncmp ( p, "extern", 6 ) ) + { + char* p2 = p + 6; + p2 = skip_ws ( p2 ); + if ( !strncmp ( p2, "\"C\"", 3 ) ) + { + p2 += 3; + p2 = skip_ws ( p2 ); + if ( *p2 == '{' ) + { + externc = true; + return p2+1; + } + } + } + // special-case for 'typedef_tident' + if ( !strncmp ( p, "typedef_tident", 14 ) ) + { + char* end = strchr ( p, ')' ); + ASSERT(end); + return end; + } + externc = false; for ( ;; ) { char* end = strchr ( p, ';' ); @@ -173,13 +408,25 @@ char* findend ( char* p ) Type identify ( const vector& tokens, int off ) { + if ( tokens[0] == "typedef_tident" ) + return T_TIDENT; int parens = 0; for ( int i = off; i < tokens.size(); i++ ) { if ( tokens[i] == "(" ) parens++; - else if ( tokens[i] == "struct" && !parens ) - return T_STRUCT; + else if ( (tokens[i] == "struct" || tokens[i] == "union") && !parens ) + { + for ( int j = i + 1; j < tokens.size(); j++ ) + { + if ( tokens[j] == "{" ) + return T_STRUCT; + else if ( tokens[j] == ";" ) + break; + } + } + else if ( tokens[i] == ";" ) + break; } if ( parens > 1 ) return T_FUNCTION_PTR; @@ -217,6 +464,8 @@ int parse_type ( Type t, const vector& tokens, int off, vector& { switch ( t ) { + case T_TIDENT: + return parse_tident ( tokens, off, names, dependencies ); case T_VARIABLE: return parse_variable ( tokens, off, names, dependencies ); case T_STRUCT: @@ -259,6 +508,15 @@ void depend ( const string& ident, vector& dependencies ) dependencies.push_back ( ident ); } +int parse_tident ( const vector& tokens, int off, vector& names, vector& dependencies ) +{ + ASSERT ( tokens[off] == "typedef_tident" ); + ASSERT ( tokens[off+1] == "(" && tokens[off+3] == ")" ); + names.push_back ( tokens[off+2] ); + dependencies.push_back ( "typedef_tident" ); + return off + 4; +} + int parse_variable ( const vector& tokens, int off, vector& names, vector& dependencies ) { // NOTE - Test with bitfields, I think this code will actually handle them properly... @@ -276,11 +534,14 @@ int parse_struct ( const vector& tokens, int off, vector& names, { int done = tokens.size(); - while ( off < done && tokens[off] != "struct" ) + //if ( tokens[off+1] == "_LARGE_INTEGER" ) + // _CrtDbgBreak(); + + while ( off < done && tokens[off] != "struct" && tokens[off] != "union" ) depend ( tokens[off++], dependencies ); - ASSERT ( tokens[off] == "struct" ); - if ( tokens[off] != "struct" ) + ASSERT ( tokens[off] == "struct" || tokens[off] == "union" ); + if ( tokens[off] != "struct" && tokens[off] != "union" ) return off; off++; @@ -292,11 +553,13 @@ int parse_struct ( const vector& tokens, int off, vector& names, // skip through body of struct - noting any dependencies int indent = 1; - while ( tokens[off] != "}" ) + //if ( off >= done ) _CrtDbgBreak(); + while ( off < done && tokens[off] != "}" ) { vector fauxnames; Type t = identify ( tokens, off ); off = parse_type ( t, tokens, off, fauxnames, dependencies ); + //if ( off >= done ) _CrtDbgBreak(); } // process any trailing dependencies/names... diff --git a/reactos/apps/utils/sdkparse/sdkparse.dsp b/reactos/apps/utils/sdkparse/sdkparse.dsp index 73d3faa209a..728d566b683 100644 --- a/reactos/apps/utils/sdkparse/sdkparse.dsp +++ b/reactos/apps/utils/sdkparse/sdkparse.dsp @@ -118,6 +118,10 @@ SOURCE=.\FixLFN.h # End Source File # Begin Source File +SOURCE=.\Header.h +# End Source File +# Begin Source File + SOURCE=.\iskeyword.cpp # End Source File # Begin Source File @@ -150,7 +154,15 @@ SOURCE=.\strip_comments.h # End Source File # Begin Source File +SOURCE=.\Symbol.h +# End Source File +# Begin Source File + SOURCE=.\tokenize.cpp # End Source File +# Begin Source File + +SOURCE=.\Type.h +# End Source File # End Target # End Project diff --git a/reactos/apps/utils/sdkparse/skip_ws.cpp b/reactos/apps/utils/sdkparse/skip_ws.cpp index d2b185271d7..ab9ad0f10a9 100644 --- a/reactos/apps/utils/sdkparse/skip_ws.cpp +++ b/reactos/apps/utils/sdkparse/skip_ws.cpp @@ -10,3 +10,8 @@ char* skip_ws ( char* p ) { return p + strspn ( p, ws ); } + +const char* skip_ws ( const char* p ) +{ + return p + strspn ( p, ws ); +} diff --git a/reactos/apps/utils/sdkparse/skip_ws.h b/reactos/apps/utils/sdkparse/skip_ws.h index fe0004167c4..a0cb47cb175 100644 --- a/reactos/apps/utils/sdkparse/skip_ws.h +++ b/reactos/apps/utils/sdkparse/skip_ws.h @@ -5,4 +5,6 @@ char* skip_ws ( char* ); +const char* skip_ws ( const char* ); + #endif//SKIP_WS_H diff --git a/reactos/apps/utils/sdkparse/tokenize.cpp b/reactos/apps/utils/sdkparse/tokenize.cpp index fed9f7413e8..89c8beb49ec 100644 --- a/reactos/apps/utils/sdkparse/tokenize.cpp +++ b/reactos/apps/utils/sdkparse/tokenize.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "assert.h" #include "tokenize.h" @@ -122,6 +123,19 @@ void tokenize ( const string& text, vector& tokens ) tokens.push_back ( ";" ); p++; break; + case '=': + switch ( p[1] ) + { + case '=': + tokens.push_back ( string ( p, 2 ) ); + p += 2; + break; + default: + tokens.push_back ( "=" ); + p++; + break; + } + break; case ':': switch ( p[1] ) { @@ -189,8 +203,13 @@ void tokenize ( const string& text, vector& tokens ) break; } break; + case '#': + while ( *p != '\n' ) + p++; + break; default: - printf ( "choked on '%c' in tokenize()\n", *p ); + printf ( "choked on '%c' in tokenize() - press any key to continue\n", *p ); + getch(); p++; break; }